├── ai_challenge
    ├── pig_chase
    │   ├── checkpoints
    │   │   └── .gitkeep
    │   ├── pig-chase-overview.png
    │   ├── pc_optim.py
    │   ├── pig_chase_eval_sample.py
    │   ├── pc_memory.py
    │   ├── pc_model.py
    │   ├── pc_utils.py
    │   ├── pc_environment.py
    │   ├── common.py
    │   ├── pc_test.py
    │   ├── pc_main.py
    │   ├── README.md
    │   ├── pig_chase_human_vs_agent.py
    │   ├── evaluation.py
    │   ├── pig_chase_baseline.py
    │   ├── pig_chase_dqn.py
    │   ├── pig_chase_dqn_top_down.py
    │   └── pig_chase.xml
    └── README.md
├── docker
    ├── malmo
    │   ├── run.sh
    │   ├── Dockerfile
    │   └── options.txt
    ├── malmopy-ai-challenge
    │   └── docker-compose.yml
    ├── malmopy-chainer-cpu
    │   └── Dockerfile
    ├── malmopy-chainer-gpu
    │   └── Dockerfile
    ├── malmopy-cntk-cpu-py27
    │   └── Dockerfile
    ├── malmopy-cntk-gpu-py27
    │   └── Dockerfile
    └── README.md
├── setup.py
├── LICENSE
├── malmopy
    ├── __init__.py
    ├── version.py
    ├── model
    │   ├── __init__.py
    │   ├── chainer
    │   │   ├── __init__.py
    │   │   └── qlearning.py
    │   ├── cntk
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   └── qlearning.py
    │   └── model.py
    ├── environment
    │   ├── gym
    │   │   ├── __init__.py
    │   │   └── gym.py
    │   ├── __init__.py
    │   ├── malmo
    │   │   └── __init__.py
    │   └── environment.py
    ├── visualization
    │   ├── tensorboard
    │   │   ├── cntk
    │   │   │   ├── __init__.py
    │   │   │   └── cntk.py
    │   │   ├── __init__.py
    │   │   └── tensorboard.py
    │   ├── __init__.py
    │   └── visualizer.py
    ├── util
    │   ├── __init__.py
    │   ├── util.py
    │   └── images.py
    ├── agent
    │   ├── __init__.py
    │   ├── astar.py
    │   ├── explorer.py
    │   └── gui.py
    └── README.md
├── .gitattributes
├── .gitignore
├── samples
    └── atari
    │   └── gym_atari_dqn.py
└── README.md


/ai_challenge/pig_chase/checkpoints/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docker/malmo/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | xvfb-run -a -e /dev/stdout -s '-screen 0 1400x900x24' $*


--------------------------------------------------------------------------------
/ai_challenge/pig_chase/pig-chase-overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kaixhin/malmo-challenge/HEAD/ai_challenge/pig_chase/pig-chase-overview.png


--------------------------------------------------------------------------------
/ai_challenge/README.md:
--------------------------------------------------------------------------------
 1 | # Malmo Collaborative AI Challenge
 2 | 
 3 | This folder contains task definitions for the Malmo Collaborative
 4 | AI Challenge. For installation instructions see [Installation](../README.md#installation).
 5 | 
 6 | ## Available challenges
 7 | 
 8 | - [Malmo Collaborative AI Challenge - Pig Chase](pig_chase/README.md) : Try to build collaborative
 9 | AI agents trying to catch a pig.
10 | 
11 | ## Further reading
12 | 
13 | Once you have familiarized yourself with a challenge task, you may want to head back to the [Overview Page](../README.md) to learn about [Installation](../README.md#installation) and [Getting started](../README.md#getting-started). Or dive into the code examples to learn how to [Write your first agent](../malmopy/README.md#write-your-first-agent).
14 | 
15 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | from distutils.core import setup
 4 | 
 5 | from setuptools import find_packages
 6 | 
 7 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'malmopy'))
 8 | from version import VERSION
 9 | 
10 | extras = {
11 |     'chainer': ['chainer>=1.21.0'],
12 |     'gym': ['gym[atari]>=0.7.0'],
13 |     'tensorflow': ['tensorflow'],
14 | }
15 | 
16 | # Meta dependency groups.
17 | all_deps = []
18 | for group_name in extras:
19 |     all_deps += extras[group_name]
20 | extras['all'] = all_deps
21 | 
22 | setup(
23 |     name='malmopy',
24 |     version=VERSION,
25 | 
26 |     packages=[package for package in find_packages()
27 |               if package.startswith('malmopy')],
28 | 
29 |     url='https://github.com/Microsoft/malmo-challenge',
30 |     license='MIT',
31 |     author='Microsoft Research Cambridge',
32 |     author_email='',
33 |     description='Malmo Collaborative AI Challenge task and example code',
34 |     install_requires=['future', 'numpy>=1.11.0', 'six>=0.10.0', 'pandas', 'Pillow'],
35 |     extras_require=extras
36 | )
37 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 |     MIT License
 2 | 
 3 |     Copyright (c) Microsoft Corporation. All rights reserved.
 4 | 
 5 |     Permission is hereby granted, free of charge, to any person obtaining a copy
 6 |     of this software and associated documentation files (the "Software"), to deal
 7 |     in the Software without restriction, including without limitation the rights
 8 |     to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 |     copies of the Software, and to permit persons to whom the Software is
10 |     furnished to do so, subject to the following conditions:
11 | 
12 |     The above copyright notice and this permission notice shall be included in all
13 |     copies or substantial portions of the Software.
14 | 
15 |     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 |     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 |     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 |     AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 |     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 |     OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 |     SOFTWARE
22 | 


--------------------------------------------------------------------------------
/malmopy/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017 Microsoft Corporation.
 2 | #
 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
 4 | # documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
 5 | #  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
 6 | # and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 7 | #
 8 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of
 9 | # the Software.
10 | #
11 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | # THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | #  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
15 | # SOFTWARE.
16 | # ===================================================================================================================
17 | 


--------------------------------------------------------------------------------
/malmopy/version.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017 Microsoft Corporation.
 2 | #
 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
 4 | # documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
 5 | #  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
 6 | # and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 7 | #
 8 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of
 9 | # the Software.
10 | #
11 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | # THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | #  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
15 | # SOFTWARE.
16 | # ===================================================================================================================
17 | 
18 | VERSION = '0.1.0'
19 | 


--------------------------------------------------------------------------------
/malmopy/model/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017 Microsoft Corporation.
 2 | #
 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
 4 | # documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
 5 | #  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
 6 | # and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 7 | #
 8 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of
 9 | # the Software.
10 | #
11 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | # THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | #  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
15 | # SOFTWARE.
16 | # ===================================================================================================================
17 | 
18 | from __future__ import absolute_import
19 | 
20 | from .model import *
21 | 
22 | 


--------------------------------------------------------------------------------
/malmopy/model/chainer/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017 Microsoft Corporation.
 2 | #
 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
 4 | # documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
 5 | #  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
 6 | # and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 7 | #
 8 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of
 9 | # the Software.
10 | #
11 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | # THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | #  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
15 | # SOFTWARE.
16 | # ===================================================================================================================
17 | 
18 | from __future__ import absolute_import
19 | 
20 | from .qlearning import *
21 | 


--------------------------------------------------------------------------------
/malmopy/environment/gym/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017 Microsoft Corporation.
 2 | #
 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
 4 | # documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
 5 | #  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
 6 | # and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 7 | #
 8 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of
 9 | # the Software.
10 | #
11 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | # THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | #  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
15 | # SOFTWARE.
16 | # ===================================================================================================================
17 | 
18 | from __future__ import absolute_import
19 | 
20 | from .gym import GymEnvironment
21 | 


--------------------------------------------------------------------------------
/malmopy/model/cntk/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017 Microsoft Corporation.
 2 | #
 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
 4 | # documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
 5 | #  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
 6 | # and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 7 | #
 8 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of
 9 | # the Software.
10 | #
11 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | # THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | #  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
15 | # SOFTWARE.
16 | # ===================================================================================================================
17 | 
18 | from __future__ import absolute_import
19 | 
20 | from .base import *
21 | from .qlearning import *
22 | 


--------------------------------------------------------------------------------
/malmopy/visualization/tensorboard/cntk/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017 Microsoft Corporation.
 2 | #
 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
 4 | # documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
 5 | #  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
 6 | # and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 7 | #
 8 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of
 9 | # the Software.
10 | #
11 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | # THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | #  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
15 | # SOFTWARE.
16 | # ===================================================================================================================
17 | 
18 | from __future__ import absolute_import
19 | 
20 | from .cntk import CntkConverter
21 | 


--------------------------------------------------------------------------------
/malmopy/environment/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017 Microsoft Corporation.
 2 | #
 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
 4 | # documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
 5 | #  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
 6 | # and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 7 | #
 8 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of
 9 | # the Software.
10 | #
11 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | # THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | #  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
15 | # SOFTWARE.
16 | # ===================================================================================================================
17 | 
18 | from __future__ import absolute_import
19 | 
20 | from .environment import BaseEnvironment, VideoCapableEnvironment
21 | 


--------------------------------------------------------------------------------
/malmopy/util/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017 Microsoft Corporation.
 2 | #
 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
 4 | # documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
 5 | #  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
 6 | # and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 7 | #
 8 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of
 9 | # the Software.
10 | #
11 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | # THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | #  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
15 | # SOFTWARE.
16 | # ===================================================================================================================
17 | 
18 | from __future__ import absolute_import
19 | 
20 | from .images import resize, rgb2gray
21 | from .util import *
22 | 
23 | 
24 | 


--------------------------------------------------------------------------------
/malmopy/visualization/tensorboard/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017 Microsoft Corporation.
 2 | #
 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
 4 | # documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
 5 | #  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
 6 | # and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 7 | #
 8 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of
 9 | # the Software.
10 | #
11 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | # THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | #  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
15 | # SOFTWARE.
16 | # ===================================================================================================================
17 | 
18 | from __future__ import absolute_import
19 | 
20 | from .tensorboard import TensorboardVisualizer, TensorflowConverter
21 | 


--------------------------------------------------------------------------------
/malmopy/visualization/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017 Microsoft Corporation.
 2 | #
 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
 4 | # documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
 5 | #  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
 6 | # and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 7 | #
 8 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of
 9 | # the Software.
10 | #
11 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | # THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | #  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
15 | # SOFTWARE.
16 | # ===================================================================================================================
17 | 
18 | from __future__ import absolute_import
19 | 
20 | from .visualizer import BaseVisualizer, ConsoleVisualizer, EmptyVisualizer, Visualizable
21 | 


--------------------------------------------------------------------------------
/malmopy/environment/malmo/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017 Microsoft Corporation.
 2 | #
 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
 4 | # documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
 5 | #  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
 6 | # and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 7 | #
 8 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of
 9 | # the Software.
10 | #
11 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | # THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | #  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
15 | # SOFTWARE.
16 | # ===================================================================================================================
17 | 
18 | from __future__ import absolute_import
19 | 
20 | from .malmo import MalmoEnvironment, allocate_remotes
21 | from .malmo import MalmoStateBuilder, MalmoRGBStateBuilder, MalmoALEStateBuilder
22 | 


--------------------------------------------------------------------------------
/malmopy/agent/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017 Microsoft Corporation.
 2 | #
 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
 4 | # documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
 5 | #  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
 6 | # and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 7 | #
 8 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of
 9 | # the Software.
10 | #
11 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | # THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | #  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
15 | # SOFTWARE.
16 | # ===================================================================================================================
17 | 
18 | from __future__ import absolute_import
19 | 
20 | from .agent import BaseAgent, RandomAgent, ConsoleAgent, ReplayMemory
21 | from .astar import AStarAgent
22 | from .explorer import BaseExplorer, LinearEpsilonGreedyExplorer
23 | from .qlearner import QLearnerAgent, History, ReplayMemory, TemporalMemory
24 | 
25 | __all__ = ['agent', 'astar', 'qlearner', 'explorer']
26 | 


--------------------------------------------------------------------------------
/ai_challenge/pig_chase/pc_optim.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from torch import optim
 3 | 
 4 | 
 5 | # Non-centered RMSprop update with shared statistics (without momentum)
 6 | class SharedRMSprop(optim.RMSprop):
 7 |   def __init__(self, params, lr=1e-2, alpha=0.99, eps=1e-8, weight_decay=0):
 8 |     super(SharedRMSprop, self).__init__(params, lr=lr, alpha=alpha, eps=eps, weight_decay=weight_decay, momentum=0, centered=False)
 9 | 
10 |     # State initialisation (must be done before step, else will not be shared between threads)
11 |     for group in self.param_groups:
12 |       for p in group['params']:
13 |         state = self.state[p]
14 |         state['step'] = p.data.new().resize_(1).zero_()
15 |         state['square_avg'] = p.data.new().resize_as_(p.data).zero_()
16 | 
17 |   def share_memory(self):
18 |     for group in self.param_groups:
19 |       for p in group['params']:
20 |         state = self.state[p]
21 |         state['step'].share_memory_()
22 |         state['square_avg'].share_memory_()
23 | 
24 |   def step(self, closure=None):
25 |     loss = None
26 |     if closure is not None:
27 |       loss = closure()
28 | 
29 |     for group in self.param_groups:
30 |       for p in group['params']:
31 |         if p.grad is None:
32 |           continue
33 |         grad = p.grad.data
34 |         state = self.state[p]
35 | 
36 |         square_avg = state['square_avg']
37 |         alpha = group['alpha']
38 | 
39 |         state['step'] += 1
40 | 
41 |         if group['weight_decay'] != 0:
42 |           grad = grad.add(group['weight_decay'], p.data)
43 | 
44 |         # g = αg + (1 - α)Δθ^2
45 |         square_avg.mul_(alpha).addcmul_(1 - alpha, grad, grad)
46 |         # θ ← θ - ηΔθ/√(g + ε)
47 |         avg = square_avg.sqrt().add_(group['eps'])
48 |         p.data.addcdiv_(-group['lr'], grad, avg)
49 | 
50 |     return loss
51 | 


--------------------------------------------------------------------------------
/ai_challenge/pig_chase/pig_chase_eval_sample.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017 Microsoft Corporation.
 2 | #
 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
 4 | # documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
 5 | #  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
 6 | # and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 7 | #
 8 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of
 9 | # the Software.
10 | #
11 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | # THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | #  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
15 | # SOFTWARE.
16 | # ===================================================================================================================
17 | 
18 | from common import ENV_AGENT_NAMES
19 | from evaluation import PigChaseEvaluator
20 | from environment import PigChaseTopDownStateBuilder
21 | from malmopy.agent import RandomAgent
22 | 
23 | 
24 | if __name__ == '__main__':
25 |     # Warn for Agent name !!!
26 | 
27 |     clients = [('127.0.0.1', 10000), ('127.0.0.1', 10001)]
28 |     agent = RandomAgent(ENV_AGENT_NAMES[1], 3)
29 | 
30 |     eval = PigChaseEvaluator(clients, agent, agent, PigChaseTopDownStateBuilder())
31 |     eval.run()
32 | 
33 |     eval.save('My Exp 1', 'pig_chase_results.json')
34 | 


--------------------------------------------------------------------------------
/docker/malmopy-ai-challenge/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017 Microsoft Corporation.
 2 | #
 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
 4 | # documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
 5 | #  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
 6 | # and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 7 | #
 8 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of
 9 | # the Software.
10 | #
11 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | # THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | #  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
15 | # SOFTWARE.
16 | # ===================================================================================================================
17 | 
18 | version: '3'
19 | services:
20 |   malmo1:
21 |     image: malmo:latest
22 |     expose:
23 |       - "10000"
24 |   malmo2:
25 |       image: malmo:latest
26 |       expose:
27 |         - "10000"
28 |   agents:
29 |       image: malmopy-cntk-cpu-py27:latest
30 |       working_dir: /root/malmo-challenge/ai_challenge/pig_chase
31 |       command: bash -c "python pig_chase_baseline.py malmo1:10000 malmo2:10000 & tensorboard --logdir 'results' --port 6006"
32 |       ports:
33 |         - "6006:6006"
34 |       links:
35 |         - malmo1
36 |         - malmo2
37 | 


--------------------------------------------------------------------------------
/ai_challenge/pig_chase/pc_memory.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import random
 3 | from collections import deque, namedtuple
 4 | 
 5 | Transition = namedtuple('Transition', ('state', 'action', 'reward', 'policy', 'env_cls'))
 6 | 
 7 | class EpisodicReplayMemory():
 8 |   def __init__(self, capacity, max_episode_length):
 9 |     # Max number of transitions possible will be the memory capacity, could be much less
10 |     self.num_episodes = capacity // max_episode_length
11 |     self.memory = deque(maxlen=self.num_episodes)
12 |     self.memory.append([])  # List for first episode
13 |     self.position = 0
14 | 
15 |   def append(self, state, action, reward, policy, env_cls):
16 |     self.memory[self.position].append(Transition(state, action, reward, policy, env_cls))  # Save s_i, a_i, r_i+1, µ(·|s_i), class
17 |     # Terminal states are saved with actions as None, so switch to next episode
18 |     if action is None:
19 |       self.memory.append([])
20 |       self.position = min(self.position + 1, self.num_episodes - 1)
21 | 
22 |   # Samples random trajectory
23 |   def sample(self, maxlen=0):
24 |     while True:
25 |       e = random.randrange(len(self.memory))
26 |       mem = self.memory[e]
27 |       T = len(mem)
28 |       if T > 0:
29 |         # Take a random subset of trajectory if maxlen specified, otherwise return full trajectory
30 |         if maxlen > 0 and T > maxlen + 1:
31 |           t = random.randrange(T - maxlen - 1)  # Include next state after final "maxlen" state
32 |           return mem[t:t + maxlen + 1]
33 |         else:
34 |           return mem
35 | 
36 |   # Samples batch of trajectories, truncating them to the same length
37 |   def sample_batch(self, batch_size, maxlen=0):
38 |     batch = [self.sample(maxlen=maxlen) for _ in range(batch_size)]
39 |     minimum_size = min(len(trajectory) for trajectory in batch)
40 |     batch = [trajectory[:minimum_size] for trajectory in batch]  # Truncate trajectories
41 |     return list(map(list, zip(*batch)))  # Transpose so that timesteps are packed together
42 | 
43 |   def __len__(self):
44 |     return sum(len(episode) for episode in self.memory)
45 | 


--------------------------------------------------------------------------------
/malmopy/model/model.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017 Microsoft Corporation.
 2 | #
 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
 4 | # documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
 5 | #  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
 6 | # and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 7 | #
 8 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of
 9 | # the Software.
10 | #
11 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | # THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | #  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
15 | # SOFTWARE.
16 | # ===================================================================================================================
17 | 
18 | from __future__ import absolute_import
19 | 
20 | 
21 | class BaseModel(object):
22 |     """Represents a learning capable entity"""
23 | 
24 |     def __init__(self, in_shape, output_shape):
25 |         self._input_shape = in_shape
26 |         self._output_shape = output_shape
27 | 
28 |     @property
29 |     def input_shape(self):
30 |         return self._input_shape
31 | 
32 |     @property
33 |     def output_shape(self):
34 |         return self._output_shape
35 | 
36 |     @property
37 |     def loss_val(self):
38 |         raise NotImplementedError()
39 | 
40 |     def evaluate(self, environment):
41 |         raise NotImplementedError()
42 | 
43 |     def train(self, x, y):
44 |         raise NotImplementedError()
45 | 
46 |     def load(self, input_file):
47 |         raise NotImplementedError()
48 | 
49 |     def save(self, output_file):
50 |         raise NotImplementedError()
51 | 
52 | 
53 | class QModel(BaseModel):
54 |     ACTION_VALUE_NETWORK = 1 << 0
55 |     TARGET_NETWORK = 1 << 1
56 | 
57 |     def evaluate(self, environment, model=ACTION_VALUE_NETWORK):
58 |         raise NotImplementedError()
59 | 
60 |     def train(self, x, y, actions=None):
61 |         raise NotImplementedError()
62 | 
63 | 
64 | 


--------------------------------------------------------------------------------
/ai_challenge/pig_chase/pc_model.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import torch
 3 | from torch import nn
 4 | from torch.nn import init
 5 | 
 6 | from pc_utils import ACTION_SIZE, STATE_SIZE
 7 | 
 8 | class ActorCritic(nn.Module):
 9 |   def __init__(self, hidden_size):
10 |     super(ActorCritic, self).__init__()
11 |     self.state_size = STATE_SIZE[0] * STATE_SIZE[1] * STATE_SIZE[2]
12 | 
13 |     self.elu = nn.ELU(inplace=True)
14 |     self.softmax = nn.Softmax()
15 |     self.sigmoid = nn.Sigmoid()
16 | 
17 |     # Pass state into model body
18 |     self.conv1 = nn.Conv2d(STATE_SIZE[0], 32, 4, stride=2)
19 |     self.conv2 = nn.Conv2d(32, 32, 3)
20 |     self.fc1 = nn.Linear(1152, hidden_size)
21 |     # Pass previous action, reward and timestep directly into LSTM
22 |     self.lstm = nn.LSTMCell(hidden_size + ACTION_SIZE + 2, hidden_size)
23 |     self.fc_actor1 = nn.Linear(hidden_size, ACTION_SIZE)
24 |     self.fc_critic1 = nn.Linear(hidden_size, ACTION_SIZE)
25 |     self.fc_actor2 = nn.Linear(hidden_size, ACTION_SIZE)
26 |     self.fc_critic2 = nn.Linear(hidden_size, ACTION_SIZE)
27 |     self.fc_class = nn.Linear(hidden_size, 1)
28 | 
29 |     # Orthogonal weight initialisation
30 |     for name, p in self.named_parameters():
31 |       if 'weight' in name:
32 |         init.orthogonal(p)
33 |       elif 'bias' in name:
34 |         init.constant(p, 0)
35 |     # Set LSTM forget gate bias to 1
36 |     for name, p in self.lstm.named_parameters():
37 |       if 'bias' in name:
38 |         n = p.size(0)
39 |         forget_start_idx, forget_end_idx = n // 4, n // 2
40 |         init.constant(p[forget_start_idx:forget_end_idx], 1)
41 | 
42 |   def forward(self, x, h):
43 |     state, extra = x.narrow(1, 0, self.state_size).contiguous(), x.narrow(1, self.state_size, ACTION_SIZE + 2)
44 |     state = state.view(state.size(0), STATE_SIZE[0], STATE_SIZE[1], STATE_SIZE[2]).contiguous()  # Restore spatial structure
45 |     x = self.elu(self.conv1(state))
46 |     x = self.elu(self.conv2(x))
47 |     x = x.view(x.size(0), -1)
48 |     x = self.elu(self.fc1(x))
49 |     h = self.lstm(torch.cat((x, extra), 1), h)  # h is (hidden state, cell state)
50 |     x = h[0]
51 |     policy1 = self.softmax(self.fc_actor1(x)).clamp(max=1 - 1e-20)  # Prevent 1s and hence NaNs
52 |     Q1 = self.fc_critic1(x)
53 |     V1 = (Q1 * policy1).sum(1)  # V is expectation of Q under π
54 |     policy2 = self.softmax(self.fc_actor2(x)).clamp(max=1 - 1e-20)
55 |     Q2 = self.fc_critic2(x)
56 |     V2 = (Q2 * policy2).sum(1)
57 |     cls = self.sigmoid(self.fc_class(x))
58 |     return policy1, Q1, V1, policy2, Q2, V2, cls, h
59 | 


--------------------------------------------------------------------------------
/malmopy/util/util.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017 Microsoft Corporation.
 2 | #
 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
 4 | # documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
 5 | #  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
 6 | # and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 7 | #
 8 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of
 9 | # the Software.
10 | #
11 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | # THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | #  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
15 | # SOFTWARE.
16 | # ===================================================================================================================
17 | 
18 | from __future__ import absolute_import
19 | 
20 | import os
21 | from math import sqrt
22 | 
23 | import numpy as np
24 | 
25 | 
26 | def euclidean(a, b):
27 |     assert len(a) == len(b), 'cannot compute distance when a and b have different shapes'
28 |     return sqrt(sum([(a - b) ** 2 for a, b in zip(a, b)]))
29 | 
30 | 
31 | def get_rank(x):
32 |     """ Get a shape's rank """
33 |     if isinstance(x, np.ndarray):
34 |         return len(x.shape)
35 |     elif isinstance(x, tuple):
36 |         return len(x)
37 |     else:
38 |         return ValueError('Unable to determine rank of type: %s' % str(type(x)))
39 | 
40 | 
41 | def check_rank(shape, required_rank):
42 |     """ Check if the shape's rank equals the expected rank """
43 |     if isinstance(shape, tuple):
44 |         return len(shape) == required_rank
45 |     else:
46 |         return False
47 | 
48 | 
49 | def isclose(a, b, atol=1e-01):
50 |     """ Check if a and b are closer than tolerance level atol
51 | 
52 |     return abs(a - b) < atol
53 |     """
54 |     return abs(a - b) < atol
55 | 
56 | 
57 | def ensure_path_exists(path):
58 |     """ Ensure that the specified path exists on the filesystem """
59 |     if not os.path.isabs(path):
60 |         path = os.path.abspath(path)
61 |     if not os.path.exists(path):
62 |         os.makedirs(path)
63 | 


--------------------------------------------------------------------------------
/docker/malmo/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017 Microsoft Corporation.
 2 | #
 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
 4 | # documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
 5 | #  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
 6 | # and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 7 | #
 8 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of
 9 | # the Software.
10 | #
11 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | # THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | #  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
15 | # SOFTWARE.
16 | # ===================================================================================================================
17 | 
18 | FROM ubuntu:16.04
19 | 
20 | ENV MALMO_VERSION 0.21.0
21 | 
22 | # Install Malmo dependencies
23 | RUN apt-get update && apt-get install -y --no-install-recommends \
24 |     openjdk-8-jdk \
25 |     libxerces-c3.1 \
26 |     libav-tools \
27 |     wget \
28 |     unzip \
29 |     xvfb && \
30 |     rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
31 | 
32 | # Download and unpack Malmo
33 | WORKDIR /root
34 | RUN wget https://github.com/Microsoft/malmo/releases/download/$MALMO_VERSION/Malmo-$MALMO_VERSION-Linux-Ubuntu-16.04-64bit_withBoost.zip && \
35 |     unzip Malmo-$MALMO_VERSION-Linux-Ubuntu-16.04-64bit_withBoost.zip && \
36 |     rm Malmo-$MALMO_VERSION-Linux-Ubuntu-16.04-64bit_withBoost.zip && \
37 |     mv Malmo-$MALMO_VERSION-Linux-Ubuntu-16.04-64bit_withBoost Malmo
38 | ENV MALMO_XSD_PATH /root/Malmo/Schemas
39 | 
40 | # Precompile Malmo mod
41 | RUN mkdir ~/.gradle && echo 'org.gradle.daemon=true\n' > ~/.gradle/gradle.properties
42 | WORKDIR /root/Malmo/Minecraft
43 | RUN ./gradlew setupDecompWorkspace
44 | RUN ./gradlew build
45 | 
46 | # Unlimited framerate settings
47 | COPY options.txt /root/Malmo/Minecraft/run
48 | 
49 | COPY run.sh /root/
50 | RUN chmod +x /root/run.sh
51 | 
52 | # Expose port
53 | EXPOSE 10000
54 | 
55 | # Run Malmo
56 | ENTRYPOINT ["/root/run.sh", "/root/Malmo/Minecraft/launchClient.sh"]


--------------------------------------------------------------------------------
/ai_challenge/pig_chase/pc_utils.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import plotly
 3 | from plotly.graph_objs import Scatter, Line
 4 | import torch
 5 | from torch import multiprocessing as mp
 6 | 
 7 | # Constants
 8 | STATE_SIZE = (3, 18, 18)
 9 | ACTION_SIZE = 3
10 | 
11 | 
12 | # Global counter
13 | class GlobalVar():
14 |   def __init__(self):
15 |     self.val = mp.Value('i', 0)
16 |     self.lock = mp.Lock()
17 | 
18 |   def increment(self):
19 |     with self.lock:
20 |       self.val.value += 1
21 | 
22 |   def set(self, value):
23 |     with self.lock:
24 |       self.val.value = value
25 | 
26 |   def value(self):
27 |     with self.lock:
28 |       return self.val.value
29 | 
30 | 
31 | # Converts an action index and action space size into a one-hot batch tensor
32 | def action_to_one_hot(action_index, action_size):
33 |   action = torch.zeros(1, action_size)
34 |   action[0, action_index] = 1
35 |   return action
36 | 
37 | 
38 | # Creates an extended input (state + previous action + reward + timestep)
39 | def extend_input(state, action, reward, timestep):
40 |   reward = torch.Tensor([reward]).unsqueeze(0)
41 |   timestep = torch.Tensor([timestep]).unsqueeze(0)
42 |   return torch.cat((state.view(1, -1), action, reward, timestep), 1)
43 | 
44 | 
45 | # Plots min, max and mean + standard deviation bars of a population over time
46 | def plot_line(xs, ys_population, filename, y_title=''):
47 |   max_colour = 'rgb(0, 132, 180)'
48 |   mean_colour = 'rgb(0, 172, 237)'
49 |   std_colour = 'rgba(29, 202, 255, 0.2)'
50 | 
51 |   ys = torch.Tensor(ys_population)
52 |   ys_min = ys.min(1)[0].squeeze()
53 |   ys_max = ys.max(1)[0].squeeze()
54 |   ys_mean = ys.mean(1).squeeze()
55 |   ys_std = ys.std(1).squeeze()
56 |   ys_upper, ys_lower = ys_mean + ys_std, ys_mean - ys_std
57 | 
58 |   trace_max = Scatter(x=xs, y=ys_max.numpy(), line=Line(color=max_colour, dash='dash'), name='Max')
59 |   trace_upper = Scatter(x=xs, y=ys_upper.numpy(), line=Line(color='transparent'), name='+1 Std. Dev.', showlegend=False)
60 |   trace_mean = Scatter(x=xs, y=ys_mean.numpy(), fill='tonexty', fillcolor=std_colour, line=Line(color=mean_colour), name='Mean')
61 |   trace_lower = Scatter(x=xs, y=ys_lower.numpy(), fill='tonexty', fillcolor=std_colour, line=Line(color='transparent'), name='-1 Std. Dev.', showlegend=False)
62 |   trace_min = Scatter(x=xs, y=ys_min.numpy(), line=Line(color=max_colour, dash='dash'), name='Min')
63 | 
64 |   plotly.offline.plot({
65 |     'data': [trace_upper, trace_mean, trace_lower, trace_min, trace_max],
66 |     'layout': dict(xaxis={'title': 'Step'},
67 |                    yaxis={'title': y_title})
68 |   }, filename=filename, auto_open=False)
69 | 


--------------------------------------------------------------------------------
/docker/malmo/options.txt:
--------------------------------------------------------------------------------
  1 | invertYMouse:false
  2 | mouseSensitivity:0.5
  3 | fov:0.0
  4 | gamma:0.0
  5 | saturation:0.0
  6 | renderDistance:6
  7 | guiScale:0
  8 | particles:2
  9 | bobView:true
 10 | anaglyph3d:false
 11 | maxFps:200
 12 | fboEnable:true
 13 | difficulty:2
 14 | fancyGraphics:false
 15 | ao:0
 16 | renderClouds:true
 17 | resourcePacks:[]
 18 | lastServer:
 19 | lang:en_US
 20 | chatVisibility:0
 21 | chatColors:true
 22 | chatLinks:true
 23 | chatLinksPrompt:true
 24 | chatOpacity:1.0
 25 | snooperEnabled:true
 26 | fullscreen:false
 27 | enableVsync:true
 28 | useVbo:false
 29 | hideServerAddress:false
 30 | advancedItemTooltips:false
 31 | pauseOnLostFocus:false
 32 | touchscreen:false
 33 | overrideWidth:0
 34 | overrideHeight:0
 35 | heldItemTooltips:true
 36 | chatHeightFocused:1.0
 37 | chatHeightUnfocused:0.44366196
 38 | chatScale:1.0
 39 | chatWidth:1.0
 40 | showInventoryAchievementHint:false
 41 | mipmapLevels:4
 42 | streamBytesPerPixel:0.5
 43 | streamMicVolume:1.0
 44 | streamSystemVolume:1.0
 45 | streamKbps:0.5412844
 46 | streamFps:0.31690142
 47 | streamCompression:1
 48 | streamSendMetadata:true
 49 | streamPreferredServer:
 50 | streamChatEnabled:0
 51 | streamChatUserFilter:0
 52 | streamMicToggleBehavior:0
 53 | forceUnicodeFont:false
 54 | allowBlockAlternatives:true
 55 | reducedDebugInfo:false
 56 | key_key.attack:-100
 57 | key_key.use:-99
 58 | key_key.forward:17
 59 | key_key.left:30
 60 | key_key.back:31
 61 | key_key.right:32
 62 | key_key.jump:57
 63 | key_key.sneak:42
 64 | key_key.drop:16
 65 | key_key.inventory:18
 66 | key_key.chat:20
 67 | key_key.playerlist:15
 68 | key_key.pickItem:-98
 69 | key_key.command:53
 70 | key_key.screenshot:60
 71 | key_key.togglePerspective:63
 72 | key_key.smoothCamera:0
 73 | key_key.sprint:29
 74 | key_key.streamStartStop:64
 75 | key_key.streamPauseUnpause:65
 76 | key_key.streamCommercial:0
 77 | key_key.streamToggleMic:0
 78 | key_key.fullscreen:87
 79 | key_key.spectatorOutlines:0
 80 | key_key.hotbar.1:2
 81 | key_key.hotbar.2:3
 82 | key_key.hotbar.3:4
 83 | key_key.hotbar.4:5
 84 | key_key.hotbar.5:6
 85 | key_key.hotbar.6:7
 86 | key_key.hotbar.7:8
 87 | key_key.hotbar.8:9
 88 | key_key.hotbar.9:10
 89 | key_key.toggleMalmo:28
 90 | key_key.handyTestHook:22
 91 | soundCategory_master:0.0
 92 | soundCategory_music:1.0
 93 | soundCategory_record:1.0
 94 | soundCategory_weather:1.0
 95 | soundCategory_block:1.0
 96 | soundCategory_hostile:1.0
 97 | soundCategory_neutral:1.0
 98 | soundCategory_player:1.0
 99 | soundCategory_ambient:1.0
100 | modelPart_cape:true
101 | modelPart_jacket:true
102 | modelPart_left_sleeve:true
103 | modelPart_right_sleeve:true
104 | modelPart_left_pants_leg:true
105 | modelPart_right_pants_leg:true
106 | modelPart_hat:true
107 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
 1 | ###############################################################################
 2 | # Set default behavior to automatically normalize line endings.
 3 | ###############################################################################
 4 | * text=auto
 5 | 
 6 | ###############################################################################
 7 | # Set default behavior for command prompt diff.
 8 | #
 9 | # This is need for earlier builds of msysgit that does not have it on by
10 | # default for csharp files.
11 | # Note: This is only used by command line
12 | ###############################################################################
13 | #*.cs     diff=csharp
14 | 
15 | ###############################################################################
16 | # Set the merge driver for project and solution files
17 | #
18 | # Merging from the command prompt will add diff markers to the files if there
19 | # are conflicts (Merging from VS is not affected by the settings below, in VS
20 | # the diff markers are never inserted). Diff markers may cause the following 
21 | # file extensions to fail to load in VS. An alternative would be to treat
22 | # these files as binary and thus will always conflict and require user
23 | # intervention with every merge. To do so, just uncomment the entries below
24 | ###############################################################################
25 | #*.sln       merge=binary
26 | #*.csproj    merge=binary
27 | #*.vbproj    merge=binary
28 | #*.vcxproj   merge=binary
29 | #*.vcproj    merge=binary
30 | #*.dbproj    merge=binary
31 | #*.fsproj    merge=binary
32 | #*.lsproj    merge=binary
33 | #*.wixproj   merge=binary
34 | #*.modelproj merge=binary
35 | #*.sqlproj   merge=binary
36 | #*.wwaproj   merge=binary
37 | 
38 | ###############################################################################
39 | # behavior for image files
40 | #
41 | # image files are treated as binary by default.
42 | ###############################################################################
43 | #*.jpg   binary
44 | #*.png   binary
45 | #*.gif   binary
46 | 
47 | ###############################################################################
48 | # diff behavior for common document formats
49 | # 
50 | # Convert binary document formats to text before diffing them. This feature
51 | # is only available from the command line. Turn it on by uncommenting the 
52 | # entries below.
53 | ###############################################################################
54 | #*.doc   diff=astextplain
55 | #*.DOC   diff=astextplain
56 | #*.docx  diff=astextplain
57 | #*.DOCX  diff=astextplain
58 | #*.dot   diff=astextplain
59 | #*.DOT   diff=astextplain
60 | #*.pdf   diff=astextplain
61 | #*.PDF   diff=astextplain
62 | #*.rtf   diff=astextplain
63 | #*.RTF   diff=astextplain
64 | 


--------------------------------------------------------------------------------
/malmopy/util/images.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017 Microsoft Corporation.
 2 | #
 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
 4 | # documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
 5 | #  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
 6 | # and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 7 | #
 8 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of
 9 | # the Software.
10 | #
11 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | # THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | #  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
15 | # SOFTWARE.
16 | # ===================================================================================================================
17 | 
18 | import sys
19 | import numpy as np
20 | 
21 | OPENCV_AVAILABLE = False
22 | PILLOW_AVAILABLE = False
23 | 
24 | try:
25 |     import cv2
26 | 
27 |     OPENCV_AVAILABLE = True
28 |     print('OpenCV found, setting as default backend.')
29 | except ImportError:
30 |     pass
31 | 
32 | try:
33 |     import PIL
34 | 
35 |     PILLOW_AVAILABLE = True
36 | 
37 |     if not OPENCV_AVAILABLE:
38 |         print('Pillow found, setting as default backend.')
39 | except ImportError:
40 |     pass
41 | 
42 | 
43 | if not (OPENCV_AVAILABLE or PILLOW_AVAILABLE):
44 |     raise ValueError('No image library backend found.'' Install either '
45 |                      'OpenCV or Pillow to support image processing.')
46 | 
47 | 
48 | def resize(img, shape):
49 |     """
50 |     Resize the specified image
51 |     :param img: Image to reshape
52 |     :param shape: New image shape
53 |     :return:
54 |     """
55 |     if OPENCV_AVAILABLE:
56 |         from cv2 import resize
57 |         return resize(img, shape)
58 |     elif PILLOW_AVAILABLE:
59 |         from PIL import Image
60 |         return np.array(Image.fromarray(img).resize(shape))
61 | 
62 | 
63 | def rgb2gray(img):
64 |     """
65 |     Convert an RGB image to grayscale
66 |     :param img: image to convert
67 |     :return:
68 |     """
69 |     if OPENCV_AVAILABLE:
70 |         from cv2 import cvtColor, COLOR_RGB2GRAY
71 |         return cvtColor(img, COLOR_RGB2GRAY)
72 |     elif PILLOW_AVAILABLE:
73 |         from PIL import Image
74 |         return np.array(Image.fromarray(img).convert('L'))
75 | 


--------------------------------------------------------------------------------
/malmopy/README.md:
--------------------------------------------------------------------------------
 1 | ## Writing your first experiment
 2 | 
 3 | The framework is designed to give you the flexibility you need to design and run your experiment. 
 4 | In this section you will see how easy it is to write a simple Atari/DQN experiment based on CNTK backend.
 5 | 
 6 | 
 7 | ### Using with Microsoft Cognitive Network ToolKit (CNTK)
 8 | To be able use CNTK from the framework, you will need first to install CNTK from the 
 9 | official repository [release page](https://github.com/Microsoft/CNTK/releases). Pick the 
10 | right distribution according to your OS / Hardware configuration and plans to use distributed
11 | training sessions.
12 | 
13 | The CNTK Python binding can be installed by running the installation script 
14 | ([more information here](https://github.com/Microsoft/CNTK/wiki/Setup-CNTK-on-your-machine)).
15 | After following the installation process you should be able to import CNTK.
16 | 
17 | ___Note that every time you will want to run experiment with CNTK you will need to activate the cntk-pyXX environment.___
18 | 
19 | ### Getting started
20 | 
21 | First of all, you need to import all the dependencies :  
22 | ```python
23 | from malmopy.agent.qlearner import QLearnerAgent, TemporalMemory
24 | from malmopy.model.cntk import QNeuralNetwork
25 | from malmopy.environment.gym import GymEnvironment 
26 |  
27 | # In this example we will use the Breakout-v3 environment.
28 | env = GymEnvironment('Breakout-v3', monitoring_path='/directory/where/to/put/records')
29 |  
30 | # Q Neural Network needs a Replay Memory to randomly sample minibatch.
31 | memory = TemporalMemory(1000000, (84, 84), 4)
32 |  
33 | #Here a simple Deep Q Neural Network backed by CNTK runtime
34 | model = QNeuralNetwork((4, 84, 84), env.available_actions, device_id=-1)
35 |  
36 | # We provide the number of action available, our model and the memory
37 | agent = QLearnerAgent("DQN Agent", env.available_actions, model, memory, 0.99, 32)
38 |   
39 | reward = 0
40 | done = False
41 |   
42 | # Remplace range by xrange if running Python 2
43 | while True:
44 | 
45 |     # Reset environment if needed
46 |     if env.done:
47 |         current_state = env.reset()
48 | 
49 |     action = agent.act(current_state, reward, done, True)    
50 |     new_state, reward, done = env.do(action)
51 | ```
52 | 
53 | ## Some comments:
54 | - The GymEnvironment monitoring_path is used to record short epsiode videos of the agent
55 | - Temporal Memory generates a sample w.r.t to the history_length previous state
56 |   - For example with history_length = 4 a sample is [s(t-3), s(t-2), s(t-1), s(t)]
57 | - QNeuralNetwork input_shape is the shape of a sample from the TemporalMemory (history_length, width, height)
58 | - QNeuralNetwork output_shape is the number of actions available for the environment (one neuron per action)
59 | - QNeuralNetwork device_id == -1 indicate 'Run on CPU', anything >=0 refers to a GPU device ID
60 | 


--------------------------------------------------------------------------------
/docker/malmopy-chainer-cpu/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017 Microsoft Corporation.
 2 | #
 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
 4 | # documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
 5 | #  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
 6 | # and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 7 | #
 8 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of
 9 | # the Software.
10 | #
11 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | # THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | #  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
15 | # SOFTWARE.
16 | # ===================================================================================================================
17 | 
18 | FROM ubuntu:16.04
19 | 
20 | # Version variables
21 | ENV MALMO_VERSION 0.21.0
22 | ENV MALMOPY_VERSION 0.1.0
23 | 
24 | RUN apt-get update -y && \
25 |     apt-get install -y --no-install-recommends \
26 |     build-essential \
27 |     python-dev \
28 |     python-pip \
29 |     python-setuptools \
30 |     cmake \
31 |     ssh \
32 |     git-all \
33 |     zlib1g-dev \
34 | 
35 |     # install Malmo dependencies
36 |     libpython2.7 \
37 |     lua5.1 \
38 |     libxerces-c3.1 \
39 |     liblua5.1-0-dev \
40 |     libav-tools \
41 |     python-tk \
42 |     python-imaging-tk \
43 |     wget \
44 |     unzip && \
45 |     rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
46 | 
47 | RUN pip install -U pip setuptools && pip install wheel && pip install chainer==1.21.0
48 | 
49 | # download and unpack Malmo
50 | WORKDIR /root
51 | RUN wget https://github.com/Microsoft/malmo/releases/download/$MALMO_VERSION/Malmo-$MALMO_VERSION-Linux-Ubuntu-16.04-64bit_withBoost.zip && \
52 |     unzip Malmo-$MALMO_VERSION-Linux-Ubuntu-16.04-64bit_withBoost.zip && \
53 |     rm Malmo-$MALMO_VERSION-Linux-Ubuntu-16.04-64bit_withBoost.zip && \
54 |     mv Malmo-$MALMO_VERSION-Linux-Ubuntu-16.04-64bit_withBoost Malmo
55 | 
56 | ENV MALMO_XSD_PATH /root/Malmo/Schemas
57 | ENV PYTHONPATH /root/Malmo/Python_Examples
58 | 
59 | # add and install malmopy, malmo challenge task and samples
60 | WORKDIR /root
61 | RUN git clone https://github.com/Microsoft/malmo-challenge.git && \
62 |     cd malmo-challenge && \
63 |     git checkout tags/$MALMOPY_VERSION -b latest
64 | WORKDIR /root/malmo-challenge
65 | RUN pip install -e '.[all]'
66 | 


--------------------------------------------------------------------------------
/docker/malmopy-chainer-gpu/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017 Microsoft Corporation.
 2 | #
 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
 4 | # documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
 5 | #  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
 6 | # and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 7 | #
 8 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of
 9 | # the Software.
10 | #
11 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | # THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | #  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
15 | # SOFTWARE.
16 | # ===================================================================================================================
17 | 
18 | FROM nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04
19 | 
20 | 
21 | # Version variables
22 | ENV MALMO_VERSION 0.21.0
23 | ENV MALMOPY_VERSION 0.1.0
24 | 
25 | RUN apt-get update -y && \
26 |     apt-get install -y --no-install-recommends \
27 |     build-essential \
28 |     python-dev \
29 |     python-pip \
30 |     python-setuptools \
31 |     cmake \
32 |     ssh \
33 |     git-all \
34 |     zlib1g-dev \
35 | 
36 |     # install Malmo dependencies
37 |     libpython2.7 \
38 |     lua5.1 \
39 |     libxerces-c3.1 \
40 |     liblua5.1-0-dev \
41 |     libav-tools \
42 |     python-tk \
43 |     python-imaging-tk \
44 |     wget \
45 |     unzip && \
46 |     rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
47 | 
48 | RUN pip install -U pip setuptools && pip install wheel && pip install chainer==1.21.0
49 | 
50 | # download and unpack Malmo
51 | WORKDIR /root
52 | RUN wget https://github.com/Microsoft/malmo/releases/download/$MALMO_VERSION/Malmo-$MALMO_VERSION-Linux-Ubuntu-16.04-64bit_withBoost.zip && \
53 |     unzip Malmo-$MALMO_VERSION-Linux-Ubuntu-16.04-64bit_withBoost.zip && \
54 |     rm Malmo-$MALMO_VERSION-Linux-Ubuntu-16.04-64bit_withBoost.zip && \
55 |     mv Malmo-$MALMO_VERSION-Linux-Ubuntu-16.04-64bit_withBoost Malmo
56 | 
57 | ENV MALMO_XSD_PATH /root/Malmo/Schemas
58 | ENV PYTHONPATH /root/Malmo/Python_Examples
59 | 
60 | # add and install malmopy, malmo challenge task and samples
61 | WORKDIR /root
62 | RUN git clone https://github.com/Microsoft/malmo-challenge.git && \
63 |     cd malmo-challenge && \
64 |     git checkout tags/$MALMOPY_VERSION -b latest
65 | WORKDIR /root/malmo-challenge
66 | RUN pip install -e '.[all]'
67 | 


--------------------------------------------------------------------------------
/docker/malmopy-cntk-cpu-py27/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017 Microsoft Corporation.
 2 | #
 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
 4 | # documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
 5 | #  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
 6 | # and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 7 | #
 8 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of
 9 | # the Software.
10 | #
11 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | # THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | #  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
15 | # SOFTWARE.
16 | # ===================================================================================================================
17 | 
18 | FROM microsoft/cntk:2.0.beta15.0-cpu-python2.7
19 | 
20 | # Version variables
21 | ENV MALMO_VERSION 0.21.0
22 | ENV MALMOPY_VERSION 0.1.0
23 | 
24 | RUN apt-get update -y && \
25 |     apt-get install -y --no-install-recommends \
26 |     build-essential \
27 |     cmake \
28 |     ssh \
29 |     git-all \
30 |     zlib1g-dev \
31 |     python-dev \
32 |     python-pip \
33 | 
34 |     # install Malmo dependencies
35 |     libpython2.7 \
36 |     openjdk-7-jdk \
37 |     lua5.1 \
38 |     libxerces-c3.1 \
39 |     liblua5.1-0-dev \
40 |     libav-tools \
41 |     python-tk \
42 |     python-imaging-tk \
43 |     wget \
44 |     unzip && \
45 |     rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
46 | 
47 | # Set CNTK Python PATH at first position to be picked automatically
48 | ENV PATH=/root/anaconda3/envs/cntk-py27/bin:$PATH
49 | 
50 | # Update pip
51 | RUN /root/anaconda3/envs/cntk-py27/bin/pip install --upgrade pip
52 | 
53 | # download and unpack Malmo
54 | WORKDIR /root
55 | RUN wget https://github.com/Microsoft/malmo/releases/download/$MALMO_VERSION/Malmo-$MALMO_VERSION-Linux-Ubuntu-16.04-64bit_withBoost.zip && \
56 |     unzip Malmo-$MALMO_VERSION-Linux-Ubuntu-16.04-64bit_withBoost.zip && \
57 |     rm Malmo-$MALMO_VERSION-Linux-Ubuntu-16.04-64bit_withBoost.zip && \
58 |     mv Malmo-$MALMO_VERSION-Linux-Ubuntu-16.04-64bit_withBoost Malmo
59 | 
60 | ENV MALMO_XSD_PATH /root/Malmo/Schemas
61 | ENV PYTHONPATH /root/Malmo/Python_Examples
62 | 
63 | # add and install malmopy, malmo challenge task and samples
64 | WORKDIR /root
65 | RUN git clone https://github.com/Microsoft/malmo-challenge.git && \
66 |     cd malmo-challenge && \
67 |     git checkout tags/$MALMOPY_VERSION -b latest
68 | WORKDIR /root/malmo-challenge
69 | RUN pip install -e '.[all]'
70 | 


--------------------------------------------------------------------------------
/docker/malmopy-cntk-gpu-py27/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017 Microsoft Corporation.
 2 | #
 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
 4 | # documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
 5 | #  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
 6 | # and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 7 | #
 8 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of
 9 | # the Software.
10 | #
11 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | # THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | #  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
15 | # SOFTWARE.
16 | # ===================================================================================================================
17 | 
18 | FROM microsoft/cntk:2.0.beta15.0-gpu-python2.7-cuda8.0-cudnn5.1
19 | 
20 | # Version variables
21 | ENV MALMO_VERSION 0.21.0
22 | ENV MALMOPY_VERSION 0.1.0
23 | 
24 | RUN apt-get update -y && \
25 |     apt-get install -y --no-install-recommends \
26 |     build-essential \
27 |     cmake \
28 |     ssh \
29 |     git-all \
30 |     zlib1g-dev \
31 |     python-dev \
32 |     python-pip \
33 | 
34 |     # install Malmo dependencies
35 |     libpython2.7 \
36 |     openjdk-7-jdk \
37 |     lua5.1 \
38 |     libxerces-c3.1 \
39 |     liblua5.1-0-dev \
40 |     libav-tools \
41 |     python-tk \
42 |     python-imaging-tk \
43 |     wget \
44 |     unzip && \
45 |     rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
46 | 
47 | # Set CNTK Python PATH at first position to be picked automatically
48 | ENV PATH=/root/anaconda3/envs/cntk-py27/bin:$PATH
49 | 
50 | # Update pip
51 | RUN /root/anaconda3/envs/cntk-py27/bin/pip install --upgrade pip
52 | 
53 | # download and unpack Malmo
54 | WORKDIR /root
55 | RUN wget https://github.com/Microsoft/malmo/releases/download/$MALMO_VERSION/Malmo-$MALMO_VERSION-Linux-Ubuntu-16.04-64bit_withBoost.zip && \
56 |     unzip Malmo-$MALMO_VERSION-Linux-Ubuntu-16.04-64bit_withBoost.zip && \
57 |     rm Malmo-$MALMO_VERSION-Linux-Ubuntu-16.04-64bit_withBoost.zip && \
58 |     mv Malmo-$MALMO_VERSION-Linux-Ubuntu-16.04-64bit_withBoost Malmo
59 | 
60 | ENV MALMO_XSD_PATH /root/Malmo/Schemas
61 | ENV PYTHONPATH /root/Malmo/Python_Examples
62 | 
63 | # add and install malmopy, malmo challenge task and samples
64 | WORKDIR /root
65 | RUN git clone https://github.com/Microsoft/malmo-challenge.git && \
66 |     cd malmo-challenge && \
67 |     git checkout tags/$MALMOPY_VERSION -b latest
68 | WORKDIR /root/malmo-challenge
69 | RUN pip install -e '.[all]'
70 | 


--------------------------------------------------------------------------------
/malmopy/agent/astar.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017 Microsoft Corporation.
 2 | #
 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
 4 | # documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
 5 | #  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
 6 | # and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 7 | #
 8 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of
 9 | # the Software.
10 | #
11 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | # THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | #  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
15 | # SOFTWARE.
16 | # ===================================================================================================================
17 | 
18 | from __future__ import absolute_import
19 | 
20 | from heapq import heapify, heappop, heappush
21 | from collections import deque
22 | 
23 | from . import BaseAgent
24 | 
25 | 
26 | class AStarAgent(BaseAgent):
27 |     def __init__(self, name, nb_actions, visualizer=None):
28 |         super(AStarAgent, self).__init__(name, nb_actions, visualizer)
29 | 
30 |     def _find_shortest_path(self, start, end, **kwargs):
31 |         came_from, cost_so_far = {}, {}
32 |         explorer = []
33 |         heapify(explorer)
34 | 
35 |         heappush(explorer, (0, start))
36 |         came_from[start] = None
37 |         cost_so_far[start] = 0
38 |         current = None
39 | 
40 |         while len(explorer) > 0:
41 |             _, current = heappop(explorer)
42 | 
43 |             if self.matches(current, end):
44 |                 break
45 | 
46 |             for nb in self.neighbors(current, **kwargs):
47 |                 cost = nb.cost if hasattr(nb, "cost") else 1
48 |                 new_cost = cost_so_far[current] + cost
49 | 
50 |                 if nb not in cost_so_far or new_cost < cost_so_far[nb]:
51 |                     cost_so_far[nb] = new_cost
52 |                     priority = new_cost + self.heuristic(end, nb, **kwargs)
53 |                     heappush(explorer, (priority, nb))
54 |                     came_from[nb] = current
55 | 
56 |         # build path:
57 |         path = deque()
58 |         while current is not start:
59 |             path.appendleft(current)
60 |             current = came_from[current]
61 |         return path, cost_so_far
62 | 
63 |     def neighbors(self, pos, **kwargs):
64 |         raise NotImplementedError()
65 | 
66 |     def heuristic(self, a, b, **kwargs):
67 |         raise NotImplementedError()
68 | 
69 |     def matches(self, a, b):
70 |         return a == b
71 | 


--------------------------------------------------------------------------------
/malmopy/agent/explorer.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017 Microsoft Corporation.
 2 | #
 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
 4 | # documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
 5 | #  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
 6 | # and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 7 | #
 8 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of
 9 | # the Software.
10 | #
11 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | # THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | #  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
15 | # SOFTWARE.
16 | # ===================================================================================================================
17 | 
18 | from __future__ import absolute_import
19 | 
20 | import numpy as np
21 | 
22 | 
23 | class BaseExplorer:
24 |     """ Explore/exploit logic wrapper"""
25 | 
26 |     def __call__(self, step, nb_actions):
27 |         return self.explore(step, nb_actions)
28 | 
29 |     def is_exploring(self, step):
30 |         """ Returns True when exploring, False when exploiting """
31 |         raise NotImplementedError()
32 | 
33 |     def explore(self, step, nb_actions):
34 |         """ Generate an exploratory action """
35 |         raise NotImplementedError()
36 | 
37 | 
38 | class LinearEpsilonGreedyExplorer(BaseExplorer):
39 |     """ Explore/exploit logic wrapper
40 | 
41 | 
42 |     This implementation uses linear interpolation between
43 |     epsilon_max and epsilon_min to linearly anneal epsilon as a function of the current episode.
44 | 
45 |     3 cases exists:
46 |         - If 0 <= episode < eps_min_time then epsilon = interpolator(episode)
47 |         - If episode >= eps_min_time then epsilon then epsilon = eps_min
48 |         - Otherwise epsilon = eps_max
49 |     """
50 | 
51 |     def __init__(self, eps_max, eps_min, eps_min_time):
52 |         assert eps_max > eps_min
53 |         assert eps_min_time > 0
54 | 
55 |         self._eps_min_time = eps_min_time
56 |         self._eps_min = eps_min
57 |         self._eps_max = eps_max
58 | 
59 |         self._a = -(eps_max - eps_min) / eps_min_time
60 | 
61 |     def _epsilon(self, step):
62 |         if step < 0:
63 |             return self._eps_max
64 |         elif step > self._eps_min_time:
65 |             return self._eps_min
66 |         else:
67 |             return self._a * step + self._eps_max
68 | 
69 |     def is_exploring(self, step):
70 |         return np.random.rand() < self._epsilon(step)
71 | 
72 |     def explore(self, step, nb_actions):
73 |         return np.random.randint(0, nb_actions)
74 | 


--------------------------------------------------------------------------------
/malmopy/visualization/tensorboard/tensorboard.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017 Microsoft Corporation.
 2 | #
 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
 4 | # documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
 5 | #  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
 6 | # and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 7 | #
 8 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of
 9 | # the Software.
10 | #
11 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | # THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | #  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
15 | # SOFTWARE.
16 | # ===================================================================================================================
17 | 
18 | from __future__ import absolute_import
19 | 
20 | import six
21 | import tensorflow as tf
22 | from tensorflow.core.framework.summary_pb2 import Summary
23 | 
24 | from ..visualizer import BaseVisualizer
25 | 
26 | 
27 | class TensorboardVisualizer(BaseVisualizer):
28 |     """
29 |     Visualize the generated results in Tensorboard
30 |     """
31 | 
32 |     def __init__(self):
33 |         super(TensorboardVisualizer, self).__init__()
34 | 
35 |         gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.01)
36 |         self._session = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
37 |         self._train_writer = None
38 | 
39 |     def initialize(self, logdir, model, converter=None):
40 |         assert logdir is not None, "logdir cannot be None"
41 |         assert isinstance(logdir, six.string_types), "logdir should be a string"
42 | 
43 |         if converter is not None:
44 |             assert isinstance(converter, TensorflowConverter), \
45 |                         "converter should derive from TensorflowConverter"
46 |             converter.convert(model, self._session.graph)
47 | 
48 |         self._train_writer = tf.summary.FileWriter(logdir=logdir,
49 |                                                    graph=self._session.graph,
50 |                                                    flush_secs=30)
51 | 
52 |     def add_entry(self, index, tag, value, **kwargs):
53 |         if "image" in kwargs and value is not None:
54 |             image_string = tf.image.encode_jpeg(value, optimize_size=True, quality=80)
55 |             summary_value = Summary.Image(width=value.shape[1],
56 |                                           height=value.shape[0],
57 |                                           colorspace=value.shape[2],
58 |                                           encoded_image_string=image_string)
59 |         else:
60 |             summary_value = Summary.Value(tag=tag, simple_value=value)
61 | 
62 |         if summary_value is not None:
63 |             entry = Summary(value=[summary_value])
64 |             self._train_writer.add_summary(entry, index)
65 | 
66 |     def close(self):
67 |         if self._train_writer is not None:
68 |             self._train_writer.close()
69 | 
70 |     def __enter__(self):
71 |         return self
72 | 
73 |     def __exit__(self, exc_type, exc_val, exc_tb):
74 |         self.close()
75 | 
76 | 
77 | class TensorflowConverter(object):
78 |     def convert(self, network, graph):
79 |         raise NotImplementedError()
80 | 


--------------------------------------------------------------------------------
/malmopy/agent/gui.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017 Microsoft Corporation.
 2 | #
 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
 4 | # documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
 5 | #  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
 6 | # and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 7 | #
 8 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of
 9 | # the Software.
10 | #
11 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | # THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | #  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
15 | # SOFTWARE.
16 | # ===================================================================================================================
17 | 
18 | from __future__ import absolute_import
19 | 
20 | import six
21 | if six.PY2:
22 |     from Tkinter import Tk
23 | else:
24 |     from tkinter import Tk
25 | 
26 | from . import BaseAgent
27 | from ..environment import VideoCapableEnvironment
28 | 
29 | FPS_KEYS_MAPPING = {'w': 'move 1', 'a': 'strafe -1', 's': 'move -1', 'd': 'strafe 1', ' ': 'jump 1',
30 |                     'q': 'strafe -1', 'z': 'move 1'}
31 | 
32 | ARROW_KEYS_MAPPING = {'Left': 'turn -1', 'Right': 'turn 1', 'Up': 'move 1', 'Down': 'move -1'}
33 | 
34 | CONTINUOUS_KEYS_MAPPING = {'Shift_L': 'crouch 1', 'Shift_R': 'crouch 1',
35 |                            '1': 'hotbar.1 1', '2': 'hotbar.2 1', '3': 'hotbar.3 1', '4': 'hotbar.4 1',
36 |                            '5': 'hotbar.5 1',
37 |                            '6': 'hotbar.6 1', '7': 'hotbar.7 1', '8': 'hotbar.8 1', '9': 'hotbar.9 1'} \
38 |     .update(ARROW_KEYS_MAPPING)
39 | 
40 | DISCRETE_KEYS_MAPPING = {'Left': 'turn -1', 'Right': 'turn 1', 'Up': 'move 1', 'Down': 'move -1',
41 |                          '1': 'hotbar.1 1', '2': 'hotbar.2 1', '3': 'hotbar.3 1', '4': 'hotbar.4 1', '5': 'hotbar.5 1',
42 |                          '6': 'hotbar.6 1', '7': 'hotbar.7 1', '8': 'hotbar.8 1', '9': 'hotbar.9 1'}
43 | 
44 | 
45 | class GuiAgent(BaseAgent):
46 |     def __init__(self, name, environment, keymap, win_name="Gui Agent", size=(640, 480), visualizer=None):
47 |         assert isinstance(keymap, list), 'keymap should be a list[character]'
48 |         assert isinstance(environment, VideoCapableEnvironment), 'environment should inherit from BaseEnvironment'
49 | 
50 |         super(GuiAgent, self).__init__(name, environment.available_actions, visualizer)
51 | 
52 |         if not environment.recording:
53 |             environment.recording = True
54 | 
55 |         self._env = environment
56 |         self._keymap = keymap
57 |         self._tick = 20
58 | 
59 |         self._root = Tk()
60 |         self._root.wm_title = win_name
61 |         self._root.resizable(width=False, height=False)
62 |         self._root.geometry = "%dx%d" % size
63 | 
64 |         self._build_layout(self._root)
65 | 
66 |     def act(self, new_state, reward, done, is_training=False):
67 |         pass
68 | 
69 |     def show(self):
70 |         self._root.mainloop()
71 | 
72 |     def _build_layout(self, root):
73 |         """
74 |         Build the window layout
75 |         :param root:
76 |         :return:
77 |         """
78 |         raise NotImplementedError()
79 | 
80 |     def _get_keymapping_help(self):
81 |         return self._keymap
82 | 


--------------------------------------------------------------------------------
/ai_challenge/pig_chase/pc_environment.py:
--------------------------------------------------------------------------------
 1 | import socket
 2 | import time
 3 | import docker
 4 | import torch
 5 | from torch import multiprocessing as mp
 6 | from common import ENV_AGENT_NAMES
 7 | from agent import PigChaseChallengeAgent, RandomAgent
 8 | from environment import PigChaseEnvironment, PigChaseSymbolicStateBuilder, PigChaseTopDownStateBuilder
 9 | 
10 | from pc_utils import GlobalVar
11 | 
12 | 
13 | # Taken from Minecraft/launch_minecraft_in_background.py
14 | def _port_has_listener(port):
15 |   sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
16 |   result = sock.connect_ex(('127.0.0.1', port))
17 |   sock.close()
18 |   return result == 0
19 | 
20 | 
21 | # Return state in C H W format (as a batch)
22 | def _map_to_observation(observation):
23 |   observation = torch.Tensor(observation)
24 |   return observation.permute(2, 1, 0).contiguous().unsqueeze(0)
25 | 
26 | 
27 | class Env():
28 |   def __init__(self, rank):
29 |     docker_client = docker.from_env()
30 |     agent_port, partner_port = 10000 + rank, 20000 + rank
31 |     clients = [('127.0.0.1', agent_port), ('127.0.0.1', partner_port)]
32 |     self.agent_type = GlobalVar()
33 | 
34 |     # Assume Minecraft launched if port has listener, launch otherwise
35 |     if not _port_has_listener(agent_port):
36 |       self._launch_malmo(docker_client, agent_port)
37 |     print('Malmo running on port ' + str(agent_port))
38 |     if not _port_has_listener(partner_port):
39 |       self._launch_malmo(docker_client, partner_port)
40 |     print('Malmo running on port ' + str(partner_port))
41 | 
42 |     # Set up partner agent env in separate process
43 |     p = mp.Process(target=self._run_partner, args=(clients, ))
44 |     p.daemon = True
45 |     p.start()
46 |     time.sleep(3)
47 | 
48 |     # Set up agent env
49 |     self.env = PigChaseEnvironment(clients, PigChaseTopDownStateBuilder(gray=False), role=1, randomize_positions=True)
50 | 
51 |   def get_class_label(self):
52 |     return self.agent_type.value() - 1
53 | 
54 |   def reset(self):
55 |     observation = self.env.reset()
56 |     while observation is None:  # May happen if episode ended with first action of other agent
57 |       observation = self.env.reset()
58 |     return _map_to_observation(observation)
59 | 
60 |   def step(self, action):
61 |     observation, reward, done = self.env.do(action)
62 |     return _map_to_observation(observation), reward, done, None  # Do not return any extra info
63 | 
64 |   def close(self):
65 |     return  # TODO: Kill processes + Docker containers
66 | 
67 |   def _launch_malmo(self, client, port):
68 |     # Launch Docker container
69 |     client.containers.run('malmo', '-port ' + str(port), detach=True, network_mode='host')
70 |     # Check for port to come up
71 |     launched = False
72 |     for _ in range(100):
73 |       time.sleep(3)
74 |       if _port_has_listener(port):
75 |         launched = True
76 |         break
77 |     # Quit if Malmo could not be launched
78 |     if not launched:
79 |       exit(1)
80 | 
81 |   # Runs partner in separate env
82 |   def _run_partner(self, clients):
83 |     env = PigChaseEnvironment(clients, PigChaseSymbolicStateBuilder(), role=0, randomize_positions=True)
84 |     agent = PigChaseChallengeAgent(ENV_AGENT_NAMES[0])
85 |     self.agent_type.set(type(agent.current_agent) == RandomAgent and PigChaseEnvironment.AGENT_TYPE_1 or PigChaseEnvironment.AGENT_TYPE_2)
86 |     obs = env.reset(self.agent_type)
87 |     reward = 0
88 |     agent_done = False
89 |     while True:
90 |       # Select an action
91 |       action = agent.act(obs, reward, agent_done, is_training=True)
92 |       # Reset if needed
93 |       if env.done:
94 |         self.agent_type.set(type(agent.current_agent) == RandomAgent and PigChaseEnvironment.AGENT_TYPE_1 or PigChaseEnvironment.AGENT_TYPE_2)
95 |         obs = env.reset(self.agent_type)
96 |       # Take a step
97 |       obs, reward, agent_done = env.do(action)
98 | 


--------------------------------------------------------------------------------
/malmopy/environment/gym/gym.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017 Microsoft Corporation.
 2 | #
 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
 4 | # documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
 5 | #  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
 6 | # and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 7 | #
 8 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of
 9 | # the Software.
10 | #
11 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
12 | # THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
14 | #  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
15 | # SOFTWARE.
16 | # ===================================================================================================================
17 | 
18 | from __future__ import absolute_import
19 | 
20 | import gym
21 | import numpy as np
22 | import six
23 | from PIL import Image
24 | from gym.wrappers import Monitor
25 | 
26 | from ..environment import VideoCapableEnvironment, StateBuilder, ALEStateBuilder
27 | 
28 | 
29 | def need_record(episode_id):
30 |     return episode_id % 1000 == 0
31 | 
32 | 
33 | class GymEnvironment(VideoCapableEnvironment):
34 |     """
35 |     Wraps an Open AI Gym environment
36 |     """
37 | 
38 |     def __init__(self, env_name, state_builder=ALEStateBuilder(), repeat_action=4, no_op=30, monitoring_path=None):
39 |         assert isinstance(state_builder, StateBuilder), 'state_builder should inherit from StateBuilder'
40 |         assert isinstance(repeat_action, (int, tuple)), 'repeat_action should be int or tuple'
41 |         if isinstance(repeat_action, int):
42 |             assert repeat_action >= 1, "repeat_action should be >= 1"
43 |         elif isinstance(repeat_action, tuple):
44 |             assert len(repeat_action) == 2, 'repeat_action should be a length-2 tuple: (min frameskip, max frameskip)'
45 |             assert repeat_action[0] < repeat_action[1], 'repeat_action[0] should be < repeat_action[1]'
46 | 
47 |         super(GymEnvironment, self).__init__()
48 | 
49 |         self._state_builder = state_builder
50 |         self._env = gym.make(env_name)
51 |         self._env.env.frameskip = repeat_action
52 |         self._no_op = max(0, no_op)
53 |         self._done = True
54 | 
55 |         if monitoring_path is not None:
56 |             self._env = Monitor(self._env, monitoring_path, video_callable=need_record)
57 | 
58 |     @property
59 |     def available_actions(self):
60 |         return self._env.action_space.n
61 | 
62 |     @property
63 |     def state(self):
64 |         return None if self._state is None else self._state_builder(self._state)
65 | 
66 |     @property
67 |     def lives(self):
68 |         return self._env.env.ale.lives()
69 | 
70 |     @property
71 |     def frame(self):
72 |         return Image.fromarray(self._state)
73 | 
74 |     def do(self, action):
75 |         self._state, self._reward, self._done, _ = self._env.step(action)
76 |         self._score += self._reward
77 |         return self.state, self._reward, self._done
78 | 
79 |     def reset(self):
80 |         super(GymEnvironment, self).reset()
81 | 
82 |         self._state = self._env.reset()
83 | 
84 |         # Random number of initial no-op to introduce stochasticity
85 |         if self._no_op > 0:
86 |             for _ in six.moves.range(np.random.randint(1, self._no_op)):
87 |                 self._state, _, _, _ = self._env.step(0)
88 | 
89 |         return self.state
90 | 


--------------------------------------------------------------------------------
/ai_challenge/pig_chase/common.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2017 Microsoft Corporation.
  2 | #
  3 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
  4 | # documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
  5 | #  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
  6 | # and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
  7 | #
  8 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of
  9 | # the Software.
 10 | #
 11 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
 12 | # THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 13 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 14 | #  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 15 | # SOFTWARE.
 16 | # ===================================================================================================================
 17 | 
 18 | ENV_AGENT_NAMES = ['Agent_1', 'Agent_2']
 19 | ENV_TARGET_NAMES = ['Pig']
 20 | ENV_ENTITIES_NAME = ENV_AGENT_NAMES + ENV_TARGET_NAMES
 21 | ENV_ACTIONS = ["move 1", "turn -1", "turn 1"]
 22 | ENV_ENTITIES = 'entities'
 23 | ENV_BOARD = 'board'
 24 | ENV_BOARD_SHAPE = (9, 9)
 25 | ENV_INDIVIDUAL_REWARD = 5
 26 | ENV_CAUGHT_REWARD = 25
 27 | 
 28 | 
 29 | def parse_clients_args(args_clients):
 30 |     """
 31 |     Return an array of tuples (ip, port) extracted from ip:port string
 32 |     :param args_clients:
 33 |     :return:
 34 |     """
 35 |     return [str.split(str(client), ':') for client in args_clients]
 36 | 
 37 | def visualize_training(visualizer, step, rewards, tag='Training'):
 38 |     visualizer.add_entry(step, '%s/reward per episode' % tag, sum(rewards))
 39 |     visualizer.add_entry(step, '%s/max.reward' % tag, max(rewards))
 40 |     visualizer.add_entry(step, '%s/min.reward' % tag, min(rewards))
 41 |     visualizer.add_entry(step, '%s/actions per episode' % tag, len(rewards)-1)
 42 | 
 43 | class Entity(object):
 44 |     """ Wrap entity attributes """
 45 | 
 46 |     def __init__(self, x, y, z, yaw, pitch, name=''):
 47 |         self._name = name
 48 |         self._x = int(x)
 49 |         self._y = int(y)
 50 |         self._z = int(z)
 51 |         self._yaw = int(yaw) % 360
 52 |         self._pitch = int(pitch)
 53 | 
 54 |     @property
 55 |     def name(self):
 56 |         return self._name
 57 | 
 58 |     @property
 59 |     def x(self):
 60 |         return self._x
 61 | 
 62 |     @x.setter
 63 |     def x(self, value):
 64 |         self._x = int(value)
 65 | 
 66 |     @property
 67 |     def y(self):
 68 |         return self._y
 69 | 
 70 |     @y.setter
 71 |     def y(self, value):
 72 |         self._y = int(value)
 73 | 
 74 |     @property
 75 |     def z(self):
 76 |         return self._z
 77 | 
 78 |     @z.setter
 79 |     def z(self, value):
 80 |         self._z = int(value)
 81 | 
 82 |     @property
 83 |     def yaw(self):
 84 |         return self._yaw
 85 | 
 86 |     @yaw.setter
 87 |     def yaw(self, value):
 88 |         self._yaw = int(value) % 360
 89 | 
 90 |     @property
 91 |     def pitch(self):
 92 |         return self._pitch
 93 | 
 94 |     @pitch.setter
 95 |     def pitch(self, value):
 96 |         self._pitch = int(value)
 97 | 
 98 |     @property
 99 |     def position(self):
100 |         return self._x, self._y, self._z
101 | 
102 |     def __eq__(self, other):
103 |         if isinstance(other, tuple):
104 |             return self.position == other
105 | 
106 |     def __getitem__(self, item):
107 |         return getattr(self, item)
108 | 
109 |     @classmethod
110 |     def create(cls, obj):
111 |         return cls(obj['x'], obj['y'], obj['z'], obj['yaw'], obj['pitch'])
112 | 


--------------------------------------------------------------------------------
/docker/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | # Run experiments in docker
  4 | 
  5 | [Docker](https://www.docker.com/) is a container solution that makes it easy to build and deploy 
  6 | software in a virtual environment. The examples in this folder use docker to easily deploy an experiment 
  7 | with all its dependencies, either on a local machine or on the cloud.
  8 | 
  9 | ## Prerequisites
 10 | 
 11 | Install docker on your local machine by following the installation instructions for 
 12 | [Windows](https://docs.docker.com/docker-for-windows/install/), 
 13 | [Linux](https://docs.docker.com/engine/installation/), 
 14 | [MacOS](https://docs.docker.com/docker-for-mac/install/).
 15 | 
 16 | Prepare a docker machine on Azure, follow the local installation steps above, then run:
 17 | ```
 18 | docker-machine create --driver azure --azure-size Standard_D12 --azure-subscription-id <subscription-id> <machine-name>
 19 | ```
 20 | Replace `<subscription-id>` with your Azure subsciption id - you can find this on the Azure dashboard after 
 21 | logging on to https://portal.azure.com. The `<machine-name>` is arbitrary.
 22 | 
 23 | Additional `docker-machine` options are listed here: https://docs.docker.com/machine/drivers/azure/
 24 | Azure machine sizes are detailed on: https://docs.microsoft.com/en-us/azure/virtual-machines/virtual-machines-linux-sizes (we recommend to use at least size Standard_D12)
 25 | 
 26 | Configure docker to deploy to `<machine-name>`. Run:
 27 | ```
 28 | docker-machine env <machine-name>
 29 | ```
 30 | This will provide a script / instructions on how to prepare your environment to work with <machine-name>.
 31 | 
 32 | ## Build the docker images
 33 | 
 34 | Build the required docker images:
 35 | ```
 36 | cd docker
 37 | docker build malmo -t malmo:latest
 38 | docker build malmopy-cntk-cpu-py27 -t malmopy-cntk-cpu-py27:latest
 39 | 
 40 | ```
 41 | 
 42 | Check to make sure that the images have been compiled:
 43 | ```
 44 | docker images
 45 | ```
 46 | You should see a list that includes the compiled images, e.g.,
 47 | ```
 48 | REPOSITORY              TAG                          IMAGE ID            CREATED             SIZE
 49 | malmopy-cntk-cpu-py27   latest                       0161af81632d        29 minutes ago      5.62 GB
 50 | malmo                   latest                       1b67b8e2cfa8        41 minutes ago      1.04 GB
 51 | ...
 52 | ```
 53 | 
 54 | ## Run the experiment
 55 | 
 56 | Run the challenge task with an example agent:
 57 | ```
 58 | cd malmopy-ai-challenge
 59 | docker-compose up
 60 | ```
 61 | 
 62 | The experiment is set up to start a tensorboard process alongside the experiment.
 63 | You can view it by pointing your browser to http://127.0.0.1:6006.
 64 | 
 65 | ## Write your own
 66 | 
 67 | The provided docker files load malmopy and sample code directly from the
 68 | `malmo-challenge` git repository. To include your own code, create a file
 69 | called `Dockerfile` with the following content:
 70 | 
 71 | ```
 72 | FROM malmopy-cntk-cpu-py27:latest
 73 | 
 74 | # add your own experiment code here
 75 | # ADD copies content from your local machine into the docker image
 76 | ADD ai_challenge/pig_chase /local/malmo-challenge/ai_challenge/pig_chase
 77 | ```
 78 | 
 79 | Build this new image using:
 80 | ```
 81 | docker build . -t my_malmo_experiment:latest
 82 | ```
 83 | 
 84 | Point the `agents` service in `docker-compose.py` to the new image by replacing
 85 | `image: malmopy-cntk-cpu-py27:latest` with the name of the image you have just
 86 | built (e.g., `image:my_malmo_experiment:latest`). Also check if the working
 87 | directory or command need to be changed.
 88 | 
 89 | Then run the new experiment:
 90 | ```
 91 | docker-compose up
 92 | ```
 93 | 
 94 | ## Cleaning up
 95 | 
 96 | If you are using a docker machine on Azure, make sure to shutdown and decomission 
 97 | the machine when your experiments have completed, to avoid incurring costs.
 98 | 
 99 | To shut a machine down:
100 | ```
101 | docker-machine stop <machine-name>
102 | ```
103 | 
104 | To remove (decomission) a machine:
105 | ```
106 | docker-machine rm <machine-name>
107 | ```
108 | 
109 | ## Further reading
110 | 
111 | - [docker documentation](https://docs.docker.com/)
112 | - [docker on Azure](https://docs.docker.com/machine/drivers/azure/)
113 | - [docker compose](https://docs.docker.com/compose/overview/)
114 | 


--------------------------------------------------------------------------------
/ai_challenge/pig_chase/pc_test.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from os import makedirs
  3 | from os.path import exists, join, pardir, abspath
  4 | from json import dump
  5 | import time
  6 | from datetime import datetime
  7 | import torch
  8 | from torch.autograd import Variable
  9 | from numpy import mean, var
 10 | 
 11 | from pc_environment import Env
 12 | from pc_model import ActorCritic
 13 | from pc_utils import ACTION_SIZE, action_to_one_hot, extend_input, plot_line
 14 | 
 15 | 
 16 | def test(rank, args, T, shared_model):
 17 |   torch.manual_seed(args.seed + rank)
 18 | 
 19 |   env = Env(rank)
 20 |   model = ActorCritic(args.hidden_size)
 21 |   model.eval()
 22 | 
 23 |   can_test = True  # Test flag
 24 |   t_start = 1  # Test step counter to check against global counter
 25 |   rewards, steps, accs = [], [], []  # Rewards and steps for plotting
 26 |   l = str(len(str(args.T_max)))  # Max num. of digits for logging steps
 27 |   done = True  # Start new episode
 28 | 
 29 |   while T.value() <= args.T_max:
 30 |     if can_test:
 31 |       t_start = T.value()  # Reset counter
 32 | 
 33 |       # Evaluate over several episodes and average results
 34 |       avg_rewards, avg_episode_lengths, avg_accs = [], [], []
 35 |       for _ in range(args.evaluation_episodes):
 36 |         while True:
 37 |           # Reset or pass on hidden state
 38 |           if done:
 39 |             # Sync with shared model every episode
 40 |             model.load_state_dict(shared_model.state_dict())
 41 |             hx = Variable(torch.zeros(1, args.hidden_size), volatile=True)
 42 |             cx = Variable(torch.zeros(1, args.hidden_size), volatile=True)
 43 |             # Reset environment and done flag
 44 |             state = env.reset()
 45 |             action, reward, done, episode_length = 0, 0, False, 0
 46 |             reward_sum, class_acc = 0, 0
 47 | 
 48 |           # Optionally render validation states
 49 |           if args.render:
 50 |             env.render()
 51 | 
 52 |           # Get label from the environment
 53 |           cls_id = env.get_class_label()
 54 | 
 55 |           # Calculate policy
 56 |           input = extend_input(state, action_to_one_hot(action, ACTION_SIZE), reward, episode_length)
 57 |           policy1, _, _, policy2, _, _, cls, (hx, cx) = model(Variable(input, volatile=True), (hx.detach(), cx.detach()))
 58 |           cls = cls.data[0, 0] < 0.5 and 0 or 1
 59 |           policy = policy1 if cls == 0 else policy2
 60 | 
 61 |           # Choose action greedily
 62 |           action = policy.max(1)[1].data[0, 0]
 63 | 
 64 |           # Step
 65 |           state, reward, done, _ = env.step(action)
 66 |           reward_sum += reward
 67 |           class_acc += cls == cls_id and 1 or 0
 68 |           done = done or episode_length >= args.max_episode_length  # Stop episodes at a max length
 69 |           episode_length += 1  # Increase episode counter
 70 | 
 71 |           # Log and reset statistics at the end of every episode
 72 |           if done:
 73 |             avg_rewards.append(reward_sum)
 74 |             avg_episode_lengths.append(episode_length)
 75 |             avg_accs.append(class_acc / episode_length)  # Normalise accuracy by episode length
 76 |             break
 77 | 
 78 |       print(('[{}] Step: {:<' + l + '} Avg. Reward: {:<8} Avg. Episode Length: {:<8} Avg. Class Acc.: {:<8}').format(
 79 |             datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S,%f')[:-3],
 80 |             t_start,
 81 |             sum(avg_rewards) / args.evaluation_episodes,
 82 |             sum(avg_episode_lengths) / args.evaluation_episodes,
 83 |             sum(avg_accs) / args.evaluation_episodes))
 84 |       rewards.append(avg_rewards)  # Keep all evaluations
 85 |       accs.append(avg_accs)
 86 |       # Keep all evaluations
 87 |       steps.append(t_start)
 88 |       plot_line(steps, rewards, 'rewards.html', 'Average Reward')  # Plot rewards
 89 |       plot_line(steps, accs, 'accs.html', 'Average Accuracy')  # Plot accuracy
 90 |       torch.save(model.state_dict(), 'checkpoints/' + str(t_start) + '.pth')  # Checkpoint model params
 91 |       can_test = False  # Finish testing
 92 |       if args.evaluate:
 93 |         return
 94 |     else:
 95 |       if T.value() - t_start >= args.evaluation_interval:
 96 |         can_test = True
 97 | 
 98 |     time.sleep(0.001)  # Check if available to test every millisecond
 99 | 
100 |   env.close()
101 | 


--------------------------------------------------------------------------------
/malmopy/visualization/visualizer.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2017 Microsoft Corporation.
  2 | #
  3 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
  4 | # documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
  5 | #  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
  6 | # and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
  7 | #
  8 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of
  9 | # the Software.
 10 | #
 11 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
 12 | # THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 13 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 14 | #  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 15 | # SOFTWARE.
 16 | # ===================================================================================================================
 17 | 
 18 | from __future__ import absolute_import
 19 | 
 20 | from os import path
 21 | 
 22 | 
 23 | class Visualizable(object):
 24 |     def __init__(self, visualizer=None):
 25 |         if visualizer is not None:
 26 |             assert isinstance(visualizer, BaseVisualizer), "visualizer should derive from BaseVisualizer"
 27 | 
 28 |         self._visualizer = visualizer
 29 | 
 30 |     def visualize(self, index, tag, value, **kwargs):
 31 |         if self._visualizer is not None:
 32 |             self._visualizer << (index, tag, value, kwargs)
 33 | 
 34 |     @property
 35 |     def can_visualize(self):
 36 |         return self._visualizer is not None
 37 | 
 38 | 
 39 | class BaseVisualizer(object):
 40 |     """ Provide a unified interface for observing the training progress """
 41 | 
 42 |     def add_entry(self, index, key, result, **kwargs):
 43 |         raise NotImplementedError()
 44 | 
 45 |     def __lshift__(self, other):
 46 |         if isinstance(other, tuple):
 47 |             if len(other) >= 3:
 48 |                 self.add_entry(other[0], str(other[1]), other[2])
 49 |             else:
 50 |                 raise ValueError("Provided tuple should be of the form (key, value)")
 51 |         else:
 52 |             raise ValueError("Trying to use stream operator without a tuple (key, value)")
 53 | 
 54 | 
 55 | class EmptyVisualizer(BaseVisualizer):
 56 |     """ A boilerplate visualizer that does nothing """
 57 | 
 58 |     def add_entry(self, index, key, result, **kwargs):
 59 |         pass
 60 | 
 61 | 
 62 | class ConsoleVisualizer(BaseVisualizer):
 63 |     """ Print visualization to stdout as:
 64 |         key -> value
 65 |     """
 66 |     CONSOLE_DEFAULT_FORMAT = "[%s] %d : %s -> %.3f"
 67 | 
 68 |     def __init__(self, format=None, prefix=None):
 69 |         self._format = format or ConsoleVisualizer.CONSOLE_DEFAULT_FORMAT
 70 |         self._prefix = prefix or '-'
 71 | 
 72 |     def add_entry(self, index, key, result, **kwargs):
 73 |         print(self._format % (self._prefix, index, key, result))
 74 | 
 75 | 
 76 | class CsvVisualizer(BaseVisualizer):
 77 |     """ Write data to file. The following formats are supported: CSV, JSON, Excel. """
 78 |     def __init__(self, output_file, override=False):
 79 |         if path.exists(output_file) and not override:
 80 |             raise Exception('%s already exists and override is False' % output_file)
 81 | 
 82 |         super(CsvVisualizer, self).__init__()
 83 |         self._file = output_file
 84 |         self._data = {}
 85 | 
 86 |     def add_entry(self, index, key, result, **kwargs):
 87 |         if key in self._data[index]:
 88 |             print('Warning: Found previous value for %s in visualizer' % key)
 89 | 
 90 |         self._data[index].update({key: result})
 91 | 
 92 |     def close(self, format='csv'):
 93 |         import pandas as pd
 94 | 
 95 |         if format == 'csv':
 96 |             pd.DataFrame.from_dict(self._data, orient='index').to_csv(self._file)
 97 |         elif format == 'json':
 98 |             pd.DataFrame.from_dict(self._data, orient='index').to_json(self._file)
 99 |         else:
100 |             writer = pd.ExcelWriter(self._file)
101 |             pd.DataFrame.from_dict(self._data, orient='index').to_excel(writer)
102 |             writer.save()
103 | 
104 |     def __enter__(self):
105 |         return self
106 | 
107 |     def __exit__(self, exc_type, exc_val, exc_tb):
108 |         self.close()
109 |         return self
110 | 


--------------------------------------------------------------------------------
/ai_challenge/pig_chase/pc_main.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import argparse
 3 | import os
 4 | import torch
 5 | from torch import multiprocessing as mp
 6 | 
 7 | from pc_model import ActorCritic
 8 | from pc_optim import SharedRMSprop
 9 | from pc_train import train
10 | from pc_test import test
11 | from pc_utils import GlobalVar
12 | 
13 | 
14 | parser = argparse.ArgumentParser(description='ACER')
15 | parser.add_argument('--seed', type=int, default=123, help='Random seed')
16 | parser.add_argument('--env', type=str, default='CartPole-v1', metavar='ENV', help='OpenAI Gym environment')
17 | parser.add_argument('--num-processes', type=int, default=1, metavar='N', help='Number of training async agents (does not include single validation agent)')
18 | parser.add_argument('--T-max', type=int, default=6e5, metavar='STEPS', help='Number of training steps')
19 | parser.add_argument('--t-max', type=int, default=30, metavar='STEPS', help='Max number of forward steps for A3C before update')
20 | parser.add_argument('--max-episode-length', type=int, default=30, metavar='LENGTH', help='Maximum episode length')
21 | parser.add_argument('--hidden-size', type=int, default=256, metavar='SIZE', help='Hidden size of LSTM cell')
22 | parser.add_argument('--model', type=str, metavar='PARAMS', help='Pretrained model (state dict)')
23 | parser.add_argument('--memory-capacity', type=int, default=100000, metavar='CAPACITY', help='Experience replay memory capacity')
24 | parser.add_argument('--replay-ratio', type=int, default=4, metavar='r', help='Ratio of off-policy to on-policy updates')
25 | parser.add_argument('--replay-start', type=int, default=100, metavar='STEPS', help='Number of transitions to save before starting off-policy training')
26 | parser.add_argument('--discount', type=float, default=0.99, metavar='γ', help='Discount factor')
27 | parser.add_argument('--trace-decay', type=float, default=1, metavar='λ', help='Eligibility trace decay factor')
28 | parser.add_argument('--trace-max', type=float, default=10, metavar='c', help='Importance weight truncation (max) value')
29 | parser.add_argument('--trust-region', action='store_true', help='Use trust region')
30 | parser.add_argument('--trust-region-decay', type=float, default=0.99, metavar='α', help='Average model weight decay rate')
31 | parser.add_argument('--trust-region-threshold', type=float, default=1, metavar='δ', help='Trust region threshold value')
32 | parser.add_argument('--reward-clip', action='store_true', help='Clip rewards to [-1, 1]')
33 | parser.add_argument('--lr', type=float, default=1e-3, metavar='η', help='Learning rate')
34 | parser.add_argument('--lr-decay', action='store_true', help='Linearly decay learning rate to 0')
35 | parser.add_argument('--rmsprop-decay', type=float, default=0.99, metavar='α', help='RMSprop decay factor')
36 | parser.add_argument('--batch-size', type=int, default=16, metavar='SIZE', help='Off-policy batch size')
37 | parser.add_argument('--entropy-weight', type=float, default=0.001, metavar='β', help='Entropy regularisation weight')
38 | parser.add_argument('--no-time-normalisation', action='store_true', help='Do not normalise loss by number of time steps')
39 | parser.add_argument('--max-gradient-norm', type=float, default=10, metavar='VALUE', help='Max value of gradient L1 norm for gradient clipping')
40 | parser.add_argument('--evaluate', action='store_true', help='Evaluate only')
41 | parser.add_argument('--evaluation-interval', type=int, default=1000, metavar='STEPS', help='Number of training steps between evaluations (roughly)')
42 | parser.add_argument('--evaluation-episodes', type=int, default=20, metavar='N', help='Number of evaluation episodes to average over')
43 | parser.add_argument('--render', action='store_true', help='Render evaluation agent')
44 | 
45 | if __name__ == '__main__':
46 |   # Setup
47 |   args = parser.parse_args()
48 |   print(' ' * 26 + 'Options')
49 |   for k, v in vars(args).items():
50 |     print(' ' * 26 + k + ': ' + str(v))
51 |   torch.manual_seed(args.seed)
52 |   T = GlobalVar()  # Global shared counter
53 | 
54 |   # Create shared network
55 |   shared_model = ActorCritic(args.hidden_size)
56 |   if args.model and os.path.isfile(args.model):
57 |     # Load pretrained weights
58 |     shared_model.load_state_dict(torch.load(args.model))
59 |   # Create average network
60 |   shared_average_model = ActorCritic(args.hidden_size)
61 |   shared_average_model.load_state_dict(shared_model.state_dict())
62 |   shared_average_model.share_memory()
63 |   for param in shared_average_model.parameters():
64 |     param.requires_grad = False
65 |   # Create optimiser for shared network parameters with shared statistics
66 |   optimiser = SharedRMSprop(shared_model.parameters(), lr=args.lr, alpha=args.rmsprop_decay)
67 |   optimiser.share_memory()
68 | 
69 |   # Start validation agent
70 |   processes = []
71 |   p = mp.Process(target=test, args=(0, args, T, shared_model))
72 |   p.start()
73 |   processes.append(p)
74 | 
75 |   if not args.evaluate:
76 |     # Start training agents
77 |     for rank in range(1, args.num_processes + 1):
78 |       p = mp.Process(target=train, args=(rank, args, T, shared_model, shared_average_model, optimiser))
79 |       p.start()
80 |       processes.append(p)
81 | 
82 |   # Clean up
83 |   for p in processes:
84 |     p.join()
85 | 


--------------------------------------------------------------------------------
/ai_challenge/pig_chase/README.md:
--------------------------------------------------------------------------------
  1 | # Malmo Collaborative AI Challenge - Pig Chase
  2 | 
  3 | This repository contains Malmo Collaborative AI challenge task definition. The challenge task takes the form of a collaborative mini game, called Pig Chase.
  4 | 
  5 | ![Screenshot of the pig chase game](pig-chase-overview.png?raw=true "Screenshot of the Pig Chase game")
  6 | 
  7 | ## Overview of the game
  8 | 
  9 | Two Minecraft agents and a pig are wandering a small meadow. The agents have two choices:
 10 | 
 11 | - _Catch the pig_ (i.e., the agents pinch or corner the pig, and no escape path is available), and receive a high reward (25 points)
 12 | - _Give up_ and leave the pig pen through the exits to the left and right of the pen, marked by blue squares, and receive a small reward (5 points)
 13 | 
 14 | The pig chased is inspired by the variant of the _stag hunt_ presented in [Yoshida et al. 2008]. The [stag hunt](https://en.wikipedia.org/wiki/Stag_hunt) is a classical game theoretic game formulation that captures conflicts between collaboration and individual safety.
 15 | 
 16 | [Yoshida et al. 2008] Yoshida, Wako, Ray J. Dolan, and Karl J. Friston. ["Game theory of mind."](http://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1000254) PLoS Comput Biol 4.12 (2008): e1000254.
 17 | 
 18 | 
 19 | ## How to play (human players)
 20 | 
 21 | To familiarize yourself with the game, we recommend that you play it yourself. The following instructions allow you to play the game with a "focused agent". A baseline agent that tries to move towards the pig whenever possible.
 22 | 
 23 | ### Prerequisites
 24 | 
 25 | * Install the [Malmo Platform](https://github.com/Microsoft/malmo) and the `malmopy` framework as described under [Installation](../../README.md#installation), and verify that you can run the Malmo platform and python example agents
 26 | 
 27 | ### Steps
 28 | 
 29 | * Start two instances of the Malmo Client on ports `10000` and `10001`
 30 | * `cd malmo-challenge/ai_challenge/pig_chase`
 31 | * `python pig_chase_human_vs_agent.py`
 32 | 
 33 | Wait for a few seconds for the human player interface to appear.
 34 | 
 35 | Note: the script assumes that two Malmo clients are running on the default ports on localhost. You can specify alternative clients on the command line. See the script's usage instructions (`python pig_chase_human_vs_agent.py -h`) for details.
 36 | 
 37 | ### How to play
 38 | 
 39 | * The game is played over 10 rounds at a time. Goal is to accumulate the highest score over these 10 rounds.
 40 | * In each round a "collaborator" agent is selected to play with you. Different collaborators may have different behaviors.
 41 | * Once the game has started, use the left/right arrow keys to turn, and the forward/backward keys to move. You can see your agent move in the first person view, and shown as a red arrow in the top-down rendering on the left.
 42 | * You and your collaborator move in turns and try to catch the pig (25 points if caught). You can give up on catching the pig in the current round by moving to the blue "exit squares" (5 points). You have a maximum of 25 steps available, and will get -1 point for each step taken.
 43 | 
 44 | ## Run your first experiment
 45 | 
 46 | An example experiment is provided in `pig_chase_baseline.py`. To run it, start two instances of the Malmo Client as [above](#steps). Then run:
 47 | 
 48 | ```
 49 | python pig_chase_baseline.py
 50 | ```
 51 | 
 52 | Depending on whether `tensorboard` is available on your system, this script will output performance statistics to either tensorboard or to console. If using tensorboard, you can plot the stored data by pointing a tensorboard instance to the results folder:
 53 | 
 54 | ```
 55 | cd ai_challenge/pig_chase
 56 | tensorboard --logdir=results --port=6006
 57 | ```
 58 | 
 59 | You can then navigate to http://127.0.0.1:6006 to view the results.
 60 | 
 61 | The baseline script runs a `FocusedAgent` by default - it uses a simple planning algorithm to find a shortest path to the pig. You can also run a `RandomAgent` baseline. Switch agents using the command line arguments:
 62 | 
 63 | ```
 64 | python pig_chase_baseline.py -t random
 65 | ```
 66 | 
 67 | For additional command line options, see the usage instructions: `python pig_chase_baseline.py -h`.
 68 | 
 69 | ## Evaluate your agent
 70 | 
 71 | We provide a commodity evaluator PigChaseEvaluator, which allows you to quickly evaluate
 72 | the performance of your agent.
 73 | 
 74 | PigChaseEvaluator takes 2 arguments:
 75 | - agent_100k : Your agent trained with 100k steps (100k train calls) 
 76 | - agent_500k : Your agent trained with 500k steps (500k train calls)
 77 | 
 78 | To evaluate your agent:
 79 | 
 80 | ``` python
 81 | # Creates an agent trained with 100k train calls
 82 | my_agent_100k = MyCustomAgent()
 83 | 
 84 | # Creates an agent trained with 500k train calls
 85 | my_agent_500k = MyCustomAgent()
 86 | 
 87 | # You can pass a custom StateBuilder for your agent.
 88 | # It will be used by the environment to generate state for your agent
 89 | eval = PigChaseEvaluator(my_agent_100k, my_agent_500k, MyStateBuilder())
 90 | 
 91 | # Run and save
 92 | eval.run()
 93 | eval.save('My experiment 1', 'path/to/save.json')
 94 | ```
 95 | 
 96 | 
 97 | ## Next steps
 98 | 
 99 | To participate in the Collaborative AI Challenge, implement and train an agent that can effectively collaborate with any collaborator. Your agent can use either the first-person visual view, or the symbolic view (as demonstrated in the `FocusedAgent`). You can use any AI/learning approach you like - originality of the chose approach is part of the criteria for the challenge prizes. Can you come up with an agent learns to outperform the A-star baseline agent? Can an agent learn to play with a copy of itself? Can it outperform your own (human) score?
100 | 
101 | For more inspiration, you can look at more [code samples](../../samples/README.md) or learn how to [run experiments on Azure using docker](../../docker/README.md).
102 | 
103 | 
104 | 
105 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | ## Ignore Visual Studio temporary files, build results, and
  2 | ## files generated by popular Visual Studio add-ons.
  3 | 
  4 | # User-specific files
  5 | *.suo
  6 | *.user
  7 | *.userosscache
  8 | *.sln.docstates
  9 | 
 10 | # User-specific files (MonoDevelop/Xamarin Studio)
 11 | *.userprefs
 12 | 
 13 | # Build results
 14 | [Dd]ebug/
 15 | [Dd]ebugPublic/
 16 | [Rr]elease/
 17 | [Rr]eleases/
 18 | x64/
 19 | x86/
 20 | bld/
 21 | [Bb]in/
 22 | [Oo]bj/
 23 | [Ll]og/
 24 | 
 25 | # Visual Studio 2015 cache/options directory
 26 | .vs/
 27 | # Uncomment if you have tasks that create the project's static files in wwwroot
 28 | #wwwroot/
 29 | 
 30 | # MSTest test Results
 31 | [Tt]est[Rr]esult*/
 32 | [Bb]uild[Ll]og.*
 33 | 
 34 | # NUNIT
 35 | *.VisualState.xml
 36 | TestResult.xml
 37 | 
 38 | # Build Results of an ATL Project
 39 | [Dd]ebugPS/
 40 | [Rr]eleasePS/
 41 | dlldata.c
 42 | 
 43 | # DNX
 44 | project.lock.json
 45 | project.fragment.lock.json
 46 | artifacts/
 47 | 
 48 | *_i.c
 49 | *_p.c
 50 | *_i.h
 51 | *.ilk
 52 | *.meta
 53 | *.obj
 54 | *.pch
 55 | *.pdb
 56 | *.pgc
 57 | *.pgd
 58 | *.rsp
 59 | *.sbr
 60 | *.tlb
 61 | *.tli
 62 | *.tlh
 63 | *.tmp
 64 | *.tmp_proj
 65 | *.log
 66 | *.vspscc
 67 | *.vssscc
 68 | .builds
 69 | *.pidb
 70 | *.svclog
 71 | *.scc
 72 | 
 73 | # Chutzpah Test files
 74 | _Chutzpah*
 75 | 
 76 | # Visual C++ cache files
 77 | ipch/
 78 | *.aps
 79 | *.ncb
 80 | *.opendb
 81 | *.opensdf
 82 | *.sdf
 83 | *.cachefile
 84 | *.VC.db
 85 | *.VC.VC.opendb
 86 | 
 87 | # Visual Studio profiler
 88 | *.psess
 89 | *.vsp
 90 | *.vspx
 91 | *.sap
 92 | 
 93 | # TFS 2012 Local Workspace
 94 | $tf/
 95 | 
 96 | # Guidance Automation Toolkit
 97 | *.gpState
 98 | 
 99 | # ReSharper is a .NET coding add-in
100 | _ReSharper*/
101 | *.[Rr]e[Ss]harper
102 | *.DotSettings.user
103 | 
104 | # JustCode is a .NET coding add-in
105 | .JustCode
106 | 
107 | # TeamCity is a build add-in
108 | _TeamCity*
109 | 
110 | # DotCover is a Code Coverage Tool
111 | *.dotCover
112 | 
113 | # NCrunch
114 | _NCrunch_*
115 | .*crunch*.local.xml
116 | nCrunchTemp_*
117 | 
118 | # MightyMoose
119 | *.mm.*
120 | AutoTest.Net/
121 | 
122 | # Web workbench (sass)
123 | .sass-cache/
124 | 
125 | # Installshield output folder
126 | [Ee]xpress/
127 | 
128 | # DocProject is a documentation generator add-in
129 | DocProject/buildhelp/
130 | DocProject/Help/*.HxT
131 | DocProject/Help/*.HxC
132 | DocProject/Help/*.hhc
133 | DocProject/Help/*.hhk
134 | DocProject/Help/*.hhp
135 | DocProject/Help/Html2
136 | DocProject/Help/html
137 | 
138 | # Click-Once directory
139 | publish/
140 | 
141 | # Publish Web Output
142 | *.[Pp]ublish.xml
143 | *.azurePubxml
144 | # TODO: Comment the next line if you want to checkin your web deploy settings
145 | # but database connection strings (with potential passwords) will be unencrypted
146 | #*.pubxml
147 | *.publishproj
148 | 
149 | # Microsoft Azure Web App publish settings. Comment the next line if you want to
150 | # checkin your Azure Web App publish settings, but sensitive information contained
151 | # in these scripts will be unencrypted
152 | PublishScripts/
153 | 
154 | # NuGet Packages
155 | *.nupkg
156 | # The packages folder can be ignored because of Package Restore
157 | **/packages/*
158 | # except build/, which is used as an MSBuild target.
159 | !**/packages/build/
160 | # Uncomment if necessary however generally it will be regenerated when needed
161 | #!**/packages/repositories.config
162 | # NuGet v3's project.json files produces more ignoreable files
163 | *.nuget.props
164 | *.nuget.targets
165 | 
166 | # Microsoft Azure Build Output
167 | csx/
168 | *.build.csdef
169 | 
170 | # Microsoft Azure Emulator
171 | ecf/
172 | rcf/
173 | 
174 | # Windows Store app package directories and files
175 | AppPackages/
176 | BundleArtifacts/
177 | Package.StoreAssociation.xml
178 | _pkginfo.txt
179 | 
180 | # Visual Studio cache files
181 | # files ending in .cache can be ignored
182 | *.[Cc]ache
183 | # but keep track of directories ending in .cache
184 | !*.[Cc]ache/
185 | 
186 | # Others
187 | ClientBin/
188 | ~$*
189 | *~
190 | *.dbmdl
191 | *.dbproj.schemaview
192 | *.jfm
193 | *.pfx
194 | *.publishsettings
195 | node_modules/
196 | orleans.codegen.cs
197 | 
198 | # Since there are multiple workflows, uncomment next line to ignore bower_components
199 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
200 | #bower_components/
201 | 
202 | # RIA/Silverlight projects
203 | Generated_Code/
204 | 
205 | # Backup & report files from converting an old project file
206 | # to a newer Visual Studio version. Backup files are not needed,
207 | # because we have git ;-)
208 | _UpgradeReport_Files/
209 | Backup*/
210 | UpgradeLog*.XML
211 | UpgradeLog*.htm
212 | 
213 | # SQL Server files
214 | *.mdf
215 | *.ldf
216 | 
217 | # Business Intelligence projects
218 | *.rdl.data
219 | *.bim.layout
220 | *.bim_*.settings
221 | 
222 | # Microsoft Fakes
223 | FakesAssemblies/
224 | 
225 | # GhostDoc plugin setting file
226 | *.GhostDoc.xml
227 | 
228 | # Node.js Tools for Visual Studio
229 | .ntvs_analysis.dat
230 | 
231 | # Visual Studio 6 build log
232 | *.plg
233 | 
234 | # Visual Studio 6 workspace options file
235 | *.opt
236 | 
237 | # Visual Studio LightSwitch build output
238 | **/*.HTMLClient/GeneratedArtifacts
239 | **/*.DesktopClient/GeneratedArtifacts
240 | **/*.DesktopClient/ModelManifest.xml
241 | **/*.Server/GeneratedArtifacts
242 | **/*.Server/ModelManifest.xml
243 | _Pvt_Extensions
244 | 
245 | # Paket dependency manager
246 | .paket/paket.exe
247 | paket-files/
248 | 
249 | # FAKE - F# Make
250 | .fake/
251 | 
252 | # JetBrains Rider
253 | .idea/
254 | *.sln.iml
255 | 
256 | # CodeRush
257 | .cr/
258 | 
259 | # Python Tools for Visual Studio (PTVS)
260 | __pycache__/
261 | *.pyc
262 | 
263 | # Tests cache
264 | */tests/.cache/v/cache/
265 | 
266 | # Library
267 | *.pyd
268 | 
269 | # Submission
270 | malmopy.egg-info
271 | ai_challenge/pig_chase/src
272 | ai_challenge/pig_chase/checkpoints/*.pth
273 | ai_challenge/pig_chase/rewards.html
274 | ai_challenge/pig_chase/accs.html
275 | 


--------------------------------------------------------------------------------
/ai_challenge/pig_chase/pig_chase_human_vs_agent.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2017 Microsoft Corporation.
  2 | #
  3 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
  4 | # documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
  5 | #  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
  6 | # and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
  7 | #
  8 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of
  9 | # the Software.
 10 | #
 11 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
 12 | # THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 13 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 14 | #  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 15 | # SOFTWARE.
 16 | # ===================================================================================================================
 17 | 
 18 | import os
 19 | import sys
 20 | from argparse import ArgumentParser
 21 | from datetime import datetime
 22 | from multiprocessing import Process, Event
 23 | from os import path
 24 | from time import sleep
 25 | 
 26 | from malmopy.agent import RandomAgent
 27 | from malmopy.agent.gui import ARROW_KEYS_MAPPING
 28 | from malmopy.visualization import ConsoleVisualizer
 29 | 
 30 | # Enforce path
 31 | sys.path.insert(0, os.getcwd())
 32 | sys.path.insert(1, os.path.join(os.path.pardir, os.getcwd()))
 33 | 
 34 | from common import parse_clients_args, ENV_AGENT_NAMES, ENV_ACTIONS
 35 | from agent import PigChaseChallengeAgent, PigChaseHumanAgent
 36 | from environment import PigChaseEnvironment, PigChaseSymbolicStateBuilder
 37 | 
 38 | EXPERIMENT_NAME = 'Pig_Chase_2xAStar'
 39 | MAX_ACTIONS = 25 # this should match the mission definition, used for display only
 40 | 
 41 | 
 42 | def agent_factory(name, role, kind, clients, max_episodes, max_actions, logdir, quit):
 43 |     assert len(clients) >= 2, 'There are not enough Malmo clients in the pool (need at least 2)'
 44 | 
 45 |     clients = parse_clients_args(clients)
 46 |     visualizer = ConsoleVisualizer(prefix='Agent %d' % role)
 47 | 
 48 |     if role == 0:
 49 |         env = PigChaseEnvironment(clients, PigChaseSymbolicStateBuilder(),
 50 |                                   actions=ENV_ACTIONS, role=role,
 51 |                                   human_speed=True, randomize_positions=True)
 52 |         agent = PigChaseChallengeAgent(name)
 53 | 
 54 |         if type(agent.current_agent) == RandomAgent:
 55 |             agent_type = PigChaseEnvironment.AGENT_TYPE_1
 56 |         else:
 57 |             agent_type = PigChaseEnvironment.AGENT_TYPE_2
 58 |         obs = env.reset(agent_type)
 59 |         reward = 0
 60 |         rewards = []
 61 |         done = False
 62 |         episode = 0
 63 | 
 64 |         while True:
 65 | 
 66 |             # select an action
 67 |             action = agent.act(obs, reward, done, True)
 68 | 
 69 |             if done:
 70 |                 visualizer << (episode + 1, 'Reward', sum(rewards))
 71 |                 rewards = []
 72 |                 episode += 1
 73 | 
 74 |                 if type(agent.current_agent) == RandomAgent:
 75 |                     agent_type = PigChaseEnvironment.AGENT_TYPE_1
 76 |                 else:
 77 |                     agent_type = PigChaseEnvironment.AGENT_TYPE_2
 78 |                 obs = env.reset(agent_type)
 79 | 
 80 |             # take a step
 81 |             obs, reward, done = env.do(action)
 82 |             rewards.append(reward)
 83 | 
 84 |     else:
 85 |         env = PigChaseEnvironment(clients, PigChaseSymbolicStateBuilder(),
 86 |                                   actions=list(ARROW_KEYS_MAPPING.values()),
 87 |                                   role=role, randomize_positions=True)
 88 |         env.reset(PigChaseEnvironment.AGENT_TYPE_3)
 89 | 
 90 |         agent = PigChaseHumanAgent(name, env, list(ARROW_KEYS_MAPPING.keys()),
 91 |                                    max_episodes, max_actions, visualizer, quit)
 92 |         agent.show()
 93 | 
 94 | 
 95 | def run_mission(agents_def):
 96 |     assert len(agents_def) == 2, 'Incompatible number of agents (required: 2, got: %d)' % len(agents_def)
 97 |     quit = Event()
 98 |     processes = []
 99 |     for agent in agents_def:
100 |         agent['quit'] = quit
101 |         p = Process(target=agent_factory, kwargs=agent)
102 |         p.daemon = True
103 |         p.start()
104 | 
105 |         if agent['role'] == 0:
106 |             sleep(1)  # Just to let time for the server to start
107 | 
108 |         processes.append(p)
109 |     quit.wait()
110 |     for process in processes:
111 |         process.terminate()
112 | 
113 | 
114 | if __name__ == '__main__':
115 |     arg_parser = ArgumentParser()
116 |     arg_parser.add_argument('-e', '--episodes', type=int, default=10, help='Number of episodes to run.')
117 |     arg_parser.add_argument('-k', '--kind', type=str, default='astar', choices=['astar', 'random'],
118 |                             help='The kind of agent to play with (random or astar).')
119 |     arg_parser.add_argument('clients', nargs='*',
120 |                             default=['127.0.0.1:10000', '127.0.0.1:10001'],
121 |                             help='Malmo clients (ip(:port)?)+')
122 |     args = arg_parser.parse_args()
123 | 
124 |     logdir = path.join('results/pig-human', datetime.utcnow().isoformat())
125 |     agents = [{'name': agent, 'role': role, 'kind': args.kind,
126 |                'clients': args.clients, 'max_episodes': args.episodes,
127 |                'max_actions': MAX_ACTIONS, 'logdir': logdir}
128 |               for role, agent in enumerate(ENV_AGENT_NAMES)]
129 | 
130 |     run_mission(agents)
131 | 


--------------------------------------------------------------------------------
/ai_challenge/pig_chase/evaluation.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2017 Microsoft Corporation.
  2 | #
  3 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
  4 | # documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
  5 | #  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
  6 | # and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
  7 | #
  8 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of
  9 | # the Software.
 10 | #
 11 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
 12 | # THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 13 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 14 | #  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 15 | # SOFTWARE.
 16 | # ===================================================================================================================
 17 | 
 18 | import os
 19 | import sys
 20 | from time import sleep
 21 | 
 22 | from common import parse_clients_args, ENV_AGENT_NAMES
 23 | from agent import PigChaseChallengeAgent
 24 | from common import ENV_AGENT_NAMES
 25 | from environment import PigChaseEnvironment, PigChaseSymbolicStateBuilder
 26 | 
 27 | # Enforce path
 28 | sys.path.insert(0, os.getcwd())
 29 | sys.path.insert(1, os.path.join(os.path.pardir, os.getcwd()))
 30 | 
 31 | 
 32 | class PigChaseEvaluator(object):
 33 |     def __init__(self, clients, agent_100k, agent_500k, state_builder):
 34 |         assert len(clients) >= 2, 'Not enough clients provided'
 35 | 
 36 |         self._clients = clients
 37 |         self._agent_100k = agent_100k
 38 |         self._agent_500k = agent_500k
 39 |         self._state_builder = state_builder
 40 |         self._accumulators = {'100k': [], '500k': []}
 41 | 
 42 |     def save(self, experiment_name, filepath):
 43 |         """
 44 |         Save the evaluation results in a JSON file 
 45 |         understandable by the leaderboard.
 46 |         
 47 |         Note: The leaderboard will not accept a submission if you already 
 48 |         uploaded a file with the same experiment name.
 49 |         
 50 |         :param experiment_name: An identifier for the experiment
 51 |         :param filepath: Path where to store the results file
 52 |         :return: 
 53 |         """
 54 | 
 55 |         assert experiment_name is not None, 'experiment_name cannot be None'
 56 | 
 57 |         from json import dump
 58 |         from os.path import exists, join, pardir, abspath
 59 |         from os import makedirs
 60 |         from numpy import mean, var
 61 | 
 62 |         # Compute metrics
 63 |         metrics = {key: {'mean': mean(buffer),
 64 |                          'var': var(buffer),
 65 |                          'count': len(buffer)}
 66 |                    for key, buffer in self._accumulators.items()}
 67 | 
 68 |         metrics['experimentname'] = experiment_name
 69 | 
 70 |         try:
 71 |             filepath = abspath(filepath)
 72 |             parent = join(pardir, filepath)
 73 |             if not exists(parent):
 74 |                 makedirs(parent)
 75 | 
 76 |             with open(filepath, 'w') as f_out:
 77 |                 dump(metrics, f_out)
 78 | 
 79 |             print('==================================')
 80 |             print('Evaluation done, results written at %s' % filepath)
 81 | 
 82 |         except Exception as e:
 83 |             print('Unable to save the results: %s' % e)
 84 | 
 85 |     def run(self):
 86 |         from multiprocessing import Process
 87 | 
 88 |         env = PigChaseEnvironment(self._clients, self._state_builder,
 89 |                                   role=1, randomize_positions=True)
 90 |         print('==================================')
 91 |         print('Starting evaluation of Agent @100k')
 92 | 
 93 |         p = Process(target=run_challenge_agent, args=(self._clients,))
 94 |         p.start()
 95 |         sleep(5)
 96 |         agent_loop(self._agent_100k, env, self._accumulators['100k'])
 97 |         p.terminate()
 98 | 
 99 |         print('==================================')
100 |         print('Starting evaluation of Agent @500k')
101 | 
102 |         p = Process(target=run_challenge_agent, args=(self._clients,))
103 |         p.start()
104 |         sleep(5)
105 |         agent_loop(self._agent_500k, env, self._accumulators['500k'])
106 |         p.terminate()
107 | 
108 | 
109 | def run_challenge_agent(clients):
110 |     builder = PigChaseSymbolicStateBuilder()
111 |     env = PigChaseEnvironment(clients, builder, role=0,
112 |                               randomize_positions=True)
113 |     agent = PigChaseChallengeAgent(ENV_AGENT_NAMES[0])
114 |     agent_loop(agent, env, None)
115 | 
116 | 
117 | def agent_loop(agent, env, metrics_acc):
118 |     EVAL_EPISODES = 100
119 |     agent_done = False
120 |     reward = 0
121 |     episode = 0
122 |     obs = env.reset()
123 | 
124 |     while episode < EVAL_EPISODES:
125 |         # check if env needs reset
126 |         if env.done:
127 |             print('Episode %d (%.2f)%%' % (episode, (episode / EVAL_EPISODES) * 100.))
128 | 
129 |             obs = env.reset()
130 |             while obs is None:
131 |                 # this can happen if the episode ended with the first
132 |                 # action of the other agent
133 |                 print('Warning: received obs == None.')
134 |                 obs = env.reset()
135 | 
136 |             episode += 1
137 | 
138 |         # select an action
139 |         action = agent.act(obs, reward, agent_done, is_training=True)
140 |         # take a step
141 |         obs, reward, agent_done = env.do(action)
142 | 
143 |         if metrics_acc is not None:
144 |             metrics_acc.append(reward)
145 | 


--------------------------------------------------------------------------------
/samples/atari/gym_atari_dqn.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2017 Microsoft Corporation.
  2 | #
  3 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
  4 | # documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
  5 | #  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
  6 | # and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
  7 | #
  8 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of
  9 | # the Software.
 10 | #
 11 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
 12 | # THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 13 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 14 | #  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 15 | # SOFTWARE.
 16 | # ===================================================================================================================
 17 | 
 18 | from argparse import ArgumentParser
 19 | from datetime import datetime
 20 | from subprocess import Popen
 21 | 
 22 | from malmopy.agent import QLearnerAgent, TemporalMemory
 23 | from malmopy.environment.gym import GymEnvironment
 24 | 
 25 | try:
 26 |     from malmopy.visualization.tensorboard import TensorboardVisualizer
 27 |     from malmopy.visualization.tensorboard.cntk import CntkConverter
 28 | 
 29 |     TENSORBOARD_AVAILABLE = True
 30 | except ImportError:
 31 |     print('Cannot import tensorboard, using ConsoleVisualizer.')
 32 |     from malmopy.visualization import ConsoleVisualizer
 33 | 
 34 |     TENSORBOARD_AVAILABLE = False
 35 | 
 36 | 
 37 | ROOT_FOLDER = 'results/baselines/%s/dqn/%s-%s'
 38 | EPOCH_SIZE = 250000
 39 | 
 40 | 
 41 | def visualize_training(visualizer, step, rewards, tag='Training'):
 42 |     visualizer.add_entry(step, '%s/reward per episode' % tag, sum(rewards))
 43 |     visualizer.add_entry(step, '%s/max.reward' % tag, max(rewards))
 44 |     visualizer.add_entry(step, '%s/min.reward' % tag, min(rewards))
 45 |     visualizer.add_entry(step, '%s/actions per episode' % tag, len(rewards)-1)
 46 | 
 47 | 
 48 | def run_experiment(environment, backend, device_id, max_epoch, record, logdir,
 49 |                    visualizer):
 50 | 
 51 |     env = GymEnvironment(environment,
 52 |                          monitoring_path=logdir if record else None)
 53 | 
 54 |     if backend == 'cntk':
 55 |         from malmopy.model.cntk import QNeuralNetwork as CntkDQN
 56 |         model = CntkDQN((4, 84, 84), env.available_actions, momentum=0.95,
 57 |                         device_id=device_id, visualizer=visualizer)
 58 |     else:
 59 |         from malmopy.model.chainer import DQNChain, QNeuralNetwork as ChainerDQN
 60 |         chain = DQNChain((4, 84, 84), env.available_actions)
 61 |         target_chain = DQNChain((4, 84, 84), env.available_actions)
 62 |         model = ChainerDQN(chain, target_chain,
 63 |                            momentum=0.95, device_id=device_id)
 64 | 
 65 |     memory = TemporalMemory(1000000, model.input_shape[1:])
 66 |     agent = QLearnerAgent("DQN Agent", env.available_actions, model, memory,
 67 |                           0.99, 32, train_after=10000, reward_clipping=(-1, 1),
 68 |                           visualizer=visualizer)
 69 | 
 70 |     state = env.reset()
 71 |     reward = 0
 72 |     agent_done = False
 73 |     viz_rewards = []
 74 | 
 75 |     max_training_steps = max_epoch * EPOCH_SIZE
 76 |     for step in range(1, max_training_steps + 1):
 77 | 
 78 |         # check if env needs reset
 79 |         if env.done:
 80 |             visualize_training(visualizer, step, viz_rewards)
 81 |             agent.inject_summaries(step)
 82 |             viz_rewards = []
 83 |             state = env.reset()
 84 | 
 85 |         # select an action
 86 |         action = agent.act(state, reward, agent_done, is_training=True)
 87 | 
 88 |         # take a step
 89 |         state, reward, agent_done = env.do(action)
 90 |         viz_rewards.append(reward)
 91 | 
 92 |         if (step % EPOCH_SIZE) == 0:
 93 |             model.save('%s-%s-dqn_%d.model' %
 94 |                        (backend, environment, step / EPOCH_SIZE))
 95 | 
 96 | 
 97 | if __name__ == '__main__':
 98 |     arg_parser = ArgumentParser(description='OpenAI Gym DQN example')
 99 |     arg_parser.add_argument('-b', '--backend', type=str, default='cntk',
100 |                             choices=['cntk', 'chainer'],
101 |                             help='Neural network backend to use.')
102 |     arg_parser.add_argument('-d', '--device', type=int, default=-1,
103 |                             help='GPU device on which to run the experiment.')
104 |     arg_parser.add_argument('-r', '--record', action='store_true',
105 |                             help='Setting this will record runs')
106 |     arg_parser.add_argument('-e', '--epochs', type=int, default=50,
107 |                             help='Number of epochs. One epoch is 250k actions.')
108 |     arg_parser.add_argument('-p', '--port', type=int, default=6006,
109 |                             help='Port for running tensorboard.')
110 |     arg_parser.add_argument('env', type=str, metavar='environment',
111 |                             nargs='?', default='Breakout-v3',
112 |                             help='Gym environment to run')
113 | 
114 |     args = arg_parser.parse_args()
115 | 
116 |     logdir = ROOT_FOLDER % (args.env, args.backend, datetime.utcnow().isoformat())
117 |     if TENSORBOARD_AVAILABLE:
118 |         visualizer = TensorboardVisualizer()
119 |         visualizer.initialize(logdir, None)
120 |         print('Starting tensorboard ...')
121 |         p = Popen(['tensorboard', '--logdir=results', '--port=%d' % args.port])
122 | 
123 |     else:
124 |         visualizer = ConsoleVisualizer()
125 | 
126 |     print('Starting experiment')
127 |     run_experiment(args.env, args.backend, int(args.device), args.epochs,
128 |                    args.record, logdir, visualizer)
129 | 
130 | 


--------------------------------------------------------------------------------
/ai_challenge/pig_chase/pig_chase_baseline.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2017 Microsoft Corporation.
  2 | #
  3 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
  4 | # documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
  5 | #  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
  6 | # and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
  7 | #
  8 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of
  9 | # the Software.
 10 | #
 11 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
 12 | # THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 13 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 14 | #  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 15 | # SOFTWARE.
 16 | # ===================================================================================================================
 17 | 
 18 | import numpy as np
 19 | import os
 20 | import sys
 21 | 
 22 | from argparse import ArgumentParser
 23 | from datetime import datetime
 24 | 
 25 | import six
 26 | from os import path
 27 | from threading import Thread, active_count
 28 | from time import sleep
 29 | 
 30 | from malmopy.agent import RandomAgent
 31 | try:
 32 |     from malmopy.visualization.tensorboard import TensorboardVisualizer
 33 |     from malmopy.visualization.tensorboard.cntk import CntkConverter
 34 | except ImportError:
 35 |     print('Cannot import tensorboard, using ConsoleVisualizer.')
 36 |     from malmopy.visualization import ConsoleVisualizer
 37 | 
 38 | from common import parse_clients_args, visualize_training, ENV_AGENT_NAMES, ENV_TARGET_NAMES
 39 | from agent import PigChaseChallengeAgent, FocusedAgent
 40 | from environment import PigChaseEnvironment, PigChaseSymbolicStateBuilder
 41 | 
 42 | # Enforce path
 43 | sys.path.insert(0, os.getcwd())
 44 | sys.path.insert(1, os.path.join(os.path.pardir, os.getcwd()))
 45 | 
 46 | BASELINES_FOLDER = 'results/baselines/pig_chase/%s/%s'
 47 | EPOCH_SIZE = 100
 48 | 
 49 | 
 50 | def agent_factory(name, role, baseline_agent, clients, max_epochs,
 51 |                   logdir, visualizer):
 52 | 
 53 |     assert len(clients) >= 2, 'Not enough clients (need at least 2)'
 54 |     clients = parse_clients_args(clients)
 55 | 
 56 |     builder = PigChaseSymbolicStateBuilder()
 57 |     env = PigChaseEnvironment(clients, builder, role=role,
 58 |                               randomize_positions=True)
 59 | 
 60 |     if role == 0:
 61 |         agent = PigChaseChallengeAgent(name)
 62 | 
 63 |         if type(agent.current_agent) == RandomAgent:
 64 |             agent_type = PigChaseEnvironment.AGENT_TYPE_1
 65 |         else:
 66 |             agent_type = PigChaseEnvironment.AGENT_TYPE_2
 67 |         obs = env.reset(agent_type)
 68 | 
 69 |         reward = 0
 70 |         agent_done = False
 71 | 
 72 |         while True:
 73 | 
 74 |             # select an action
 75 |             action = agent.act(obs, reward, agent_done, is_training=True)
 76 | 
 77 |             # reset if needed
 78 |             if env.done:
 79 |                 if type(agent.current_agent) == RandomAgent:
 80 |                     agent_type = PigChaseEnvironment.AGENT_TYPE_1
 81 |                 else:
 82 |                     agent_type = PigChaseEnvironment.AGENT_TYPE_2
 83 |                 obs = env.reset(agent_type)
 84 | 
 85 |             # take a step
 86 |             obs, reward, agent_done = env.do(action)
 87 | 
 88 | 
 89 |     else:
 90 | 
 91 |         if baseline_agent == 'astar':
 92 |             agent = FocusedAgent(name, ENV_TARGET_NAMES[0])
 93 |         else:
 94 |             agent = RandomAgent(name, env.available_actions)
 95 | 
 96 |         obs = env.reset()
 97 |         reward = 0
 98 |         agent_done = False
 99 |         viz_rewards = []
100 | 
101 |         max_training_steps = EPOCH_SIZE * max_epochs
102 |         for step in six.moves.range(1, max_training_steps+1):
103 | 
104 |             # check if env needs reset
105 |             if env.done:
106 | 
107 |                 visualize_training(visualizer, step, viz_rewards)
108 |                 viz_rewards = []
109 |                 obs = env.reset()
110 | 
111 |             # select an action
112 |             action = agent.act(obs, reward, agent_done, is_training=True)
113 |             # take a step
114 |             obs, reward, agent_done = env.do(action)
115 |             viz_rewards.append(reward)
116 | 
117 |             agent.inject_summaries(step)
118 | 
119 | 
120 | def run_experiment(agents_def):
121 |     assert len(agents_def) == 2, 'Not enough agents (required: 2, got: %d)'\
122 |                 % len(agents_def)
123 | 
124 |     processes = []
125 |     for agent in agents_def:
126 |         p = Thread(target=agent_factory, kwargs=agent)
127 |         p.daemon = True
128 |         p.start()
129 | 
130 |         # Give the server time to start
131 |         if agent['role'] == 0:
132 |             sleep(1)
133 | 
134 |         processes.append(p)
135 | 
136 |     try:
137 |         # wait until only the challenge agent is left
138 |         while active_count() > 2:
139 |             sleep(0.1)
140 |     except KeyboardInterrupt:
141 |         print('Caught control-c - shutting down.')
142 | 
143 | 
144 | if __name__ == '__main__':
145 |     arg_parser = ArgumentParser('Pig Chase baseline experiment')
146 |     arg_parser.add_argument('-t', '--type', type=str, default='astar',
147 |                             choices=['astar', 'random'],
148 |                             help='The type of baseline to run.')
149 |     arg_parser.add_argument('-e', '--epochs', type=int, default=5,
150 |                             help='Number of epochs to run.')
151 |     arg_parser.add_argument('clients', nargs='*',
152 |                             default=['127.0.0.1:10000', '127.0.0.1:10001'],
153 |                             help='Minecraft clients endpoints (ip(:port)?)+')
154 |     args = arg_parser.parse_args()
155 | 
156 |     logdir = BASELINES_FOLDER % (args.type, datetime.utcnow().isoformat())
157 |     if 'malmopy.visualization.tensorboard' in sys.modules:
158 |         visualizer = TensorboardVisualizer()
159 |         visualizer.initialize(logdir, None)
160 |     else:
161 |         visualizer = ConsoleVisualizer()
162 | 
163 |     agents = [{'name': agent, 'role': role, 'baseline_agent': args.type,
164 |                'clients': args.clients, 'max_epochs': args.epochs,
165 |                'logdir': logdir, 'visualizer': visualizer}
166 |               for role, agent in enumerate(ENV_AGENT_NAMES)]
167 | 
168 |     run_experiment(agents)
169 | 
170 | 


--------------------------------------------------------------------------------
/malmopy/visualization/tensorboard/cntk/cntk.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2017 Microsoft Corporation.
  2 | #
  3 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
  4 | # documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
  5 | #  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
  6 | # and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
  7 | #
  8 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of
  9 | # the Software.
 10 | #
 11 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
 12 | # THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 13 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 14 | #  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 15 | # SOFTWARE.
 16 | # ===================================================================================================================
 17 | 
 18 | from __future__ import absolute_import
 19 | 
 20 | import six
 21 | import tensorflow as tf
 22 | from tensorflow.core.framework import attr_value_pb2, tensor_shape_pb2
 23 | 
 24 | from ..tensorboard import TensorflowConverter
 25 | 
 26 | 
 27 | class CntkConverter(TensorflowConverter):
 28 |     def convert(self, network, graph):
 29 |         """
 30 |         Converts a function from CNTK to the Tensorflow graph format
 31 | 
 32 |         Args:
 33 |             network: CNTK function that defines the network structure
 34 |             graph: destination Tensorflow graph
 35 |         """
 36 |         # Walk every node of the network iteratively
 37 |         stack = [network.model]
 38 |         visited = set()
 39 | 
 40 |         while stack:
 41 |             node = stack.pop()
 42 | 
 43 |             if node in visited:
 44 |                 continue
 45 | 
 46 |             try:
 47 | 
 48 |                 # Function node
 49 |                 node = node.root_function
 50 |                 stack.extend(node.inputs)
 51 |                 try:
 52 |                     # TF graph already has the current node
 53 |                     graph.get_operation_by_name(node.uid.split('_')[0])
 54 |                     continue
 55 | 
 56 |                 except KeyError:
 57 |                     # New network node that has to be converted to TF format
 58 |                     # define TF operation attributes based on CNTK network node
 59 |                     try:
 60 |                         dim_x = tensor_shape_pb2.TensorShapeProto.Dim(size=node.outputs[0].shape[0])
 61 |                     except IndexError:
 62 |                         dim_x = tensor_shape_pb2.TensorShapeProto.Dim(size=1)
 63 |                     try:
 64 |                         dim_y = tensor_shape_pb2.TensorShapeProto.Dim(size=node.outputs[0].shape[1])
 65 |                     except IndexError:
 66 |                         dim_y = tensor_shape_pb2.TensorShapeProto.Dim(size=1)
 67 |                     shape = tensor_shape_pb2.TensorShapeProto(dim=(dim_x, dim_y))
 68 |                     shape_attr = attr_value_pb2.AttrValue(shape=shape)
 69 |                     attrs = {"shape": shape_attr}
 70 | 
 71 |                     # Use name scope based on the node's name (e.g. Plus1) to
 72 |                     # group the operation and its inputs
 73 |                     with graph.name_scope(node.uid) as _:
 74 | 
 75 |                         # Create a TF placeholder operation with type, name and shape of the current node
 76 |                         op = graph.create_op("Placeholder", inputs=[],
 77 |                                              dtypes=[node.outputs[0].dtype], attrs=attrs,
 78 |                                              name=node.uid)
 79 | 
 80 |                         # Add inputs to the created TF operation
 81 |                         for i in six.moves.range(len(node.inputs)):
 82 |                             child = node.inputs[i]
 83 |                             name = child.uid
 84 |                             try:
 85 |                                 # The input tensor already exists in the graph
 86 |                                 tf_input = graph.get_tensor_by_name(name + ":0")
 87 |                             except KeyError:
 88 |                                 # A new tensor that needs to be converted from CNTK to TF
 89 |                                 shape = self.convert_shape(child.shape)
 90 |                                 dtype = child.dtype
 91 |                                 # Create a new placeholder tensor with the corresponding attributes
 92 |                                 tf_input = tf.placeholder(shape=shape, dtype=dtype, name=name)
 93 | 
 94 |                             # Update TF operator's inputs
 95 |                             op._add_input(tf_input)
 96 | 
 97 |                     # Update TF operation's outputs
 98 |                     output = node.outputs[0]
 99 |                     for o in graph.get_operations():
100 |                         if output.uid in o.name:
101 |                             o._add_input(op.outputs[0])
102 | 
103 |             except AttributeError:
104 |                 # OutputVariable node
105 |                 try:
106 |                     if node.is_output:
107 |                         try:
108 |                             # Owner of the node is already added to the TF graph
109 |                             owner_name = node.owner.uid + '/' + node.owner.uid
110 |                             graph.get_operation_by_name(owner_name)
111 |                         except KeyError:
112 |                             # Unknown network node
113 |                             stack.append(node.owner)
114 | 
115 |                 except AttributeError:
116 |                     pass
117 | 
118 |         # Add missing connections in the graph
119 |         CntkConverter.update_outputs(graph.get_operations())
120 |         graph.finalize()
121 | 
122 |     @staticmethod
123 |     def convert_shape(shape):
124 |         if len(shape) == 0:
125 |             shape = (1, 1)
126 |         else:
127 |             if len(shape) == 1:
128 |                 shape += (1,)
129 |         return shape
130 | 
131 |     @staticmethod
132 |     def update_outputs(ops):
133 |         """Updates the inputs/outputs of the Tensorflow operations
134 |         by adding missing connections
135 | 
136 |         Args:
137 |             ops: a list of Tensorflow operations
138 |         """
139 |         for i in six.moves.range(len(ops)):
140 |             for j in six.moves.range(i + 1, len(ops)):
141 |                 if ops[i].name.split('/')[1] in ops[j].name.split('/')[1]:
142 |                     ops[i]._add_input(ops[j].outputs[0])
143 | 


--------------------------------------------------------------------------------
/malmopy/environment/environment.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2017 Microsoft Corporation.
  2 | #
  3 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
  4 | # documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
  5 | #  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
  6 | # and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
  7 | #
  8 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of
  9 | # the Software.
 10 | #
 11 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
 12 | # THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 13 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 14 | #  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 15 | # SOFTWARE.
 16 | # ===================================================================================================================
 17 | 
 18 | from __future__ import absolute_import
 19 | 
 20 | import numpy as np
 21 | 
 22 | from ..util import check_rank, get_rank, resize, rgb2gray
 23 | 
 24 | 
 25 | class StateBuilder(object):
 26 |     """
 27 |     StateBuilder are object that map environment state into another representation.
 28 | 
 29 |     Subclasses should override the build() method which can map specific environment behavior.
 30 |     For concrete examples, malmo package has some predefined state builder specific to Malmo
 31 |     """
 32 | 
 33 |     def build(self, environment):
 34 |         raise NotImplementedError()
 35 | 
 36 |     def __call__(self, *args, **kwargs):
 37 |         return self.build(*args)
 38 | 
 39 | 
 40 | class ALEStateBuilder(StateBuilder):
 41 |     """
 42 |     Atari Environment state builder interface.
 43 | 
 44 |     This class assumes the environment.state() returns a numpy array.
 45 |     """
 46 | 
 47 |     SCALE_FACTOR = 1. / 255.
 48 | 
 49 |     def __init__(self, shape=(84, 84), normalize=True):
 50 |         self._shape = shape
 51 |         self._normalize = bool(normalize)
 52 | 
 53 |     def build(self, environment):
 54 |         if not isinstance(environment, np.ndarray):
 55 |             raise ValueError(
 56 |                 'environment type is not a numpy.ndarray (got %s)' % str(
 57 |                     type(environment)))
 58 | 
 59 |         state = environment
 60 | 
 61 |         # Convert to gray
 62 |         if check_rank(environment.shape, 3):
 63 |             state = rgb2gray(environment)
 64 |         elif get_rank(state) > 3:
 65 |             raise ValueError('Cannot handle data with more than 3 dimensions')
 66 | 
 67 |         # Resize
 68 |         if state.shape != self._shape:
 69 |             state = resize(state, self._shape)
 70 | 
 71 |         return (state * ALEStateBuilder.SCALE_FACTOR).astype(np.float32)
 72 | 
 73 | 
 74 | class BaseEnvironment(object):
 75 |     """
 76 |     Abstract representation of an interactive environment
 77 |     """
 78 | 
 79 |     def __init__(self):
 80 |         self._score = 0.
 81 |         self._reward = 0.
 82 |         self._done = False
 83 |         self._state = None
 84 | 
 85 |     def do(self, action):
 86 |         """
 87 |         Do the specified action in the environment
 88 |         :param action: The action to be executed
 89 |         :return Tuple holding the new state, the reward and a flag indicating if the environment is done
 90 |         """
 91 |         raise NotImplementedError()
 92 | 
 93 |     def reset(self):
 94 |         """
 95 |         Reset the current environment's internal state.
 96 |         :return:
 97 |         """
 98 |         self._score = 0.
 99 |         self._reward = 0.
100 |         self._done = False
101 |         self._state = None
102 | 
103 |     @property
104 |     def available_actions(self):
105 |         """
106 |         Returns the number of actions available in this environment
107 |         :return: Integer > 0
108 |         """
109 |         raise NotImplementedError()
110 | 
111 |     @property
112 |     def done(self):
113 |         """
114 |         Indicate if the current environment is in a terminal state
115 |         :return: Boolean True if environment is in a terminal state, False otherwise
116 |         """
117 |         return self._done
118 | 
119 |     @property
120 |     def state(self):
121 |         """
122 |         Return the current environment state
123 |         :return:
124 |         """
125 |         return self._state
126 | 
127 |     @property
128 |     def reward(self):
129 |         """
130 |         Return accumulated rewards
131 |         :return: Float as the current accumulated rewards since last state
132 |         """
133 |         return self._reward
134 | 
135 |     @property
136 |     def score(self):
137 |         """
138 |         Return the environment's current score.
139 |         It is common that the score will the sum of observed rewards, but subclasses can change this behavior.
140 |         :return: Number
141 |         """
142 |         return self._score
143 | 
144 |     @property
145 |     def is_turn_based(self):
146 |         """
147 |         Indicate if this environment is running on a turn-based scenario (i.e.,
148 |         agents take turns and wait for other agents' turns to complete before taking the next action).
149 |         All subclasses should override this accordingly to the running scenario.
150 |         As currently turn based is not the default behavior, the value returned is False
151 |         :return: False
152 |         """
153 |         return False
154 | 
155 | 
156 | class VideoCapableEnvironment(BaseEnvironment):
157 |     """
158 |     Represent the capacity of an environment to stream it's current state.
159 |     Streaming relies on 2 properties :
160 |      - fps : Number of frame this environment is able to generate each second
161 |      - frame : The latest frame generated by this environment
162 |     The display adapter should ask for a new frame with a 1/fps millisecond delay.
163 |     If there is no updated frame, the frame property can return None.
164 |     """
165 | 
166 |     def __init__(self):
167 |         super(VideoCapableEnvironment, self).__init__()
168 |         self._recording = False
169 | 
170 |     @property
171 |     def recording(self):
172 |         """
173 |         Indicate if the current environment is dispatching the video stream
174 |         :return: True if streaming, False otherwise
175 |         """
176 |         return self._recording
177 | 
178 |     @recording.setter
179 |     def recording(self, val):
180 |         """
181 |         Change the internal recording state.
182 |         :param val: True to activate video streaming, False otherwise
183 |         :return:
184 |         """
185 |         self._recording = bool(val)
186 | 
187 |     @property
188 |     def frame(self):
189 |         """
190 |         Return the most recent frame from the environment
191 |         :return: PIL Image representing the current environment
192 |         """
193 |         raise NotImplementedError()
194 | 


--------------------------------------------------------------------------------
/malmopy/model/cntk/base.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2017 Microsoft Corporation.
  2 | #
  3 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
  4 | # documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
  5 | #  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
  6 | # and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
  7 | #
  8 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of
  9 | # the Software.
 10 | #
 11 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
 12 | # THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 13 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 14 | #  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 15 | # SOFTWARE.
 16 | # ===================================================================================================================
 17 | 
 18 | import numpy as np
 19 | from cntk.device import cpu, gpu, try_set_default_device
 20 | from cntk.train.distributed import Communicator
 21 | from cntk.learners import set_default_unit_gain_value
 22 | from cntk.ops import abs, element_select, less, square, sqrt, reduce_sum, reduce_mean
 23 | 
 24 | from ...visualization import Visualizable
 25 | 
 26 | 
 27 | def rmse(y, y_hat, axis=0):
 28 |     """
 29 |      Compute the Root Mean Squared error as part of the model graph
 30 | 
 31 |      :param y: CNTK Variable holding the true value of Y
 32 |      :param y_hat: CNTK variable holding the estimated value of Y
 33 |      :param axis: The axis over which to compute the mean, 0 by default
 34 |      :return: Root Mean Squared error
 35 |      """
 36 |     return sqrt(reduce_mean(square(y_hat - y), axis=axis))
 37 | 
 38 | 
 39 | def huber_loss(y_hat, y, delta):
 40 |     """
 41 |     Compute the Huber Loss as part of the model graph
 42 | 
 43 |     Huber Loss is more robust to outliers. It is defined as:
 44 |      if |y - h_hat| < delta :
 45 |         0.5 * (y - y_hat)**2
 46 |     else :
 47 |         delta * |y - y_hat| - 0.5 * delta**2
 48 | 
 49 |     :param y: Target value
 50 |     :param y_hat: Estimated value
 51 |     :param delta: Outliers threshold
 52 |     :return: float
 53 |     """
 54 |     half_delta_squared = 0.5 * delta * delta
 55 |     error = y - y_hat
 56 |     abs_error = abs(error)
 57 | 
 58 |     less_than = 0.5 * square(error)
 59 |     more_than = (delta * abs_error) - half_delta_squared
 60 | 
 61 |     loss_per_sample = element_select(less(abs_error, delta), less_than, more_than)
 62 | 
 63 |     return reduce_sum(loss_per_sample, name='loss')
 64 | 
 65 | 
 66 | def as_learning_rate_by_sample(learning_rate_per_minibatch, minibatch_size, momentum=0, momentum_as_unit_gain=False):
 67 |     """
 68 |     Compute the scale parameter for the learning rate to match the learning rate
 69 |     definition used in other deep learning frameworks.
 70 |     In CNTK, gradients are calculated as follows:
 71 |         g(t + 1) = momentum * v(t) + (1-momentum) * gradient(t)
 72 | 
 73 |     Whereas in other frameworks they are computed this way :
 74 |         g(t + 1) = momentum * v(t)
 75 | 
 76 |     According to the above equations we need to scale the learning rate with regard to the momentum by a
 77 |     factor of 1/(1 - momentum)
 78 |     :param learning_rate_per_minibatch: The current learning rate
 79 |     :param minibatch_size: Size of the minibatch
 80 |     :param momentum: The current momentum (0 by default, used only when momentum_as_unit_gain is True)
 81 |     :param momentum_as_unit_gain: Indicate whetherf the momentum is a unit gain factor (CNTK) or not (TensorFlow, etc.)
 82 |     :return: Scaled learning rate according to momentum and minibatch size
 83 |     """
 84 |     assert learning_rate_per_minibatch > 0, "learning_rate_per_minibatch cannot be < 0"
 85 |     assert minibatch_size > 0, "minibatch_size cannot be < 1"
 86 | 
 87 |     learning_rate_per_sample = learning_rate_per_minibatch / minibatch_size
 88 | 
 89 |     if momentum_as_unit_gain:
 90 |         learning_rate_per_sample /= (1. - momentum)
 91 | 
 92 |     return learning_rate_per_sample
 93 | 
 94 | 
 95 | def as_momentum_as_time_constant(momentum, minibatch_size):
 96 |     """ Convert a momentum  provided a global for the a full minibatch
 97 |     to the momentum as number of sample seen rate by sample
 98 | 
 99 |     momentum_as_time_constant = -minibatch_size / (np.log(momentum))
100 |     """
101 |     return np.ceil(-minibatch_size / (np.log(momentum)))
102 | 
103 | 
104 | def prepend_batch_seq_axis(tensor):
105 |     """
106 |     CNTK uses 2 dynamic axes (batch, sequence, input_shape...).
107 |     To have a single sample with length 1 you need to pass (1, 1, input_shape...)
108 |     This method reshapes a tensor to add to the batch and sequence axis equal to 1.
109 |     :param tensor: The tensor to be reshaped
110 |     :return: Reshaped tensor with batch and sequence axis = 1
111 |     """
112 |     return tensor.reshape((1, 1,) + tensor.shape)
113 | 
114 | 
115 | def prepend_batch_axis(tensor):
116 |     """
117 |     CNTK uses 2 dynamic axes (batch, sequence, input_shape...).
118 |     If you define variables with dynamic_axes=[Axis.default_batch_axis()] you can rid of sequence axis
119 | 
120 |     To have a single sample with length 1 you need to pass (1, input_shape...)
121 |     This method reshapes a tensor to add to the batch and sequence axis equal to 1.
122 |     :param tensor: The tensor to be reshaped
123 |     :return: Reshaped tensor with batch and sequence axis = 1
124 |     """
125 |     return tensor.reshape((1,) + tensor.shape)
126 | 
127 | 
128 | class CntkModel(Visualizable):
129 |     """ Base class for CNTK based neural networks.
130 | 
131 |     It handles the management of the CPU/GPU device and provides commodity methods for exporting the model
132 |     """
133 | 
134 |     def __init__(self, device_id=None, unit_gain=False, n_workers=1, visualizer=None):
135 |         """
136 |         Abstract constructor of CNTK model.
137 |         This constructor wraps CNTK intialization and tuning
138 |         :param device_id: Use None if you want CNTK to use the best available device, -1 for CPU, >= 0 for GPU
139 |         :param n_workers: Number of concurrent workers for distributed training. Keep set to 1 for non distributed mode
140 |         :param visualizer: Optional visualizer allowing model to save summary data
141 |         """
142 |         assert n_workers >= 1, 'n_workers should be at least 1 (not distributed) or > 1 if distributed'
143 | 
144 |         Visualizable.__init__(self, visualizer)
145 | 
146 |         self._model = None
147 |         self._learner = None
148 |         self._loss = None
149 |         self._distributed = n_workers > 1
150 | 
151 |         if isinstance(device_id, int):
152 |             try_set_default_device(cpu() if device_id == -1 else gpu(device_id))
153 | 
154 |         set_default_unit_gain_value(unit_gain)
155 | 
156 |     def _build_model(self):
157 |         raise NotImplementedError()
158 | 
159 |     @property
160 |     def loss_val(self):
161 |         raise NotImplementedError()
162 | 
163 |     @property
164 |     def model(self):
165 |         return self._model
166 | 
167 |     @property
168 |     def distributed_training(self):
169 |         return self._distributed
170 | 
171 |     @property
172 |     def distributed_rank(self):
173 |         if self._distributed:
174 |             if self._learner and hasattr(self._learner, 'communicator'):
175 |                 return self._learner.communicator().rank()
176 |             else:
177 |                 return 0
178 | 
179 |     def load(self, input_file):
180 |         if self._model is None:
181 |             raise ValueError("cannot load to a model that equals None")
182 | 
183 |         self._model.restore(input_file)
184 | 
185 |     def save(self, output_file):
186 |         if self._model is None:
187 |             raise ValueError("cannot save a model that equals None")
188 | 
189 |         self._model.save(output_file)
190 | 
191 |     def finalize(self):
192 |         if self._distributed:
193 |             Communicator.finalize()
194 | 


--------------------------------------------------------------------------------
/ai_challenge/pig_chase/pig_chase_dqn.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2017 Microsoft Corporation.
  2 | #
  3 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
  4 | # documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
  5 | #  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
  6 | # and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
  7 | #
  8 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of
  9 | # the Software.
 10 | #
 11 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
 12 | # THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 13 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 14 | #  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 15 | # SOFTWARE.
 16 | # ===================================================================================================================
 17 | 
 18 | import os
 19 | import sys
 20 | from argparse import ArgumentParser
 21 | from datetime import datetime
 22 | 
 23 | import six
 24 | from os import path
 25 | from threading import Thread, active_count
 26 | from time import sleep
 27 | 
 28 | from malmopy.agent import LinearEpsilonGreedyExplorer
 29 | 
 30 | from common import parse_clients_args, visualize_training, ENV_AGENT_NAMES
 31 | from agent import PigChaseChallengeAgent, PigChaseQLearnerAgent
 32 | from environment import PigChaseEnvironment, PigChaseSymbolicStateBuilder
 33 | 
 34 | from malmopy.environment.malmo import MalmoALEStateBuilder
 35 | from malmopy.agent import TemporalMemory, RandomAgent
 36 | 
 37 | try:
 38 |     from malmopy.visualization.tensorboard import TensorboardVisualizer
 39 |     from malmopy.visualization.tensorboard.cntk import CntkConverter
 40 | except ImportError:
 41 |     print('Cannot import tensorboard, using ConsoleVisualizer.')
 42 |     from malmopy.visualization import ConsoleVisualizer
 43 | 
 44 | # Enforce path
 45 | sys.path.insert(0, os.getcwd())
 46 | sys.path.insert(1, os.path.join(os.path.pardir, os.getcwd()))
 47 | 
 48 | DQN_FOLDER = 'results/baselines/%s/dqn/%s-%s'
 49 | EPOCH_SIZE = 100000
 50 | 
 51 | 
 52 | def agent_factory(name, role, clients, backend,
 53 |                   device, max_epochs, logdir, visualizer):
 54 | 
 55 |     assert len(clients) >= 2, 'Not enough clients (need at least 2)'
 56 |     clients = parse_clients_args(clients)
 57 | 
 58 |     if role == 0:
 59 | 
 60 |         builder = PigChaseSymbolicStateBuilder()
 61 |         env = PigChaseEnvironment(clients, builder, role=role,
 62 |                                   randomize_positions=True)
 63 |         agent = PigChaseChallengeAgent(name)
 64 |         if type(agent.current_agent) == RandomAgent:
 65 |             agent_type = PigChaseEnvironment.AGENT_TYPE_1
 66 |         else:
 67 |             agent_type = PigChaseEnvironment.AGENT_TYPE_2
 68 | 
 69 |         obs = env.reset(agent_type)
 70 |         reward = 0
 71 |         agent_done = False
 72 | 
 73 |         while True:
 74 |             if env.done:
 75 |                 if type(agent.current_agent) == RandomAgent:
 76 |                     agent_type = PigChaseEnvironment.AGENT_TYPE_1
 77 |                 else:
 78 |                     agent_type = PigChaseEnvironment.AGENT_TYPE_2
 79 | 
 80 |                 obs = env.reset(agent_type)
 81 |                 while obs is None:
 82 |                     # this can happen if the episode ended with the first
 83 |                     # action of the other agent
 84 |                     print('Warning: received obs == None.')
 85 |                     obs = env.reset(agent_type)
 86 | 
 87 |             # select an action
 88 |             action = agent.act(obs, reward, agent_done, is_training=True)
 89 |             # take a step
 90 |             obs, reward, agent_done = env.do(action)
 91 | 
 92 |     else:
 93 |         env = PigChaseEnvironment(clients, MalmoALEStateBuilder(),
 94 |                                   role=role, randomize_positions=True)
 95 |         memory = TemporalMemory(100000, (84, 84))
 96 | 
 97 |         if backend == 'cntk':
 98 |             from malmopy.model.cntk import QNeuralNetwork
 99 |             model = QNeuralNetwork((memory.history_length, 84, 84), env.available_actions, device)
100 |         else:
101 |             from malmopy.model.chainer import QNeuralNetwork, DQNChain
102 |             chain = DQNChain((memory.history_length, 84, 84), env.available_actions)
103 |             target_chain = DQNChain((memory.history_length, 84, 84), env.available_actions)
104 |             model = QNeuralNetwork(chain, target_chain, device)
105 | 
106 |         explorer = LinearEpsilonGreedyExplorer(1, 0.1, 1000000)
107 |         agent = PigChaseQLearnerAgent(name, env.available_actions,
108 |                                       model, memory, 0.99, 32, 50000,
109 |                                       explorer=explorer, visualizer=visualizer)
110 | 
111 |         obs = env.reset()
112 |         reward = 0
113 |         agent_done = False
114 |         viz_rewards = []
115 | 
116 |         max_training_steps = EPOCH_SIZE * max_epochs
117 |         for step in six.moves.range(1, max_training_steps+1):
118 | 
119 |             # check if env needs reset
120 |             if env.done:
121 | 
122 |                 visualize_training(visualizer, step, viz_rewards)
123 |                 agent.inject_summaries(step)
124 |                 viz_rewards = []
125 | 
126 |                 obs = env.reset()
127 |                 while obs is None:
128 |                     # this can happen if the episode ended with the first
129 |                     # action of the other agent
130 |                     print('Warning: received obs == None.')
131 |                     obs = env.reset()
132 | 
133 |             # select an action
134 |             action = agent.act(obs, reward, agent_done, is_training=True)
135 |             # take a step
136 |             obs, reward, agent_done = env.do(action)
137 |             viz_rewards.append(reward)
138 | 
139 |             if (step % EPOCH_SIZE) == 0:
140 |                 if 'model' in locals():
141 |                     model.save('pig_chase-dqn_%d.model' % (step / EPOCH_SIZE))
142 | 
143 | 
144 | def run_experiment(agents_def):
145 |     assert len(agents_def) == 2, 'Not enough agents (required: 2, got: %d)' \
146 |                                  % len(agents_def)
147 | 
148 |     processes = []
149 |     for agent in agents_def:
150 |         p = Thread(target=agent_factory, kwargs=agent)
151 |         p.daemon = True
152 |         p.start()
153 | 
154 |         # Give the server time to start
155 |         if agent['role'] == 0:
156 |             sleep(1)
157 | 
158 |         processes.append(p)
159 | 
160 |     try:
161 |         # wait until only the challenge agent is left
162 |         while active_count() > 2:
163 |             sleep(0.1)
164 |     except KeyboardInterrupt:
165 |         print('Caught control-c - shutting down.')
166 | 
167 | 
168 | if __name__ == '__main__':
169 |     arg_parser = ArgumentParser('Pig Chase DQN experiment')
170 |     arg_parser.add_argument('-b', '--backend', type=str, choices=['cntk', 'chainer'],
171 |                             default='cntk', help='Neural network backend')
172 |     arg_parser.add_argument('-e', '--epochs', type=int, default=5,
173 |                             help='Number of epochs to run.')
174 |     arg_parser.add_argument('clients', nargs='*',
175 |                             default=['127.0.0.1:10000', '127.0.0.1:10001'],
176 |                             help='Minecraft clients endpoints (ip(:port)?)+')
177 |     arg_parser.add_argument('-d', '--device', type=int, default=-1,
178 |                             help='GPU device on which to run the experiment.')
179 |     args = arg_parser.parse_args()
180 | 
181 |     logdir = path.join('results/pig_chase/dqn', datetime.utcnow().isoformat())
182 |     if 'malmopy.visualization.tensorboard' in sys.modules:
183 |         visualizer = TensorboardVisualizer()
184 |         visualizer.initialize(logdir, None)
185 | 
186 |     else:
187 |         visualizer = ConsoleVisualizer()
188 | 
189 |     agents = [{'name': agent, 'role': role, 'clients': args.clients,
190 |                'backend': args.backend, 'device': args.device,
191 |                'max_epochs': args.epochs, 'logdir': logdir, 'visualizer': visualizer}
192 |               for role, agent in enumerate(ENV_AGENT_NAMES)]
193 | 
194 |     run_experiment(agents)
195 | 


--------------------------------------------------------------------------------
/ai_challenge/pig_chase/pig_chase_dqn_top_down.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2017 Microsoft Corporation.
  2 | #
  3 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
  4 | # documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
  5 | #  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
  6 | # and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
  7 | #
  8 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of
  9 | # the Software.
 10 | #
 11 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
 12 | # THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 13 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 14 | #  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 15 | # SOFTWARE.
 16 | # ===================================================================================================================
 17 | 
 18 | import os
 19 | import sys
 20 | from argparse import ArgumentParser
 21 | from datetime import datetime
 22 | 
 23 | import six
 24 | from os import path
 25 | from threading import Thread, active_count
 26 | from time import sleep
 27 | 
 28 | from malmopy.agent import LinearEpsilonGreedyExplorer, RandomAgent
 29 | 
 30 | from common import parse_clients_args, visualize_training, ENV_AGENT_NAMES
 31 | from agent import PigChaseChallengeAgent, PigChaseQLearnerAgent
 32 | from environment import PigChaseEnvironment, PigChaseSymbolicStateBuilder, \
 33 |     PigChaseTopDownStateBuilder
 34 | 
 35 | from malmopy.agent import TemporalMemory
 36 | 
 37 | try:
 38 |     from malmopy.visualization.tensorboard import TensorboardVisualizer
 39 |     from malmopy.visualization.tensorboard.cntk import CntkConverter
 40 | except ImportError:
 41 |     print('Cannot import tensorboard, using ConsoleVisualizer.')
 42 |     from malmopy.visualization import ConsoleVisualizer
 43 | 
 44 | # Enforce path
 45 | sys.path.insert(0, os.getcwd())
 46 | sys.path.insert(1, os.path.join(os.path.pardir, os.getcwd()))
 47 | 
 48 | DQN_FOLDER = 'results/baselines/%s/dqn/%s-%s'
 49 | EPOCH_SIZE = 100000
 50 | 
 51 | 
 52 | def agent_factory(name, role, clients, backend, device, max_epochs, logdir, visualizer):
 53 | 
 54 |     assert len(clients) >= 2, 'Not enough clients (need at least 2)'
 55 |     clients = parse_clients_args(clients)
 56 | 
 57 |     if role == 0:
 58 | 
 59 |         builder = PigChaseSymbolicStateBuilder()
 60 |         env = PigChaseEnvironment(clients, builder, role=role,
 61 |                                   randomize_positions=True)
 62 | 
 63 |         agent = PigChaseChallengeAgent(name)
 64 |         if type(agent.current_agent) == RandomAgent:
 65 |             agent_type = PigChaseEnvironment.AGENT_TYPE_1
 66 |         else:
 67 |             agent_type = PigChaseEnvironment.AGENT_TYPE_2
 68 | 
 69 |         obs = env.reset(agent_type)
 70 |         reward = 0
 71 |         agent_done = False
 72 | 
 73 |         while True:
 74 |             if env.done:
 75 |                 if type(agent.current_agent) == RandomAgent:
 76 |                     agent_type = PigChaseEnvironment.AGENT_TYPE_1
 77 |                 else:
 78 |                     agent_type = PigChaseEnvironment.AGENT_TYPE_2
 79 | 
 80 |                 obs = env.reset(agent_type)
 81 |                 while obs is None:
 82 |                     # this can happen if the episode ended with the first
 83 |                     # action of the other agent
 84 |                     print('Warning: received obs == None.')
 85 |                     obs = env.reset(agent_type)
 86 | 
 87 |             # select an action
 88 |             action = agent.act(obs, reward, agent_done, is_training=True)
 89 |             # take a step
 90 |             obs, reward, agent_done = env.do(action)
 91 | 
 92 |     else:
 93 |         env = PigChaseEnvironment(clients, PigChaseTopDownStateBuilder(True),
 94 |                                   role=role, randomize_positions=True)
 95 |         memory = TemporalMemory(100000, (18, 18))
 96 | 
 97 |         if backend == 'cntk':
 98 |             from malmopy.model.cntk import QNeuralNetwork
 99 |             model = QNeuralNetwork((memory.history_length, 18, 18), env.available_actions, device)
100 |         else:
101 |             from malmopy.model.chainer import QNeuralNetwork, ReducedDQNChain
102 |             chain = ReducedDQNChain((memory.history_length, 18, 18), env.available_actions)
103 |             target_chain = ReducedDQNChain((memory.history_length, 18, 18), env.available_actions)
104 |             model = QNeuralNetwork(chain, target_chain, device)
105 | 
106 |         explorer = LinearEpsilonGreedyExplorer(1, 0.1, 1000000)
107 |         agent = PigChaseQLearnerAgent(name, env.available_actions,
108 |                                       model, memory, 0.99, 32, 50000,
109 |                                       explorer=explorer, visualizer=visualizer)
110 | 
111 |         obs = env.reset()
112 |         reward = 0
113 |         agent_done = False
114 |         viz_rewards = []
115 | 
116 |         max_training_steps = EPOCH_SIZE * max_epochs
117 |         for step in six.moves.range(1, max_training_steps+1):
118 | 
119 |             # check if env needs reset
120 |             if env.done:
121 | 
122 |                 visualize_training(visualizer, step, viz_rewards)
123 |                 agent.inject_summaries(step)
124 |                 viz_rewards = []
125 | 
126 |                 obs = env.reset()
127 |                 while obs is None:
128 |                     # this can happen if the episode ended with the first
129 |                     # action of the other agent
130 |                     print('Warning: received obs == None.')
131 |                     obs = env.reset()
132 | 
133 |             # select an action
134 |             action = agent.act(obs, reward, agent_done, is_training=True)
135 |             # take a step
136 |             obs, reward, agent_done = env.do(action)
137 |             viz_rewards.append(reward)
138 | 
139 |             if (step % EPOCH_SIZE) == 0:
140 |                 if 'model' in locals():
141 |                     model.save('pig_chase-dqn_%d.model' % (step / EPOCH_SIZE))
142 | 
143 | 
144 | def run_experiment(agents_def):
145 |     assert len(agents_def) == 2, 'Not enough agents (required: 2, got: %d)' \
146 |                                  % len(agents_def)
147 | 
148 |     processes = []
149 |     for agent in agents_def:
150 |         p = Thread(target=agent_factory, kwargs=agent)
151 |         p.daemon = True
152 |         p.start()
153 | 
154 |         # Give the server time to start
155 |         if agent['role'] == 0:
156 |             sleep(1)
157 | 
158 |         processes.append(p)
159 | 
160 |     try:
161 |         # wait until only the challenge agent is left
162 |         while active_count() > 2:
163 |             sleep(0.1)
164 |     except KeyboardInterrupt:
165 |         print('Caught control-c - shutting down.')
166 | 
167 | 
168 | if __name__ == '__main__':
169 |     arg_parser = ArgumentParser('Pig Chase DQN experiment')
170 |     arg_parser.add_argument('-b', '--backend', type=str, choices=['cntk', 'chainer'],
171 |                            default='cntk', help='Neural network backend')
172 |     arg_parser.add_argument('-e', '--epochs', type=int, default=5,
173 |                             help='Number of epochs to run.')
174 |     arg_parser.add_argument('clients', nargs='*',
175 |                             default=['127.0.0.1:10000', '127.0.0.1:10001'],
176 |                             help='Minecraft clients endpoints (ip(:port)?)+')
177 |     arg_parser.add_argument('-d', '--device', type=int, default=-1,
178 |                             help='GPU device on which to run the experiment.')
179 |     args = arg_parser.parse_args()
180 | 
181 |     logdir = path.join('results/pig_chase/dqn', datetime.utcnow().isoformat())
182 |     if 'malmopy.visualization.tensorboard' in sys.modules:
183 |         visualizer = TensorboardVisualizer()
184 |         visualizer.initialize(logdir, None)
185 | 
186 |     else:
187 |         visualizer = ConsoleVisualizer()
188 | 
189 |     agents = [{'name': agent, 'role': role, 'clients': args.clients,
190 |                'backend':args.backend, 'device': args.device,
191 |                'max_epochs': args.epochs, 'logdir': logdir, 'visualizer': visualizer}
192 |               for role, agent in enumerate(ENV_AGENT_NAMES)]
193 | 
194 |     run_experiment(agents)
195 | 


--------------------------------------------------------------------------------
/ai_challenge/pig_chase/pig_chase.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8" standalone="no" ?>
  2 | <!--
  3 |   ~ Copyright (c) 2017 Microsoft Corporation.
  4 |   ~
  5 |   ~ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
  6 |   ~ associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
  7 |   ~ furnished to do so, subject to the following conditions:
  8 |   ~
  9 |   ~ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 10 |   ~
 11 |   ~ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 12 |   -->
 13 | 
 14 | <Mission xmlns="http://ProjectMalmo.microsoft.com">
 15 | 
 16 |     <About>
 17 |         <Summary>Catch the pig!</Summary>
 18 |     </About>
 19 | 
 20 |     <ModSettings>
 21 |         <MsPerTick>4</MsPerTick>
 22 |     </ModSettings>
 23 | 
 24 |     <ServerSection>
 25 |         <ServerInitialConditions>
 26 |             <Time>
 27 |                 <StartTime>6000</StartTime>
 28 |                 <AllowPassageOfTime>false</AllowPassageOfTime>
 29 |             </Time>
 30 |             <Weather>clear</Weather>
 31 |             <AllowSpawning>false</AllowSpawning>
 32 |         </ServerInitialConditions>
 33 |         <ServerHandlers>
 34 |             <FlatWorldGenerator generatorString="3;minecraft:bedrock,2*minecraft:dirt,minecraft:grass;1;village"/>
 35 |             <DrawingDecorator>
 36 |                 <!-- Tricks to clean the map before drawing (avoid entity duplication on the map) -->
 37 |                 <!-- coordinates for cuboid are inclusive -->
 38 |                 <DrawCuboid x1="-10" y1="4" z1="-10" x2="10" y2="45" z2="10" type="air"/>
 39 | 
 40 |                 <!-- Area Limits -->
 41 |                 <DrawLine x1="1" y1="3" z1="0" x2="7" y2="3" z2="0" type="sand"/>
 42 |                 <DrawLine x1="1" y1="4" z1="0" x2="7" y2="4" z2="0" type="fence"/>
 43 | 
 44 |                 <DrawLine x1="1" y1="3" z1="6" x2="7" y2="3" z2="6" type="sand"/>
 45 |                 <DrawLine x1="1" y1="4" z1="6" x2="7" y2="4" z2="6" type="fence"/>
 46 | 
 47 |                 <DrawLine x1="1" y1="3" z1="0" x2="1" y2="3" z2="2" type="sand"/>
 48 |                 <DrawLine x1="1" y1="4" z1="0" x2="1" y2="4" z2="2" type="fence"/>
 49 |                 <DrawLine x1="0" y1="3" z1="2" x2="0" y2="3" z2="4" type="sand"/>
 50 |                 <DrawLine x1="0" y1="4" z1="2" x2="0" y2="4" z2="4" type="fence"/>
 51 |                 <DrawLine x1="1" y1="3" z1="4" x2="1" y2="3" z2="6" type="sand"/>
 52 |                 <DrawLine x1="1" y1="4" z1="4" x2="1" y2="4" z2="6" type="fence"/>
 53 | 
 54 |                 <DrawLine x1="7" y1="3" z1="0" x2="7" y2="3" z2="2" type="sand"/>
 55 |                 <DrawLine x1="7" y1="4" z1="0" x2="7" y2="4" z2="2" type="fence"/>
 56 |                 <DrawLine x1="8" y1="3" z1="2" x2="8" y2="3" z2="4" type="sand"/>
 57 |                 <DrawLine x1="8" y1="4" z1="2" x2="8" y2="4" z2="4" type="fence"/>
 58 |                 <DrawLine x1="7" y1="3" z1="4" x2="7" y2="3" z2="6" type="sand"/>
 59 |                 <DrawLine x1="7" y1="4" z1="4" x2="7" y2="4" z2="6" type="fence"/>
 60 | 
 61 |                 <!-- Path blocker -->
 62 |                 <DrawBlock x="3" y="3" z="2" type="sand"/>
 63 |                 <DrawBlock x="3" y="4" z="2" type="fence"/>
 64 | 
 65 |                 <DrawBlock x="3" y="3" z="4" type="sand"/>
 66 |                 <DrawBlock x="3" y="4" z="4" type="fence"/>
 67 | 
 68 |                 <DrawBlock x="5" y="3" z="2" type="sand"/>
 69 |                 <DrawBlock x="5" y="4" z="2" type="fence"/>
 70 | 
 71 |                 <DrawBlock x="5" y="3" z="4" type="sand"/>
 72 |                 <DrawBlock x="5" y="4" z="4" type="fence"/>
 73 | 
 74 |                 <DrawBlock x="1" y="3" z="3" type="lapis_block"/>
 75 |                 <DrawBlock x="7" y="3" z="3" type="lapis_block"/>
 76 | 
 77 |                 <!-- Pig -->
 78 |                 <DrawEntity x="4.5" y="4" z="3.5" type="Pig"/>
 79 | 
 80 |             </DrawingDecorator>
 81 |             <ServerQuitFromTimeUp timeLimitMs="1000000"/>
 82 |             <ServerQuitWhenAnyAgentFinishes/>
 83 |         </ServerHandlers>
 84 |     </ServerSection>
 85 | 
 86 |     <AgentSection mode="Survival">
 87 |         <Name>Agent_1</Name>
 88 |         <AgentStart>
 89 |             <Placement x="5.5" y="4" z="1.5" pitch="30" yaw="0"/>
 90 |             <Inventory>
 91 |                 <InventoryObject type="diamond_helmet" slot="39"/>
 92 |             </Inventory>
 93 |         </AgentStart>
 94 |         <AgentHandlers>
 95 |             <MissionQuitCommands quitDescription="Agent_1_caught_pig"/>
 96 |             <ObservationFromNearbyEntities>
 97 |                 <Range name="entities" xrange="8" yrange="2" zrange="6"/>
 98 |             </ObservationFromNearbyEntities>
 99 |             <ObservationFromGrid>
100 |                 <Grid name="board" absoluteCoords="true">
101 |                     <min x="0" y="3" z="-1"/>
102 |                     <max x="8" y="3" z="7"/>
103 |                 </Grid>
104 |             </ObservationFromGrid>
105 |             <ObservationFromFullStats/>
106 |             <TurnBasedCommands requestedPosition="1">
107 |                 <DiscreteMovementCommands>
108 |                     <ModifierList type="deny-list">
109 |                         <command>attack</command>
110 |                     </ModifierList>
111 |                 </DiscreteMovementCommands>
112 |             </TurnBasedCommands>
113 |             <RewardForTouchingBlockType>
114 |                 <Block reward="5.0" type="lapis_block" behaviour="onceOnly"/>
115 |             </RewardForTouchingBlockType>
116 |             <RewardForSendingCommand reward="-1"/>
117 |             <AgentQuitFromTouchingBlockType>
118 |                 <Block type="lapis_block" description="Agent_1_defaulted"/>
119 |             </AgentQuitFromTouchingBlockType>
120 |             <VideoProducer want_depth="false">
121 |                 <Width>640</Width>
122 |                 <Height>480</Height>
123 |             </VideoProducer>
124 |         </AgentHandlers>
125 |     </AgentSection>
126 | 
127 |     <AgentSection mode="Survival">
128 |         <Name>Agent_2</Name>
129 |         <AgentStart>
130 |             <Placement x="2.5" y="4" z="5.5" pitch="30" yaw="180"/>
131 |             <Inventory>
132 |                 <InventoryObject type="iron_helmet" slot="39"/>
133 |             </Inventory>
134 |         </AgentStart>
135 |         <AgentHandlers>
136 |             <MissionQuitCommands quitDescription="Agent_2_caught_pig"/>
137 |             <ObservationFromNearbyEntities>
138 |                 <Range name="entities" xrange="8" yrange="2" zrange="6"/>
139 |             </ObservationFromNearbyEntities>
140 |             <ObservationFromGrid>
141 |                 <Grid name="board" absoluteCoords="true">
142 |                     <min x="0" y="3" z="-1"/>
143 |                     <max x="8" y="3" z="7"/>
144 |                 </Grid>
145 |             </ObservationFromGrid>
146 |             <ObservationFromFullStats/>
147 |             <TurnBasedCommands requestedPosition="1">
148 |                 <DiscreteMovementCommands>
149 |                     <ModifierList type="deny-list">
150 |                         <command>attack</command>
151 |                     </ModifierList>
152 |                 </DiscreteMovementCommands>
153 |             </TurnBasedCommands>
154 |             <RewardForTouchingBlockType>
155 |                 <Block reward="5.0" type="lapis_block" behaviour="onceOnly"/>
156 |             </RewardForTouchingBlockType>
157 |             <RewardForCatchingMob>
158 |                 <Mob type="Pig" reward="25.0" distribution="Agent_1:1 Agent_2:1" oneshot="true" global="true"/>
159 |             </RewardForCatchingMob>
160 |             <RewardForSendingCommand reward="-1"/>
161 |             <AgentQuitFromReachingCommandQuota total="25" description="command_quota_reached"/>
162 |             <AgentQuitFromTouchingBlockType>
163 |                 <Block type="lapis_block" description="Agent_2_defaulted"/>
164 |             </AgentQuitFromTouchingBlockType>
165 |             <AgentQuitFromCatchingMob>
166 |                 <Mob type="Pig" description="caught_the_pig" global="true"/>
167 |             </AgentQuitFromCatchingMob>
168 |             <VideoProducer want_depth="false">
169 |                 <Width>640</Width>
170 |                 <Height>480</Height>
171 |             </VideoProducer>
172 |         </AgentHandlers>
173 |     </AgentSection>
174 | </Mission>
175 | 


--------------------------------------------------------------------------------
/malmopy/model/chainer/qlearning.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2017 Microsoft Corporation.
  2 | #
  3 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
  4 | # documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
  5 | #  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
  6 | # and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
  7 | #
  8 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of
  9 | # the Software.
 10 | #
 11 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
 12 | # THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 13 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 14 | #  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 15 | # SOFTWARE.
 16 | # ===================================================================================================================
 17 | 
 18 | from __future__ import absolute_import
 19 | 
 20 | import chainer.cuda as cuda
 21 | import chainer.functions as F
 22 | import chainer.links as L
 23 | import numpy as np
 24 | from chainer import ChainList
 25 | from chainer.initializers import HeUniform
 26 | from chainer.optimizers import Adam
 27 | from chainer.serializers import save_npz, load_npz
 28 | 
 29 | from ..model import QModel
 30 | from ...util import check_rank, get_rank
 31 | 
 32 | 
 33 | class ChainerModel(ChainList):
 34 |     """
 35 |     Wraps a Chainer Chain and enforces the model to be callable.
 36 |     Every model should override the __call__ method as a forward call.
 37 |     """
 38 | 
 39 |     def __init__(self, input_shape, output_shape):
 40 |         self.input_shape = input_shape
 41 |         self.output_shape = output_shape
 42 | 
 43 |         super(ChainerModel, self).__init__(*self._build_model())
 44 | 
 45 |     def __call__(self, *args, **kwargs):
 46 |         raise NotImplementedError()
 47 | 
 48 |     def _build_model(self):
 49 |         raise NotImplementedError()
 50 | 
 51 | 
 52 | class MLPChain(ChainerModel):
 53 |     """
 54 |     Create a Multi Layer Perceptron neural network.
 55 |     The number of layers and units for each layer can be specified using hidden_layer_sizes.
 56 |     For example for a 128 units on the first hidden layer, then 256 on the second and 512 on the third:
 57 | 
 58 |     >>> MLPChain(input_shape=(28, 28), output_shape=10, hidden_layer_sizes=(128, 256, 512))
 59 | 
 60 |     Note : The network will contain len(hidden_layer_sizes) + 2 layers because 
 61 |     of the input layer and the output layer.
 62 |     """
 63 | 
 64 |     def __init__(self, in_shape, output_shape,
 65 |                  hidden_layer_sizes=(512, 512, 512), activation=F.relu):
 66 |         self._activation = activation
 67 |         self._hidden_layer_sizes = hidden_layer_sizes
 68 | 
 69 |         super(MLPChain, self).__init__(in_shape, output_shape)
 70 | 
 71 |     @property
 72 |     def hidden_layer_sizes(self):
 73 |         return self._hidden_layer_sizes
 74 | 
 75 |     def __call__(self, x):
 76 |         f = self._activation
 77 | 
 78 |         for layer in self[:-1]:
 79 |             x = f(layer(x))
 80 |         return self[-1](x)
 81 | 
 82 |     def _build_model(self):
 83 |         hidden_layers = [L.Linear(None, units) for units in
 84 |                          self._hidden_layer_sizes]
 85 |         hidden_layers += [L.Linear(None, self.output_shape)]
 86 | 
 87 |         return hidden_layers
 88 | 
 89 | 
 90 | class ReducedDQNChain(ChainerModel):
 91 |     """
 92 |     Simplified DQN topology:
 93 |     
 94 |     Convolution(64, kernel=(4, 4), strides=(2, 2)
 95 |     Convolution(64, kernel=(3, 3), strides=(1, 1)
 96 |     Dense(512)
 97 |     Dense(output_shape)
 98 |     """
 99 |     def __init__(self, in_shape, output_shape):
100 |         super(ReducedDQNChain, self).__init__(in_shape, output_shape)
101 | 
102 |     def __call__(self, x):
103 |         for layer in self[:-1]:
104 |             x = F.relu(layer(x))
105 |         return self[-1](x)
106 | 
107 |     def _build_model(self):
108 |         initializer = HeUniform()
109 |         in_shape = self.input_shape[0]
110 | 
111 |         return [L.Convolution2D(in_shape, 64, ksize=4, stride=2,
112 |                                 initialW=initializer),
113 |                 L.Convolution2D(64, 64, ksize=3, stride=1,
114 |                                 initialW=initializer),
115 |                 L.Linear(None, 512, initialW=HeUniform(0.1)),
116 |                 L.Linear(512, self.output_shape, initialW=HeUniform(0.1))]
117 | 
118 | 
119 | class DQNChain(ChainerModel):
120 |     """
121 |     DQN topology as in 
122 |     (Mnih & al. 2015): Human-level control through deep reinforcement learning"
123 |     Nature 518.7540 (2015): 529-533.
124 |     
125 |     Convolution(32, kernel=(8, 8), strides=(4, 4)
126 |     Convolution(64, kernel=(4, 4), strides=(2, 2)
127 |     Convolution(64, kernel=(3, 3), strides=(1, 1)
128 |     Dense(512)
129 |     Dense(output_shape)
130 |     """
131 | 
132 |     def __init__(self, in_shape, output_shape):
133 |         super(DQNChain, self).__init__(in_shape, output_shape)
134 | 
135 |     def __call__(self, x):
136 |         for layer in self[:-1]:
137 |             x = F.relu(layer(x))
138 |         return self[-1](x)
139 | 
140 |     def _build_model(self):
141 |         initializer = HeUniform()
142 |         in_shape = self.input_shape[0]
143 | 
144 |         return [L.Convolution2D(in_shape, 32, ksize=8, stride=4,
145 |                                 initialW=initializer),
146 |                 L.Convolution2D(32, 64, ksize=4, stride=2,
147 |                                 initialW=initializer),
148 |                 L.Convolution2D(64, 64, ksize=3, stride=1,
149 |                                 initialW=initializer),
150 |                 L.Linear(7 * 7 * 64, 512, initialW=HeUniform(0.01)),
151 |                 L.Linear(512, self.output_shape, initialW=HeUniform(0.01))]
152 | 
153 | 
154 | class QNeuralNetwork(QModel):
155 |     def __init__(self, model, target, device_id=-1,
156 |                  learning_rate=0.00025, momentum=.9,
157 |                  minibatch_size=32, update_interval=10000):
158 | 
159 |         assert isinstance(model, ChainerModel), \
160 |             'model should inherit from ChainerModel'
161 | 
162 |         super(QNeuralNetwork, self).__init__(model.input_shape,
163 |                                              model.output_shape)
164 | 
165 |         self._gpu_device = None
166 |         self._loss_val = 0
167 | 
168 |         # Target model update method
169 |         self._steps = 0
170 |         self._target_update_interval = update_interval
171 | 
172 |         # Setup model and target network
173 |         self._minibatch_size = minibatch_size
174 |         self._model = model
175 |         self._target = target
176 |         self._target.copyparams(self._model)
177 | 
178 |         # If GPU move to GPU memory
179 |         if device_id >= 0:
180 |             with cuda.get_device(device_id) as device:
181 |                 self._gpu_device = device
182 |                 self._model.to_gpu(device)
183 |                 self._target.to_gpu(device)
184 | 
185 |         # Setup optimizer
186 |         self._optimizer = Adam(learning_rate, momentum, 0.999)
187 |         self._optimizer.setup(self._model)
188 | 
189 |     def evaluate(self, environment, model=QModel.ACTION_VALUE_NETWORK):
190 |         if check_rank(environment.shape, get_rank(self._input_shape)):
191 |             environment = environment.reshape((1,) + environment.shape)
192 | 
193 |         # Move data if necessary
194 |         if self._gpu_device is not None:
195 |             environment = cuda.to_gpu(environment, self._gpu_device)
196 | 
197 |         if model == QModel.ACTION_VALUE_NETWORK:
198 |             output = self._model(environment)
199 |         else:
200 |             output = self._target(environment)
201 | 
202 |         return cuda.to_cpu(output.data)
203 | 
204 |     def train(self, x, y, actions=None):
205 |         actions = actions.astype(np.int32)
206 |         batch_size = len(actions)
207 | 
208 |         if self._gpu_device:
209 |             x = cuda.to_gpu(x, self._gpu_device)
210 |             y = cuda.to_gpu(y, self._gpu_device)
211 |             actions = cuda.to_gpu(actions, self._gpu_device)
212 | 
213 |         q = self._model(x)
214 |         q_subset = F.reshape(F.select_item(q, actions), (batch_size, 1))
215 |         y = y.reshape(batch_size, 1)
216 | 
217 |         loss = F.sum(F.huber_loss(q_subset, y, 1.0))
218 | 
219 |         self._model.cleargrads()
220 |         loss.backward()
221 |         self._optimizer.update()
222 | 
223 |         self._loss_val = np.asscalar(cuda.to_cpu(loss.data))
224 | 
225 |         # Keeps track of the number of train() calls
226 |         self._steps += 1
227 |         if self._steps % self._target_update_interval == 0:
228 |             # copy weights
229 |             self._target.copyparams(self._model)
230 | 
231 |     @property
232 |     def loss_val(self):
233 |         return self._loss_val  # / self._minibatch_size
234 | 
235 |     def save(self, output_file):
236 |         save_npz(output_file, self._model)
237 | 
238 |     def load(self, input_file):
239 |         load_npz(input_file, self._model)
240 | 
241 |         # Copy parameter from model to target
242 |         self._target.copyparams(self._model)
243 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # The Malmo Collaborative AI Challenge - Team Pig Catcher
  2 | 
  3 | ## Approach
  4 | 
  5 | The challenge involves 2 agents who can either *cooperate* or *defect*. The optimal policy, based on stag hunt [[1]](#references), depends on the policy of the other agent. Not knowing the other agent's policy, the optimal solution is then based on *modelling* the other agent's policy. Similarly, the challenge can be considered a *sequential social dilemma* [[2]](#references), as goals could change over time.
  6 | 
  7 | By treating the other agent as part of the environment, we can use model-free RL, and simply aim to maximise the reward of our agent. As a baseline we take a DRL algorithm - ACER [[3]](#references) - and train it against the evaluation agent (which randomly uses a focused or random strategy every episode).
  8 | 
  9 | We chose to approach this challenge using *hierarchical RL*. We assume there are 2 subpolicies, one for each type of partner agent. To do so, we use option heads [[4]](#references), whereby the agent has shared features, but separate heads for different subpolicies. In this case, ACER with 2 subpolicies has 2 Q-value heads and 2 policy heads. To choose which subpolicy to use at any given time, the agent also has an additional classifier head that is trained (using an oracle) to distinguish which option to use. Therefore, we ask the following questions:
 10 | 
 11 | - Can the agent distinguish between the two possible behaviours of the evaluation agent?
 12 | - Does the agent learn qualitatively different subpolicies?
 13 | 
 14 | Unfortunately, due to technical difficulties and time restrictions, we were unable to successfully train an agent. Full results and more details can be found in our video.
 15 | 
 16 | ## Design Decisions
 17 | 
 18 | For our baseline, we implemented ACER [[3]](#references) in PyTorch based on reference code [[5, 6]](#references). In addition, we augmented the state that the agent receives with the previous action, reward and a step counter [[7]](#references). Our challenge entry augments the agent with option heads [[4]](#references), and we aim to distinguish the different policies of the evaluation agent.
 19 | 
 20 | We also introduce a novel contribution - a batch version of ACER - which increases stability. We sample a batch of off-policy trajectories, and then truncate them to match the smallest.
 21 | 
 22 | ## Instructions
 23 | 
 24 | Dependencies:
 25 | 
 26 | - [Python 2](https://www.python.org/)
 27 | - [PyTorch](http://pytorch.org/)
 28 | - [Plotly](https://plot.ly/python/)
 29 | - [Docker](https://www.docker.com/) + [docker-py](https://docker-py.readthedocs.io/en/stable/)
 30 | 
 31 | Firstly, [build the Malmo Docker image](https://github.com/Kaixhin/malmo-challenge/tree/master/docker). Secondly, [enable running Docker as a non-root user](https://docs.docker.com/engine/installation/linux/linux-postinstall/).
 32 | 
 33 | Run ACER with `OMP_NUM_THREADS=1 python pc_main.py`. The code automatically opens up Minecraft (Docker) instances.
 34 | 
 35 | ## Discussion
 36 | 
 37 | [![Team Pig Catcher Discussion Video](https://img.youtube.com/vi/e3_vsTKsMCY/0.jpg)](https://www.youtube.com/watch?v=e3_vsTKsMCY)
 38 | 
 39 | ## References
 40 | 
 41 | [1] [Game Theory of Mind](http://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1000254)  
 42 | [2] [Multi-agent Reinforcement Learning in Sequential Social Dilemmas](https://arxiv.org/abs/1702.03037)  
 43 | [3] [Sample Efficient Actor-Critic with Experience Replay](https://arxiv.org/abs/1611.01224)  
 44 | [4] [Classifying Options for Deep Reinforcement Learning](https://arxiv.org/abs/1604.08153)  
 45 | [5] [ikostrikov/pytorch-a3c](https://github.com/ikostrikov/pytorch-a3c)  
 46 | [6] [pfnet/ChainerRL](https://github.com/pfnet/chainerrl)  
 47 | [7] [Learning to Navigate in Complex Environments](https://arxiv.org/abs/1611.03673)  
 48 | 
 49 | ---
 50 | 
 51 | ---
 52 | 
 53 | This repository contains the task definition and example code for the [Malmo Collaborative AI Challenge](https://www.microsoft.com/en-us/research/academic-program/collaborative-ai-challenge/).
 54 | This challenge is organized to encourage research in collaborative AI - to work towards AI agents 
 55 | that learn to collaborate to solve problems and achieve goals. 
 56 | You can find additional details, including terms and conditions, prizes and information on how to participate at the [Challenge Homepage](https://www.microsoft.com/en-us/research/academic-program/collaborative-ai-challenge/).
 57 | 
 58 | [![Join the chat at https://gitter.im/malmo-challenge/Lobby](https://badges.gitter.im/malmo-challenge/Lobby.svg)](https://gitter.im/malmo-challenge/Lobby?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
 59 | [![license](https://img.shields.io/github/license/mashape/apistatus.svg?maxAge=2592000)](https://github.com/Microsoft/malmo-challenge/blob/master/LICENSE)
 60 | 
 61 | ----
 62 | 
 63 | **Notes for challenge participants:** Once you and your team decide to participate in the challenge, please make sure to register your team at our [Registration Page](https://www.surveygizmo.com/s3/3299773/The-Collaborative-AI-Challenge). On the registration form, you need to provide a link to the GitHub repository that will 
 64 | contain your solution. We recommend that you fork this repository (<a href="https://help.github.com/articles/fork-a-repo/" target="_blank">learn how</a>), 
 65 | and provide address of the forked repo. You can then update your submission as you make progress on the challenge task. 
 66 | We will consider the version of the code on branch master at the time of the submission deadline as your challenge submission. Your submission needs to contain code in working order, a 1-page description of your approach, and a 1-minute video that shows off your agent. Please see the [challenge terms and conditions]() for further details.
 67 | 
 68 | ----
 69 | 
 70 | **Jump to:**
 71 | 
 72 | - [Installation](#installation)
 73 |   - [Prerequisites](#prerequisites)
 74 |   - [Minimal installation](#minimal-installation)
 75 |   - [Optional extensions](#optional-extensions)
 76 | 
 77 | - [Getting started](#getting-started)
 78 |   - [Play the challenge task](#play-the-challenge-task)
 79 |   - [Run your first experiment](#run-your-first-experiment)
 80 | 
 81 | - [Next steps](#next-steps)
 82 |   - [Run an experiment in Docker on Azure](#run-an-experiment-in-docker-on-azure)
 83 |   - [Resources](#resources)
 84 | 
 85 | # Installation
 86 | 
 87 | ## Prerequisites
 88 | 
 89 | - [Python](https://www.python.org/) 2.7+ (recommended) or 3.5+
 90 | - [Project Malmo](https://github.com/Microsoft/malmo) - we recommend downloading the [Malmo-0.21.0 release](https://github.com/Microsoft/malmo/releases) and installing dependencies for [Windows](https://github.com/Microsoft/malmo/blob/master/doc/install_windows.md), [Linux](https://github.com/Microsoft/malmo/blob/master/doc/install_linux.md) or [MacOS](https://github.com/Microsoft/malmo/blob/master/doc/install_macosx.md). Test your Malmo installation by [launching Minecraft with Malmo](https://github.com/Microsoft/malmo#launching-minecraft-with-our-mod) and [launching an agent](https://github.com/Microsoft/malmo#launch-an-agent).
 91 | 
 92 | ## Minimal installation
 93 | 
 94 | ```
 95 | pip install -e git+https://github.com/Microsoft/malmo-challenge#egg=malmopy
 96 | ```
 97 | 
 98 | or 
 99 | 
100 | ```
101 | git clone https://github.com/Microsoft/malmo-challenge
102 | cd malmo-challenge
103 | pip install -e .
104 | ```
105 | 
106 | ## Optional extensions
107 | 
108 | Some of the example code uses additional dependencies to provide 'extra' functionality. These can be installed using:
109 | 
110 | ```
111 | pip install -e '.[extra1, extra2]'
112 | ```
113 | For example to install gym and chainer:
114 | 
115 | ```
116 | pip install -e '.[gym]'
117 | ```
118 | 
119 | Or to install all extras:
120 | 
121 | ```
122 | pip install -e '.[all]'
123 | ```
124 | 
125 | The following extras are available:
126 | - `gym`: [OpenAI Gym](https://gym.openai.com/) is an interface to a wide range of reinforcement learning environments. Installing this extra enables the Atari example agents in [samples/atari](samples/atari) to train on the gym environments. *Note that OpenAI gym atari environments are currently not available on Windows.*
127 | - `tensorflow`: [TensorFlow](https://www.tensorflow.org/) is a popular deep learning framework developed by Google. In our examples it enables visualizations through [TensorBoard](https://www.tensorflow.org/get_started/summaries_and_tensorboard).
128 | 
129 | 
130 | # Getting started
131 | 
132 | ## Play the challenge task
133 | 
134 | The challenge task takes the form of a mini game, called Pig Chase. Learn about the game, and try playing it yourself on our [Pig Chase Challenge page](ai_challenge/pig_chase/README.md).
135 | 
136 | ## Run your first experiment
137 | 
138 | See how to [run your first baseline experiment](ai_challenge/pig_chase/README.md#run-your-first-experiment) on the [Pig Chase Challenge page](ai_challenge/pig_chase/README.md).
139 | 
140 | # Next steps
141 | 
142 | ## Run an experiment in Docker on Azure
143 | 
144 | Docker is a virtualization platform that makes it easy to deploy software with all its dependencies. 
145 | We use docker to run experiments locally or in the cloud. Details on how to run an example experiment using docker are in the [docker README](docker/README.md).
146 | 
147 | 
148 | ## Resources
149 | 
150 | - [Malmo Platform Tutorial](https://github.com/Microsoft/malmo/blob/master/Malmo/samples/Python_examples/Tutorial.pdf)
151 | - [Azure Portal](portal.azure.com/)
152 | - [Docker Documentation](https://docs.docker.com/)
153 | - [Docker Machine on Azure](https://docs.microsoft.com/en-us/azure/virtual-machines/virtual-machines-linux-docker-machine)
154 | - [CNTK Tutorials](https://www.microsoft.com/en-us/research/product/cognitive-toolkit/tutorials/)
155 | - [CNTK Documentation](https://github.com/Microsoft/CNTK/wiki)
156 | - [Chainer Documentation](http://docs.chainer.org/en/stable/)
157 | - [TensorBoard Documentation](https://www.tensorflow.org/get_started/summaries_and_tensorboard)
158 | 


--------------------------------------------------------------------------------
/malmopy/model/cntk/qlearning.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2017 Microsoft Corporation.
  2 | #
  3 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
  4 | # documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
  5 | #  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
  6 | # and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
  7 | #
  8 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of
  9 | # the Software.
 10 | #
 11 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
 12 | # THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 13 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 14 | #  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 15 | # SOFTWARE.
 16 | # ===================================================================================================================
 17 | 
 18 | from __future__ import absolute_import
 19 | 
 20 | from cntk import Value
 21 | from cntk.axis import Axis
 22 | from cntk.initializer import he_uniform, he_normal
 23 | from cntk.layers import Convolution, Dense, default_options
 24 | from cntk.layers.higher_order_layers import Sequential
 25 | from cntk.learners import adam, momentum_schedule, learning_rate_schedule, \
 26 |     UnitType
 27 | from cntk.ops import input, relu, reduce_sum
 28 | from cntk.ops.functions import CloneMethod
 29 | from cntk.train.trainer import Trainer
 30 | 
 31 | from . import CntkModel, prepend_batch_axis, huber_loss
 32 | from ..model import QModel
 33 | from ...util import check_rank
 34 | 
 35 | 
 36 | class QNeuralNetwork(CntkModel, QModel):
 37 |     """
 38 |     Represents a learning capable entity using CNTK
 39 |     """
 40 | 
 41 |     def __init__(self, in_shape, output_shape, device_id=None,
 42 |                  learning_rate=0.00025, momentum=0.9,
 43 |                  minibatch_size=32, update_interval=10000,
 44 |                  n_workers=1, visualizer=None):
 45 | 
 46 |         """
 47 |         Q Neural Network following Mnih and al. implementation and default options.
 48 | 
 49 |         The network has the following topology:
 50 |         Convolution(32, (8, 8))
 51 |         Convolution(64, (4, 4))
 52 |         Convolution(64, (2, 2))
 53 |         Dense(512)
 54 | 
 55 |         :param in_shape: Shape of the observations perceived by the learner (the neural net input)
 56 |         :param output_shape: Size of the action space (mapped to the number of output neurons)
 57 | 
 58 |         :param device_id: Use None to let CNTK select the best available device,
 59 |                           -1 for CPU, >= 0 for GPU
 60 |                           (default: None)
 61 | 
 62 |         :param learning_rate: Learning rate
 63 |                               (default: 0.00025, as per Mnih et al.)
 64 | 
 65 |         :param momentum: Momentum, provided as momentum value for
 66 |                          averaging gradients without unit gain filter
 67 |                          Note that CNTK does not currently provide an implementation
 68 |                          of Graves' RmsProp with momentum.
 69 |                          It uses AdamSGD optimizer instead.
 70 |                          (default: 0, no momentum with RProp optimizer)
 71 | 
 72 |         :param minibatch_size: Minibatch size
 73 |                                (default: 32, as per Mnih et al.)
 74 | 
 75 |         :param n_workers: Number of concurrent worker for distributed training.
 76 |                           (default: 1, not distributed)
 77 | 
 78 |         :param visualizer: Optional visualizer allowing the model to save summary data
 79 |                            (default: None, no visualization)
 80 | 
 81 |         Ref: Mnih et al.: "Human-level control through deep reinforcement learning."
 82 |         Nature 518.7540 (2015): 529-533.
 83 |         """
 84 | 
 85 |         assert learning_rate > 0, 'learning_rate should be > 0'
 86 |         assert 0. <= momentum < 1, 'momentum should be 0 <= momentum < 1'
 87 | 
 88 |         QModel.__init__(self, in_shape, output_shape)
 89 |         CntkModel.__init__(self, device_id, False, n_workers, visualizer)
 90 | 
 91 |         self._nb_actions = output_shape
 92 |         self._steps = 0
 93 |         self._target_update_interval = update_interval
 94 |         self._target = None
 95 | 
 96 |         # Input vars
 97 |         self._environment = input(in_shape, name='env',
 98 |                                   dynamic_axes=(Axis.default_batch_axis()))
 99 |         self._q_targets = input(1, name='q_targets',
100 |                                 dynamic_axes=(Axis.default_batch_axis()))
101 |         self._actions = input(output_shape, name='actions',
102 |                               dynamic_axes=(Axis.default_batch_axis()))
103 | 
104 |         # Define the neural network graph
105 |         self._model = self._build_model()(self._environment)
106 |         self._target = self._model.clone(
107 |             CloneMethod.freeze, {self._environment: self._environment}
108 |         )
109 | 
110 |         # Define the learning rate
111 |         lr_schedule = learning_rate_schedule(learning_rate, UnitType.minibatch)
112 | 
113 |         # AdamSGD optimizer
114 |         m_schedule = momentum_schedule(momentum)
115 |         vm_schedule = momentum_schedule(0.999)
116 |         l_sgd = adam(self._model.parameters, lr_schedule,
117 |                      momentum=m_schedule,
118 |                      unit_gain=True,
119 |                      variance_momentum=vm_schedule)
120 | 
121 |         if self.distributed_training:
122 |             raise NotImplementedError('ASGD not implemented yet.')
123 | 
124 |         # _actions is a sparse 1-hot encoding of the actions done by the agent
125 |         q_acted = reduce_sum(self._model * self._actions, axis=0)
126 | 
127 |         # Define the trainer with Huber Loss function
128 |         criterion = huber_loss(q_acted, self._q_targets, 1.0)
129 | 
130 |         self._learner = l_sgd
131 |         self._trainer = Trainer(self._model, (criterion, None), l_sgd)
132 | 
133 |     @property
134 |     def loss_val(self):
135 |         return self._trainer.previous_minibatch_loss_average
136 | 
137 |     def _build_model(self):
138 |         with default_options(init=he_uniform(), activation=relu, bias=True):
139 |             model = Sequential([
140 |                 Convolution((8, 8), 32, strides=(4, 4)),
141 |                 Convolution((4, 4), 64, strides=(2, 2)),
142 |                 Convolution((3, 3), 64, strides=(1, 1)),
143 |                 Dense(512, init=he_normal(0.01)),
144 |                 Dense(self._nb_actions, activation=None, init=he_normal(0.01))
145 |             ])
146 |             return model
147 | 
148 |     def train(self, x, q_value_targets, actions=None):
149 |         assert actions is not None, 'actions cannot be None'
150 | 
151 |         # We need to add extra dimensions to shape [N, 1] => [N, 1]
152 |         if check_rank(q_value_targets.shape, 1):
153 |             q_value_targets = q_value_targets.reshape((-1, 1))
154 | 
155 |         # Add extra dimensions to match shape [N, 1] required by one_hot
156 |         if check_rank(actions.shape, 1):
157 |             actions = actions.reshape((-1, 1))
158 | 
159 |         # We need batch axis
160 |         if check_rank(x.shape, len(self._environment.shape)):
161 |             x = prepend_batch_axis(x)
162 | 
163 |         self._trainer.train_minibatch({
164 |             self._environment: x,
165 |             self._actions: Value.one_hot(actions, self._nb_actions),
166 |             self._q_targets: q_value_targets
167 |         })
168 | 
169 |         # Counter number of train calls
170 |         self._steps += 1
171 | 
172 |         # Update the model with the target one
173 |         if (self._steps % self._target_update_interval) == 0:
174 |             self._target = self._model.clone(
175 |                 CloneMethod.freeze, {self._environment: self._environment}
176 |             )
177 | 
178 |     def evaluate(self, data, model=QModel.ACTION_VALUE_NETWORK):
179 |         # If evaluating a single sample, expand the minibatch axis
180 |         # (minibatch = 1, input_shape...)
181 |         if len(data.shape) == len(self.input_shape):
182 |             data = prepend_batch_axis(data)  # Append minibatch dim
183 | 
184 |         if model == QModel.TARGET_NETWORK:
185 |             predictions = self._target.eval({self._environment: data})
186 |         else:
187 |             predictions = self._model.eval({self._environment: data})
188 |         return predictions.squeeze()
189 | 
190 | 
191 | class ReducedQNeuralNetwork(QNeuralNetwork):
192 |     """
193 |     Represents a learning capable entity using CNTK, reduced model
194 |     """
195 | 
196 |     def __init__(self, in_shape, output_shape, device_id=None,
197 |                  learning_rate=0.00025, momentum=0.9,
198 |                  minibatch_size=32, update_interval=10000,
199 |                  n_workers=1, visualizer=None):
200 | 
201 |         QNeuralNetwork.__init__(self, in_shape, output_shape, device_id,
202 |                                 learning_rate, momentum, minibatch_size, update_interval,
203 |                                 n_workers, visualizer)
204 | 
205 |     def _build_model(self):
206 |         with default_options(init=he_uniform(), activation=relu, bias=True):
207 |             model = Sequential([
208 |                 Convolution((4, 4), 64, strides=(2, 2), name='conv1'),
209 |                 Convolution((3, 3), 64, strides=(1, 1), name='conv2'),
210 |                 Dense(512, name='dense1', init=he_normal(0.01)),
211 |                 Dense(self._nb_actions, activation=None, init=he_normal(0.01), name='qvalues')
212 |             ])
213 |             return model


--------------------------------------------------------------------------------