├── .gitignore
├── LICENSE.txt
├── README.md
├── params
    ├── acrobot
    │   └── sarsa.json
    ├── cartpole
    │   ├── sarsa.json
    │   ├── sarsa_alphabound.json
    │   └── sarsa_twopoles.json
    ├── chain
    │   └── delayed_qlearning.json
    ├── mountaincar
    │   ├── example_randtrial.json
    │   ├── ilstd.json
    │   ├── lspi.json
    │   ├── lstd.json
    │   ├── lstdq.json
    │   ├── mdba.json
    │   ├── mdq.json
    │   ├── mdsarsa.json
    │   ├── modelbased.json
    │   ├── nac_lstdq.json
    │   ├── nacs.json
    │   ├── olstd.json
    │   ├── qlearning.json
    │   ├── rlstd.json
    │   ├── sarsa.json
    │   ├── sarsa_ann.json
    │   ├── sarsa_lecun.json
    │   ├── ttac1.json
    │   └── ttnac3.json
    └── puddleworld
    │   └── sarsa.json
├── pyrl
    ├── .gitignore
    ├── Makefile
    ├── README.md
    ├── TODO
    ├── __init__.py
    ├── agents
    │   ├── .gitignore
    │   ├── README.md
    │   ├── __init__.py
    │   ├── delayed_qlearning.py
    │   ├── lstd.py
    │   ├── mirror_descent.py
    │   ├── modelbased.py
    │   ├── models
    │   │   ├── .gitignore
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── batch_model.py
    │   │   └── model.py
    │   ├── planners
    │   │   ├── .gitignore
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── fitted_qiteration.py
    │   │   └── planner.py
    │   ├── policy_gradient.py
    │   ├── qlearning.py
    │   ├── sarsa_lambda.py
    │   ├── sarsa_lambda_ann.py
    │   ├── skeleton_agent.py
    │   └── stepsizes.py
    ├── basis
    │   ├── .gitignore
    │   ├── CTiles
    │   │   ├── .gitignore
    │   │   ├── CMakeLists.txt
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── doc
    │   │   │   └── tiles.html
    │   │   ├── fancytiles.py
    │   │   ├── src
    │   │   │   ├── tiles.cpp
    │   │   │   ├── tiles.h
    │   │   │   └── tilesInt.C
    │   │   └── tiletimes.py
    │   ├── README.md
    │   ├── Tiles
    │   │   ├── .gitignore
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── fancytiles.py
    │   │   ├── tiles.py
    │   │   └── tiletimes.py
    │   ├── __init__.py
    │   ├── fourier.py
    │   ├── rbf.py
    │   ├── tilecode.py
    │   └── trivial.py
    ├── environments
    │   ├── .gitignore
    │   ├── README.md
    │   ├── __init__.py
    │   ├── acrobot.py
    │   ├── batch_replenish.py
    │   ├── bicycle.py
    │   ├── cartpole.py
    │   ├── chain.py
    │   ├── configs
    │   │   ├── neurostim
    │   │   │   ├── params.dat
    │   │   │   ├── test_features.dat
    │   │   │   ├── test_labels.dat
    │   │   │   └── test_stimulation.dat
    │   │   ├── pinball
    │   │   │   ├── pinball_hard_single.cfg
    │   │   │   └── pinball_simple_single.cfg
    │   │   ├── pomdps
    │   │   │   └── tiger.POMDP
    │   │   └── tetris
    │   │   │   ├── 3brick.dat
    │   │   │   ├── melax.dat
    │   │   │   ├── standard.dat
    │   │   │   └── sztetris.dat
    │   ├── fuelworld.py
    │   ├── gridworld.py
    │   ├── libPOMDP
    │   │   ├── .gitignore
    │   │   ├── CMakeLists.txt
    │   │   ├── COPYING
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   └── src
    │   │   │   ├── imm-reward.c
    │   │   │   ├── imm-reward.h
    │   │   │   ├── libpomdp.c
    │   │   │   ├── libpomdp.h
    │   │   │   ├── mdp-common.h
    │   │   │   ├── mdp.c
    │   │   │   ├── mdp.h
    │   │   │   ├── parse_constant.h
    │   │   │   ├── parse_err.c
    │   │   │   ├── parse_err.h
    │   │   │   ├── parse_hash.c
    │   │   │   ├── parse_hash.h
    │   │   │   ├── parser.y
    │   │   │   ├── scanner.l
    │   │   │   ├── sparse-matrix.c
    │   │   │   └── sparse-matrix.h
    │   ├── marble_maze.py
    │   ├── mdptetris
    │   │   ├── CMakeLists.txt
    │   │   ├── COPYING
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── data
    │   │   │   ├── features
    │   │   │   │   ├── bertsekas_initial.dat
    │   │   │   │   ├── ce_bdu.dat
    │   │   │   │   ├── ce_bertsekas.dat
    │   │   │   │   ├── ce_bertsekas_dellacherie.dat
    │   │   │   │   ├── ce_dellacherie.dat
    │   │   │   │   ├── ce_du.dat
    │   │   │   │   ├── dellacherie_initial.dat
    │   │   │   │   ├── dellacherie_ourwellsums.dat
    │   │   │   │   ├── record_bdu.dat
    │   │   │   │   ├── record_du.dat
    │   │   │   │   └── value_estimator_bertsekas.dat
    │   │   │   ├── pieces3.dat
    │   │   │   ├── pieces4.dat
    │   │   │   └── pieces_melax.dat
    │   │   └── src
    │   │   │   ├── board.c
    │   │   │   ├── board.h
    │   │   │   ├── brick_masks.c
    │   │   │   ├── brick_masks.h
    │   │   │   ├── common_parameters.c
    │   │   │   ├── common_parameters.h
    │   │   │   ├── config.h
    │   │   │   ├── feature_functions.c
    │   │   │   ├── feature_functions.h
    │   │   │   ├── feature_policy.c
    │   │   │   ├── feature_policy.h
    │   │   │   ├── file_tools.c
    │   │   │   ├── file_tools.h
    │   │   │   ├── game.c
    │   │   │   ├── game.h
    │   │   │   ├── games_statistics.c
    │   │   │   ├── games_statistics.h
    │   │   │   ├── hashtable.c
    │   │   │   ├── hashtable.h
    │   │   │   ├── interruptions.c
    │   │   │   ├── interruptions.h
    │   │   │   ├── last_move_info.c
    │   │   │   ├── last_move_info.h
    │   │   │   ├── macros.h
    │   │   │   ├── mdptetris.c
    │   │   │   ├── piece.c
    │   │   │   ├── piece.h
    │   │   │   ├── random.c
    │   │   │   ├── random.h
    │   │   │   ├── rewards.c
    │   │   │   ├── rewards.h
    │   │   │   ├── simple_tetris.c
    │   │   │   ├── simple_tetris.h
    │   │   │   ├── tetris.c
    │   │   │   └── types.h
    │   ├── mountaincar.py
    │   ├── multiroom.py
    │   ├── neurostim.py
    │   ├── pinball.py
    │   ├── pomdp.py
    │   ├── puddleworld.py
    │   ├── skeleton_environment.py
    │   ├── taxi.py
    │   ├── tetris.py
    │   ├── twip.py
    │   └── windyworld.py
    ├── experiments
    │   ├── .gitignore
    │   ├── README.md
    │   ├── __init__.py
    │   ├── episodic.py
    │   └── randomized.py
    ├── misc
    │   ├── .gitignore
    │   ├── __init__.py
    │   ├── json.py
    │   ├── matrix.py
    │   ├── parameter.py
    │   └── timer.py
    ├── rlglue
    │   ├── .gitignore
    │   ├── RLGlueLocal.py
    │   ├── TaskSpecRLGlue.py
    │   ├── __init__.py
    │   ├── registry.py
    │   └── run.py
    └── visualizers
    │   ├── .gitignore
    │   ├── README.md
    │   ├── __init__.py
    │   ├── compareParameters.py
    │   ├── plotExperiment.py
    │   └── plotParameters.py
└── scripts
    ├── generate_spearmint.sh
    ├── spearmint_config.py
    └── spearmint_template.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.py[co]
 2 | *~
 3 | # Packages
 4 | *.egg
 5 | *.egg-info
 6 | dist
 7 | build
 8 | eggs
 9 | parts
10 | bin
11 | var
12 | sdist
13 | develop-eggs
14 | .installed.cfg
15 | 
16 | # Installer logs
17 | pip-log.txt
18 | 
19 | # Unit test / coverage reports
20 | .coverage
21 | .tox
22 | 
23 | #Translations
24 | *.mo
25 | 
26 | #Mr Developer
27 | .mr.developer.cfg
28 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | python-rl
 2 | =========
 3 | 
 4 | Some Reinforcement Learning in Python
 5 | 
 6 | 
 7 | Run with:
 8 | 
 9 | python -m pyrl.rlglue.run
10 | 
11 | Many other run options exist. A good starting point is with the command line help:
12 | 
13 | python -m pyrl.rlglue.run --help
14 | 
15 | The params/ directory contains examples of experiments that demonstrate many of the different agent algorithms. 
16 | As an example, a randomized trial experiment using mountain car, and a randomly generated 'fixed policy' can be 
17 | run with:
18 | 
19 | python -m pyrl.rlglue.run --load params/mountaincar/example_randtrial.json
20 | 
21 | The out put of this particular experiment is of the form:
22 | #evaluation points, list of evaluation index and evaluation value pairs, list of parameter values
23 | 
24 | For example:
25 | 1,0,-4999.0,0.0,0.219169344211,0.1,1.0,0.7,1,13709650200845
26 | 
27 | For this, there is oly 1 evaluation point (which is because this experiment only runs one episode). 
28 | Then the evaluation index is zero, for the zero-th episode, followed by the return for that episode. 
29 | Then we see a learning rate of 0.0 (because this is a fixed policy), followed by other parameters of 
30 | Sarsa which in this case are not important. The final value of the line is the random seed used to 
31 | generate the fixed policy.
32 | 
33 | 
34 | Contributors
35 | ============
36 | Will Dabney
37 | 
38 | Pierre-Luc Bacon
39 | 


--------------------------------------------------------------------------------
/params/acrobot/sarsa.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"environment": {"params": {}, "name": "Acrobot"}, 
 3 | 	"experiment": {"params": {"num_episodes": 50, "num_runs": 30}, "name": "Episodic"}, 
 4 | 	"agent": {"params": {
 5 | 		 "alpha": 0.003,
 6 | 		 "epsilon": 0.01,
 7 | 		 "gamma": 0.99,
 8 | 		 "lmbda": 0.7,
 9 | 		 "basis": "fourier",
10 | 		 "fourier_order": 3
11 | 	}, "name": "Sarsa"}
12 | }
13 | 


--------------------------------------------------------------------------------
/params/cartpole/sarsa.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"environment": {"params": {}, "name": "Cart Pole"},
 3 | 	"experiment": {"params": {"num_episodes": 50, "num_runs": 30}, "name": "Episodic"},
 4 | 	"agent": {
 5 | 		 "params": {
 6 | 		 	   "basis": "fourier",
 7 | 			   "fourier_order": 3,
 8 | 			   "alpha": 0.005,
 9 | 			   "gamma": 0.99,
10 | 			   "lmbda": 0.7,
11 | 			   "epsilon": 0.01
12 | 		},
13 | 		"name": "Sarsa"
14 | 		}
15 | }


--------------------------------------------------------------------------------
/params/cartpole/sarsa_alphabound.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"environment": {"params": {}, "name": "Cart Pole"}, 
 3 | 	"experiment": {"params": {"num_episodes": 30}, "name": "Episodic"}, 
 4 | 	"agent": {"params": {
 5 | 		 "alpha": 1.0,
 6 | 		 "epsilon": 0.01,
 7 | 		 "gamma": 0.99,
 8 | 		 "lmbda": 0.7,
 9 | 		 "basis": "fourier",
10 | 		 "fourier_order": 3
11 | 	}, "name": "Adaptive (AlphaBound) Sarsa"}
12 | }
13 | 


--------------------------------------------------------------------------------
/params/cartpole/sarsa_twopoles.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"environment": {"params": {"pole_scales": [1.0, 0.1]}, "name": "Cart Pole"},
 3 | 	"experiment": {"params": {"num_episodes": 10000, "num_runs": 10}, "name": "Episodic"},
 4 | 	"agent": {
 5 | 		 "params": {
 6 | 		 	   "basis": "fourier",
 7 | 			   "fourier_order": 5,
 8 | 			   "alpha": 0.00001,
 9 | 			   "gamma": 0.99,
10 | 			   "lmbda": 0.7,
11 | 			   "epsilon": 0.01
12 | 		},
13 | 		"name": "Sarsa"
14 | 		}
15 | }


--------------------------------------------------------------------------------
/params/chain/delayed_qlearning.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"environment": {"params": {}, "name": "Chain"}, 
 3 | 	"experiment": {"params": {"num_episodes": 10, "maxsteps":1000, "num_runs": 10}, "name": "Episodic"}, 
 4 | 	"agent": {"params": {
 5 | 		 "m": 10,
 6 | 		 "gamma": 0.99,
 7 | 		 "epsilon": 0.1
 8 | 		 }, "name": "Delayed Q-Learning"}
 9 | }
10 | 


--------------------------------------------------------------------------------
/params/mountaincar/example_randtrial.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "environment": {"params": {}, "name": "Mountain Car"},
 3 |     "experiment": {"params": {"num_trials": 30, "num_runs": 10, "num_episodes": 1}, "name": "Randomized Trial"},
 4 |     "agent": {
 5 |         "params":
 6 |         {
 7 |             "alpha": 0.1, 
 8 | 	    "lmbda": 0.7, 
 9 | 	    "gamma": 1.0, 
10 | 	    "softmax": true
11 |         }, "name": "Fixed Policy"}
12 | }


--------------------------------------------------------------------------------
/params/mountaincar/ilstd.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"environment": {
 3 | 		 "params": {
 4 | 			}, "name": "Mountain Car"}, 
 5 | 	"experiment": {
 6 | 		 "params": {
 7 | 		 	   "num_episodes": 50
 8 | 			}, "name": "Episodic"}, 
 9 | 	"agent": {
10 | 		 "params": {
11 | 		 	   "alpha": 0.000001,
12 | 			   "epsilon": 0.01,
13 | 			   "ilstd_sweeps": 1,
14 | 			   "lmbda": 0.0,
15 | 			   "gamma": 0.99,
16 | 			   "basis": "fourier",
17 | 			   "fourier_order": 3
18 | 			}, "name": "Incremental Least Squares TD"}
19 | }


--------------------------------------------------------------------------------
/params/mountaincar/lspi.json:
--------------------------------------------------------------------------------
1 | {
2 | 	"environment": {"params": {}, "name": "Mountain Car"}, 
3 | 	"experiment": {"params": {"num_episodes": 50}, "name": "Episodic"}, 
4 | 	"agent": {"params": {"gamma": 1.0, "lspi_threshold": 0.001, "lstd_num_samples": 700, "lstd_precond": 0.001}, "name": "LSPI"}
5 | }
6 | 


--------------------------------------------------------------------------------
/params/mountaincar/lstd.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"environment": {
 3 | 		 "params": {
 4 | 			}, "name": "Mountain Car"}, 
 5 | 	"experiment": {
 6 | 		 "params": {
 7 | 		 	   "num_episodes": 50
 8 | 			}, "name": "Episodic"}, 
 9 | 	"agent": {
10 | 		 "params": {
11 | 		 	   "lstd_update_freq": 100,
12 | 			   "epsilon": 0.01,
13 | 			   "basis": "fourier",
14 | 			   "fourier_order": 3
15 | 			}, "name": "Least Squares Temporal Difference Learning"}
16 | }


--------------------------------------------------------------------------------
/params/mountaincar/lstdq.json:
--------------------------------------------------------------------------------
1 | {
2 | 	"environment": {"params": {}, "name": "Mountain Car"}, 
3 | 	"experiment": {"params": {"num_episodes": 50}, "name": "Episodic"}, 
4 | 	"agent": {"params": {"lstd_update_freq": 500, "lstd_num_samples": 500, "lstd_precond": 0.1, "basis": "fourier"}, "name": "LSTD-Q"}
5 | }
6 | 


--------------------------------------------------------------------------------
/params/mountaincar/mdba.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"environment": {
 3 | 		 "params": {
 4 | 			}, "name": "Mountain Car"}, 
 5 | 	"experiment": {
 6 | 		 "params": {
 7 | 		 	   "num_episodes": 50
 8 | 			}, "name": "Episodic"}, 
 9 | 	"agent": {
10 | 		 "params": {
11 | 		 	   "nonlinear_lr": 0.000001,
12 | 		 	   "sparsity": 0.0001,
13 | 			   "epsilon": 0.01,
14 | 			   "gamma": 0.99,
15 | 			   "lmbda": 0.7,
16 | 			   "alpha": 0.007,
17 | 			   "basis": "fourier",
18 | 			   "fourier_order": 3
19 | 			}, "name": "Sparse Mirror Descent Q-Learning with Non-Linear Basis Adaptation"}
20 | }


--------------------------------------------------------------------------------
/params/mountaincar/mdq.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"environment": {
 3 | 		 "params": {
 4 | 			}, "name": "Mountain Car"}, 
 5 | 	"experiment": {
 6 | 		 "params": {
 7 | 		 	   "num_episodes": 50
 8 | 			}, "name": "Episodic"}, 
 9 | 	"agent": {
10 | 		 "params": {
11 | 		 	   "sparsity": 0.0001,
12 | 			   "epsilon": 0.01,
13 | 			   "gamma": 0.99,
14 | 			   "lmbda": 0.7,
15 | 			   "alpha": 0.007,
16 | 			   "basis": "fourier",
17 | 			   "fourier_order": 3
18 | 			}, "name": "Sparse Mirror Descent Q-Learning"}
19 | }


--------------------------------------------------------------------------------
/params/mountaincar/mdsarsa.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"environment": {
 3 | 		 "params": {
 4 | 			}, "name": "Mountain Car"}, 
 5 | 	"experiment": {
 6 | 		 "params": {
 7 | 		 	   "num_episodes": 50
 8 | 			}, "name": "Episodic"}, 
 9 | 	"agent": {
10 | 		 "params": {
11 | 		 	   "sparsity": 0.0001,
12 | 			   "epsilon": 0.01,
13 | 			   "gamma": 0.99,
14 | 			   "lmbda": 0.7,
15 | 			   "alpha": 0.007,
16 | 			   "basis": "fourier",
17 | 			   "fourier_order": 3
18 | 			}, "name": "Sparse Mirror Descent Sarsa"}
19 | }


--------------------------------------------------------------------------------
/params/mountaincar/modelbased.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"environment": {
 3 | 		 "params": {
 4 | 			}, "name": "Mountain Car"}, 
 5 | 	"experiment": {
 6 | 		 "params": {
 7 | 		 	   "num_episodes": 50
 8 | 			}, "name": "Episodic"}, 
 9 | 	"agent": {
10 | 		 "params": {
11 | 		 	   "planner_params": {"basis": "fourier", "regressor": "ridge", "iterations": 1000, "support_size": 50, "resample": 15},
12 | 			   "model_params": {"update_freq": 20, "known_threshold": 0.95, "max_experiences": 700},
13 | 	   		   "gamma": 0.99
14 | 			}, "name": "Model Based Agent"}
15 | }


--------------------------------------------------------------------------------
/params/mountaincar/nac_lstdq.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"environment": {
 3 | 		 "params": {
 4 | 			}, "name": "Mountain Car"}, 
 5 | 	"experiment": {
 6 | 		 "params": {
 7 | 		 	   "num_episodes": 50
 8 | 			}, "name": "Episodic"}, 
 9 | 	"agent": {
10 | 		 "params": {
11 | 			   "epsilon": 0.01,
12 | 			   "nac_freq": 100,
13 | 			   "gamma": 0.99,
14 | 			   "lmbda": 0.7,
15 | 			   "alpha": 0.004,
16 | 			   "basis": "fourier",
17 | 			   "fourier_order": 3
18 | 			}, "name": "Natural Actor-Critic with LSTD-Q"}
19 | }


--------------------------------------------------------------------------------
/params/mountaincar/nacs.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"environment": {
 3 | 		 "params": {
 4 | 			}, "name": "Mountain Car"}, 
 5 | 	"experiment": {
 6 | 		 "params": {
 7 | 		 	   "num_episodes": 50
 8 | 			}, "name": "Episodic"}, 
 9 | 	"agent": {
10 | 		 "params": {
11 | 			   "epsilon": 0.01,
12 | 			   "beta": 0.004,
13 | 			   "nac_freq": 200,
14 | 			   "gamma": 0.99,
15 | 			   "lmbda": 0.7,
16 | 			   "alpha": 0.004,
17 | 			   "basis": "fourier",
18 | 			   "fourier_order": 3
19 | 			}, "name": "Natural Actor-Critic with Sarsa"}
20 | }


--------------------------------------------------------------------------------
/params/mountaincar/olstd.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"environment": {
 3 | 		 "params": {
 4 | 			}, "name": "Mountain Car"}, 
 5 | 	"experiment": {
 6 | 		 "params": {
 7 | 		 	   "num_episodes": 50
 8 | 			}, "name": "Episodic"}, 
 9 | 	"agent": {
10 | 		 "params": {
11 | 			   "epsilon": 0.01,
12 | 			   "basis": "fourier",
13 | 			   "fourier_order": 3
14 | 			}, "name": "Online Least Squares TD"}
15 | }


--------------------------------------------------------------------------------
/params/mountaincar/qlearning.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"environment": {
 3 | 		 "params": {
 4 | 			}, "name": "Mountain Car"}, 
 5 | 	"experiment": {
 6 | 		 "params": {
 7 | 		 	   "num_episodes": 50
 8 | 			}, "name": "Episodic"}, 
 9 | 	"agent": {
10 | 		 "params": {
11 | 			   "epsilon": 0.01,
12 | 			   "gamma": 0.99,
13 | 			   "lmbda": 0.7,
14 | 			   "alpha": 0.004,
15 | 			   "basis": "fourier",
16 | 			   "fourier_order": 3
17 | 			}, "name": "Q-Learning"}
18 | }


--------------------------------------------------------------------------------
/params/mountaincar/rlstd.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"environment": {
 3 | 		 "params": {
 4 | 			}, "name": "Mountain Car"}, 
 5 | 	"experiment": {
 6 | 		 "params": {
 7 | 		 	   "num_episodes": 50
 8 | 			}, "name": "Episodic"}, 
 9 | 	"agent": {
10 | 		 "params": {
11 | 		 	   "rlstd_delta": 1.0,
12 | 			   "epsilon": 0.01,
13 | 			   "basis": "fourier",
14 | 			   "fourier_order": 3
15 | 			}, "name": "Recursive Least Squares TD"}
16 | }


--------------------------------------------------------------------------------
/params/mountaincar/sarsa.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"environment": {"params": {}, "name": "Mountain Car"}, 
 3 | 	"experiment": {"params": {"num_episodes": 50, "num_runs": 5}, "name": "Episodic"}, 
 4 | 	"agent": {
 5 | 		 "params": {
 6 | 		 	   "basis": "fourier", 
 7 | 			   "fourier_order": 3, 
 8 | 			   "alpha": 0.004, 
 9 | 			   "gamma": 1.0, 
10 | 			   "lmbda": 0.7, 
11 | 			   "epsilon": 0.01
12 | 		}, 
13 | 		"name": "Sarsa"
14 | 		}
15 | }


--------------------------------------------------------------------------------
/params/mountaincar/sarsa_ann.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"environment": {"params": {}, "name": "Mountain Car"}, 
 3 | 	"experiment": {"params": {"num_episodes": 100}, "name": "Episodic"}, 
 4 | 	"agent": {"params": {
 5 | 		 	    "num_hidden": 5,
 6 | 			    "epsilon": 0.01,
 7 | 			    "gamma": 1.0,
 8 | 			    "lmbda": 0.9,
 9 | 			    "alpha": 0.00001
10 | 		 }, "name": "Sarsa ANN"}
11 | }


--------------------------------------------------------------------------------
/params/mountaincar/sarsa_lecun.json:
--------------------------------------------------------------------------------
1 | {
2 |     "environment": {"params": {}, "name": "Mountain Car"},
3 |     "experiment": {"params": {"num_episodes": 30}, "name": "Episodic"},
4 |     "agent": {"params": {"basis": "fourier"}, "name": "Adaptive (InvMaxEigen) Sarsa"}
5 | }
6 | 


--------------------------------------------------------------------------------
/params/mountaincar/ttac1.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"environment": {
 3 | 		 "params": {
 4 | 			}, "name": "Mountain Car"}, 
 5 | 	"experiment": {
 6 | 		 "params": {
 7 | 		 	   "num_episodes": 50
 8 | 			}, "name": "Episodic"}, 
 9 | 	"agent": {
10 | 		 "params": {
11 | 		 	   "beta": 0.001,
12 | 			   "epsilon": 0.01,
13 | 			   "gamma": 0.99,
14 | 			   "lmbda": 0.7,
15 | 			   "alpha": 0.0001,
16 | 			   "basis": "fourier",
17 | 			   "fourier_order": 3
18 | 			}, "name": "Two-Timescale Actor-Critic"}
19 | }


--------------------------------------------------------------------------------
/params/mountaincar/ttnac3.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"environment": {
 3 | 		 "params": {
 4 | 			}, "name": "Mountain Car"}, 
 5 | 	"experiment": {
 6 | 		 "params": {
 7 | 		 	   "num_episodes": 50
 8 | 			}, "name": "Episodic"}, 
 9 | 	"agent": {
10 | 		 "params": {
11 | 		 	   "beta": 0.001,
12 | 			   "epsilon": 0.01,
13 | 			   "gamma": 0.99,
14 | 			   "lmbda": 0.7,
15 | 			   "alpha": 0.0001,
16 | 			   "basis": "fourier",
17 | 			   "fourier_order": 3
18 | 			}, "name": "Two-Timescale Natural Actor-Critic"}
19 | }


--------------------------------------------------------------------------------
/params/puddleworld/sarsa.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"environment": {"params": {}, "name": "Puddle World"}, 
 3 | 	"experiment": {"params": {"num_episodes": 50, "num_runs": 30}, "name": "Episodic"}, 
 4 | 	"agent": {
 5 | 		 "params": {
 6 | 		 	   "basis": "fourier", 
 7 | 			   "fourier_order": 3, 
 8 | 			   "alpha": 0.1, 
 9 | 			   "gamma": 1.0, 
10 | 			   "lmbda": 0.7, 
11 | 			   "epsilon": 0.01
12 | 		}, 
13 | 		"name": "Sarsa"
14 | 		}
15 | }


--------------------------------------------------------------------------------
/pyrl/.gitignore:
--------------------------------------------------------------------------------
 1 | */*.py[co]
 2 | */*.o
 3 | */*.so
 4 | 
 5 | # Packages
 6 | *.egg
 7 | *.egg-info
 8 | dist
 9 | build
10 | eggs
11 | parts
12 | bin
13 | var
14 | sdist
15 | develop-eggs
16 | .installed.cfg
17 | */*~
18 | # Installer logs
19 | pip-log.txt
20 | 
21 | # Unit test / coverage reports
22 | .coverage
23 | .tox
24 | 
25 | #Translations
26 | *.mo
27 | 
28 | #Mr Developer
29 | .mr.developer.cfg
30 | 


--------------------------------------------------------------------------------
/pyrl/Makefile:
--------------------------------------------------------------------------------
1 | 
2 | DIRS = basis/CTiles/build environments/libPOMDP/build environments/mdptetris/build
3 | 
4 | all:
5 | 	-for d in $(DIRS); do (mkdir $$d; cd $$d; cmake ..; $(MAKE) ); done
6 | 
7 | clean:
8 | 	-for d in $(DIRS); do (mkdir $$d; cd $$d; cmake ..; $(MAKE) clean; cd ..; rm -rf build); done
9 | 	-find . -type f -name "*.pyc" -exec rm -f {} \;


--------------------------------------------------------------------------------
/pyrl/README.md:
--------------------------------------------------------------------------------
 1 | pyRL
 2 | =========
 3 | 
 4 | I could rant all day long about the fact that most of the time Reinforcement Learning code 
 5 | available online tends to be completely broken, out of date, or very minimally useful. By 
 6 | far the biggest exception to this, in my opinion, has been the RL-Glue project. However, 
 7 | the project has either matured or been left on the shelf with few real updates in the last 
 8 | couple of years. 
 9 | 
10 | pyRL is a project meant to provide an up to date collection of Reinforcement Learning 
11 | agents, environments, and supporting methods written in Python, built on and extending the 
12 | RL-Glue framework. Whenever possible it will make use of optimized python libraries such as 
13 | numpy, scipy, scikits-learn, and neurolab. Some modules requiring additional speed will be 
14 | written in C and will be compilable to Python modules. 
15 | 
16 | All agents and environments will be able to act as standalone RL-Glue network interfaces run 
17 | from the commandline. However, pyRL also includes a module allowing agent, environment and experiment 
18 | to be run together without the use of sockets. RL-Glue version 3.0 does not currently support that 
19 | functionality for Python. 
20 | 
21 | This project is very much under development, but in the near-term I hope to have the most 
22 | common RL environments, model-free and model-based agents implemented and working. From there 
23 | I hope to add interesting new algorithms that I come across in the field (whenever I'm able to 
24 | implement them successfully).
25 | 
26 | --Will Dabney
27 | 


--------------------------------------------------------------------------------
/pyrl/TODO:
--------------------------------------------------------------------------------
 1 | 
 2 | TODO for pyRL project
 3 | ====================
 4 | 
 5 | Implement Environments (in Python) or Convert C/C++ into Module:
 6 | ------------------------------
 7 | Dart Throwing (from Bruno Castro da Silva's paper on parameterized skills)
 8 | Partially Observable Taxi
 9 | Simulated-Simplified Red Room (GDK/S.K.'s work, similar to a more functional continuous playroom)
10 | N-DOF Reaching and Reaching through viapoint
11 | Ball in Cup simulation
12 | 
13 | 
14 | Implement Agents (in Python) or Convert C/C++ into Module:
15 | ------------------------------
16 | 
17 | Implement "save_trajectory filename numsteps" message support into base class of existing agents
18 | 
19 | TD-delta Pi
20 | TDC / GTD
21 | Skill Chaining
22 | PoWER
23 | PI^2
24 | 
25 | 
26 | Extensions to Existing Implementations:
27 | ------------------------------
28 | 
29 | Add other exploration bonus methods to the modelbased agents
30 | Add pygame based viewer for Tetris environment
31 | Provide reasonable 'working' parameters for every agent algorithm. Currently missing:
32 | 	Sarsa ANN
33 | 	REINFORCE
34 | 	Composite Mirror Descent
35 | By having working parameters for every algorithm on at least one domain, and 
36 | every domain for at least one algorithm, I plan to build a script which uses 
37 | them as tests. This should ensure that I don't break things accidentally. 
38 | Actual unit tests on RL agents and environments would be great, but until 
39 | inspiration hits, this is probably the best approach.


--------------------------------------------------------------------------------
/pyrl/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | 


--------------------------------------------------------------------------------
/pyrl/agents/.gitignore:
--------------------------------------------------------------------------------
 1 | */*.py[co]
 2 | 
 3 | # Packages
 4 | *.egg
 5 | *.egg-info
 6 | dist
 7 | build
 8 | eggs
 9 | parts
10 | bin
11 | var
12 | sdist
13 | develop-eggs
14 | .installed.cfg
15 | */*~
16 | # Installer logs
17 | pip-log.txt
18 | 
19 | # Unit test / coverage reports
20 | .coverage
21 | .tox
22 | 
23 | #Translations
24 | *.mo
25 | 
26 | #Mr Developer
27 | .mr.developer.cfg
28 | 


--------------------------------------------------------------------------------
/pyrl/agents/README.md:
--------------------------------------------------------------------------------
1 | pyrl.agents
2 | =========
3 | 
4 | Reinforcement Learning agents that have been implemented in python using the RLGlue framework.
5 | 


--------------------------------------------------------------------------------
/pyrl/agents/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Author: Pierre-Luc Bacon <pierrelucbacon@gmail.com>
 3 | 
 4 | __all__ = ["skeleton_agent", "stepsizes"]
 5 | 
 6 | try:
 7 |     import sklearn
 8 |     __all__.append("modelbased")
 9 | except:
10 |     pass
11 | 
12 | try:
13 |     import pyrl.basis.tilecode
14 |     __all__.append("qlearning")
15 |     __all__.append("delayed_qlearning")
16 |     __all__.append("sarsa_lambda")
17 |     __all__.append("lstd")
18 |     __all__.append("policy_gradient")
19 |     __all__.append("mirror_descent")
20 | except:
21 |     pass
22 | 
23 | try:
24 |     import neurolab
25 |     __all__.append("sarsa_lambda_ann")
26 | except:
27 |     pass
28 | 
29 | 


--------------------------------------------------------------------------------
/pyrl/agents/delayed_qlearning.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | import numpy
  4 | import qlearning
  5 | from pyrl.rlglue.registry import register_agent
  6 | 
  7 | @register_agent
  8 | class delayed_qlearning(qlearning.qlearning_agent):
  9 |     """Delayed Q-Learning algorithm. This algorithm is only directly applicable
 10 |     to discrete state, discrete action domains. Thus, it should throw an assertion
 11 |     failure if you attempt to use it not in such a domain.
 12 | 
 13 |     Unfortunately, I have no yet been able to get this to work consistently on
 14 |     the marble maze domain. It seems likely that it would work on something simpler
 15 |     like chain domain. Maybe there's a bug?
 16 | 
 17 |     From the paper:
 18 |     PAC Model-Free Reinforcement Learning. 2006.
 19 |     Alexander Strehl, Lihong Li, Eric Wiewiora, John Langford, and Michael Littman.
 20 |     """
 21 | 
 22 |     name = "Delayed Q-Learning"
 23 | 
 24 |     def init_parameters(self):
 25 |         self.gamma = self.params.setdefault('gamma', 0.99)
 26 |         self.epsilon = self.params.setdefault('epsilon', 0.1)
 27 |         super(delayed_qlearning, self).init_parameters()
 28 |         self.m = self.params.setdefault('m', 100)
 29 | 
 30 |     @classmethod
 31 |     def agent_parameters(cls):
 32 |         param_set = parameter_set(cls.name, description="Parameters required for running an RL agent algorithm.")
 33 |         add_parameter(param_set, "epsilon", default=0.1)
 34 |         add_parameter(param_set, "gamma", default=0.99)
 35 |         add_parameter(param_set, "m", default=100, type=int, min=1, max=1000)
 36 |         return param_set
 37 | 
 38 |     def agent_supported(self, parsedSpec):
 39 |         if parsedSpec.valid:
 40 |             # Check observation form, and then set up number of features/states
 41 |             assert len(parsedSpec.getIntObservations()) > 0, "Expecting at least one discrete observation"
 42 |             assert len(parsedSpec.getDoubleObservations()) == 0, "Expecting no continuous observations."
 43 | 
 44 |             # Check action form, and then set number of actions
 45 |             assert len(parsedSpec.getIntActions())==1, "Expecting 1-dimensional discrete actions"
 46 |             assert len(parsedSpec.getDoubleActions())==0, "Expecting no continuous actions"
 47 |             assert not parsedSpec.isSpecial(parsedSpec.getIntActions()[0][0]), "Expecting min action to be a number not a special value"
 48 |             assert not parsedSpec.isSpecial(parsedSpec.getIntActions()[0][1]), "Expecting max action to be a number not a special value"
 49 |             self.reward_range = numpy.array(parsedSpec.getRewardRange()[0])
 50 |             return True
 51 |         else:
 52 |             return False
 53 | 
 54 |     def agent_init(self,taskSpec):
 55 |         super(delayed_qlearning, self).agent_init(taskSpec)
 56 |         self.weights.fill(1./(1. - self.gamma))
 57 |         self.updates = numpy.zeros(self.weights.shape)
 58 |         self.visit_count = numpy.zeros(self.weights.shape)
 59 |         self.update_time = numpy.zeros(self.weights.shape)
 60 |         self.LEARN = numpy.ones(self.weights.shape, dtype=bool)
 61 |         self.last_update = 0
 62 |         self.step_count = 0
 63 |         # Compute the 'correct' m to use (from the paper)
 64 |         # But tends to be so large as to be impractical
 65 |         #k = 1./((1. - self.gamma)*self.epsilon)
 66 |         #delta = 0.1
 67 |         #self.m = numpy.log(3. * self.numDiscStates * self.numActions * (1. + self.numDiscStates * self.numActions * k) / delta)
 68 |         #self.m /= 2. * self.epsilon**2 * (1. - self.gamma)**2
 69 |         #self.m = int(self.m)
 70 |         #print self.m
 71 | 
 72 |     def getAction(self, state, discState):
 73 |         """Get the action under the current policy for the given state.
 74 | 
 75 |         Args:
 76 |             state: The array of continuous state features
 77 |             discState: The integer representing the current discrete state value
 78 | 
 79 |         Returns:
 80 |             The current policy action, or a random action with some probability.
 81 |         """
 82 |         return numpy.dot(self.weights[discState,:,:].T, self.basis.computeFeatures(state)).argmax()
 83 | 
 84 |     def update(self, phi_t, state, discState, reward):
 85 |         reward = (reward - self.reward_range[0]) / (self.reward_range[1] - self.reward_range[0])
 86 |         self.step_count += 1
 87 |         state_action = numpy.where(phi_t != 0)
 88 |         if self.LEARN[state_action]: # If Learn[s,a]
 89 |             qvalues = self.getActionValues(state, discState)
 90 |             self.updates[state_action] += reward + self.gamma * qvalues.max()
 91 |             self.visit_count[state_action] += 1
 92 |             if self.visit_count[state_action] == self.m:
 93 |                 if self.weights[state_action] - self.updates[state_action]/self.m >= 2. * self.epsilon:
 94 |                     self.weights[state_action] = self.updates[state_action]/self.m + self.epsilon
 95 |                     self.last_update = self.step_count
 96 |                     #print (self.weights.ravel() < self.weights.max()).sum(), self.weights.size
 97 |                 elif self.update_time[state_action] >= self.last_update:
 98 |                     self.LEARN[state_action] = False
 99 |                 self.update_time[state_action] = self.step_count
100 |                 self.updates[state_action] = 0
101 |                 self.visit_count[state_action] = 0
102 |         elif self.update_time[state_action] < self.last_update:
103 |             self.LEARN[state_action] = True
104 | 
105 | if __name__=="__main__":
106 |     from pyrl.agents.skeleton_agent import runAgent
107 |     runAgent(delayed_qlearning)
108 | 
109 | 
110 | 
111 | 


--------------------------------------------------------------------------------
/pyrl/agents/models/.gitignore:
--------------------------------------------------------------------------------
 1 | */*.py[co]
 2 | 
 3 | # Packages
 4 | *.egg
 5 | *.egg-info
 6 | dist
 7 | build
 8 | eggs
 9 | parts
10 | bin
11 | var
12 | sdist
13 | develop-eggs
14 | .installed.cfg
15 | */*~
16 | # Installer logs
17 | pip-log.txt
18 | 
19 | # Unit test / coverage reports
20 | .coverage
21 | .tox
22 | 
23 | #Translations
24 | *.mo
25 | 
26 | #Mr Developer
27 | .mr.developer.cfg
28 | 


--------------------------------------------------------------------------------
/pyrl/agents/models/README.md:
--------------------------------------------------------------------------------
1 | pyrl.agents.models
2 | =========
3 | 
4 | Model learners and models for use with the RL agents.
5 | 


--------------------------------------------------------------------------------
/pyrl/agents/models/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | __all__ = ["model", "batch_model"]
3 | 


--------------------------------------------------------------------------------
/pyrl/agents/models/model.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | 
 3 | class ModelLearner(object):
 4 | 
 5 |     def __init__(self, **kwargs):
 6 |         self.params = kwargs
 7 | 
 8 |     def model_init(self, numDiscStates, contFeatureRanges, numActions, rewardRange):
 9 |         self.numDiscStates = numDiscStates
10 |         self.numContStates = len(contFeatureRanges)
11 |         self.numActions = numActions
12 |         self.reward_range = rewardRange
13 |         self.feature_ranges = numpy.array([[0, self.numDiscStates-1]] + list(contFeatureRanges))
14 |         self.feature_span = numpy.ones((len(self.feature_ranges),))
15 |         non_constants = self.feature_ranges[:,0]!=self.feature_ranges[:,1]
16 |         self.feature_span[non_constants] = self.feature_ranges[non_constants,1] - self.feature_ranges[non_constants,0]
17 | 
18 |     def randParameter(self, param_key, args, sample=None):
19 |         """A utility function for use inside randomize_parameters. Takes a parameter
20 |         key (name), the named arguments passed to randomize_parameters, and optionally
21 |         the sampled random value to set in case the key does not exist in the arguments.
22 | 
23 |         This will then set it (if not already present) in args and assign which ever value
24 |         args ends up with into params.
25 |         """
26 |         if sample is None:
27 |             sample = numpy.random.random()
28 |         self.params[param_key] = args.setdefault(param_key, sample)
29 | 
30 |     def randomize_parameters(self, **args):
31 |         """Generate parameters randomly, constrained by given named parameters.
32 | 
33 |         If used, this must be called before agent_init in order to have desired effect.
34 | 
35 |         Parameters that fundamentally change the algorithm are not randomized over. For
36 |         example, basis and softmax fundamentally change the domain and have very few values
37 |         to be considered. They are not randomized over.
38 | 
39 |         Basis parameters, on the other hand, have many possible values and ARE randomized.
40 | 
41 |         Args:
42 |             **args: Named parameters to fix, which will not be randomly generated
43 | 
44 |         Returns:
45 |             List of resulting parameters of the class. Will always be in the same order.
46 |             Empty list if parameter free.
47 | 
48 |         """
49 |         return args
50 | 
51 |     def updateExperience(self, lastState, action, newState, reward):
52 |         return False
53 | 
54 |     def getStateSpace(self):
55 |         return self.feature_ranges, self.numActions
56 | 
57 |     # This method does not gaurantee that num_requested is filled, but will not
58 |     # provide more than num_requested.
59 |     def sampleStateActions(self, num_requested):
60 |         pass
61 | 
62 |     def predict(self, state, action):
63 |         pass
64 | 
65 |     def predictSet(self, states):
66 |         pass
67 | 
68 | 
69 |     def isKnown(self, state, action):
70 |         return False
71 | 
72 | 
73 | 


--------------------------------------------------------------------------------
/pyrl/agents/planners/.gitignore:
--------------------------------------------------------------------------------
 1 | */*.py[co]
 2 | 
 3 | # Packages
 4 | *.egg
 5 | *.egg-info
 6 | dist
 7 | build
 8 | eggs
 9 | parts
10 | bin
11 | var
12 | sdist
13 | develop-eggs
14 | .installed.cfg
15 | */*~
16 | # Installer logs
17 | pip-log.txt
18 | 
19 | # Unit test / coverage reports
20 | .coverage
21 | .tox
22 | 
23 | #Translations
24 | *.mo
25 | 
26 | #Mr Developer
27 | .mr.developer.cfg
28 | 


--------------------------------------------------------------------------------
/pyrl/agents/planners/README.md:
--------------------------------------------------------------------------------
1 | pyrl.agents.planners
2 | =========
3 | 
4 | Planners for use with models and RL agents.
5 | 


--------------------------------------------------------------------------------
/pyrl/agents/planners/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | __all__ = ["planner", "fitted_qiteration"]
3 | 


--------------------------------------------------------------------------------
/pyrl/agents/planners/planner.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from random import Random
 3 | import numpy
 4 | 
 5 | class Planner(object):
 6 | 
 7 |     def __init__(self, model, **kwargs):
 8 |         self.model = model
 9 |         self.gamma = kwargs.setdefault('gamma', 1.0)
10 |         self.params = kwargs
11 |         self.randGenerator = Random()
12 | 
13 | 
14 |     def planner_init(self, numDiscStates, contFeatureRanges, numActions, rewardRange):
15 |         pass
16 | 
17 |     def randParameter(self, param_key, args, sample=None):
18 |         """A utility function for use inside randomize_parameters. Takes a parameter
19 |         key (name), the named arguments passed to randomize_parameters, and optionally
20 |         the sampled random value to set in case the key does not exist in the arguments.
21 | 
22 |         This will then set it (if not already present) in args and assign which ever value
23 |         args ends up with into params.
24 |         """
25 |         if sample is None:
26 |             sample = numpy.random.random()
27 |         self.params[param_key] = args.setdefault(param_key, sample)
28 | 
29 |     def randomize_parameters(self, **args):
30 |         """Generate parameters randomly, constrained by given named parameters.
31 | 
32 |         Parameters that fundamentally change the algorithm are not randomized over. For
33 |         example, basis and softmax fundamentally change the domain and have very few values
34 |         to be considered. They are not randomized over.
35 | 
36 |         Basis parameters, on the other hand, have many possible values and ARE randomized.
37 | 
38 |         Args:
39 |             **args: Named parameters to fix, which will not be randomly generated
40 | 
41 |         Returns:
42 |             List of resulting parameters of the class. Will always be in the same order.
43 |             Empty list if parameter free.
44 | 
45 |         """
46 |         self.randParameter('gamma', args)
47 |         return args
48 | 
49 |     def updateExperience(self, lastState, action, newState, reward):
50 |         if self.model.updateExperience(lastState, action, newState, reward):
51 |             self.updatePlan()
52 | 
53 |     def updatePlan(self):
54 |         pass
55 | 
56 |     def getAction(self, state):
57 |         pass
58 | 
59 | 
60 | 
61 | 


--------------------------------------------------------------------------------
/pyrl/agents/qlearning.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # Author: Will Dabney
  3 | 
  4 | from random import Random
  5 | import numpy
  6 | import copy
  7 | 
  8 | from rlglue.agent.Agent import Agent
  9 | from rlglue.agent import AgentLoader as AgentLoader
 10 | from rlglue.types import Action
 11 | from rlglue.types import Observation
 12 | from rlglue.utils import TaskSpecVRLGLUE3
 13 | from pyrl.rlglue.registry import register_agent
 14 | 
 15 | import sarsa_lambda
 16 | import stepsizes
 17 | 
 18 | @register_agent
 19 | class qlearning_agent(sarsa_lambda.sarsa_lambda):
 20 |     name = "Q-Learning"
 21 | 
 22 |     def agent_step(self,reward, observation):
 23 |         """Take one step in an episode for the agent, as the result of taking the last action.
 24 | 
 25 |         Args:
 26 |             reward: The reward received for taking the last action from the previous state.
 27 |             observation: The next observation of the episode, which is the consequence of taking the previous action.
 28 | 
 29 |         Returns:
 30 |             The next action the RL agent chooses to take, represented as an RLGlue Action object.
 31 |         """
 32 | 
 33 |         newState = numpy.array(list(observation.doubleArray))
 34 |         lastState = numpy.array(list(self.lastObservation.doubleArray))
 35 |         lastAction = self.lastAction.intArray[0]
 36 | 
 37 |         newDiscState = self.getDiscState(observation.intArray)
 38 |         lastDiscState = self.getDiscState(self.lastObservation.intArray)
 39 | 
 40 |         # Update eligibility traces
 41 |         phi_t = numpy.zeros(self.traces.shape)
 42 |         phi_t[lastDiscState, :, lastAction] = self.basis.computeFeatures(lastState)
 43 | 
 44 |         self.update_traces(phi_t, None)
 45 |         self.update(phi_t, newState, newDiscState, reward)
 46 | 
 47 |         # QLearning can choose action after update
 48 |         newIntAction = self.getAction(newState, newDiscState)
 49 |         returnAction=Action()
 50 |         returnAction.intArray=[newIntAction]
 51 | 
 52 |         self.lastAction=copy.deepcopy(returnAction)
 53 |         self.lastObservation=copy.deepcopy(observation)
 54 |         return returnAction
 55 | 
 56 |     def getActionValues(self, state, discState):
 57 |         if state is not None:
 58 |             return numpy.dot(self.weights[discState,:,:].T, self.basis.computeFeatures(state))
 59 |         else:
 60 |             return numpy.zeros((self.numActions,))
 61 | 
 62 |     def update(self, phi_t, state, discState, reward):
 63 |         qvalues = self.getActionValues(state, discState)
 64 |         a_tp = qvalues.argmax()
 65 |         phi_tp = numpy.zeros(self.traces.shape)
 66 |         if state is not None:
 67 |             phi_tp[discState, :, a_tp] = self.basis.computeFeatures(state)
 68 | 
 69 |         # Compute Delta (TD-error)
 70 |         delta = self.gamma*qvalues[a_tp] + reward - numpy.dot(self.weights.flatten(), phi_t.flatten())
 71 | 
 72 |         # Update the weights with both a scalar and vector stepsize used
 73 |         # (Maybe we should actually make them both work together naturally)
 74 |         self.weights += self.rescale_update(phi_t, phi_tp, delta, reward, delta*self.traces)
 75 | 
 76 |     def agent_end(self,reward):
 77 |         """Receive the final reward in an episode, also signaling the end of the episode.
 78 | 
 79 |         Args:
 80 |             reward: The reward received for taking the last action from the previous state.
 81 |         """
 82 | 
 83 |         lastState = numpy.array(list(self.lastObservation.doubleArray))
 84 |         lastAction = self.lastAction.intArray[0]
 85 | 
 86 |         lastDiscState = self.getDiscState(self.lastObservation.intArray)
 87 | 
 88 |         # Update eligibility traces
 89 |         phi_t = numpy.zeros(self.traces.shape)
 90 |         phi_t[lastDiscState, :, lastAction] = self.basis.computeFeatures(lastState)
 91 | 
 92 |         self.update_traces(phi_t, None)
 93 |         self.update(phi_t, None, 0, reward)
 94 | 
 95 | 
 96 | 
 97 | if __name__=="__main__":
 98 |     from pyrl.agents.skeleton_agent import runAgent
 99 |     runAgent(qlearning_agent)
100 | 
101 | 
102 | 
103 | 
104 | 
105 | 


--------------------------------------------------------------------------------
/pyrl/basis/.gitignore:
--------------------------------------------------------------------------------
 1 | */*.py[co]
 2 | */*.o
 3 | */*.so
 4 | 
 5 | # Packages
 6 | *.egg
 7 | *.egg-info
 8 | dist
 9 | build
10 | eggs
11 | parts
12 | bin
13 | var
14 | sdist
15 | develop-eggs
16 | .installed.cfg
17 | */*~
18 | # Installer logs
19 | pip-log.txt
20 | 
21 | # Unit test / coverage reports
22 | .coverage
23 | .tox
24 | 
25 | #Translations
26 | *.mo
27 | 
28 | #Mr Developer
29 | .mr.developer.cfg
30 | 


--------------------------------------------------------------------------------
/pyrl/basis/CTiles/.gitignore:
--------------------------------------------------------------------------------
 1 | */*.py[co]
 2 | */*.o
 3 | */*.so
 4 | 
 5 | # Packages
 6 | *.egg
 7 | *.egg-info
 8 | dist
 9 | build
10 | eggs
11 | parts
12 | bin
13 | var
14 | sdist
15 | develop-eggs
16 | .installed.cfg
17 | */*~
18 | # Installer logs
19 | pip-log.txt
20 | 
21 | # Unit test / coverage reports
22 | .coverage
23 | .tox
24 | 
25 | #Translations
26 | *.mo
27 | 
28 | #Mr Developer
29 | .mr.developer.cfg
30 | 


--------------------------------------------------------------------------------
/pyrl/basis/CTiles/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.8)
 2 | 
 3 | PROJECT(CTiles)
 4 | 
 5 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_LIST_DIR})
 6 | 
 7 | file(GLOB SRC
 8 |     "src/*.h"
 9 |     "src/*.cpp"
10 |     "src/*.C"
11 | )
12 | 
13 | find_package(PythonInterp REQUIRED)
14 | find_package(PythonLibs REQUIRED)
15 | 
16 | include_directories(${PYTHON_INCLUDE_DIRS})
17 | 
18 | 
19 | add_library(tiles MODULE ${SRC})
20 | set_target_properties(tiles PROPERTIES PREFIX "")
21 | target_link_libraries(tiles ${PYTHON_LIBRARIES})
22 | 


--------------------------------------------------------------------------------
/pyrl/basis/CTiles/README.md:
--------------------------------------------------------------------------------
 1 | pyrl.basis.CTiles
 2 | ====================
 3 | 
 4 | This folder contains the C version of tile coding, as well as the python routines which call the c version of tiles.
 5 | This was written by Rich Sutton and only the makefile has changed from the original.
 6 | 
 7 | The following files are here:
 8 | 
 9 | Makefile - compiles both the C version and the Python->C version (for Mac or Linux)
10 | tiles.h - header for C version of tiles
11 | tiles.cpp - c++ version of tiles
12 | tiletimes.cpp - timing code for c calling c version of tiles
13 | tilesInt.C - interface so that Python can call the c version
14 | tiletimes.py - timing code for the python calling c version of tiles
15 | fancytiles.py - code to get different shapes and sizes of tiles
16 | 
17 | To use these:
18 | In a terminal window:
19 | cmake .
20 | make
21 | ... this creates the tiles.so and tiles.o files
22 | 
23 | 
24 | Note About CMake and Python on Mac: 
25 | For some reason things can sometimes get messed up with this combination. Some people claim 
26 | this is a bug in cmake or a bug from mac. It comes up when you have multiple python distributions 
27 | installed. So, most people should be fine, but if you get a fatal error when trying to use this 
28 | module in python you should look into uninstalling the unused distributions or pass to cmake 
29 | the following arguments with the correct values filled in: 
30 | 
31 | -DPYTHON_LIBRARY=... -DPYTHON_INCLUDE=...
32 | 
33 | 


--------------------------------------------------------------------------------
/pyrl/basis/CTiles/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | 


--------------------------------------------------------------------------------
/pyrl/basis/CTiles/src/tiles.h:
--------------------------------------------------------------------------------
1 | #ifndef _TILES_H_#define _TILES_H_#include <stdio.h>#include <stdlib.h>#include <fcntl.h>#include <unistd.h>#define MAX_NUM_VARS 20        // Maximum number of variables in a grid-tiling      #define MAX_NUM_COORDS 100     // Maximum number of hashing coordinates      #define MaxLONGINT 2147483647  void tiles(	int the_tiles[],               // provided array contains returned tiles (tile indices)	int num_tilings,           // number of tile indices to be returned in tiles           int memory_size,           // total number of possible tiles	float floats[],            // array of floating point variables    int num_floats,            // number of floating point variables    int ints[],				  // array of integer variables    int num_ints);             // number of integer variablesclass collision_table {public:    collision_table(int,int);    ~collision_table();    long m;    long *data;    int safe;    long calls;    long clearhits;    long collisions;    void reset();    int usage();    void print();    void save(int);    void restore(int);};	void tiles(	int the_tiles[],               // provided array contains returned tiles (tile indices)	int num_tilings,           // number of tile indices to be returned in tiles           collision_table *ctable,   // total number of possible tiles	float floats[],            // array of floating point variables    int num_floats,            // number of floating point variables    int ints[],				  // array of integer variables    int num_ints);             // number of integer variablesint hash_UNH(int *ints, int num_ints, long m, int increment);int hash(int *ints, int num_ints, collision_table *ctable);// no intsvoid tiles(int the_tiles[],int nt,int memory,float floats[],int nf);void tiles(int the_tiles[],int nt,collision_table *ct,float floats[],int nf);// one intvoid tiles(int the_tiles[],int nt,int memory,float floats[],int nf,int h1);void tiles(int the_tiles[],int nt,collision_table *ct,float floats[],int nf,int h1);// two intsvoid tiles(int the_tiles[],int nt,int memory,float floats[],int nf,int h1,int h2);void tiles(int the_tiles[],int nt,collision_table *ct,float floats[],int nf,int h1,int h2);// three intsvoid tiles(int the_tiles[],int nt,int memory,float floats[],int nf,int h1,int h2,int h3);void tiles(int the_tiles[],int nt,collision_table *ct,float floats[],int nf,int h1,int h2,int h3);// one float, no intsvoid tiles1(int the_tiles[],int nt,int memory,float f1);void tiles1(int the_tiles[],int nt,collision_table *ct,float f1);// one float, one intvoid tiles1(int the_tiles[],int nt,int memory,float f1,int h1);void tiles1(int the_tiles[],int nt,collision_table *ct,float f1,int h1);// one float, two intsvoid tiles1(int the_tiles[],int nt,int memory,float f1,int h1,int h2);void tiles1(int the_tiles[],int nt,collision_table *ct,float f1,int h1,int h2);// one float, three intsvoid tiles1(int the_tiles[],int nt,int memory,float f1,int h1,int h2,int h3);void tiles1(int the_tiles[],int nt,collision_table *ct,float f1,int h1,int h2,int h3);// two floats, no intsvoid tiles2(int the_tiles[],int nt,int memory,float f1,float f2);void tiles2(int the_tiles[],int nt,collision_table *ct,float f1,float f2);// two floats, one intvoid tiles2(int the_tiles[],int nt,int memory,float f1,float f2,int h1);void tiles2(int the_tiles[],int nt,collision_table *ct,float f1,float f2,int h1);// two floats, two intsvoid tiles2(int the_tiles[],int nt,int memory,float f1,float f2,int h1,int h2);void tiles2(int the_tiles[],int nt,collision_table *ct,float f1,float f2,int h1,int h2);// two floats, three intsvoid tiles2(int the_tiles[],int nt,int memory,float f1,float f2,int h1,int h2,int h3);void tiles2(int the_tiles[],int nt,collision_table *ct,float f1,float f2,int h1,int h2,int h3);void tileswrap(	int the_tiles[],               // provided array contains returned tiles (tile indices)	int num_tilings,           // number of tile indices to be returned in tiles           int memory_size,           // total number of possible tiles	float floats[],            // array of floating point variables    int num_floats,            // number of floating point variables    int wrap_widths[],         // array of widths (length and units as in floats)    int ints[],				  // array of integer variables    int num_ints);             // number of integer variables	void tileswrap(	int the_tiles[],               // provided array contains returned tiles (tile indices)	int num_tilings,           // number of tile indices to be returned in tiles           collision_table *ctable,   // total number of possible tiles	float floats[],            // array of floating point variables    int num_floats,            // number of floating point variables    int wrap_widths[],         // array of widths (length and units as in floats)    int ints[],				  // array of integer variables    int num_ints);             // number of integer variables#endif


--------------------------------------------------------------------------------
/pyrl/basis/CTiles/tiletimes.py:
--------------------------------------------------------------------------------
 1 | # tile timing tests
 2 | import random
 3 | random.seed(65597)
 4 | 
 5 | import tiles
 6 | import timeit
 7 | 
 8 | def runit (num=10, ct=2048, numt=1):
 9 |     for i in xrange(num):
10 |         for j in xrange(num):
11 |             t = tiles.tiles(numt, ct, [i*0.5, j*0.5])
12 | def runit2 (num=10, ct=2048, numt=1):
13 |     for i in xrange(num):
14 |         for j in xrange(num):
15 |             t = tiles.tiles(numt, ct, [i*0.5, j*0.5, float(i+j)/2, float(i-j)/2], [i, j])
16 | def runitw (num=10, ct=2048, numt=1):
17 |     for i in xrange(num):
18 |         for j in xrange(num):
19 |             t = tiles.tileswrap(numt, ct, [i*0.5, j*0.5], [10, 1])
20 | def runitl (num=10, ct=2048, numt=1):
21 |     tlist = [None for i in range(num*num*numt)]
22 |     for i in xrange(num):
23 |         for j in xrange(num):
24 |             t = tiles.loadtiles(tlist, i*num*numt+j, numt, ct, [i*0.5, j*0.5])
25 | def runitlw (num=10, ct=2048, numt=1):
26 |     tlist = [None for i in range(num*num*numt)]
27 |     for i in xrange(num):
28 |         for j in xrange(num):
29 |             tiles.loadtileswrap(tlist, i*num*numt+j, numt, ct, [i*0.5, j*0.5], [10, 1])
30 |     return tlist
31 | 
32 | def initct(mem=16384):
33 |     global ctu, cts, ctss
34 |     ctu=tiles.CollisionTable(mem, safetyval='unsafe')
35 |     cts=tiles.CollisionTable(mem, safetyval='safe')
36 |     ctss=tiles.CollisionTable(mem, safetyval='super safe')
37 | 
38 | def timetest(command, info, info2='2 floats', num=100, numt=1, mem=16384):            
39 |     initct(mem)
40 |     print " "
41 |     print info
42 |     print "Timing over", num*num, "calls to tiles,", numt, "tiling each for", info2
43 |     t= timeit.Timer(command + '('+str(num)+','+str(mem)+','+str(numt)+')', 'from __main__ import ' + command)
44 |     print "With no collision table", t.timeit(1), "seconds"
45 |     t= timeit.Timer(command + '('+str(num)+', ctu'+','+str(numt)+')', 'from __main__ import ctu, ' + command)
46 |     print "With unsafe collision table", t.timeit(1), "seconds"
47 |     print ctu
48 |     t= timeit.Timer(command + '('+str(num)+', cts'+','+str(numt)+')', 'from __main__ import cts, ' + command)
49 |     print "With safe collision table", t.timeit(1), "seconds"
50 |     print cts
51 |     t= timeit.Timer(command + '('+str(num)+', ctss'+','+str(numt)+')', 'from __main__ import ctss, ' + command)
52 |     print "With super safe collision table", t.timeit(1), "seconds"
53 |     print ctss
54 |     print " "
55 |     #print "Timing over", num*num, "calls to tiles, 16 tilings each for", info2
56 |     #t= timeit.Timer(command + '('+str(num)+', 16384, 16)', 'from __main__ import ' + command)
57 |     #print "With no collision table", t.timeit(1), "seconds"
58 | 
59 | timetest('runit', "Standard test", numt=4)
60 | #timetest('runit2', 'Testing with more input variables','4 floats, 2 ints', 100, 3, 32768)
61 | timetest('runitw', 'WRAP version', numt=4)
62 | timetest('runitl', 'Load version', '2 floats', 100, 4) # only do 10 x 10 calls, but with 4 tilings each
63 | timetest('runitlw', 'Load WRAP version', '2 floats', 100, 4)
64 | 
65 | 


--------------------------------------------------------------------------------
/pyrl/basis/README.md:
--------------------------------------------------------------------------------
1 | python-rl.basis
2 | =========
3 | 
4 | Basis functions used in function (usually linear) approximation by the agents.
5 | 


--------------------------------------------------------------------------------
/pyrl/basis/Tiles/.gitignore:
--------------------------------------------------------------------------------
 1 | */*.py[co]
 2 | */*.o
 3 | */*.so
 4 | 
 5 | # Packages
 6 | *.egg
 7 | *.egg-info
 8 | dist
 9 | build
10 | eggs
11 | parts
12 | bin
13 | var
14 | sdist
15 | develop-eggs
16 | .installed.cfg
17 | */*~
18 | # Installer logs
19 | pip-log.txt
20 | 
21 | # Unit test / coverage reports
22 | .coverage
23 | .tox
24 | 
25 | #Translations
26 | *.mo
27 | 
28 | #Mr Developer
29 | .mr.developer.cfg
30 | 


--------------------------------------------------------------------------------
/pyrl/basis/Tiles/README.md:
--------------------------------------------------------------------------------
 1 | pyrl.basis.Tiles
 2 | =================
 3 | 
 4 | This is the tile coding python implementation provided by Rich Sutton. The only reason to prefer this 
 5 | python only implementation over the CTiles package is that CTiles is very slow when doing loadtiles calls. 
 6 | 
 7 | Contents:
 8 |   tiles.py - python tiles code
 9 |   fancytiles.py - code for making different shapes and sizes of tiles
10 |   tiletimes.py - timing code for tiles
11 | 


--------------------------------------------------------------------------------
/pyrl/basis/Tiles/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/pyrl/basis/Tiles/tiletimes.py:
--------------------------------------------------------------------------------
 1 | # tile timing tests
 2 | import random
 3 | random.seed(65597)
 4 | 
 5 | import tiles
 6 | import tilesn
 7 | import timeit
 8 | 
 9 | def runit (num=10, ct=2048, numt=1):
10 |     for i in xrange(num):
11 |         for j in xrange(num):
12 |             t = tiles.tiles(numt, ct, [i*0.5, j*0.5])
13 | def runitn (num=10, ct=2048, numt=4):
14 |     for i in xrange(num):
15 |         for j in xrange(num):
16 |             t = tilesn.tiles(numt, ct, [i*0.5, j*0.5])
17 | def runit2 (num=10, ct=2048, numt=1):
18 |     for i in xrange(num):
19 |         for j in xrange(num):
20 |             t = tiles.tiles(numt, ct, [i*0.5, j*0.5, float(i+j)/2, float(i-j)/2], [i, j])
21 | def runitw (num=10, ct=2048, numt=1):
22 |     for i in xrange(num):
23 |         for j in xrange(num):
24 |             t = tiles.tileswrap(numt, ct, [i*0.5, j*0.5], [10, 1])
25 | def runitl (num=10, ct=2048, numt=1):
26 |     tlist = [None for i in range(num*num*numt)]
27 |     for i in xrange(num):
28 |         for j in xrange(num):
29 |             t = tiles.loadtiles(tlist, i*num*numt+j, numt, ct, [i*0.5, j*0.5])
30 | def runitlw (num=10, ct=2048, numt=1):
31 |     tlist = [None for i in range(num*num*numt)]
32 |     for i in xrange(num):
33 |         for j in xrange(num):
34 |             tiles.loadtileswrap(tlist, i*num*numt+j, numt, ct, [i*0.5, j*0.5], [10, 1])
35 |     return tlist
36 | 
37 | def initct(mem=16384):
38 |     global ctu, cts, ctss
39 |     ctu=tiles.CollisionTable(mem, safetyval='unsafe')
40 |     cts=tiles.CollisionTable(mem, safetyval='safe')
41 |     ctss=tiles.CollisionTable(mem, safetyval='super safe')
42 | 
43 | def timetest(command, info, info2='2 floats', num=100, numt=1, mem=16384):            
44 |     initct(mem)
45 |     print " "
46 |     print info
47 |     print "Timing over", num*num, "calls to tiles,", numt, "tiling each for", info2
48 |     t= timeit.Timer(command + '('+str(num)+','+str(mem)+','+str(numt)+')', 'from __main__ import ' + command)
49 |     print "With no collision table", t.timeit(1), "seconds"
50 |     t= timeit.Timer(command + '('+str(num)+', ctu'+','+str(numt)+')', 'from __main__ import ctu, ' + command)
51 |     print "With unsafe collision table", t.timeit(1), "seconds"
52 |     print ctu
53 |     t= timeit.Timer(command + '('+str(num)+', cts'+','+str(numt)+')', 'from __main__ import cts, ' + command)
54 |     print "With safe collision table", t.timeit(1), "seconds"
55 |     print cts
56 |     t= timeit.Timer(command + '('+str(num)+', ctss'+','+str(numt)+')', 'from __main__ import ctss, ' + command)
57 |     print "With super safe collision table", t.timeit(1), "seconds"
58 |     print ctss
59 |     print " "
60 |     print "Timing over", num*num, "calls to tiles, 16 tilings each for", info2
61 |     t= timeit.Timer(command + '('+str(num)+', 16384, 16)', 'from __main__ import ' + command)
62 |     print "With no collision table", t.timeit(1), "seconds"
63 | 
64 | timetest('runit', "Standard test", numt=4)
65 | timetest('runit2', 'Testing with more input variables','4 floats, 2 ints', 100, 3, 32768)
66 | timetest('runitw', 'WRAP version', numt=4)
67 | timetest('runitl', 'Load version', '2 floats', 100, 4) # only do 10 x 10 calls, but with 4 tilings each
68 | timetest('runitlw', 'Load WRAP version', '2 floats', 100, 4)
69 | 
70 | """
71 | print " "   
72 | print "Tiles with num array"
73 | ctu=tilesn.CollisionTable(16384, safetyval='unsafe')
74 | cts=tilesn.CollisionTable(16384, safetyval='safe')
75 | ctss=tilesn.CollisionTable(16384, safetyval='super safe')
76 | print "Timing over 10000 calls to tiles with numarray, 4 tiling each for 2 floats"
77 | t= timeit.Timer('runitn(100, 16384)', 'from __main__ import runitn')
78 | print "With no collision table", t.timeit(1), "seconds"
79 | t= timeit.Timer('runitn(100, ctu)', 'from __main__ import runitn, ctu')
80 | print "With unsafe collision table", t.timeit(1), "seconds"
81 | print ctu
82 | t= timeit.Timer('runitn(100, cts)', 'from __main__ import runitn, cts')
83 | print "With safe collision table", t.timeit(1), "seconds"
84 | print cts
85 | #t= timeit.Timer('runitn(100, ctss)', 'from __main__ import runitn, ctss')
86 | #print "With super safe collision table", t.timeit(1), "seconds"
87 | #print ctss     # need different array comparisons for super safe
88 | print " "
89 | print "Timing over 10000 calls to tiles with numarray, 16 tilings each for 2 floats"
90 | t= timeit.Timer('runitn(100, 16384, 16)', 'from __main__ import runitn')
91 | print "With no collision table", t.timeit(1), "seconds"
92 | """
93 | 
94 | 
95 | 


--------------------------------------------------------------------------------
/pyrl/basis/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/pyrl/basis/fourier.py:
--------------------------------------------------------------------------------
 1 | import numpy, itertools
 2 | import trivial
 3 | 
 4 | class FourierBasis(trivial.TrivialBasis):
 5 |     """Fourier Basis linear function approximation. Requires the ranges for each dimension, and is thus able to
 6 |     use only sine or cosine (and uses cosine). So, this has half the coefficients that a full Fourier approximation
 7 |     would use.
 8 | 
 9 |     From the paper:
10 |     G.D. Konidaris, S. Osentoski and P.S. Thomas.
11 |     Value Function Approximation in Reinforcement Learning using the Fourier Basis.
12 |     In Proceedings of the Twenty-Fifth Conference on Artificial Intelligence, pages 380-385, August 2011.
13 |     """
14 | 
15 |     def __init__(self, nvars, ranges, order=3):
16 |         nterms = pow(order + 1.0, nvars)
17 |         self.numTerms = nterms
18 |         self.order = order
19 |         self.ranges = numpy.array(ranges)
20 |         iter = itertools.product(range(order+1), repeat=nvars)
21 |         self.multipliers = numpy.array([list(map(int,x)) for x in iter])
22 | 
23 |     def computeFeatures(self, features):
24 |         if len(features) == 0:
25 |             return numpy.ones((1,))
26 |         basisFeatures = numpy.array([self.scale(features[i],i) for i in range(len(features))])
27 |         return numpy.cos(numpy.pi * numpy.dot(self.multipliers, basisFeatures))
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/pyrl/basis/rbf.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Simplest Gaussian RBF implementation ever.
 3 | import numpy, trivial
 4 | 
 5 | class RBFBasis(trivial.TrivialBasis):
 6 |     """Radial Basis Functions basis. This implementation is just about as simplistic as it gets.
 7 |     This really could use some work to make it more competitive with state of the art.
 8 |     """
 9 | 
10 |     def __init__(self, nvars, ranges, num_functions=10, beta=0.9):
11 |         trivial.TrivialBasis.__init__(self, nvars, ranges)
12 |         self.beta = beta
13 |         self.num_functions = num_functions
14 |         self.centers = numpy.random.uniform(self.ranges[:,0], self.ranges[:,1].T, (self.num_functions,self.numTerms))
15 | 
16 |     def getNumBasisFunctions(self):
17 |         return self.num_functions
18 | 
19 |     def computeFeatures(self, features):
20 |         if len(features) == 0:
21 |             return numpy.ones((1,))
22 |         features = numpy.array(features)
23 |         return numpy.array([numpy.exp(-self.beta * numpy.linalg.norm(features-c)**2) for c in self.centers])
24 | 
25 | 
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/pyrl/basis/tilecode.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import numpy
 3 | import trivial
 4 | from CTiles import tiles
 5 | 
 6 | class TileCodingBasis(trivial.TrivialBasis):
 7 |     """ Tile Coding Basis. From Rich Sutton's implementation,
 8 |         http://incompleteideas.net/rlai.cs.ualberta.ca/RLAI/RLtoolkit/tiles.html
 9 |     """
10 | 
11 |     def __init__(self, nvars, ranges, num_tiles=100, num_weights=2048):
12 |         trivial.TrivialBasis.__init__(self, nvars, ranges)
13 |         self.num_tiles = num_tiles
14 |         self.mem_size = num_weights
15 | 
16 |     def getNumBasisFunctions(self):
17 |         return self.mem_size
18 | 
19 |     def computeFeatures(self, features):
20 |         if len(features) == 0:
21 |             return numpy.ones((1,))
22 |         features = list(trivial.TrivialBasis.computeFeatures(self, features))
23 |         indices = tiles.tiles(self.num_tiles, self.mem_size, features)
24 |         result = numpy.zeros((self.mem_size,))
25 |         result[indices] = 1.0
26 |         return result
27 | 
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/pyrl/basis/trivial.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | 
 3 | class TrivialBasis(object):
 4 |     """Uses the features themselves as a basis. However, does a little bit of basic manipulation
 5 |     to make things more reasonable. Specifically, this allows (defaults to) rescaling to be in the
 6 |     range [-1, +1].
 7 |     """
 8 | 
 9 |     def __init__(self, nvars, ranges):
10 |         self.numTerms = nvars
11 |         self.ranges = numpy.array(ranges)
12 | 
13 |     def scale(self, value, pos):
14 |         if self.ranges[pos,0] == self.ranges[pos,1]:
15 |             return 0.0
16 |         else:
17 |             return (value - self.ranges[pos,0]) / (self.ranges[pos,1] - self.ranges[pos,0])
18 | 
19 |     def getNumBasisFunctions(self):
20 |         return self.numTerms
21 | 
22 |     def computeFeatures(self, features):
23 |         if len(features) == 0:
24 |             return numpy.ones((1,))
25 |         return (numpy.array([self.scale(features[i],i) for i in range(len(features))]) - 0.5)*2.
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/pyrl/environments/.gitignore:
--------------------------------------------------------------------------------
 1 | */*.py[co]
 2 | */*.o
 3 | */*.so
 4 | 
 5 | # Packages
 6 | *.egg
 7 | *.egg-info
 8 | dist
 9 | build
10 | eggs
11 | parts
12 | bin
13 | var
14 | sdist
15 | develop-eggs
16 | .installed.cfg
17 | */*~
18 | # Installer logs
19 | pip-log.txt
20 | 
21 | # Unit test / coverage reports
22 | .coverage
23 | .tox
24 | 
25 | #Translations
26 | *.mo
27 | 
28 | #Mr Developer
29 | .mr.developer.cfg
30 | 


--------------------------------------------------------------------------------
/pyrl/environments/README.md:
--------------------------------------------------------------------------------
1 | pyrl.environments
2 | =========
3 | 
4 | Different environments/domains implemented in python with RLGlue.
5 | 


--------------------------------------------------------------------------------
/pyrl/environments/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Author: Pierre-Luc Bacon <pierrelucbacon@gmail.com>
 3 | 
 4 | __all__ = ["fuelworld", "gridworld", "mountaincar", "acrobot", "cartpole",
 5 |            "multiroom", "skeleton_environment", "taxi", "windyworld",
 6 |            "batch_replenish", "puddleworld", "neurostim", "marble_maze",
 7 |            "bicycle", "chain", "twip"]
 8 | 
 9 | try:
10 |     from libPOMDP import libpomdp
11 |     __all__.append("pomdp")
12 | except:
13 |     pass
14 | 
15 | try:
16 |     from mdptetris import mdptetris
17 |     __all__.append("tetris")
18 | except:
19 |     pass
20 | 
21 | 
22 | try:
23 |     import pygame
24 |     __all__.append("pinball")
25 | except:
26 |     pass
27 | 


--------------------------------------------------------------------------------
/pyrl/environments/batch_replenish.py:
--------------------------------------------------------------------------------
  1 | # 
  2 | # Copyright (C) 2013, Will Dabney
  3 | # 
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | #
 16 | 
 17 | import numpy
 18 | from rlglue.environment.Environment import Environment
 19 | from rlglue.environment import EnvironmentLoader as EnvironmentLoader
 20 | from rlglue.types import Observation
 21 | from rlglue.types import Action
 22 | from rlglue.types import Reward_observation_terminal
 23 | from pyrl.rlglue import TaskSpecRLGlue
 24 | from pyrl.rlglue.registry import register_environment
 25 | 
 26 | @register_environment
 27 | class BatchReplenishment(Environment):
 28 | 	"""Batch replenishment inventory control task.
 29 | 
 30 | 	The domain was given by George and Powell 2006. It is an example of a simple 
 31 | 	domain in which no fixed step-size performs well, but adaptive step-sizes 
 32 | 	do well.
 33 | 	"""
 34 | 
 35 | 	name = "Batch Replenishment"
 36 | 
 37 | 	def __init__(self, demand_mean = 10.0, demand_std = 1.0, payoff = 5., 
 38 | 		     cost = 2., gamma = 0.99, time_period = 20, noise=0.0):
 39 | 
 40 | 		self.T = time_period
 41 | 		self.noise = noise
 42 | 		self.demand = numpy.array([demand_mean, demand_std])
 43 | 		self.payoff = payoff
 44 | 		self.cost = cost
 45 | 		self.discount = gamma
 46 | 		self.max_quantity = 200.
 47 | 		self.domain_name = "Noisy Batch Replenishment Problem"
 48 | 
 49 | 	def makeTaskSpec(self):
 50 | 		ts = TaskSpecRLGlue.TaskSpec(discount_factor=self.discount, 
 51 | 					     reward_range=(-self.max_quantity * self.cost, 
 52 | 							    self.max_quantity * self.payoff))
 53 | 		ts.addDiscreteAction((0, 3)) # Representing purchase of 0, 1, 10, and 100 units
 54 | 		ts.addContinuousObservation((0.0, self.max_quantity))
 55 | 		ts.addContinuousObservation((0.0, self.max_quantity))
 56 | 		ts.setEpisodic()
 57 | 		ts.setExtra(self.domain_name)
 58 | 		return ts.toTaskSpec()
 59 | 
 60 | 	def reset(self):
 61 | 		# Start with no resources in stock, and no unsatisfied demand
 62 | 		self.state = numpy.zeros((2,))
 63 | 		self.counter = 0
 64 | 
 65 | 	def env_init(self):
 66 | 		return self.makeTaskSpec()
 67 | 	
 68 | 	def env_start(self):
 69 | 		self.reset()
 70 | 		returnObs = Observation()
 71 | 		returnObs.doubleArray = self.state.tolist()
 72 | 		return returnObs
 73 | 		
 74 | 	def takeAction(self, intAction):
 75 | 		x = 0. if intAction == 0 else 10.**(intAction-1)
 76 | 		self.counter += 1
 77 | 		# If noisy, create noise on cost/payoff
 78 | 		paynoise = numpy.random.normal(scale=self.noise) if self.noise > 0 else 0.0
 79 | 		costnoise = numpy.random.normal(scale=self.noise) if self.noise > 0 else 0.0
 80 | 
 81 | 		# Update random demand
 82 | 		self.state[1] = min(self.max_quantity, 
 83 | 				    max(0., numpy.random.normal(self.demand[0], scale=self.demand[1])))
 84 | 		reward = (self.payoff + paynoise) * self.state.min() - (self.cost + costnoise) * x
 85 | 		self.state[0] = min(self.max_quantity, max(0., self.state[0] - self.state[1]) + x)
 86 | 
 87 | 		
 88 | 		return reward/600.
 89 | 
 90 | 	def env_step(self,thisAction):
 91 | 		intAction = thisAction.intArray[0]
 92 | 		theReward = self.takeAction(intAction)
 93 | 
 94 | 		theObs = Observation()
 95 | 		theObs.doubleArray = self.state.tolist()
 96 | 		
 97 | 		returnRO = Reward_observation_terminal()
 98 | 		returnRO.r = theReward
 99 | 		returnRO.o = theObs
100 | 		returnRO.terminal = int(self.counter >= self.T)
101 | 
102 | 		return returnRO
103 | 
104 | 	def env_cleanup(self):
105 | 		pass
106 | 
107 | 	def env_message(self,inMessage):
108 | 		return "I don't know how to respond to your message";
109 | 
110 | 
111 | if __name__=="__main__":
112 | 	import argparse
113 | 	parser = argparse.ArgumentParser(description='Run 2D Noisy Continuous Gridworld environment in network mode.')
114 | 	parser.add_argument("--demand_mean", type=float, default=10., help="Mean demand for the product.")
115 | 	parser.add_argument("--demand_std", type=float, default=1., 
116 | 			    help="Standard deviation of demand for the product.")
117 | 	parser.add_argument("--payoff", type=float, default=5., help="Payment received per unit product sold.")
118 | 	parser.add_argument("--cost", type=float, default=2., help="Cost per unit product purchased.")
119 | 	parser.add_argument("--discount_factor", type=float, default=0.999, help="Discount factor to learn over.")
120 | 	parser.add_argument("--time_period", type=int, default=20, help="Time period for problem. (Number of steps to run)")
121 | 	parser.add_argument("--noise", type=float, default=0, help="Standard deviation of additive noise to generate")
122 | 	args = parser.parse_args()
123 | 	EnvironmentLoader.loadEnvironment(BatchReplenishment(demand_mean=args.demand_mean, 
124 | 							     demand_std=args.demand_std, 
125 | 							     payoff=args.payoff, 
126 | 							     cost=args.cost, 
127 | 							     gamma=args.discount_factor,
128 | 							     time_period = args.time_period,
129 | 							     noise=args.noise))
130 | 


--------------------------------------------------------------------------------
/pyrl/environments/chain.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # Copyright (C) 2013, Will Dabney
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | #
 16 | 
 17 | import numpy
 18 | from rlglue.environment.Environment import Environment
 19 | from rlglue.environment import EnvironmentLoader as EnvironmentLoader
 20 | from rlglue.types import Observation
 21 | from rlglue.types import Action
 22 | from rlglue.types import Reward_observation_terminal
 23 | from pyrl.rlglue import TaskSpecRLGlue
 24 | from pyrl.rlglue.registry import register_environment
 25 | 
 26 | @register_environment
 27 | class Chain(Environment):
 28 |     """The simple 5-state chain domain often used in the literature for more
 29 |     theoretical methods that don't scale as well to large problems. Its also
 30 |     a good demonstration of the need for sufficient exploration.
 31 | 
 32 |     From paper:
 33 |     Bayesian Q-learning. 1998.
 34 |     Richard Dearden, Nir Friedman, and Stuart Russell.
 35 |     """
 36 |     name = "Chain"
 37 | 
 38 |     def __init__(self, **kwargs):
 39 |         self.state = 0
 40 |         self.chain_size = kwargs.setdefault("chain_size", 5)
 41 |         self.slip_prob = kwargs.setdefault("slip_prob", 0.2)
 42 |         self.goal_reward = 10.0
 43 |         self.left_reward = 2.0
 44 |         self.right_reward = 0.0
 45 | 
 46 |     def makeTaskSpec(self):
 47 |         ts = TaskSpecRLGlue.TaskSpec(discount_factor=0.99, reward_range=(0.0, 10.0))
 48 |         ts.addDiscreteAction((0, 1))
 49 |         ts.addDiscreteObservation((0, self.chain_size-1))
 50 |         ts.setContinuing()
 51 |         ts.setExtra(self.name)
 52 |         return ts.toTaskSpec()
 53 | 
 54 |     def getState(self):
 55 |         return [self.state]
 56 | 
 57 |     def reset(self):
 58 |         self.state = 0
 59 | 
 60 |     def env_init(self):
 61 |         return self.makeTaskSpec()
 62 | 
 63 |     def env_start(self):
 64 |         self.reset()
 65 |         returnObs = Observation()
 66 |         returnObs.intArray = self.getState()
 67 |         return returnObs
 68 | 
 69 |     def isAtGoal(self):
 70 |         return self.state == self.chain_size-1
 71 | 
 72 |     def takeAction(self, intAction):
 73 |         if numpy.random.random() < self.slip_prob:
 74 |             intAction = 0 if intAction == 1 else 1
 75 | 
 76 |         if intAction == 0:
 77 |             self.state = 0
 78 |             return self.left_reward
 79 |         else:
 80 |             self.state = min(self.chain_size-1, self.state+1)
 81 |             if self.isAtGoal():
 82 |                 return self.goal_reward
 83 |             else:
 84 |                 return self.right_reward
 85 | 
 86 |     def env_step(self,thisAction):
 87 |         intAction = int(thisAction.intArray[0])
 88 |         theReward = self.takeAction(intAction)
 89 |         theObs = Observation()
 90 |         theObs.intArray = self.getState()
 91 | 
 92 |         returnRO = Reward_observation_terminal()
 93 |         returnRO.r = theReward
 94 |         returnRO.o = theObs
 95 |         returnRO.terminal = 0
 96 | 
 97 |         return returnRO
 98 | 
 99 |     def env_cleanup(self):
100 |         pass
101 | 
102 |     def env_message(self,inMessage):
103 |         return "I don't know how to respond to your message";
104 | 
105 | 
106 | 
107 | 
108 | 


--------------------------------------------------------------------------------
/pyrl/environments/configs/neurostim/params.dat:
--------------------------------------------------------------------------------
1 | test_features.dat
2 | test_labels.dat
3 | test_stimulation.dat
4 | 23198 
5 | 5 
6 | 3
7 | 3
8 | 0.02
9 | 5


--------------------------------------------------------------------------------
/pyrl/environments/configs/neurostim/test_stimulation.dat:
--------------------------------------------------------------------------------
1 | -0.3708554 -0.4063264  0.5785546  0.4593724  0.3791109
2 | 


--------------------------------------------------------------------------------
/pyrl/environments/configs/pinball/pinball_hard_single.cfg:
--------------------------------------------------------------------------------
 1 | ball 0.015
 2 | target 0.5 0.06 0.04
 3 | start 0.055 0.95 
 4 | 
 5 | polygon 0.0 0.0 0.0 0.01 1.0 0.01 1.0 0.0 
 6 | polygon 0.0 0.0 0.01 0.0 0.01 1.0 0.0 1.0 
 7 | polygon 0.0 1.0 0.0 0.99 1.0 0.99 1.0 1.0 
 8 | polygon 1.0 1.0 0.99 1.0 0.99 0.0 1.0 0.0 
 9 | polygon 0.034 0.852 0.106 0.708 0.33199999999999996 0.674 0.17599999999999996 0.618 0.028 0.718 
10 | polygon 0.15 0.7559999999999999 0.142 0.93 0.232 0.894 0.238 0.99 0.498 0.722 
11 | polygon 0.8079999999999999 0.91 0.904 0.784 0.7799999999999999 0.572 0.942 0.562 0.952 0.82 0.874 0.934 
12 | polygon 0.768 0.814 0.692 0.548 0.594 0.47 0.606 0.804 0.648 0.626 
13 | polygon 0.22799999999999998 0.5760000000000001 0.39 0.322 0.3400000000000001 0.31400000000000006 0.184 0.456 
14 | polygon 0.09 0.228 0.242 0.076 0.106 0.03 0.022 0.178 
15 | polygon 0.11 0.278 0.24600000000000002 0.262 0.108 0.454 0.16 0.566 0.064 0.626 0.016 0.438 
16 | polygon 0.772 0.1 0.71 0.20599999999999996 0.77 0.322 0.894 0.09600000000000002 0.8039999999999999 0.17600000000000002 
17 | polygon 0.698 0.476 0.984 0.27199999999999996 0.908 0.512 
18 | polygon 0.45 0.39199999999999996 0.614 0.25799999999999995 0.7340000000000001 0.438 
19 | polygon 0.476 0.868 0.552 0.8119999999999999 0.62 0.902 0.626 0.972 0.49 0.958 
20 | polygon 0.61 0.014000000000000002 0.58 0.094 0.774 0.05000000000000001 0.63 0.054000000000000006 
21 | polygon 0.33399999999999996 0.014 0.27799999999999997 0.03799999999999998 0.368 0.254 0.7 0.20000000000000004 0.764 0.108 0.526 0.158 
22 | polygon 0.294 0.584 0.478 0.626 0.482 0.574 0.324 0.434 0.35 0.39 0.572 0.52 0.588 0.722 0.456 0.668 
23 | 


--------------------------------------------------------------------------------
/pyrl/environments/configs/pinball/pinball_simple_single.cfg:
--------------------------------------------------------------------------------
 1 | ball 0.02
 2 | target 0.9 0.2 0.04
 3 | start 0.2 0.9
 4 | 
 5 | polygon 0.0 0.0 0.0 0.01 1.0 0.01 1.0 0.0
 6 | polygon 0.0 0.0 0.01 0.0 0.01 1.0 0.0 1.0
 7 | polygon 0.0 1.0 0.0 0.99 1.0 0.99 1.0 1.0
 8 | polygon 1.0 1.0 0.99 1.0 0.99 0.0 1.0 0.0
 9 | 
10 | polygon 0.35 0.4 0.45 0.55 0.43 0.65 0.3 0.7 0.45 0.7 0.5 0.6 0.45 0.35
11 | polygon 0.2 0.6 0.25 0.55 0.15 0.5 0.15 0.45 0.2 0.3 0.12 0.27 0.075 0.35 0.09 0.55 
12 | polygon 0.3 0.8 0.6 0.75 0.8 0.8 0.8 0.9 0.6 0.85 0.3 0.9
13 | polygon 0.8 0.7 0.975 0.65 0.75 0.5 0.9 0.3 0.7 0.35 0.63 0.65 
14 | polygon 0.6 0.25 0.3 0.07 0.15 0.175 0.15 0.2 0.3 0.175 0.6 0.3
15 | polygon 0.75 0.025 0.8 0.24 0.725 0.27 0.7 0.025
16 | 


--------------------------------------------------------------------------------
/pyrl/environments/configs/pomdps/tiger.POMDP:
--------------------------------------------------------------------------------
 1 | # This is the tiger problem of AAAI paper fame in the new pomdp
 2 | # format.  This format is still experimental and subject to change
 3 | 
 4 | discount: 0.75
 5 | values: reward
 6 | states: tiger-left tiger-right 
 7 | actions: listen open-left open-right
 8 | observations: tiger-left tiger-right
 9 | 
10 | T:listen
11 | identity
12 | 
13 | T:open-left
14 | uniform
15 | 
16 | T:open-right
17 | uniform
18 | 
19 | O:listen
20 | 0.85 0.15
21 | 0.15 0.85
22 | 
23 | O:open-left
24 | uniform
25 | 
26 | O:open-right
27 | uniform
28 | 
29 | R:listen : * : * : * -1
30 | 
31 | R:open-left : tiger-left : * : * -100
32 | 
33 | R:open-left : tiger-right : * : * 10
34 | 
35 | R:open-right : tiger-left : * : * 10 
36 | 
37 | R:open-right : tiger-right : * : * -100
38 | 
39 | 


--------------------------------------------------------------------------------
/pyrl/environments/configs/tetris/3brick.dat:
--------------------------------------------------------------------------------
 1 | ##########################################################################
 2 | # File format describing the pieces
 3 | # number of pieces
 4 | # for each piece:
 5 | # 	number of orientations, height, width (for the first orientation)
 6 | #       piece (with "X")
 7 | ##########################################################################
 8 | #
 9 | # Pieces with 3 bricks
10 | # There are 2 pieces:
11 | 2
12 | #
13 | 4 2 2
14 | XX
15 |  X 
16 | 2 1 3
17 | XXX
18 | # End of File


--------------------------------------------------------------------------------
/pyrl/environments/configs/tetris/melax.dat:
--------------------------------------------------------------------------------
 1 | ##########################################################################
 2 | # File format describing the pieces
 3 | # number of pieces
 4 | # for each piece:
 5 | # 	number of orientations, height, width (for the first orientation)
 6 | #       piece (with "X")
 7 | ##########################################################################
 8 | #
 9 | # Melax's Reduced set of pieces
10 | # There are 5 pieces:
11 | 5
12 | #
13 | 1 1 1
14 | X
15 | 2 1 2
16 | XX
17 | 2 2 2
18 |  X
19 | X 
20 | 4 2 2
21 |  X
22 | XX
23 | 1 2 2
24 | XX
25 | XX
26 | # End of File


--------------------------------------------------------------------------------
/pyrl/environments/configs/tetris/standard.dat:
--------------------------------------------------------------------------------
 1 | ##########################################################################
 2 | # File format describing the pieces
 3 | # number of pieces
 4 | # for each piece:
 5 | # 	number of orientations, height, width (for the first orientation)
 6 | #       piece (with "X")
 7 | ##########################################################################
 8 | #
 9 | # Pieces with 4 bricks (standard pieces)
10 | # There are 7 pieces:
11 | 7
12 | #
13 | 2 4 1
14 | X
15 | X
16 | X
17 | X
18 | 1 2 2
19 | XX
20 | XX
21 | 4 3 2
22 | X 
23 | XX
24 | X 
25 | 2 3 2
26 |  X 
27 | XX
28 | X 
29 | 2 3 2
30 | X 
31 | XX
32 |  X
33 | 4 2 3
34 | X  
35 | XXX 
36 | 4 2 3
37 | XXX
38 | X  
39 | # End of File


--------------------------------------------------------------------------------
/pyrl/environments/configs/tetris/sztetris.dat:
--------------------------------------------------------------------------------
 1 | ##########################################################################
 2 | # File format describing the pieces
 3 | # number of pieces
 4 | # for each piece:
 5 | # 	number of orientations, height, width (for the first orientation)
 6 | #       piece (with "X")
 7 | ##########################################################################
 8 | #
 9 | # Pieces for SZ Tetris (only the N/Mirrored N (i.e. S/Z) pieces
10 | # There are 2 pieces:
11 | 2
12 | #
13 | 2 3 2
14 |  X 
15 | XX
16 | X 
17 | 2 3 2
18 | X 
19 | XX
20 |  X
21 | # End of File


--------------------------------------------------------------------------------
/pyrl/environments/fuelworld.py:
--------------------------------------------------------------------------------
  1 | # 
  2 | # Copyright (C) 2013, Will Dabney
  3 | # 
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | #
 16 | 
 17 | import sys
 18 | import numpy
 19 | 
 20 | from rlglue.environment.Environment import Environment
 21 | from rlglue.environment import EnvironmentLoader as EnvironmentLoader
 22 | from rlglue.types import Observation
 23 | from rlglue.types import Action
 24 | from rlglue.types import Reward_observation_terminal
 25 | 
 26 | from pyrl.rlglue import TaskSpecRLGlue
 27 | from pyrl.rlglue.registry import register_environment
 28 | 
 29 | from . import gridworld
 30 | from scipy.stats import norm
 31 | 
 32 | @register_environment
 33 | class FuelWorld(gridworld.Gridworld):
 34 | 	name = "Fuel World"
 35 | 
 36 | 	# This is a continuous version of Todd Hester's Fuel World domain. 
 37 | 	# As such, we will make the size, starting locations, and goal fixed to 
 38 | 	# match the original's specifications. We will keep the additive gaussian noise, 
 39 | 	# and as mentioned this will be continuous instead of discrete state spaces.
 40 | 	def __init__(self, noise=0.0, fudge=1.4143, variation=(-10.0, -13.0, 5.0), fuel_noise=0.0):
 41 | 		gridworld.Gridworld.__init__(self, size_x=31.0, size_y=21.0, goal_x=24.0, goal_y=11.0, 
 42 | 					     noise=noise, random_start=True, fudge=fudge)
 43 | 		self.fuel = 0.0
 44 | 		self.fuel_noise = fuel_noise
 45 | 		self.var = variation
 46 | 		self.domain_name = "Continuous Fuel World"
 47 | 		
 48 | 
 49 | 	def makeTaskSpec(self):
 50 | 		ts = TaskSpecRLGlue.TaskSpec(discount_factor=1.0, reward_range=(-400.0, 0.0))
 51 | 		ts.addDiscreteAction((0, 7))
 52 | 		ts.addContinuousObservation((0.0, self.size[0]-1))
 53 | 		ts.addContinuousObservation((0.0, self.size[1]-1))
 54 | 		ts.addContinuousObservation((-1.0, 60.0)) # Fuel range as per FuelRooms.cc
 55 | 		ts.setEpisodic()
 56 | 		ts.setExtra(self.domain_name)
 57 | 		return ts.toTaskSpec()
 58 | 
 59 | 	def env_start(self):
 60 | 		self.reset()
 61 | 		returnObs = Observation()
 62 | 		returnObs.doubleArray = self.pos.tolist() + [self.fuel]
 63 | 		return returnObs
 64 | 
 65 | 	def reset(self):
 66 | 		# Randomly start in the rectangle around (0,7),(4,12)
 67 | 		self.pos = numpy.random.random((2,))
 68 | 		self.pos[0] *= 4.0
 69 | 		self.pos[1] *= 5.0
 70 | 		self.pos[1] += 7.0
 71 | 
 72 | 		self.fuel = numpy.random.random()*4.0 + 14.0 # Between 14 and 18
 73 | 
 74 | 	def inFuelCell(self, position):
 75 | 		return self.pos[1] <= 1.0 or self.pos[1] >= self.size[1]-1.0
 76 | 
 77 | 	def isAtGoal(self):
 78 | 		return gridworld.Gridworld.isAtGoal(self) or self.fuel < 0
 79 | 
 80 | 	def getState(self):
 81 | 		return gridworld.Gridworld.getState(self) + [self.fuel]
 82 | 
 83 | 	def takeAction(self, intAction):
 84 | 		if intAction == 0:
 85 | 			self.pos[0] += 1.0
 86 | 		elif intAction == 1:
 87 | 			self.pos[0] -= 1.0
 88 | 		elif intAction == 2:
 89 | 			self.pos[1] += 1.0
 90 | 		elif intAction == 3:
 91 | 			self.pos[1] -= 1.0
 92 | 		elif intAction == 4:
 93 | 			self.pos += numpy.array([-1.0, 1.0])
 94 | 		elif intAction == 5:
 95 | 			self.pos += numpy.array([1.0, 1.0])
 96 | 		elif intAction == 6:
 97 | 			self.pos += numpy.array([-1.0, -1.0])
 98 | 		elif intAction == 7:
 99 | 			self.pos += numpy.array([1.0, -1.0])
100 | 
101 | 		if self.noise > 0:
102 | 			self.pos += numpy.random.normal(scale=self.noise, size=(2,))
103 | 
104 | 		self.pos = self.pos.clip([0, 0], self.size)
105 | 
106 | 		self.fuel -= 1.0
107 | 		if self.fuel_noise > 0:
108 | 			self.fuel += numpy.random.normal(scale=self.fuel_noise)
109 | 		
110 | 		if self.inFuelCell(self.pos):
111 | 			self.fuel += 20.0
112 | 		if self.fuel > 60.0:
113 | 			self.fuel = 60.0
114 | 
115 | 		if gridworld.Gridworld.isAtGoal(self):
116 | 			return 0.0
117 | 		elif self.fuel < 0:
118 | 			return -400.0
119 | 		elif self.inFuelCell(self.pos): # Fuel costs
120 | 			base = self.var[0] if self.pos[1] <= 1.0 else self.var[1]
121 | 			a = self.var[2]
122 | 			return base - (int(self.pos[0]) % 5)*a
123 | 		elif intAction < 4:
124 | 			return -1.0
125 | 		elif intAction >= 4:
126 | 			return -1.4
127 | 		else:
128 | 			print "ERROR in FuelWorld.takeAction"
129 | 
130 | 
131 | if __name__=="__main__":
132 | 	import argparse
133 | 	parser = argparse.ArgumentParser(description='Run 2D MultiRoom Noisy Continuous Gridworld environment in network mode.')
134 | 	gridworld.addGridworldArgs(parser)
135 | 	parser.add_argument("--fuel_noise", type=float, default=0.0, 
136 | 			    help="If non-zero then gives the standard deviation of the additive Gaussian noise to add to the fuel expenditure.")
137 | 	args = parser.parse_args()
138 | 	EnvironmentLoader.loadEnvironment(FuelWorld(noise=args.noise, fudge=args.fudge, fuel_noise=args.fuel_noise))
139 | 


--------------------------------------------------------------------------------
/pyrl/environments/gridworld.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # Copyright (C) 2013, Will Dabney
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | #
 16 | 
 17 | import numpy
 18 | 
 19 | from rlglue.environment.Environment import Environment
 20 | from rlglue.environment import EnvironmentLoader as EnvironmentLoader
 21 | from rlglue.types import Observation
 22 | from rlglue.types import Action
 23 | from rlglue.types import Reward_observation_terminal
 24 | from pyrl.rlglue import TaskSpecRLGlue
 25 | from pyrl.rlglue.registry import register_environment
 26 | 
 27 | @register_environment
 28 | class Gridworld(Environment):
 29 |     name = "Gridworld"
 30 | 
 31 |     # All parameters are in units of 1, where 1 is how far on average
 32 |     # the agent can move with a single action.
 33 |     def __init__(self, size_x=10, size_y=10, goal_x=10, goal_y=10, noise=0.0, reward_noise=0.0, random_start=False, fudge=1.4143):
 34 |         self.size = numpy.array([size_x, size_y])
 35 |         self.goal = numpy.array([goal_x, goal_y])
 36 |         self.noise = noise
 37 |         self.reward_noise = reward_noise
 38 |         self.random_start = random_start
 39 |         self.pos = numpy.zeros((2,))
 40 |         self.fudge = fudge
 41 |         self.domain_name = "Continuous Gridworld by Will Dabney"
 42 | 
 43 |     def makeTaskSpec(self):
 44 |         ts = TaskSpecRLGlue.TaskSpec(discount_factor=1.0, reward_range=(-1.0, 0.0))
 45 |         ts.addDiscreteAction((0, 3))
 46 |         ts.addContinuousObservation((0.0, self.size[0]))
 47 |         ts.addContinuousObservation((0.0, self.size[1]))
 48 |         ts.setEpisodic()
 49 |         ts.setExtra(self.domain_name)
 50 |         return ts.toTaskSpec()
 51 | 
 52 |     def getState(self):
 53 |         return self.pos.tolist()
 54 | 
 55 |     def reset(self):
 56 |         if self.random_start:
 57 |             self.pos = numpy.random.random((2,)) * self.size
 58 |         else:
 59 |             self.pos[:] = 0.0
 60 | 
 61 |     def env_init(self):
 62 |         return self.makeTaskSpec()
 63 | 
 64 |     def env_start(self):
 65 |         self.reset()
 66 |         returnObs = Observation()
 67 |         returnObs.doubleArray = self.getState()
 68 |         return returnObs
 69 | 
 70 |     def isAtGoal(self):
 71 |         return numpy.linalg.norm(self.pos - self.goal) < self.fudge
 72 | 
 73 |     def takeAction(self, intAction):
 74 |         if intAction == 0:
 75 |             self.pos[0] += 1.0
 76 |         elif intAction == 1:
 77 |             self.pos[0] -= 1.0
 78 |         elif intAction == 2:
 79 |             self.pos[1] += 1.0
 80 |         elif intAction == 3:
 81 |             self.pos[1] -= 1.0
 82 | 
 83 |         if self.noise > 0:
 84 |             self.pos += numpy.random.normal(scale=self.noise, size=(2,))
 85 |         self.pos = self.pos.clip([0, 0], self.size)
 86 |         return 0.0 if self.isAtGoal() else -1.0
 87 | 
 88 |     def env_step(self,thisAction):
 89 |         episodeOver = 0
 90 |         intAction = thisAction.intArray[0]
 91 | 
 92 |         theReward = self.takeAction(intAction)
 93 | 
 94 |         if self.isAtGoal():
 95 |             episodeOver = 1
 96 | 
 97 |         if self.reward_noise > 0:
 98 |             theReward += numpy.random.normal(scale=self.reward_noise)
 99 | 
100 |         theObs = Observation()
101 |         theObs.doubleArray = self.getState()
102 | 
103 |         returnRO = Reward_observation_terminal()
104 |         returnRO.r = theReward
105 |         returnRO.o = theObs
106 |         returnRO.terminal = episodeOver
107 | 
108 |         return returnRO
109 | 
110 |     def env_cleanup(self):
111 |         pass
112 | 
113 |     def env_message(self,inMessage):
114 |         return "I don't know how to respond to your message";
115 | 
116 | 
117 | def addGridworldArgs(parser):
118 |     parser.add_argument("--size_x", type=float, default=10, help="Size of the gridworld in the x (horizontal) dimension, where 1.0 is the unit of movement.")
119 |     parser.add_argument("--size_y", type=float, default=10, help="Size of the gridworld in the y (vertical) dimension, where 1.0 is the unit of movement.")
120 |     parser.add_argument("--goal_x", type=float, default=10, help="Goal x coordinate")
121 |     parser.add_argument("--goal_y", type=float, default=10, help="Goal y coordinate")
122 |     parser.add_argument("--noise", type=float, default=0, help="Standard deviation of additive noise to generate")
123 |     parser.add_argument("--fudge", type=float, default=1.4143, help="Distance from goal allowed before episode is counted as finished")
124 |     parser.add_argument("--random_restarts", type=bool, default=False, help="Randomly assign x,y initial locations.")
125 | 
126 | if __name__=="__main__":
127 |     import argparse
128 |     parser = argparse.ArgumentParser(description='Run 2D Noisy Continuous Gridworld environment in network mode.')
129 |     addGridworldArgs(parser)
130 |     args = parser.parse_args()
131 |     EnvironmentLoader.loadEnvironment(Gridworld(size_x=args.size_x, size_y=args.size_y, goal_x=args.goal_x, goal_y=args.goal_y, noise=args.noise, random_start=args.random_restarts, fudge=args.fudge))
132 | 


--------------------------------------------------------------------------------
/pyrl/environments/libPOMDP/.gitignore:
--------------------------------------------------------------------------------
 1 | */*.py[co]
 2 | */*.o
 3 | */*.so
 4 | 
 5 | # Packages
 6 | *.egg
 7 | *.egg-info
 8 | dist
 9 | build
10 | eggs
11 | parts
12 | bin
13 | var
14 | sdist
15 | develop-eggs
16 | .installed.cfg
17 | */*~
18 | # Installer logs
19 | pip-log.txt
20 | 
21 | # Unit test / coverage reports
22 | .coverage
23 | .tox
24 | 
25 | #Translations
26 | *.mo
27 | 
28 | #Mr Developer
29 | .mr.developer.cfg
30 | 


--------------------------------------------------------------------------------
/pyrl/environments/libPOMDP/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.8)
 2 | 
 3 | PROJECT(libPOMDP)
 4 | 
 5 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_LIST_DIR})
 6 | 
 7 | file(GLOB SRC
 8 |     "src/*.h"
 9 |     "src/*.c"
10 | )
11 | 
12 | find_package(PythonInterp REQUIRED)
13 | find_package(PythonLibs REQUIRED)
14 | find_package(BISON REQUIRED)
15 | find_package(FLEX REQUIRED)
16 | 
17 | # Semi-hacky way of getting numpy included
18 | # Needs testing on linux
19 | STRING(REPLACE bin/python lib/python${PYTHON_VERSION_MAJOR}.${PYTHON_VERSION_MINOR}/site-packages/numpy/core/include/numpy/ NUMPY_INC ${PYTHON_EXECUTABLE})
20 | 
21 | include_directories(${PYTHON_INCLUDE_DIRS})
22 | include_directories(${NUMPY_INC})
23 | 
24 | BISON_TARGET(parser ${CMAKE_CURRENT_SOURCE_DIR}/src/parser.y ${CMAKE_CURRENT_SOURCE_DIR}/src/parser.c)
25 | FLEX_TARGET(scanner ${CMAKE_CURRENT_SOURCE_DIR}/src/scanner.l  ${CMAKE_CURRENT_SOURCE_DIR}/src/scanner.c)
26 | ADD_FLEX_BISON_DEPENDENCY(scanner parser)
27 | 
28 | include_directories(${CMAKE_CURRENT_SOURCE_DIR})
29 | 
30 | add_library(libpomdp MODULE ${SRC} ${BISON_parser_OUTPUTS} ${FLEX_scanner_OUTPUTS})
31 | set_target_properties(libpomdp PROPERTIES PREFIX "")
32 | target_link_libraries(libpomdp ${PYTHON_LIBRARIES} ${FLEX_LIBRARIES} ${BISON_LIBRARIES})
33 | 


--------------------------------------------------------------------------------
/pyrl/environments/libPOMDP/README.md:
--------------------------------------------------------------------------------
 1 | pyrl.environments.libPOMDP
 2 | ============================
 3 | 
 4 | This is primarily code from pomdp-solve written by Anthony R. Cassandra. I've only added a 
 5 | new makefile, which is not as sophisticated as the original, and some code to allow the whole thing 
 6 | to be compiled into a Python module.
 7 | 
 8 | At present this only contains the code relevant for reading and writing the MDP/POMDP specification 
 9 | files, and interacting with the information contained within them. However, pomdp-solve itself 
10 | has many useful implementations in pure C that may later be brought into this module for use in python.
11 | 
12 | This has been tested on Mac OS X, but 'should' also work in Linux. 


--------------------------------------------------------------------------------
/pyrl/environments/libPOMDP/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amarack/python-rl/a1c1f5bc42cb20f5d9630818d1908f2100916ef4/pyrl/environments/libPOMDP/__init__.py


--------------------------------------------------------------------------------
/pyrl/environments/libPOMDP/src/imm-reward.h:
--------------------------------------------------------------------------------
 1 | /*  imm-reward.h
 2 | 
 3 |   *****
 4 |   Copyright 1994-1997, Brown University
 5 |   Copyright 1998, 1999, Anthony R. Cassandra
 6 | 
 7 |                            All Rights Reserved
 8 |                            
 9 |   Permission to use, copy, modify, and distribute this software and its
10 |   documentation for any purpose other than its incorporation into a
11 |   commercial product is hereby granted without fee, provided that the
12 |   above copyright notice appear in all copies and that both that
13 |   copyright notice and this permission notice appear in supporting
14 |   documentation.
15 |   
16 |   ANTHONY CASSANDRA DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17 |   INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ANY
18 |   PARTICULAR PURPOSE.  IN NO EVENT SHALL ANTHONY CASSANDRA BE LIABLE FOR
19 |   ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20 |   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
21 |   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
22 |   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
23 |   *****
24 | 
25 |     Header file for imm-reward.c
26 | */
27 | #ifndef MDP_IMM_REWARD_H
28 | #define MDP_IMM_REWARD_H 1
29 | 
30 | #include "sparse-matrix.h"
31 | 
32 | /*
33 |    We will represent the general immediate reward structure as a
34 | linked list, where each node of the list will correspond to a single
35 | R: * : ... entry.  The entry from the file could specify a single
36 | value, a row of values, or an entire matrix.  Thus we need three
37 | different representations depending on the situation.  Additionally,
38 | all of the components could have a wildcard character indicating 
39 | that it is a specification for a family of values.  This is indicated
40 | with special characters.
41 | 
42 | */
43 | 
44 | /* Each of the action, states and obs could have a state index number,
45 |   or one of these two values.  Since states cannot be negative we use
46 |   negative values for the special characters.  The observation cannot
47 |   be present when the next_state is present, but this should be
48 |   enforced by the parser.  When both the next state and obs are not
49 |   present, we will use a sparse matrix representation.  When only the
50 |   obs is not present we will use a single dimensional, non-sparse
51 |   matrix.  When both are specified we use a single value.  Note that
52 |   it does not matter if the indivdual elements are specific indices or
53 |   a wildcard, either way we will store a single value.
54 | 
55 | */
56 | 
57 | #define WILDCARD_SPEC                -1
58 | #define NOT_PRESENT                  -99
59 | 
60 | /* This allows us to easily check what type of entry it is, since */
61 | /* there are three possibilities. */
62 | typedef enum { ir_none, ir_value, ir_vector, ir_matrix } IR_Type;
63 | 
64 | typedef struct Imm_Reward_Node_Struct *Imm_Reward_List;
65 | struct Imm_Reward_Node_Struct {
66 | 
67 |   IR_Type type;
68 | 
69 |   int action;
70 |   int cur_state;
71 |   int next_state;
72 |   int obs;
73 | 
74 |   union rep_tag {
75 |      double value;
76 |      double *vector;
77 |      Matrix matrix;
78 |   } rep;
79 | 
80 |   Imm_Reward_List next;
81 | };
82 | 
83 | extern double gMinReward;
84 | extern double gMaxReward;
85 | 
86 | extern void destroyImmRewards();
87 | extern void newImmReward( int action, int cur_state, int next_state, int obs );
88 | extern void enterImmReward( int cur_state, int next_state, int obs, 
89 | 			   double value );
90 | extern void doneImmReward();
91 | extern double getImmediateReward( int action, int cur_state, int
92 | 				 next_state, int obs );
93 | 				 
94 | #endif
95 | 


--------------------------------------------------------------------------------
/pyrl/environments/libPOMDP/src/libpomdp.h:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #include "Python.h" // Must be the first header?
 4 | #include "arrayobject.h"
 5 | //#include "numpy/arrayobject.h"
 6 | 
 7 | #include <stdlib.h>
 8 | #include "mdp.h"
 9 | #include "imm-reward.h"
10 | 
11 | static PyObject *C_readMDP(PyObject *self, PyObject *args);
12 | /* list by action of sparse matrix of shape: gNumStates x gNumStates */
13 | static PyObject* getSparseTransitionMatrix(PyObject *self, PyObject *args);
14 | /* list by action of sparse matrix of shape: gNumStates x gNumObservations */
15 | static PyObject* getSparseObsMatrix(PyObject *self, PyObject *args);
16 | 
17 | static PyObject *C_getRewardRange(PyObject *self, PyObject *args);
18 | static PyObject *C_getReward(PyObject *self, PyObject *args);
19 | static PyObject *C_transformBelief(PyObject *self, PyObject *args);
20 | static PyObject *C_getInitialBelief(PyObject *self, PyObject *args);
21 | static PyObject *C_isRewardType(PyObject *self, PyObject *args);
22 | static PyObject *C_getNumObservations(PyObject *self, PyObject *args);
23 | static PyObject *C_getNumActions(PyObject *self, PyObject *args);
24 | static PyObject *C_getNumStates(PyObject *self, PyObject *args);
25 | static PyObject *C_getDiscount(PyObject *self, PyObject *args);
26 | 
27 | static PyObject *loadFile(PyObject *self, PyObject *args); 
28 | PyObject* fillPyMatrix(Matrix *target);
29 | 
30 | #define validState(S) (S >= 0 && S < gNumStates)
31 | #define validAction(A) (A >= 0 && A < gNumActions)
32 | #define validObservation(O) (O >= 0 && O < gNumObservations)
33 | 
34 | 


--------------------------------------------------------------------------------
/pyrl/environments/libPOMDP/src/mdp-common.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *<SOURCE_HEADER>
 3 |  *
 4 |  *  <NAME>
 5 |  *    global.h
 6 |  *  </NAME>
 7 |  *  <AUTHOR>
 8 |  *    Anthony R. Cassandra
 9 |  *  </AUTHOR>
10 |  *  <CREATE_DATE>
11 |  *    July, 1998
12 |  *  </CREATE_DATE>
13 |  *
14 |  *  <RCS_KEYWORD>
15 |  *    $RCSfile: mdp-common.h,v $
16 |  *    $Source: /u/cvs/proj/pomdp-solve/src/mdp/mdp-common.h,v $
17 |  *    $Revision: 1.1 $
18 |  *    $Date: 2004/10/10 03:44:59 $
19 |  *  </RCS_KEYWORD>
20 |  *
21 |  *  <COPYRIGHT>
22 |  *
23 |  *    1994-1997, Brown University
24 |  *    1998-2003, Anthony R. Cassandra
25 |  *
26 |  *    All Rights Reserved
27 |  *                          
28 |  *    Permission to use, copy, modify, and distribute this software and its
29 |  *    documentation for any purpose other than its incorporation into a
30 |  *    commercial product is hereby granted without fee, provided that the
31 |  *    above copyright notice appear in all copies and that both that
32 |  *    copyright notice and this permission notice appear in supporting
33 |  *    documentation.
34 |  * 
35 |  *    ANTHONY CASSANDRA DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
36 |  *    INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ANY
37 |  *    PARTICULAR PURPOSE.  IN NO EVENT SHALL ANTHONY CASSANDRA BE LIABLE FOR
38 |  *    ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
39 |  *    WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
40 |  *    ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
41 |  *    OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
42 |  *
43 |  *  </COPYRIGHT>
44 |  *
45 |  *</SOURCE_HEADER>
46 |  */
47 | 
48 | /*
49 |  *   Header file for all globally defined items in the mdp library.
50 |  */
51 | 
52 | #ifndef MDP_COMMON_H
53 | #define MDP_COMMON_H
54 | 
55 | #ifdef DMALLOC
56 | 
57 | #include "dmalloc.h"
58 | 
59 | #define XCALLOC(num, size) calloc( (num), (size) )
60 | #define XMALLOC(size) malloc( size )
61 | #define XREALLOC(p, size) realloc( (p), (size) )
62 | #define XFREE(stale) free(stale)
63 | 
64 | #else
65 | 
66 | #define XCALLOC(num, size) calloc( (num), (size) )
67 | #define XMALLOC(size) malloc( size )
68 | #define XREALLOC(p, size) realloc( (p), (size) )
69 | #define XFREE(stale) free(stale)
70 | 
71 | #endif
72 | 
73 | #endif
74 | 
75 | 


--------------------------------------------------------------------------------
/pyrl/environments/libPOMDP/src/mdp.h:
--------------------------------------------------------------------------------
  1 | /*   mdp.h  
  2 | 
  3 |   *****
  4 |   Copyright 1994-1997, Brown University
  5 |   Copyright 1998, 1999, Anthony R. Cassandra
  6 | 
  7 |                            All Rights Reserved
  8 |                            
  9 |   Permission to use, copy, modify, and distribute this software and its
 10 |   documentation for any purpose other than its incorporation into a
 11 |   commercial product is hereby granted without fee, provided that the
 12 |   above copyright notice appear in all copies and that both that
 13 |   copyright notice and this permission notice appear in supporting
 14 |   documentation.
 15 |   
 16 |   ANTHONY CASSANDRA DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
 17 |   INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ANY
 18 |   PARTICULAR PURPOSE.  IN NO EVENT SHALL ANTHONY CASSANDRA BE LIABLE FOR
 19 |   ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 20 |   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 21 |   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 22 |   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 23 |   *****
 24 | 
 25 |      header file for mdp.c
 26 | */
 27 | #ifndef MDP_MDP_H
 28 | #define MDP_MDP_H
 29 | 
 30 | #include "sparse-matrix.h"
 31 | 
 32 | /* Use this type for a variable that indicated whether we have a 
 33 |    POMDP or an MDP.
 34 | */
 35 | typedef enum {  UNKNOWN_problem_type, 
 36 | 		MDP_problem_type, 
 37 | 		POMDP_problem_type 
 38 | 	      } Problem_Type;
 39 | 
 40 | /* Use this to determine if the problems values are rewards or costs.
 41 |    */
 42 | #define NUM_VALUE_TYPES          2
 43 | typedef enum {REWARD_value_type, COST_value_type } Value_Type;
 44 | #define VALUE_TYPE_STRINGS       { \
 45 |                                    "cost", \
 46 |                                    "reward" \
 47 |                                  }
 48 | 
 49 | #define DEFAULT_DISCOUNT_FACTOR               1.0
 50 | 
 51 | #define DEFAULT_VALUE_TYPE                    REWARD_value_type
 52 | 
 53 | #define INVALID_STATE                         -1
 54 | #define INVALID_OBS                           -1
 55 | #define INVALID_ACTION                        -1
 56 | 
 57 | #ifndef MDP_C
 58 | 
 59 | /* Exported variables */
 60 | extern char *value_type_str[];
 61 | extern double gDiscount;
 62 | extern Problem_Type gProblemType;
 63 | extern Value_Type gValueType;
 64 | 
 65 | /* We will use this flag to indicate whether the problem has negative
 66 |    rewards or not.  It starts off FALSE and becomes TRUE if any
 67 |    negative reward is found. */
 68 | extern double gMinimumImmediateReward;
 69 | 
 70 | extern int gNumStates;
 71 | extern int gNumActions;
 72 | extern int gNumObservations;
 73 | 
 74 | /* Intermediate variables */
 75 | 
 76 | extern I_Matrix *IP;  /* Transition Probabilities */
 77 | extern I_Matrix *IR;  /* Observation Probabilities */
 78 | extern I_Matrix IQ;  /* Immediate values for MDP only */
 79 | 
 80 | /* Sparse variables */
 81 | 
 82 | extern Matrix *P;  /* Transition Probabilities */
 83 | extern Matrix *R;  /* Observation Probabilities */
 84 | extern Matrix *QI;  /* The immediate values, for MDPs only */
 85 | extern Matrix Q;  /* Immediate values for state action pairs.  These
 86 | 		     are expectations computed from immediate values:
 87 | 		     either the QI for MDPs or the special
 88 | 		     representation for the POMDPs */
 89 | 
 90 | extern double *gInitialBelief;   /* For POMDPs */
 91 | extern int gInitialState;        /* For MDPs   */
 92 | 
 93 | /* Exported functions */
 94 | extern double *newBeliefState();
 95 | extern int transformBeliefState( double *pi,
 96 |                                 double *pi_hat,
 97 |                                 int a,
 98 |                                 int obs );
 99 | extern void copyBeliefState( double *copy, double *pi );
100 | extern void displayBeliefState( FILE *file, double *pi );
101 | extern int readMDP( char *filename );
102 | extern void convertMatrices();
103 | extern void deallocateMDP();
104 | extern void convertMatrices();
105 | extern int verifyIntermediateMDP();
106 | extern void deallocateIntermediateMDP();
107 | extern void allocateIntermediateMDP();
108 | extern int writeMDP( char *filename );
109 | extern void displayMDPSlice( int state );
110 | 
111 | #endif
112 | #endif
113 | 


--------------------------------------------------------------------------------
/pyrl/environments/libPOMDP/src/parse_constant.h:
--------------------------------------------------------------------------------
 1 | /*  parse_constant.h
 2 | 
 3 |   *****
 4 |   Copyright 1994-1997, Brown University
 5 |   Copyright 1998, 1999, Anthony R. Cassandra
 6 | 
 7 |                            All Rights Reserved
 8 |                            
 9 |   Permission to use, copy, modify, and distribute this software and its
10 |   documentation for any purpose other than its incorporation into a
11 |   commercial product is hereby granted without fee, provided that the
12 |   above copyright notice appear in all copies and that both that
13 |   copyright notice and this permission notice appear in supporting
14 |   documentation.
15 |   
16 |   ANTHONY CASSANDRA DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17 |   INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ANY
18 |   PARTICULAR PURPOSE.  IN NO EVENT SHALL ANTHONY CASSANDRA BE LIABLE FOR
19 |   ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20 |   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
21 |   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
22 |   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
23 |   *****
24 | */
25 | 
26 | #ifndef MDP_PARSE_CONSTANT_H
27 | #define MDP_PARSE_CONSTANT_H 1
28 | 
29 | typedef enum { CONST_INT, CONST_STRING, CONST_FLOAT } Const_Type;
30 | 
31 | typedef struct cNode
32 |       {
33 | 	Const_Type	theTag;		/* Type of constant it is */
34 | 	union {
35 | 		int	theInt;		
36 | 		char	*theString;	
37 | 		double	theFloat;	
38 | 	      } theValue;
39 |       } Constant_Block;
40 | 
41 | #endif
42 | 
43 | 


--------------------------------------------------------------------------------
/pyrl/environments/libPOMDP/src/parse_hash.h:
--------------------------------------------------------------------------------
 1 | /* parse_hash.h 
 2 | 
 3 |   *****
 4 |   Copyright 1994-1997, Brown University
 5 |   Copyright 1998, 1999, Anthony R. Cassandra
 6 | 
 7 |                            All Rights Reserved
 8 |                            
 9 |   Permission to use, copy, modify, and distribute this software and its
10 |   documentation for any purpose other than its incorporation into a
11 |   commercial product is hereby granted without fee, provided that the
12 |   above copyright notice appear in all copies and that both that
13 |   copyright notice and this permission notice appear in supporting
14 |   documentation.
15 |   
16 |   ANTHONY CASSANDRA DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17 |   INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ANY
18 |   PARTICULAR PURPOSE.  IN NO EVENT SHALL ANTHONY CASSANDRA BE LIABLE FOR
19 |   ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20 |   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
21 |   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
22 |   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
23 |   *****
24 | */
25 | 
26 | #ifndef MDP_PARSE_HASH_H
27 | #define MDP_PARSE_HASH_H 1
28 | 
29 | #define HASH_TABLE_SIZE      255
30 | 
31 | typedef enum { nt_state, nt_action, 
32 |                nt_observation, nt_unknown } Mnemonic_Type;
33 | 
34 | typedef struct Node_Struct *Node;
35 | struct Node_Struct {
36 |    Mnemonic_Type type;
37 |    int number;
38 |    char *str;
39 |    Node next;
40 | };
41 | 
42 | extern void H_create();
43 | extern void H_destroy();
44 | extern int H_enter( char *str, Mnemonic_Type type );
45 | extern int H_lookup( char *str, Mnemonic_Type type );
46 | 
47 | #endif
48 | 


--------------------------------------------------------------------------------
/pyrl/environments/libPOMDP/src/sparse-matrix.h:
--------------------------------------------------------------------------------
 1 | /* sparse-matrix.h 
 2 | 
 3 |    Header file for sparse-matrix.c.
 4 | 
 5 |   *****
 6 |   Copyright 1994-1997, Brown University
 7 |   Copyright 1998, 1999, Anthony R. Cassandra
 8 | 
 9 |                            All Rights Reserved
10 |                            
11 |   Permission to use, copy, modify, and distribute this software and its
12 |   documentation for any purpose other than its incorporation into a
13 |   commercial product is hereby granted without fee, provided that the
14 |   above copyright notice appear in all copies and that both that
15 |   copyright notice and this permission notice appear in supporting
16 |   documentation.
17 |   
18 |   ANTHONY CASSANDRA DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
19 |   INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ANY
20 |   PARTICULAR PURPOSE.  IN NO EVENT SHALL ANTHONY CASSANDRA BE LIABLE FOR
21 |   ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
22 |   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
23 |   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
24 |   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
25 |   *****
26 | 
27 | */
28 | #ifndef MDP_SPARSE_MATRIX_H
29 | #define MDP_SPARSE_MATRIX_H 1
30 | 
31 | #define POS_ZERO_TOLERANCE        0.0000000001
32 | #define NEG_ZERO_TOLERANCE        -0.0000000001
33 | 
34 | #define IS_ZERO(X)   ((X < POS_ZERO_TOLERANCE) && ( X > NEG_ZERO_TOLERANCE ))
35 | 
36 | /*  Each row of the intermediate form of the matrix will consist
37 |     of a linked list.  This structure is for each node of those linked
38 |     lists.  The pertinent information is the column number for the
39 |     entry and the value.  Additionally, this linked list will be kept
40 |     sorted by column so when adding an element we look for the column
41 |     and insert it in place. It will be sorted from least to greatest.
42 |     */ 
43 | typedef struct I_Matrix_Row_Node_Struct *I_Matrix_Row_Node;
44 | struct I_Matrix_Row_Node_Struct {
45 |   int column;
46 |   double value;
47 |   I_Matrix_Row_Node next;
48 | };
49 | 
50 | /*  A matrix in intermediate form will be a linked list for each row
51 |     and a count of the number of non-zero entries for each row.
52 |     */
53 | struct I_Matrix_Struct {
54 |   int num_rows;
55 |   I_Matrix_Row_Node *row;   /* An array of pointers for the head of
56 | 			       each row's linked list */
57 |   int *row_length;            /* An array for the current lengths of
58 | 			       each row. */
59 | };
60 | typedef struct I_Matrix_Struct *I_Matrix;
61 | 
62 | /* A matrix will be sparsely represented by a bunch of arrays
63 |    */
64 | struct Matrix_Struct {
65 |   int num_rows;
66 |   int num_non_zero;
67 |   double *mat_val;  /* The actual non-zero entries stored row by row. */
68 |   int *row_start;   /* the position for the start of each row in mat_val */
69 |   int *row_length;  /* The length of each row in mat_val */
70 |   int *col;         /* The column number for each entry in mat_val */
71 | };
72 | typedef struct Matrix_Struct *Matrix;
73 | 
74 | /**********************************************************************/
75 | /******************************  External Routines  *******************/
76 | /**********************************************************************/
77 | extern void destroyRow( I_Matrix_Row_Node row );
78 | extern I_Matrix_Row_Node addEntryToRow( I_Matrix_Row_Node row, 
79 | 				int col, double value,
80 | 				int *count, int accumulate );
81 | extern void displayRow( I_Matrix_Row_Node row );
82 | 
83 | extern int addEntryToIMatrix( I_Matrix i_matrix, int row, 
84 | 			     int col, double value );
85 | extern int accumulateEntryInIMatrix( I_Matrix i_matrix, int row, 
86 | 				    int col, double value );
87 | extern void destroyIMatrix( I_Matrix i_matrix );
88 | extern I_Matrix newIMatrix( int num_rows );
89 | extern double sumIMatrixRowValues( I_Matrix i_matrix, int row );
90 | extern Matrix newMatrix( int num_rows, int num_non_zero );
91 | extern void destroyMatrix( Matrix matrix );
92 | extern Matrix transformIMatrix( I_Matrix i_matrix );
93 | extern void displayMatrix( Matrix matrix );
94 | extern double sumRowValues( Matrix matrix, int row );
95 | extern double getEntryMatrix( Matrix matrix, int row, int col );
96 | 
97 | #endif
98 | 


--------------------------------------------------------------------------------
/pyrl/environments/marble_maze.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # Copyright (C) 2013, Will Dabney
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | #
 16 | 
 17 | import numpy
 18 | from rlglue.environment.Environment import Environment
 19 | from rlglue.environment import EnvironmentLoader as EnvironmentLoader
 20 | from rlglue.types import Observation
 21 | from rlglue.types import Action
 22 | from rlglue.types import Reward_observation_terminal
 23 | from pyrl.rlglue import TaskSpecRLGlue
 24 | from pyrl.rlglue.registry import register_environment
 25 | 
 26 | @register_environment
 27 | class MarbleMaze(Environment):
 28 |     """A simple gridworld like domain, with many wall segments to create
 29 |     a maze. This domain, unlike the other gridworld domains will be made
 30 |     entirely discrete.
 31 | 
 32 |     From paper:
 33 |     A Bayesian Sampling Approach to Exploration in Reinforcement Learning. 2009.
 34 |     John Asmuth, Lihong Li, Michael Littman, Ali Nouri, and David Wingate.
 35 |     """
 36 |     name = "Marble Maze"
 37 | 
 38 |     def __init__(self, **kwargs):
 39 |         # Building walls, each int in the maze matrix represents the type of wall setup
 40 |         # 0000 no walls
 41 |         # 0001 wall to the north
 42 |         N = 1
 43 |         # 0010 wall to the east
 44 |         E = 2
 45 |         # 0100 wall to the south
 46 |         S = 4
 47 |         # 1000 wall to the west
 48 |         W = 8
 49 |         self.directions = numpy.array([N,E,S,W], dtype=int)
 50 |         self.maze = numpy.array([[N+W, N, N, N, N, N+E],
 51 |                                 [W+S, N+E+S, W, E, W, E],
 52 |                                 [N+W, N+S, S, S+E, W, E],
 53 |                                 [S+W, N+S, N, N, E, E],
 54 |                                 [N+W, N, 0, S, 0, E],
 55 |                                 [S+W, S, S+E, S+W+N, S+E, S+E+W]], dtype=int)
 56 |         self.pits = numpy.array([[1,1], [4,1], [4,2], [3, 3]], dtype=int)
 57 |         self.noise = kwargs.setdefault('noise', 0.2)
 58 |         self.start_loc = numpy.zeros((2,), dtype=int)
 59 |         self.pos = numpy.zeros((2,), dtype=int)
 60 |         self.step_reward = -0.001
 61 |         self.goal_loc = numpy.array([5,5], dtype=int)
 62 |         self.domain_name = "Marbel Maze (Discrete)"
 63 | 
 64 |     def makeTaskSpec(self):
 65 |         ts = TaskSpecRLGlue.TaskSpec(discount_factor=0.95, reward_range=(-1.0, 1.0))
 66 |         ts.addDiscreteAction((0, 3))
 67 |         ts.addDiscreteObservation((0, self.maze.shape[0]-1))
 68 |         ts.addDiscreteObservation((0, self.maze.shape[1]-1))
 69 |         ts.setEpisodic()
 70 |         ts.setExtra(self.domain_name)
 71 |         return ts.toTaskSpec()
 72 | 
 73 |     def getState(self):
 74 |         return self.pos.tolist()
 75 | 
 76 |     def reset(self):
 77 |         self.pos = self.start_loc.copy()
 78 | 
 79 |     def env_init(self):
 80 |         return self.makeTaskSpec()
 81 | 
 82 |     def env_start(self):
 83 |         self.reset()
 84 |         returnObs = Observation()
 85 |         returnObs.intArray = self.getState()
 86 |         return returnObs
 87 | 
 88 |     def isAtGoal(self):
 89 |         return (self.pos == self.goal_loc).all()
 90 | 
 91 |     def takeAction(self, intAction):
 92 |         direction = numpy.zeros((2,), dtype=int)
 93 |         direction[int(intAction)/2] = 1 + (intAction % 2)*-2
 94 | 
 95 |         # Noisy movement causes agent to move perpendicular to
 96 |         # the desired action, with equal likelihood for either option
 97 |         if numpy.random.random() < self.noise:
 98 |             direction.fill(0)
 99 |             direction[int(intAction < 2)] = numpy.random.randint(2)*-2 + 1
100 |         if self.maze[tuple(self.pos)] % 2 != 0: # North wall
101 |             direction[0] = max(0, direction[0])
102 |         if (self.maze[tuple(self.pos)] % 8 >= 4): # South wall
103 |             direction[0] = min(0, direction[0])
104 |         if (self.maze[tuple(self.pos)] % 4 >= 2): # East wall
105 |             direction[1] = min(0, direction[1])
106 |         if self.maze[tuple(self.pos)] >= 8: # West wall
107 |             direction[1] = max(0, direction[1])
108 | 
109 |         self.pos += direction
110 |         if self.isAtGoal():
111 |             return 1.0, True
112 |         elif self.pos.tolist() in self.pits.tolist():
113 |             return -1.0, True
114 |         else:
115 |             return self.step_reward, False
116 | 
117 |     def env_step(self,thisAction):
118 |         intAction = int(thisAction.intArray[0])
119 |         theReward, episodeOver = self.takeAction(intAction)
120 | 
121 |         theObs = Observation()
122 |         theObs.intArray = self.getState()
123 | 
124 |         returnRO = Reward_observation_terminal()
125 |         returnRO.r = theReward
126 |         returnRO.o = theObs
127 |         returnRO.terminal = int(episodeOver)
128 | 
129 |         return returnRO
130 | 
131 |     def env_cleanup(self):
132 |         pass
133 | 
134 |     def env_message(self,inMessage):
135 |         return "I don't know how to respond to your message";
136 | 
137 | 
138 | 
139 | 
140 | 


--------------------------------------------------------------------------------
/pyrl/environments/mdptetris/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.8)
 2 | 
 3 | PROJECT(mdptetris)
 4 | 
 5 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_LIST_DIR})
 6 | 
 7 | file(GLOB SRC
 8 |     "src/*.h"
 9 |     "src/*.c"
10 |     "src/*.C"
11 | )
12 | 
13 | find_package(PythonInterp REQUIRED)
14 | find_package(PythonLibs REQUIRED)
15 | 
16 | include_directories(${PYTHON_INCLUDE_DIRS})
17 | 
18 | add_library(mdptetris MODULE ${SRC})
19 | set_target_properties(mdptetris PROPERTIES PREFIX "")
20 | target_link_libraries(mdptetris ${PYTHON_LIBRARIES})
21 | 


--------------------------------------------------------------------------------
/pyrl/environments/mdptetris/README.md:
--------------------------------------------------------------------------------
 1 | mdptetris
 2 | =========
 3 | 
 4 | This is a modified version of mdptetris from:
 5 | 
 6 |    	https://gforge.inria.fr/projects/mdptetris/
 7 | 	http://mdptetris.gforge.inria.fr/doc/
 8 | 
 9 | It extends their Tetris implementation to be compiled as a python module, and adds 
10 | a few utility functions for use with general reinforcement learning agents. We have 
11 | also removed most of the agent logic from the original code, and are continuing to 
12 | pair down this fork of their codebase to just the essentials for use as a Tetris 
13 | reinforcement learning environment, where agents do not have specific domain knowledge.
14 | 


--------------------------------------------------------------------------------
/pyrl/environments/mdptetris/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amarack/python-rl/a1c1f5bc42cb20f5d9630818d1908f2100916ef4/pyrl/environments/mdptetris/__init__.py


--------------------------------------------------------------------------------
/pyrl/environments/mdptetris/data/features/bertsekas_initial.dat:
--------------------------------------------------------------------------------
 1 | 1
 2 | 1
 3 | 22
 4 | 0 0
 5 | 8 0
 6 | 8 0
 7 | 8 0
 8 | 8 0
 9 | 8 0
10 | 8 0
11 | 8 0
12 | 8 0
13 | 8 0
14 | 8 0
15 | 9 0
16 | 9 0
17 | 9 0
18 | 9 0
19 | 9 0
20 | 9 0
21 | 9 0
22 | 9 0
23 | 9 0
24 | 7 -10
25 | 5 -1
26 | 


--------------------------------------------------------------------------------
/pyrl/environments/mdptetris/data/features/ce_bdu.dat:
--------------------------------------------------------------------------------
 1 | 0
 2 | -1
 3 | 28
 4 | 8 0 100
 5 | 8 0 100
 6 | 8 0 100
 7 | 8 0 100
 8 | 8 0 100
 9 | 8 0 100
10 | 8 0 100
11 | 8 0 100
12 | 8 0 100
13 | 8 0 100
14 | 9 0 100
15 | 9 0 100
16 | 9 0 100
17 | 9 0 100
18 | 9 0 100
19 | 9 0 100
20 | 9 0 100
21 | 9 0 100
22 | 9 0 100
23 | 7 0 100
24 | 5 0 100
25 | 1 0 100
26 | 2 0 100
27 | 3 0 100
28 | 4 0 100
29 | 6 0 100
30 | -1 0 100
31 | -2 0 100
32 | 


--------------------------------------------------------------------------------
/pyrl/environments/mdptetris/data/features/ce_bertsekas.dat:
--------------------------------------------------------------------------------
 1 | 0
 2 | -1
 3 | 21
 4 | 8 0 100
 5 | 8 0 100
 6 | 8 0 100
 7 | 8 0 100
 8 | 8 0 100
 9 | 8 0 100
10 | 8 0 100
11 | 8 0 100
12 | 8 0 100
13 | 8 0 100
14 | 9 0 100
15 | 9 0 100
16 | 9 0 100
17 | 9 0 100
18 | 9 0 100
19 | 9 0 100
20 | 9 0 100
21 | 9 0 100
22 | 9 0 100
23 | 7 0 100
24 | 5 0 100
25 | 


--------------------------------------------------------------------------------
/pyrl/environments/mdptetris/data/features/ce_bertsekas_dellacherie.dat:
--------------------------------------------------------------------------------
 1 | 0
 2 | -1
 3 | 26
 4 | 8 0 100
 5 | 8 0 100
 6 | 8 0 100
 7 | 8 0 100
 8 | 8 0 100
 9 | 8 0 100
10 | 8 0 100
11 | 8 0 100
12 | 8 0 100
13 | 8 0 100
14 | 9 0 100
15 | 9 0 100
16 | 9 0 100
17 | 9 0 100
18 | 9 0 100
19 | 9 0 100
20 | 9 0 100
21 | 9 0 100
22 | 9 0 100
23 | 7 0 100
24 | 5 0 100
25 | 1 0 100
26 | 2 0 100
27 | 3 0 100
28 | 4 0 100
29 | 6 0 100
30 | 


--------------------------------------------------------------------------------
/pyrl/environments/mdptetris/data/features/ce_dellacherie.dat:
--------------------------------------------------------------------------------
 1 | 0
 2 | -1
 3 | 6
 4 | 1 0 100
 5 | 2 0 100
 6 | 3 0 100
 7 | 4 0 100
 8 | 5 0 100
 9 | 6 0 100
10 | 


--------------------------------------------------------------------------------
/pyrl/environments/mdptetris/data/features/ce_du.dat:
--------------------------------------------------------------------------------
 1 | 0
 2 | -1
 3 | 8
 4 | 1 0 100
 5 | 2 0 100
 6 | 3 0 100
 7 | 4 0 100
 8 | 5 0 100
 9 | 6 0 100
10 | -1 0 100
11 | -2 0 100
12 | 


--------------------------------------------------------------------------------
/pyrl/environments/mdptetris/data/features/dellacherie_initial.dat:
--------------------------------------------------------------------------------
 1 | 0
 2 | -1
 3 | 6
 4 | 1 -1
 5 | 2 1
 6 | 3 -1
 7 | 4 -1
 8 | 5 -4
 9 | 6 -1
10 | 


--------------------------------------------------------------------------------
/pyrl/environments/mdptetris/data/features/dellacherie_ourwellsums.dat:
--------------------------------------------------------------------------------
 1 | 0
 2 | -1
 3 | 6
 4 | 1 -1
 5 | 2 1
 6 | 3 -1
 7 | 4 -1
 8 | 5 -4
 9 | -6 -1
10 | 


--------------------------------------------------------------------------------
/pyrl/environments/mdptetris/data/features/record_bdu.dat:
--------------------------------------------------------------------------------
 1 | 0
 2 | -1
 3 | 28
 4 | 8 -1.318725e+01
 5 | 8 -7.652075e+00
 6 | 8 7.622007e+00
 7 | 8 -9.044294e+00
 8 | 8 -1.747279e+00
 9 | 8 -7.569865e+00
10 | 8 -5.221819e+00
11 | 8 -6.874555e-01
12 | 8 -6.483100e+00
13 | 8 -1.601383e+01
14 | 9 2.356909e+00
15 | 9 2.143912e+00
16 | 9 -7.351929e+00
17 | 9 1.558139e+00
18 | 9 4.336980e-01
19 | 9 3.655922e+00
20 | 9 -4.673708e+00
21 | 9 -1.997187e+00
22 | 9 9.122675e+00
23 | 7 1.236382e+01
24 | 5 -4.064000e+01
25 | 1 -4.327153e+01
26 | 2 -1.036039e+00
27 | 3 -3.693346e+01
28 | 4 -8.100232e+01
29 | 6 -4.117906e+01
30 | -1 -1.327585e+01
31 | -2 -7.547524e+01
32 | 


--------------------------------------------------------------------------------
/pyrl/environments/mdptetris/data/features/record_du.dat:
--------------------------------------------------------------------------------
 1 | 0
 2 | -1
 3 | 8
 4 | 1 -1.262900e+01
 5 | 2 6.601974e+00
 6 | 3 -9.215815e+00
 7 | 4 -1.977356e+01
 8 | 5 -1.308335e+01
 9 | 6 -1.048747e+01
10 | -1 -1.611863e+00
11 | -2 -2.404087e+01
12 | 


--------------------------------------------------------------------------------
/pyrl/environments/mdptetris/data/features/value_estimator_bertsekas.dat:
--------------------------------------------------------------------------------
 1 | 1
 2 | 0
 3 | 22
 4 | 0 1.882163
 5 | 8 0.157276
 6 | 8 0.027709
 7 | 8 0.073735
 8 | 8 0.068846
 9 | 8 0.076944
10 | 8 0.077298
11 | 8 0.071355
12 | 8 0.074865
13 | 8 0.026533
14 | 8 0.155844
15 | 9 -0.109454
16 | 9 -0.084454
17 | 9 -0.084207
18 | 9 -0.080510
19 | 9 -0.079441
20 | 9 -0.079068
21 | 9 -0.082915
22 | 9 -0.083773
23 | 9 -0.111844
24 | 7 -0.064443
25 | 5 -0.630502
26 | 


--------------------------------------------------------------------------------
/pyrl/environments/mdptetris/data/pieces3.dat:
--------------------------------------------------------------------------------
 1 | ##########################################################################
 2 | # File format describing the pieces
 3 | # number of pieces
 4 | # for each piece:
 5 | # 	number of orientations, height, width (for the first orientation)
 6 | #       piece (with "X")
 7 | ##########################################################################
 8 | #
 9 | # Pieces with 3 bricks
10 | # There are 2 pieces:
11 | 2
12 | #
13 | 4 2 2
14 | XX
15 |  X 
16 | 2 1 3
17 | XXX
18 | # End of File


--------------------------------------------------------------------------------
/pyrl/environments/mdptetris/data/pieces4.dat:
--------------------------------------------------------------------------------
 1 | ##########################################################################
 2 | # File format describing the pieces
 3 | # number of pieces
 4 | # for each piece:
 5 | # 	number of orientations, height, width (for the first orientation)
 6 | #       piece (with "X")
 7 | ##########################################################################
 8 | #
 9 | # Pieces with 4 bricks (standard pieces)
10 | # There are 7 pieces:
11 | 7
12 | #
13 | 2 4 1
14 | X
15 | X
16 | X
17 | X
18 | 1 2 2
19 | XX
20 | XX
21 | 4 3 2
22 | X 
23 | XX
24 | X 
25 | 2 3 2
26 |  X 
27 | XX
28 | X 
29 | 2 3 2
30 | X 
31 | XX
32 |  X
33 | 4 2 3
34 | X  
35 | XXX 
36 | 4 2 3
37 | XXX
38 | X  
39 | # End of File


--------------------------------------------------------------------------------
/pyrl/environments/mdptetris/data/pieces_melax.dat:
--------------------------------------------------------------------------------
 1 | ##########################################################################
 2 | # File format describing the pieces
 3 | # number of pieces
 4 | # for each piece:
 5 | # 	number of orientations, height, width (for the first orientation)
 6 | #       piece (with "X")
 7 | ##########################################################################
 8 | #
 9 | # Melax's Reduced set of pieces
10 | # There are 5 pieces:
11 | 5
12 | #
13 | 1 1 1
14 | X
15 | 2 1 2
16 | XX
17 | 2 2 2
18 |  X
19 | X 
20 | 4 2 2
21 |  X
22 | XX
23 | 1 2 2
24 | XX
25 | XX
26 | # End of File


--------------------------------------------------------------------------------
/pyrl/environments/mdptetris/src/board.h:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * @defgroup board Board
  3 |  * @ingroup api
  4 |  * @brief The Tetris board
  5 |  *
  6 |  * This module handles the game board. The board is composed by an array of rows.
  7 |  * Each row is represented by a 16-bit integer.
  8 |  *
  9 |  * @{
 10 |  */
 11 | #ifndef BOARD_H
 12 | #define BOARD_H
 13 | 
 14 | #include <stdio.h>
 15 | #include "types.h"
 16 | #include "piece.h"
 17 | #include "last_move_info.h"
 18 | 
 19 | /**
 20 |  * @brief The game board.
 21 |  *
 22 |  * The rows are numeroted from bottom to top, starting with 0.
 23 |  * The columns are numeroted from left to right, starting with 1 (actually column 0 is the left border).
 24 |  */
 25 | struct Board {
 26 |   /**
 27 |    * @name Board structure and content
 28 |    */
 29 |   int width;            /**< Number of columns in the board, not including the side borders (10 in standard Tetris). */
 30 |   int height;           /**< Number of rows in the board (20 in standard Tetris). */
 31 |   int extended_height;  /**< Number of rows in the internal representation of board (24 in standard Tetris). */
 32 |   int allow_lines_after_overflow;  /**< enable the lines completion when the piece overflows? */
 33 |   uint16_t *rows;       /**< Board state: array of rows where each row is represented with an integer. */
 34 | 
 35 |   /**
 36 |    * @name Information stored to improve the speed
 37 |    */
 38 |   int wall_height;      /**< Current height of the wall (index of the lowest empty row). */
 39 |   int max_piece_height; /**< Maximum height of a piece (4 for standard Tetris), used to know how many
 40 |                              lines we have to check when a piece is dropped. */
 41 |   int *column_heights;  /**< Height of each column. */
 42 | 
 43 |   /**
 44 |    * @name Bit masks depending on the board size
 45 |    */
 46 |   uint16_t empty_row;   /**< 16-bit integer representing an empty row (for the standard board size: 1000000000011111). */
 47 |   uint16_t full_row;    /**< 16-bit integer representing a full row (for the standard board size: 1111111111111111). */
 48 | 
 49 |   /**
 50 |    * @name Information needed to cancel the last move.
 51 |    */
 52 |   uint16_t *previous_rows;  /**< The board state before the last move. */
 53 |   int previous_wall_height; /**< The wall height (index of the first empty row) before the last move. */
 54 | };
 55 | 
 56 | /**
 57 |  * @name Board creation and destruction
 58 |  *
 59 |  * These functions allow to create or destroy a board.
 60 |  *
 61 |  * @{
 62 |  */
 63 | Board *new_board(int width, int height, int allow_lines_after_overflow, int nb_pieces, Piece *pieces);
 64 | Board *new_board_copy(const Board *board);
 65 | void free_board(Board *board);
 66 | /**
 67 |  * @}
 68 |  */
 69 | 
 70 | 
 71 | /**
 72 |  * @name Actions on the board
 73 |  *
 74 |  * These functions change the board state.
 75 |  * You should not change the content of a board's structure directly.
 76 |  *
 77 |  * @{
 78 |  */
 79 | int board_drop_piece(Board *board, PieceOrientation *oriented_piece, int orientation, int column, LastMoveInfo *last_move_info, int cancellable);
 80 | int board_drop_piece_fancy(Board *board, PieceOrientation *oriented_piece, int orientation, int column,LastMoveInfo *last_move_info, int cancellable, int **fancy_board);
 81 | int board_drop_piece_rlc(Board *board, Piece *pieces, int piece_index, int desired_orientation, int desired_column,LastMoveInfo *last_move_info, int cancellable);
 82 | void board_cancel_last_move(Board *board);
 83 | void board_reset(Board *board);
 84 | /**
 85 |  * @}
 86 |  */
 87 | 
 88 | /**
 89 |  * @name Additional information about the board
 90 |  *
 91 |  * The following functions provide some other information about the board.
 92 |  * For performance reasons, these information are computed only
 93 |  * if you ask it explicitly. 
 94 |  *
 95 |  * @{
 96 |  */
 97 | void board_update_column_heights(Board *board);
 98 | int board_get_column_height(Board *board, int column);
 99 | /**
100 |  * @}
101 |  */
102 | 
103 | /**
104 |  * @name Displaying
105 |  * @{
106 |  */
107 | void board_print(FILE *out, Board *board);
108 | /**
109 |  * @}
110 |  */
111 | 
112 | #endif
113 | 
114 | /**
115 |  * @}
116 |  */
117 | 


--------------------------------------------------------------------------------
/pyrl/environments/mdptetris/src/brick_masks.c:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Definition of the bit masks declared in brick_masks.h
 3 |  */
 4 | 
 5 | #include "config.h"
 6 | #include "brick_masks.h"
 7 | 
 8 | const uint16_t brick_masks[] = {
 9 |   0x8000, /* X............... */
10 |   0x4000, /* .X.............. */
11 |   0x2000, /* ..X............. */
12 |   0x1000, /* etc */
13 |   0x0800,
14 |   0x0400,
15 |   0x0200,
16 |   0x0100,
17 |   0x0080,
18 |   0x0040,
19 |   0x0020,
20 |   0x0010,
21 |   0x0008,
22 |   0x0004,
23 |   0x0002, /* ..............X. */
24 |   0x0001  /* ...............X */
25 | };
26 | 
27 | const uint16_t brick_masks_inv[] = {
28 |    0x7FFF, /* .XXXXXXXXXXXXXXX */
29 |   ~0x4000, /* X.XXXXXXXXXXXXXX */
30 |   ~0x2000, /* XX.XXXXXXXXXXXXX */
31 |   ~0x1000, /* etc */
32 |   ~0x0800,
33 |   ~0x0400,
34 |   ~0x0200,
35 |   ~0x0100,
36 |   ~0x0080,
37 |   ~0x0040,
38 |   ~0x0020,
39 |   ~0x0010,
40 |   ~0x0008,
41 |   ~0x0004,
42 |   ~0x0002, /* XXXXXXXXXXXXXX.X */
43 |   ~0x0001  /* XXXXXXXXXXXXXXX. */
44 | };
45 | 
46 | /**
47 |  * Prints the 16 bits of a row into a file.
48 |  * @param out the file to write
49 |  * @param row the row
50 |  */
51 | void print_row(FILE *out, uint16_t row) {
52 |   int i;
53 |   for (i = 0; i < 16; i++) {
54 |     if (row & brick_masks[i]) {
55 |       fprintf(out, "X");
56 |     }
57 |     else {
58 |       fprintf(out, ".");
59 |     }
60 |   }
61 | }
62 | 


--------------------------------------------------------------------------------
/pyrl/environments/mdptetris/src/brick_masks.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Bit masks to store the board state and the shape of the pieces.
 3 |  */
 4 | 
 5 | #ifndef BRICK_MASKS_H
 6 | #define BRICK_MASKS_H
 7 | 
 8 | #include <stdio.h>
 9 | #include <stdint.h>
10 | 
11 | /**
12 |  * Bit masks to represent the bricks on a row and the shape of each piece.
13 |  * With this representation, a row state is stored on a single 16-bit integer.
14 |  * A row size must not exceed 16 cells.
15 |  * There is 12 cells in the row of a standard Tetris game (including the 2 side borders).
16 |  * These bit fields are also used to represent the shape of the pieces.
17 |  */
18 | extern const uint16_t brick_masks[];
19 | extern const uint16_t brick_masks_inv[];
20 | 
21 | void print_row(FILE *out, uint16_t row);
22 | 
23 | #endif
24 | 


--------------------------------------------------------------------------------
/pyrl/environments/mdptetris/src/common_parameters.h:
--------------------------------------------------------------------------------
 1 | #ifndef COMMON_PARAMETERS_H
 2 | #define COMMON_PARAMETERS_H
 3 | 
 4 | #include "types.h"
 5 | #include "rewards.h"
 6 | #include "macros.h"
 7 | #include "file_tools.h"
 8 | 
 9 | #define MAX_LENGTH 256
10 | 
11 | /**
12 |  * This structure defines the parameters used in the LPI and CE algorithms.
13 |  */
14 | struct CommonParameters {
15 | 
16 |   /* Parameters common to algorithms and tetris */
17 | 
18 |   int board_width;                               /* board width */
19 |   int board_height;                              /* board height */
20 | 
21 |   int tetris_implementation;                     /* 0: Simplified Tetris, 1: RLC2008 Tetris, 2(to be done): Original Tetris */   
22 | 
23 |   int allow_lines_after_overflow;                /* enable the lines completion when the piece overflows? */
24 |   char piece_file_name[MAX_FILE_NAME];           /* file defining the pieces */
25 |   unsigned int random_generator_seed;            /* seed of the random number generator */
26 | 
27 |   /* Parameters which are common to LPI and CE algorithms */
28 | 
29 |   RewardDescription reward_description;          /* reward function */
30 | };
31 | 
32 | 
33 | void set_default_reward_function(RewardFunctionID reward_function_id);
34 | 
35 | void load_default_parameters(CommonParameters *parameters);
36 | 
37 | void ask_common_parameters(CommonParameters *parameters);
38 | 
39 | int parse_common_parameter(CommonParameters *parameters, int nb_args, char **args, void (*print_usage)(void));
40 | 
41 | void parameters_assert(int assertion, const char *error_message, void (*print_usage)(void));
42 | 
43 | void common_parameters_print_usage(void);
44 | 
45 | #endif
46 | 


--------------------------------------------------------------------------------
/pyrl/environments/mdptetris/src/config.h:
--------------------------------------------------------------------------------
 1 | /* config.h.  Generated from config.h.in by configure.  */
 2 | /* config.h.in.  Generated from configure.ac by autoheader.  */
 3 | 
 4 | /* Define to 1 if you have the <inttypes.h> header file. */
 5 | #define HAVE_INTTYPES_H 1
 6 | 
 7 | /* Define to 1 if you have the `m' library (-lm). */
 8 | #define HAVE_LIBM 1
 9 | 
10 | /* Define to 1 if you have the `z' library (-lz). */
11 | #define HAVE_LIBZ 1
12 | 
13 | /* Define to 1 if you have the <memory.h> header file. */
14 | #define HAVE_MEMORY_H 1
15 | 
16 | /* Define to 1 if you have the `pow' function. */
17 | #define HAVE_POW 1
18 | 
19 | /* Define to 1 if you have the <signal.h> header file. */
20 | #define HAVE_SIGNAL_H 1
21 | 
22 | /* Define to 1 if you have the `sqrt' function. */
23 | #define HAVE_SQRT 1
24 | 
25 | /* Define to 1 if you have the <stdint.h> header file. */
26 | #define HAVE_STDINT_H 1
27 | 
28 | /* Define to 1 if you have the <stdlib.h> header file. */
29 | #define HAVE_STDLIB_H 1
30 | 
31 | /* Define to 1 if you have the <strings.h> header file. */
32 | #define HAVE_STRINGS_H 1
33 | 
34 | /* Define to 1 if you have the <string.h> header file. */
35 | #define HAVE_STRING_H 1
36 | 
37 | /* Define to 1 if you have the <sys/stat.h> header file. */
38 | #define HAVE_SYS_STAT_H 1
39 | 
40 | /* Define to 1 if you have the <sys/types.h> header file. */
41 | #define HAVE_SYS_TYPES_H 1
42 | 
43 | /* Define to 1 if you have the <unistd.h> header file. */
44 | #define HAVE_UNISTD_H 1
45 | 
46 | /* Name of package */
47 | #define PACKAGE "mdptetris"
48 | 
49 | /* Define to the address where bug reports for this package should be sent. */
50 | #define PACKAGE_BUGREPORT "christophe.thiery@loria.fr"
51 | 
52 | /* Define to the full name of this package. */
53 | #define PACKAGE_NAME "mdptetris"
54 | 
55 | /* Define to the full name and version of this package. */
56 | #define PACKAGE_STRING "mdptetris 1.4"
57 | 
58 | /* Define to the one symbol short name of this package. */
59 | #define PACKAGE_TARNAME "mdptetris"
60 | 
61 | /* Define to the home page for this package. */
62 | #define PACKAGE_URL ""
63 | 
64 | /* Define to the version of this package. */
65 | #define PACKAGE_VERSION "1.4"
66 | 
67 | /* Define to 1 if you have the ANSI C header files. */
68 | #define STDC_HEADERS 1
69 | 
70 | /* Version number of package */
71 | #define VERSION "1.4"
72 | 
73 | /* Define for Solaris 2.5.1 so the uint32_t typedef from <sys/synch.h>,
74 |    <pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the
75 |    #define below would cause a syntax error. */
76 | /* #undef _UINT32_T */
77 | 
78 | /* Define to empty if `const' does not conform to ANSI C. */
79 | /* #undef const */
80 | 
81 | /* Define to `unsigned int' if <sys/types.h> does not define. */
82 | /* #undef size_t */
83 | 
84 | /* Define to the type of an unsigned integer type of width exactly 16 bits if
85 |    such a type exists and the standard includes do not define it. */
86 | /* #undef uint16_t */
87 | 
88 | /* Define to the type of an unsigned integer type of width exactly 32 bits if
89 |    such a type exists and the standard includes do not define it. */
90 | /* #undef uint32_t */
91 | 


--------------------------------------------------------------------------------
/pyrl/environments/mdptetris/src/feature_functions.h:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * @defgroup feature_functions Feature functions
  3 |  * @ingroup api
  4 |  * @brief Definition of the features functions.
  5 |  *
  6 |  * This module contains the definition of all feature functions.
  7 |  *
  8 |  * @see feature_policy
  9 |  * @{
 10 |  */
 11 | #ifndef FEATURE_FUNCTIONS_H
 12 | #define FEATURE_FUNCTIONS_H
 13 | 
 14 | #include "types.h"
 15 | #include "feature_policy.h"
 16 | 
 17 | /**
 18 |  * @name General feature function handling
 19 |  * @{
 20 |  */
 21 | FeatureFunction *feature_function(FeatureID feature_id);
 22 | void features_initialize(const FeaturePolicy *feature_policy);
 23 | void features_exit(void);
 24 | /**
 25 |  * @}
 26 |  */
 27 | 
 28 | /**
 29 |  * @name Special feature functions
 30 |  * @{
 31 |  */
 32 | double get_constant(Game *game);
 33 | /**
 34 |  * @}
 35 |  */
 36 | 
 37 | /**
 38 |  * @name Original feature functions
 39 |  */
 40 | double get_hole_depths(Game *game);
 41 | double get_rows_with_holes(Game *game);
 42 | double get_next_wall_height(Game *game);
 43 | double get_surrounded_holes(Game *game);
 44 | double get_next_local_value_function(Game *game);
 45 | double get_well_sums_fast(Game *game);
 46 | 
 47 | double get_wall_distance_to_top(Game *game);
 48 | double get_next_column_distance_to_top(Game *game);
 49 | double get_next_column_height_difference2(Game *game);
 50 | 
 51 | double get_wall_distance_to_top_square(Game *game);
 52 | double get_hole_depths_square(Game *game);
 53 | double get_height_square(Game *game);
 54 | double get_next_column_height2(Game *game);
 55 | 
 56 | double get_diversity(Game *game);
 57 | 
 58 | /**
 59 |  * @}
 60 |  */
 61 | 
 62 | /**
 63 |  * @name Feature functions from Dellacherie (2003)
 64 |  * @{
 65 |  */
 66 | double get_landing_height(Game *game);
 67 | double get_eroded_piece_cells(Game *game);
 68 | double get_row_transitions(Game *game);
 69 | double get_column_transitions(Game *game);
 70 | double get_holes(Game *game);
 71 | double get_well_sums_dellacherie(Game *game);
 72 | /**
 73 |  * @}
 74 |  */
 75 | 
 76 | /**
 77 |  * @name Feature functions from Bertsekas and Ioffe (1996)
 78 |  */
 79 | double get_wall_height(Game *game);
 80 | double get_next_column_height(Game *game);
 81 | double get_next_column_height_difference(Game *game);
 82 | /**
 83 |  * @}
 84 |  */
 85 | 
 86 | /**
 87 |  * @name Feature functions from Fahey (2006)
 88 |  */
 89 | double get_occupied_cells(Game *game);
 90 | double get_weighted_cells(Game *game);
 91 | double get_wells(Game *game);
 92 | double get_rows_eliminated(Game *game);
 93 | /**
 94 |  * @}
 95 |  */
 96 | 
 97 | /**
 98 |  * @name Feature functions from Bohm et al. (2005)
 99 |  */
100 | double get_max_height_difference(Game *game);
101 | /**
102 |  * @}
103 |  */
104 | 
105 | #endif
106 | 
107 | /**
108 |  * @}
109 |  */
110 | 


--------------------------------------------------------------------------------
/pyrl/environments/mdptetris/src/file_tools.c:
--------------------------------------------------------------------------------
 1 | #include "config.h"
 2 | #include "file_tools.h"
 3 | #include "macros.h"
 4 | 
 5 | /**
 6 |  * @brief Opens a file in the current directory or in the MdpTetris data directory.
 7 |  *
 8 |  * The current directory is used for the user-defined data files. The data directory
 9 |  * contains data files installed with the program.
10 |  *
11 |  * First, the function tries to open the file in the current directory. If this file
12 |  * doesn't exist, then the function tries to open it in the data directory of MdpTetris
13 |  * (e.g. \c /usr/local/share/mdptetris).
14 |  *
15 |  * You should use this function instead of \c fopen to open any data file (provided
16 |  * with the application of user-defined). You don't have to care about the directories.
17 |  *
18 |  * Note that the data directory contains read-only files installed with MdpTetris,
19 |  * so this function will not try to open a file in the data directory if the fopen mode
20 |  * is not \c "r".
21 |  *
22 |  * @param file_name name of the file to open
23 |  * @param fopen_mode mode to give to the \c fopen call
24 |  * @return the file, or NULL if it couldn't be open.
25 |  */
26 | FILE *open_data_file(const char *file_name, const char *fopen_mode) {
27 | 
28 |   FILE *f;
29 |   char file_name_in_datadir[MAX_FILE_NAME];
30 | 
31 |   /* first, open the file in the current directory */
32 |   f = fopen(file_name, fopen_mode);
33 | 
34 |   if (f == NULL && fopen_mode[0] == 'r') {
35 |     /* open the file in the data directory */
36 |     sprintf(file_name_in_datadir, "%s/%s", DATADIR, file_name);
37 |     f = fopen(file_name_in_datadir, fopen_mode);
38 |   }
39 | 
40 |   return f;
41 | }
42 | 
43 | /**
44 |  * @brief Reads the next non-comment line of a file.
45 |  *
46 |  * A line is considered as a comment if the first character is '#'.
47 |  *
48 |  * @param f the file to read
49 |  * @param line pointer to store the characters that will be read
50 |  * @param line_size maximum number of characters to read on the line
51 |  * @return 1 if the a line was successfuly read, 0 if the end of the file
52 |  * was reached.
53 |  */
54 | int readline_skipcomments(FILE *f, char *line, int line_size) {
55 | 
56 |   do {
57 |     if (fgets(line, line_size, f) == NULL) {
58 |       return 0;
59 |     }
60 | 
61 |   } while (line[0] == '#');  /* skip comments */
62 | 
63 |   return 1;
64 | }
65 | 
66 | /**
67 |  * @brief Displays a message explaining an error occured when parsing a file
68 |  * and exits the program.
69 |  * @param file_name name of the file
70 |  * @param expected a string describing what was expected
71 |  * @param readed a string describing what was read instead of what was expected
72 |  */
73 | void problem_reading_file(const char *file_name, const char *expected, const char* readed) {
74 |   DIE3("Problem reading file '%s': expected '%s' and readed '%s'\n", file_name, expected, readed);
75 | }
76 | 


--------------------------------------------------------------------------------
/pyrl/environments/mdptetris/src/file_tools.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @defgroup file_tools File tools
 3 |  * @ingroup api
 4 |  * @brief Basic file parsing functions
 5 |  *
 6 |  * This module provides some utility functions to analyse a file.
 7 |  *
 8 |  * @{
 9 |  */
10 | #ifndef FILE_TOOLS_H
11 | #define FILE_TOOLS_H
12 | 
13 | #include <stdio.h>
14 | 
15 | /**
16 |  * @brief Maximum number or characters allowed in a file name.
17 |  */
18 | #define MAX_FILE_NAME 256
19 | 
20 | /**
21 |  * @brief Constant string containing the name of the MdpTetris data directory.
22 |  */
23 | #define DATADIR STRING(DATADIR_)
24 | 
25 | 
26 | FILE *open_data_file(const char *file_name, const char *fopen_mode);
27 | int readline_skipcomments(FILE *f, char *line, int line_size);
28 | void problem_reading_file(const char *file_name, const char *expected, const char* readed);
29 | 
30 | #endif
31 | 
32 | /**
33 |  * @}
34 |  */
35 | 


--------------------------------------------------------------------------------
/pyrl/environments/mdptetris/src/game.h:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * @defgroup game Game
  3 |  * @ingroup api
  4 |  * @brief A tetris game
  5 |  *
  6 |  * This module handles a game of Tetris. A game is composed by the board state,
  7 |  * the current piece and the score.
  8 |  *
  9 |  * @{
 10 |  */
 11 | #ifndef GAME_H
 12 | #define GAME_H
 13 | 
 14 | #include <stdio.h>
 15 | #include "types.h"
 16 | #include "board.h"
 17 | #include "piece.h"
 18 | #include "last_move_info.h"
 19 | 
 20 | /**
 21 |  * @brief Action decided by the player.
 22 |  *
 23 |  * An action is a decision made by the player in a given game state.
 24 |  * It is where the player drops the piece and in which orientation.
 25 |  */
 26 | struct Action {
 27 |   int orientation; /**< Rotation of the piece, from \c 0 to \c 3. */
 28 |   int column;      /**< Column where the piece is dropped, from \c 1 to \c board->width. */
 29 | };
 30 | 
 31 | /**
 32 |  * @brief Configuration of the game pieces.
 33 |  *
 34 |  * This structure describes the set of pieces to use in the game.
 35 |  * In standard Tetris, there are 7 pieces.
 36 |  */
 37 | typedef struct PieceConfiguration {
 38 |   int nb_pieces;            /**< Number of existing pieces (7 in standard Tetris). */
 39 |   Piece *pieces;            /**< Shapes of the existing pieces. */
 40 |   int *piece_sequence;      /**< The sequence of pieces falling
 41 | 			     * (a NULL-terminated array of piece indexes),
 42 | 			     * or NULL to choose the pieces randomly. */
 43 |   int nb_games;             /**< Number of games currently allocated that use this piece configuration. */
 44 | } PieceConfiguration;
 45 | 
 46 | /**
 47 |  * @brief A game.
 48 |  *
 49 |  * The game structure. The structure contains the game
 50 |  * configuration, the current state and some information
 51 |  * about the previous state of the game.
 52 |  */
 53 | struct Game {
 54 | 
 55 |   /**
 56 |    * @name Configuration of the pieces
 57 |    */
 58 |   PieceConfiguration *piece_configuration; /**< The pieces of Tetris. */
 59 | 
 60 |   int tetris_implementation;               /**< 0: Simplified Tetris, 1: RLC2008 Tetris, 2(to be done): Original Tetris */   
 61 |   /**
 62 |    * @name Game state
 63 |    */
 64 |   Board *board;                           /**< The wall state. */
 65 |   int game_over;                          /**< 1 if the game is over, 0 otherwise. */
 66 |   int score;                              /**< Number of lines removed since the beginning of the game. */
 67 |   Piece *current_piece;                   /**< The current piece falling. */
 68 |   int current_piece_index;                /**< Index of the current piece. */
 69 |   int current_piece_sequence_index;       /**< Current index in the sequence of pieces. */
 70 |   
 71 |   /**
 72 |    * @name Information about the previous state
 73 |    */
 74 |   int previous_piece_index;               /**< The last piece placed. */
 75 |   LastMoveInfo last_move_info;            /**< Information about the last move. */
 76 | };
 77 | 
 78 | /**
 79 |  * @name Game creation and destruction
 80 |  *
 81 |  * These functions allow to create or destroy a game.
 82 |  */
 83 | Game *new_game(int tetris_implementation, int width, int height, int allow_lines_after_overflow,
 84 | 	       const char *pieces_file_name, int *piece_sequence);
 85 | Game *new_standard_game();
 86 | Game *new_game_from_parameters(CommonParameters *parameters);
 87 | Game *new_game_copy(const Game *other);
 88 | void free_game(Game *game);
 89 | /**
 90 |  * @}
 91 |  */
 92 | 
 93 | /**
 94 |  * @name Observation functions
 95 |  *
 96 |  * These functions provide some information about the current game state.
 97 |  * You can read directly the content of a game's structure.
 98 |  *
 99 |  * @{
100 |  */
101 | int game_get_nb_possible_orientations(Game *game);
102 | int game_get_nb_possible_columns(Game *game, int orientation);
103 | int game_get_current_piece(Game *game);
104 | int game_get_nb_pieces(Game *game);
105 | /**
106 |  * @}
107 |  */
108 | 
109 | /**
110 |  * @name Modification functions
111 |  *
112 |  * These function change the current game state.
113 |  * You should not change the content of a game's structure directly.
114 |  *
115 |  * @{
116 |  */
117 | int game_drop_piece(Game *game, const Action *action, int cancellable);
118 | void game_cancel_last_move(Game *game);
119 | void game_set_current_piece_index(Game *game, int piece_index);
120 | void game_reset(Game *game);
121 | void generate_next_piece(Game *game);
122 | /**
123 |  * @}
124 |  */
125 | 
126 | /**
127 |  * @name Displaying
128 |  * @{
129 |  */
130 | void game_print(FILE *out, Game *game);
131 | /**
132 |  * @}
133 |  */
134 | 
135 | #endif
136 | 
137 | /**
138 |  * @}
139 |  */
140 | 


--------------------------------------------------------------------------------
/pyrl/environments/mdptetris/src/games_statistics.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @defgroup game_statistics Game statistics
 3 |  * @ingroup api
 4 |  * @brief Statistics about some sequences (episodes) of games played
 5 |  *
 6 |  * This module handles the statistics about one or several sequences of games played.
 7 |  * A seqeuence of games is called an episode. Each episode is saved as a line
 8 |  * in the statistics file. The line contains the episode number, the best score,
 9 |  * the worst score, the mean score, the standard deviation and the scores of the
10 |  * games played during the episode. Optionnaly, it can also contain the feature weights
11 |  * if the games were played with a feature-based policy.
12 |  *
13 |  * As the first element of each line is the episode number and the second element is
14 |  * the episode's mean score, the file can be plotted directly in Gnuplot.
15 |  *
16 |  * @{
17 |  */
18 | #ifndef GAMES_STATISTICS_H
19 | #define GAMES_STATISTICS_H
20 | 
21 | #include <stdio.h>
22 | #include "game.h"
23 | #include "macros.h"
24 | 
25 | /**
26 |  * @brief Statistics about the episodes.
27 |  *
28 |  * This structure contains all data about the current episode
29 |  * and some global information about all episodes.
30 |  */
31 | struct GamesStatistics {
32 |   /**
33 |    * @name Statistics about an episode (reinitialized when games_statistics_reset() is called)
34 |    */
35 |   int *scores;               /**< Score of each game. */
36 |   int nb_games_played;       /**< Number of games to play in the current episode. */
37 |   int min_score;             /**< Worst score of a game in this episode. */
38 |   int max_score;             /**< Best score of a game in this episode. */
39 |   double mean;               /**< Mean score of the games in this episode. */
40 |   double standard_deviation; /**< Standard deviation of the games in this episode. */
41 | 
42 |   /**
43 |    * @name Information about all episodes played
44 |    */
45 |   int nb_episodes;           /**< Number of episodes done until now. */
46 |   double best_mean;          /**< Best mean score of an episode. */
47 |   FILE *stats_file;          /**< The file where the statistics are saved. */
48 | };
49 | 
50 | /**
51 |  * @name Statistics creation and destruction
52 |  *
53 |  * These functions allow to create or destroy a GameStatistics object.
54 |  */
55 | GamesStatistics *games_statistics_new(const char *stats_file_name, int nb_games, const char *comments);
56 | void games_statistics_free(GamesStatistics *games_statistics);
57 | /**
58 |  * @}
59 |  */
60 | 
61 | /**
62 |  * @name Statistics update
63 |  * 
64 |  * These function update the statistics, taking new information into account.
65 |  */
66 | void games_statistics_add_game(GamesStatistics *stats, int score);
67 | void games_statistics_end_episode(GamesStatistics *games_statistics, const FeaturePolicy *feature_policy);
68 | /**
69 |  * @}
70 |  */
71 | 
72 | #endif
73 | 
74 | /**
75 |  * @}
76 |  */
77 | 


--------------------------------------------------------------------------------
/pyrl/environments/mdptetris/src/hashtable.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @defgroup hashtable Hashtables
 3 |  * @ingroup api
 4 |  * @brief A hashtable implementation
 5 |  *
 6 |  * This module a hashtable implementation. A hashtable associates
 7 |  * a void* object to a string.
 8 |  *
 9 |  * @{
10 |  */
11 | #ifndef HASHTABLE_H
12 | #define HASHTABLE_H
13 | 
14 | typedef struct Hashtable Hashtable;
15 | 
16 | /**
17 |  * @name Hashtable creation and destruction
18 |  * @{
19 |  */
20 | Hashtable* hashtable_new(int table_size, void (*free_function)(void *element));
21 | void hashtable_free(Hashtable *hashtable);
22 | /**
23 |  * @}
24 |  */
25 | 
26 | /**
27 |  * @name Accessing the elements
28 |  * @{
29 |  */
30 | void* hashtable_get(Hashtable *hashtable, const char *key);
31 | void hashtable_add(Hashtable *hashtable, const char *key, void *data);
32 | int hashtable_get_length(Hashtable *hashtable);
33 | int hashtable_contains(Hashtable *hashtable, const char *key);
34 | void hashtable_foreach(Hashtable *hashtable, void (*function)(const char *key, void *data));
35 | /**
36 |  * @}
37 |  */
38 | 
39 | /**
40 |  * @name Removing elements
41 |  * @{
42 |  */
43 | void hashtable_remove(Hashtable *hashtable, const char *key);
44 | void hashtable_clear(Hashtable *hashtable);
45 | void hashtable_prune(Hashtable *hashtable, int (*should_remove)(const char *key, void *data));
46 | /**
47 |  * @}
48 |  */
49 | 
50 | /**
51 |  * @name Displaying (for debug)
52 |  * @{
53 |  */
54 | void hashtable_print(Hashtable *hashtable);
55 | /**
56 |  * @}
57 |  */
58 | 
59 | #endif
60 | /**
61 |  * @}
62 |  */
63 | 


--------------------------------------------------------------------------------
/pyrl/environments/mdptetris/src/interruptions.c:
--------------------------------------------------------------------------------
  1 | #include "config.h"
  2 | #include "interruptions.h"
  3 | 
  4 | #ifdef HAVE_SIGNAL_H
  5 | 
  6 | #include <stdio.h>
  7 | #include <stdlib.h>
  8 | #define __USE_POSIX
  9 | #include <signal.h>
 10 | 
 11 | /**
 12 |  * Number of times the user has pressed Ctrl-C.
 13 |  */
 14 | static int nb_interruptions = 0;
 15 | 
 16 | /**
 17 |  * Default interruption handler, saved here so that
 18 |  * we can restore it.
 19 |  */
 20 | static struct sigaction old_sigaction;
 21 | 
 22 | /**
 23 |  * Function called when the user pressed Ctrl-C.
 24 |  */
 25 | static void interrupt_handler(int sig);
 26 | 
 27 | /**
 28 |  * Initializes the interruption handler.
 29 |  */
 30 | void initialize_interruptions(void) {
 31 |   struct sigaction new_sigaction;
 32 |   sigset_t sigset;
 33 | 
 34 |   /* set the interruption handler */
 35 |   new_sigaction.sa_handler = interrupt_handler;
 36 |   sigemptyset(&sigset);
 37 |   new_sigaction.sa_mask = sigset;
 38 |   new_sigaction.sa_flags = 0;
 39 |   sigaction(SIGINT, &new_sigaction, &old_sigaction);
 40 | }
 41 | 
 42 | /**
 43 |  * Restores the default interruption handler.
 44 |  */
 45 | void exit_interruptions(void) {
 46 |   sigaction(SIGINT, &old_sigaction, NULL);
 47 | }
 48 | 
 49 | /**
 50 |  * Returns whether the user pressed Ctrl-C a first time.
 51 |  */
 52 | int is_interrupted(void) {
 53 |   return nb_interruptions;
 54 | }
 55 | 
 56 | /**
 57 |  * Function called when the user pressed Ctrl-C.
 58 |  */
 59 | static void interrupt_handler(int sig) {
 60 |   switch (nb_interruptions) {
 61 | 
 62 |   case 0:
 63 |     printf("\nInterruption detected - Finishing the current iteration\nPress Ctrl-C again to exit now\n");
 64 |     break;
 65 | 
 66 |   case 1:
 67 |     printf("\n");
 68 |     exit(0);
 69 |     break;
 70 |   }
 71 | 
 72 |   nb_interruptions++;
 73 | }
 74 | 
 75 | #else
 76 | 
 77 | /* signal.h is not present: we disable the Ctrl-C system */
 78 | 
 79 | /**
 80 |  * @cond
 81 |  */
 82 | 
 83 | void initialize_interruptions(void) {
 84 | 
 85 | }
 86 | 
 87 | void exit_interruptions(void) {
 88 | 
 89 | }
 90 | 
 91 | int is_interrupted(void) {
 92 |   return 0;
 93 | }
 94 | 
 95 | /**
 96 |  * @endcond
 97 |  */
 98 | 
 99 | #endif
100 | 


--------------------------------------------------------------------------------
/pyrl/environments/mdptetris/src/interruptions.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @defgroup interruptions Interruptions
 3 |  * @ingroup api
 4 |  * @brief Ctrl-C signal management
 5 |  *
 6 |  * This module handles the Ctrl-C signal.
 7 |  * When the user presses Ctrl-C, a message is displayed.
 8 |  * Then your algorithm should finish the current iteration and stop.
 9 |  * If the user presses Ctrl-C again, the program exits.
10 |  *
11 |  * Note that this module uses the POSIX signals mechanism, and requires
12 |  * the \c signal.h header. If \c signal.h is not detected by the \c configure
13 |  * script, then the mechanism is disabled: the functions of this module
14 |  * do nothing, and when the user presses Ctrl-C, the program is stopped.
15 |  *
16 |  * @{
17 |  */
18 | #ifndef INTERRUPTIONS_H
19 | #define INTERRUPTIONS_H
20 | 
21 | /**
22 |  * @brief Initializes the interruption handler.
23 |  *
24 |  * Call this function if you want to use the Ctrl-C system.
25 |  * This function changes the handler of the SIGINT signal.
26 |  * After this function is called, if SIGINT is received (i.e.
27 |  * the user has pressed Ctrl-C), a message "Finishing the
28 |  * current iteration" is displayed. Then the function
29 |  * is_interrupted() returns \c 1 and your algorithm should
30 |  * finish its current iteration and stop.
31 |  * If the SIGINT signal is received a second time (i.e. after the message
32 |  * was displayed but before the current iteration of your algorithm
33 |  * is finished), then the program stops.
34 |  *
35 |  * @see exit_interruptions()
36 |  *
37 |  */
38 | void initialize_interruptions(void);
39 | 
40 | /**
41 |  * @brief Restores the default interruption handler.
42 |  *
43 |  * Call this function to cancel the behavior created by
44 |  * initialize_interruptions().
45 |  *
46 |  * @see initialize_interruptions()
47 |  */
48 | void exit_interruptions(void);
49 | 
50 | /**
51 |  * @brief Returns whether the user pressed Ctrl-C a first time.
52 |  *
53 |  * initialize_interruptions() should have been called before.
54 |  * Your algorithm has to call this function to know when the user wants to
55 |  * stop. As soon as this function returns 1, your algorithm should finish
56 |  * its current iteration, save some data if necessary and then stop.
57 |  *
58 |  * @return 1 if the user pressed Ctrl-C once, and 0 otherwise.
59 |  */
60 | int is_interrupted(void);
61 | 
62 | #endif
63 | /**
64 |  * @}
65 |  */
66 | 


--------------------------------------------------------------------------------
/pyrl/environments/mdptetris/src/last_move_info.c:
--------------------------------------------------------------------------------
 1 | #include "config.h"
 2 | #include "last_move_info.h"
 3 | 
 4 | /**
 5 |  * @brief Prints information about the last move.
 6 |  * @param out file to write
 7 |  * @param last_move_info some last move information to print
 8 |  */
 9 | void print_last_move_info(FILE *out, LastMoveInfo *last_move_info) {
10 |   printf("Last move:\n");
11 |   printf("  Removed lines: %d\n", last_move_info->removed_lines);
12 |   printf("  Landing height: %d\n", last_move_info->landing_height_bottom);
13 |   printf("  Eliminated cells from the last piece : %d\n", last_move_info->eliminated_bricks_in_last_piece);
14 | }
15 | 


--------------------------------------------------------------------------------
/pyrl/environments/mdptetris/src/last_move_info.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @defgroup last_move_info Last move info
 3 |  * @ingroup api
 4 |  * @brief Information about the last move
 5 |  *
 6 |  * This module stores some information about the last move made.
 7 |  * This information is actually used by some features: indeed, several features
 8 |  * evaluate the last action instead of the current state itself.
 9 |  *
10 |  * @{
11 |  */
12 | #ifndef LAST_MOVE_INFO_H
13 | #define LAST_MOVE_INFO_H
14 | 
15 | #include <stdio.h>
16 | #include "types.h"
17 | 
18 | /**
19 |  * @brief Information about the last action.
20 |  */
21 | typedef struct LastMoveInfo {
22 | 
23 |   /**
24 |    * @name Effect of the action on the board
25 |    */
26 |   int removed_lines;                      /**< Number of rows completed during the move
27 | 					   * (also used to cancel a move). */
28 |   int landing_height_bottom;              /**< Index of the row where the bottom part
29 | 					   * of the piece is put. */
30 |   int eliminated_bricks_in_last_piece;    /**< Number of cells of the last piece put
31 | 					   * that were part of rows completed. */
32 | 
33 |   /**
34 |    * @name The action made
35 |    */
36 |   int column;                             /**< Column where the last piece was put
37 | 					   * (\c 1 to \c w where \c w is the board size). */
38 |   int orientation;                        /**< Orientation of the last piece (\c 0 to
39 | 					   * <code>n - 1</code> where \c n is the number of
40 | 					   * possible orientations of the piece. */
41 |   PieceOrientation *oriented_piece;       /* The last piece put. */
42 | 
43 |   int nb_steps;                           /* The number of steps (RLC mode) */
44 | 
45 | } LastMoveInfo;
46 | 
47 | /**
48 |  * @name Displaying
49 |  * @{
50 |  */
51 | void print_last_move_info(FILE *out, LastMoveInfo *last_move_info);
52 | /**
53 |  * @}
54 |  */
55 | 
56 | #endif
57 | /**
58 |  * @}
59 |  */
60 | 


--------------------------------------------------------------------------------
/pyrl/environments/mdptetris/src/piece.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @defgroup piece Pieces
 3 |  * @ingroup api
 4 |  * @brief Tetris pieces
 5 |  *
 6 |  * This module handles the game pieces. A piece is composed by an array of orientations.
 7 |  * Each orientation is an array of rows, where each row is represented by a 16-bit integer.
 8 |  *
 9 |  * @{
10 |  */
11 | #ifndef PIECE_H
12 | #define PIECE_H
13 | 
14 | #include <stdio.h>
15 | #include <stdint.h>
16 | #include "types.h"
17 | 
18 | /**
19 |  * @brief A Tetris piece oriented in a specific direction.
20 |  */
21 | struct PieceOrientation {
22 |   int width;                  /**< Width of the piece in this orientation. */
23 |   int height;                 /**< Height of the piece in this orientation. */
24 |   uint16_t *bricks;           /**< Shape of the piece in this orientation
25 |                                * (array of size \c height where each element is a
26 | 			       * 16-bit integer representing a row). */
27 |   int *nb_full_cells_on_rows; /**< Number of full cells on each row (array of
28 | 			       * size \c height where each element is the
29 | 			       * number of full cells on a row. */
30 | };
31 | 
32 | /**
33 |  * @brief A Tetris piece with its possible orientations.
34 |  */
35 | struct Piece {
36 |   int nb_orientations;            /**< Number of possible orientations of the shape. */
37 |   PieceOrientation *orientations; /**< Array of size \c nb_orientations, containing
38 | 				   * the possible orientations of the piece
39 | 				   * (not an array of pointers) */
40 | };
41 | 
42 | /**
43 |  * @name Piece creation and destruction
44 |  *
45 |  * These functions allow to create or destroy the pieces.
46 |  *
47 |  * @{
48 |  */
49 | void load_pieces(const char *file_name, int *nb_pieces, Piece **pieces);
50 | void free_piece(Piece *piece);
51 | /**
52 |  * @}
53 |  */
54 | 
55 | 
56 | /**
57 |  * @name Displaying
58 |  * 
59 |  * These functions display human-readable views of a Tetris piece.
60 |  */
61 | void piece_print(FILE *out, Piece *piece);
62 | void piece_print_orientation(FILE *out, PieceOrientation *orientation);
63 | void piece_print_orientations(FILE *out, Piece *piece);
64 | /**
65 |  * @}
66 |  */
67 | 
68 | #endif
69 | 
70 | /**
71 |  * @}
72 |  */
73 | 


--------------------------------------------------------------------------------
/pyrl/environments/mdptetris/src/random.c:
--------------------------------------------------------------------------------
 1 | #include <time.h>
 2 | #include <stdlib.h>
 3 | #include "config.h"
 4 | #include "random.h"
 5 | 
 6 | /**
 7 |  * Initializes the GSL random number generator with a specified seed.
 8 |  */
 9 | void initialize_random_generator(unsigned int seed) {
10 |   srand(seed);
11 | }
12 | 
13 | /**
14 |  * Returns an integer number in [a,b[.
15 |  */
16 | int random_uniform(int a, int b) {
17 |   return (rand() % (b - a)) - a;
18 | }
19 | 


--------------------------------------------------------------------------------
/pyrl/environments/mdptetris/src/random.h:
--------------------------------------------------------------------------------
 1 | #ifndef RANDOM_H
 2 | #define RANDOM_H
 3 | 
 4 | /**
 5 |  * Initializes the GSL random number generator.
 6 |  */
 7 | void initialize_random_generator(unsigned int seed);
 8 | 
 9 | /**
10 |  * Frees the random number generator.
11 |  */
12 | void exit_random_generator();
13 | 
14 | /**
15 |  * Returns an integer number in [a,b[.
16 |  */
17 | int random_uniform(int a, int b);
18 | 
19 | #endif
20 | 


--------------------------------------------------------------------------------
/pyrl/environments/mdptetris/src/rewards.c:
--------------------------------------------------------------------------------
 1 | #include "config.h"
 2 | #include "rewards.h"
 3 | 
 4 | RewardFunction *all_reward_functions[] = {
 5 |   get_no_reward,
 6 |   get_reward_removed_lines,
 7 |   get_reward_one,
 8 |   get_reward_at_least_one_line,
 9 |   get_reward_tetris_are_better
10 | };
11 | 
12 | /**
13 |  * Returns 0.
14 |  */
15 | int get_no_reward(Game *game) {
16 |   return 0;
17 | }
18 | 
19 | /**
20 |  * Returns the number of lines removed in the last move.
21 |  */
22 | int get_reward_removed_lines(Game *game) {
23 |   return game->last_move_info.removed_lines;
24 | }
25 | 
26 | /**
27 |  * Returns 1 if the game is not over.
28 |  */
29 | int get_reward_one(Game *game) {
30 |   if (game->game_over) {
31 |     return 0;
32 |   } else {
33 |     return 1;
34 |   }
35 | }
36 | 
37 | /**
38 |  * Returns 1 if there was at least one removed line in the last move.
39 |  */
40 | int get_reward_at_least_one_line(Game *game) {
41 |   return (game->last_move_info.removed_lines > 0) ? 1 : 0;  
42 | }
43 | 
44 | 
45 | /**
46 |  * Returns a number which grows quickly with the number of removed lines. (useful for the RL competition)
47 |  */
48 | int get_reward_tetris_are_better(Game *game) {
49 |   switch (game->last_move_info.removed_lines) {
50 |   case 0:
51 |     return 0;
52 |   case 1:
53 |     return 1;
54 |   case 2:
55 |     return 3;
56 |   case 3:
57 |     return 7;
58 |   case 4:
59 |     return 13;
60 |   }
61 |   return(0);
62 | }
63 | 
64 | 


--------------------------------------------------------------------------------
/pyrl/environments/mdptetris/src/rewards.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * This module provides some usual reward functions.
 3 |  */
 4 | 
 5 | #ifndef REWARD_H
 6 | #define REWARD_H
 7 | 
 8 | #include "types.h"
 9 | #include "game.h"
10 | 
11 | /**
12 |  * Function type for a reward function.
13 |  */
14 | typedef int (RewardFunction)(Game *game);
15 | 
16 | /**
17 |  * Constants to identify the reward functions.
18 |  */
19 | typedef enum {
20 |   NO_REWARD,                /* always zero */
21 |   REWARD_REMOVED_LINES,     /* number of lines removed in the last move */
22 |   REWARD_ONE,               /* always 1 (rewards the number of moves before the game is over) */
23 |   REWARD_AT_LEAST_ONE_LINE,  /* 1 if there was at least one line removed in the last move */
24 |   REWARD_TETRIS_ARE_BETTER  /* 0,1,4,9,15 (for RL competition) */
25 | } RewardFunctionID;
26 | 
27 | /**
28 |  * This structure defines a reward function with its ID.
29 |  */
30 | struct RewardDescription {
31 |   RewardFunction *reward_function;
32 |   RewardFunctionID reward_function_id;
33 | };
34 | 
35 | /**
36 |  * Associates a reward function to each index.
37 |  */
38 | extern RewardFunction *all_reward_functions[];
39 | 
40 | 
41 | /************************************
42 |  *        REWARD FUNCTIONS          *
43 |  ************************************/
44 | 
45 | /**
46 |  * Returns 0.
47 |  */
48 | int get_no_reward(Game *game);
49 | 
50 | /**
51 |  * Returns the number of lines removed in the last move.
52 |  */
53 | int get_reward_removed_lines(Game *game);
54 | 
55 | /**
56 |  * Returns 1 if the game is not over.
57 |  */
58 | int get_reward_one(Game *game);
59 | 
60 | /**
61 |  * Returns 1 if there was at least one removed line in the last move.
62 |  */
63 | int get_reward_at_least_one_line(Game *game);
64 | 
65 | /**
66 |  * Returns a number which grows quickly with the number of removed lines. (useful for the RL competition)
67 |  */
68 | int get_reward_tetris_are_better(Game *game);
69 | 
70 | #endif
71 | 


--------------------------------------------------------------------------------
/pyrl/environments/mdptetris/src/simple_tetris.c:
--------------------------------------------------------------------------------
 1 | #include "config.h"
 2 | #include "simple_tetris.h"
 3 | #include "brick_masks.h"
 4 | 
 5 | /**
 6 |  * Returns the state code corresponding to a game
 7 |  * without the current piece.
 8 |  */
 9 | uint32_t get_game_code(Game *game) {
10 |   uint32_t state_code;
11 |   uint16_t *board_rows;
12 |   int i;
13 | 
14 |   state_code = 0;
15 | 
16 |   board_rows = game->board->rows;
17 |   for (i = game->board->wall_height - 1; i >= 0; i--) {
18 |     state_code = state_code << WIDTH;
19 |     state_code |= (board_rows[i] & brick_masks_inv[0]) >> WEAK_BITS_SHIFT;
20 |   }
21 | 
22 |   return state_code;
23 | }
24 | 
25 | /**
26 |  * Sets the game board (not including the current piece)
27 |  * corresponding to a integer code.
28 |  */
29 | void set_game_state(Game *game, uint32_t state_code) {
30 |   uint16_t *board_rows;
31 |   uint16_t row, empty_row;
32 |   int i;
33 |   int wall_height;
34 | 
35 |   /* each bit represents the state of a cell */
36 |   board_rows = game->board->rows;
37 |   empty_row = game->board->empty_row;
38 |   for (i = 0; i < HEIGHT; i++) {
39 |     wall_height = i;
40 |     row = (uint16_t) (state_code & LAST_BITS_MASK);
41 |     if (!row) { /* the row is empty */
42 |       board_rows[i] = empty_row;
43 |     }
44 |     else {
45 |       board_rows[i] = (row << WEAK_BITS_SHIFT) | empty_row;
46 |     }
47 |     state_code = state_code >> WIDTH;
48 |   }
49 |   game->board->wall_height = i;
50 | }
51 | 


--------------------------------------------------------------------------------
/pyrl/environments/mdptetris/src/simple_tetris.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * This module defines the properties of a simplified
 3 |  * tetris game.
 4 |  */
 5 | 
 6 | #ifndef SIMPLE_TETRIS_H
 7 | #define SIMPLE_TETRIS_H
 8 | 
 9 | #include <stdint.h>
10 | #include "game.h"
11 | #include "macros.h"
12 | #include "file_tools.h"
13 | 
14 | /* Uncomment the following line for 4*5  */
15 | /*#define SIZE_45 */
16 | 
17 | 
18 | #ifdef SIZE_45
19 | 
20 | #define WIDTH 4
21 | #define HEIGHT 5
22 | #define NB_STATES 1048576
23 | #define LAST_BITS_MASK 0x0000000F /* 4 bits */
24 | #define WEAK_BITS_SHIFT 11
25 | 
26 | #else
27 | 
28 | #define WIDTH 5
29 | #define HEIGHT 5
30 | #define NB_STATES 33554432
31 | #define LAST_BITS_MASK 0x0000001F /* 5 bits */
32 | #define WEAK_BITS_SHIFT 10
33 | 
34 | #endif
35 | 
36 | /**
37 |  * Parameters of the algorithm.
38 |  * This parameters are saved in the values file so the
39 |  * algorithm can be resumed after an interruption.
40 |  */
41 | typedef struct ValueIterationParameters {
42 |   int nb_pieces;      /* number of pieces */
43 |   double gamma;       /* discount factor */
44 |   double delta_limit; /* limit to stop the algorithm */
45 |   int iterations;     /* current number of iterations */
46 |   int use_buffer;     /* 1 to use a buffer */
47 | 
48 |   /* files */
49 |   char piece_file_name[MAX_FILE_NAME];
50 |   char delta_file_name[MAX_FILE_NAME];
51 | } ValueIterationParameters;
52 | 
53 | typedef struct OldValueIterationParameters {
54 |   int nb_pieces;      /* number of pieces */
55 |   double gamma;       /* discount factor */
56 |   double delta_limit; /* limit to stop the algorithm */
57 |   int iterations;     /* current number of iterations */
58 | 
59 |   /* files */
60 |   char piece_file_name[MAX_FILE_NAME];
61 |   char delta_file_name[MAX_FILE_NAME];
62 | } OldValueIterationParameters;
63 | 
64 | uint32_t get_game_code(Game *game);
65 | void set_game_state(Game *game, uint32_t state_code);
66 | 
67 | #endif
68 | 


--------------------------------------------------------------------------------
/pyrl/environments/mdptetris/src/types.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * This header declares the main types defined in the modules.
 3 |  * Each header should include this one first to avoid cycling
 4 |  * dependencies other headers.
 5 |  */
 6 | 
 7 | #ifndef TYPES_H
 8 | #define TYPES_H
 9 | 
10 | typedef struct Game Game;
11 | typedef struct Action Action;
12 | typedef struct Board Board;
13 | typedef struct Piece Piece;
14 | typedef struct PieceOrientation PieceOrientation;
15 | typedef struct RewardDescription RewardDescription;
16 | typedef struct Feature Feature;
17 | typedef struct FeaturePolicy FeaturePolicy;
18 | typedef struct GamesStatistics GamesStatistics;
19 | typedef struct Strategy Strategy;
20 | typedef struct CommonParameters CommonParameters;
21 | 
22 | /**
23 |  * @brief Function type for a feature.
24 |  *
25 |  * A feature function takes as parameter a game state
26 |  * and returns a numeric value.
27 |  */
28 | typedef double (FeatureFunction)(Game *game);
29 | 
30 | #endif
31 | 


--------------------------------------------------------------------------------
/pyrl/environments/multiroom.py:
--------------------------------------------------------------------------------
 1 | # 
 2 | # Copyright (C) 2013, Will Dabney
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #
16 | 
17 | import sys
18 | import numpy
19 | 
20 | from rlglue.environment.Environment import Environment
21 | from rlglue.environment import EnvironmentLoader as EnvironmentLoader
22 | from rlglue.types import Observation
23 | from rlglue.types import Action
24 | from rlglue.types import Reward_observation_terminal
25 | from pyrl.rlglue import TaskSpecRLGlue
26 | from pyrl.rlglue.registry import register_environment
27 | 
28 | from . import gridworld
29 | 
30 | @register_environment
31 | class MultiRoomGridworld(gridworld.Gridworld):
32 | 	name = "Multi-Room Gridworld"
33 | 
34 | 	# All parameters are in units of 1, where 1 is how far on average
35 | 	# the agent can move with a single action.
36 | 	# The walls will always be of unit thickness and be placed 
37 | 	# at 0.5*size_y with a door at 0.9*size_x, and 
38 | 	# above that wall a vertical wall will be placed at 0.3*size_x with a door at 0.75*size_y
39 | 	# If the goal falls inside a wall it will be pushed to the nearest non-wall location
40 | 	def __init__(self, size_x=10, size_y=10, goal_x=10, goal_y=10, noise=0.0, random_start=False, fudge=1.4143):
41 | 		gridworld.Gridworld.__init__(self, size_x=size_x, size_y=size_y, goal_x=goal_x, goal_y=goal_y, 
42 | 					     noise=noise, random_start=random_start, fudge=fudge)
43 | 		# Build walls and doors (actually might only need to specify the doors)
44 | 		#self.wall1 = numpy.array([[0.0, size_y*0.5], [size_x, size_y*0.5]])
45 | 		self.door1 = numpy.array([size_x*0.9, size_y*0.5])
46 | 		#self.wall2 = numpy.array([[size_x*0.3, size_y*0.5], [size_x*0.3, size_y]])
47 | 		self.door2 = numpy.array([size_x*0.3, size_y*0.75])
48 | 		self.goal = self.fixPoint(self.goal)
49 | 		self.domain_name = "Continuous MultiRoom Gridworld by Will Dabney"
50 | 
51 | 	def fixPoint(self, point):
52 | 		if numpy.abs(self.door1 - point).max() <= 0.5 or numpy.abs(self.door2 - point).max() <= 0.5:
53 | 			return point
54 | 
55 | 		cond1 = point[1] <= self.door1[1]
56 | 		cond2 = point[0] <= self.door2[0]
57 | 		
58 | 		if cond1: # Bottom room
59 | 			return point.clip([0.0, 0.0], [self.size[0], self.door1[1]-0.51])
60 | 		else:
61 | 			if cond2: # Top left room
62 | 				return point.clip([0.0, self.door1[1]+0.51], [self.door2[0]-0.51, self.size[1]])
63 | 			else: # Top right room
64 | 				return point.clip([self.door2[0]+0.51, self.door1[1]+0.51], self.size)
65 | 
66 | 	def isPointInWall(self, point):
67 | 		if (self.fixPoint(point) == point).all():
68 | 			return False
69 | 		else:
70 | 			return True
71 | 
72 | 	def reset(self):
73 | 		if self.random_start:
74 | 			self.pos = self.fixPoint(numpy.random.random((2,)) * self.size)
75 | 		else:
76 | 			self.pos[:] = 0.0
77 | 	
78 | 	def takeAction(self, action):
79 | 		reward = gridworld.Gridworld.takeAction(self, action)
80 | 		self.pos = self.fixPoint(self.pos)
81 | 		return reward
82 | 
83 | 
84 | if __name__=="__main__":
85 | 	import argparse
86 | 	parser = argparse.ArgumentParser(description='Run 2D MultiRoom Noisy Continuous Gridworld environment in network mode.')
87 | 	gridworld.addGridworldArgs(parser)
88 | 	args = parser.parse_args()
89 | 	EnvironmentLoader.loadEnvironment(MultiRoomGridworld(size_x=args.size_x, size_y=args.size_y, goal_x=args.goal_x, goal_y=args.goal_y, noise=args.noise, random_start=args.random_restarts, fudge=args.fudge))
90 | 


--------------------------------------------------------------------------------
/pyrl/environments/puddleworld.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (C) 2013, Will Dabney
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #
16 | 
17 | import numpy
18 | from rlglue.environment import EnvironmentLoader as EnvironmentLoader
19 | from pyrl.rlglue.registry import register_environment
20 | 
21 | from . import gridworld
22 | from pyrl.misc.matrix import mvnpdf
23 | 
24 | @register_environment
25 | class PuddleWorld(gridworld.Gridworld):
26 |     name = "Puddle World"
27 |     def __init__(self, size_x=10, size_y=10, goal_x=10, goal_y=10, puddle_penalty=-100.0,
28 |              puddle_means=[(0.35, 0.5), (0.5, 0.35)], puddle_var=[(1.2, 1.e-5, 1.e-5, 0.5), (0.5, 1.e-5, 1.e-5, 1.2)],
29 |              noise=0.0, reward_noise=0.0, random_start=False, fudge=1.4143):
30 | 
31 |         gridworld.Gridworld.__init__(self, size_x=size_x, size_y=size_y, goal_x=goal_x,
32 |                          goal_y=goal_y, noise=noise, reward_noise=reward_noise, random_start=random_start, fudge=fudge)
33 |         self.puddle_penalty = puddle_penalty
34 |         self.puddle_means = map(numpy.array, puddle_means)
35 |         self.puddle_var = map(lambda cov: numpy.linalg.inv(numpy.array(cov).reshape((2,2))), puddle_var)
36 |         self.domain_name = "Continuous PuddleWorld"
37 | 
38 |     def reset(self):
39 |         if self.random_start:
40 |             self.pos = numpy.random.random((2,)) * self.size
41 |         else:
42 |             self.pos = numpy.array([0., 0.])
43 | 
44 |     def takeAction(self, action):
45 |         base_reward = gridworld.Gridworld.takeAction(self, action)
46 |         for mu, inv_cov in zip(self.puddle_means, self.puddle_var):
47 |             base_reward += mvnpdf(self.pos, mu, inv_cov) * self.puddle_penalty
48 |         return base_reward
49 | 
50 | 
51 | if __name__=="__main__":
52 |     import argparse
53 |     parser = argparse.ArgumentParser(description='Run Noisy Continuous Puddle World environment in network mode.')
54 |     gridworld.addGridworldArgs(parser)
55 |     parser.add_argument("--puddle", type=float, nargs=6, action='append',
56 |                 help="Add a puddle with arguments: mean_x, mean_y, cov1, cov2, cov3, cov4. " + \
57 |                     "Where mean specifies the center of the puddle and cov specifies the " + \
58 |                     "covariance matrix of the multivariate normal distribution that describes " + \
59 |                     "the puddle's depth.")
60 |     parser.add_argument("--puddle_penalty", type=float, default=-100,
61 |                 help="The reward penalty scale for walking through puddles.")
62 |     args = parser.parse_args()
63 |     kwargs = {}
64 |     if args.puddle is not None:
65 |         means = []
66 |         covs = []
67 |         for puddle in args.puddle:
68 |             means.append(tuple(puddle[:2]))
69 |             covs.append(tuple(puddle[2:]))
70 |         kwargs['puddle_means'] = means
71 |         kwargs['puddle_var'] = covs
72 | 
73 |     if args.size_x:
74 |         kwargs['size_x'] = args.size_x
75 |     if args.size_y:
76 |         kwargs['size_y'] = args.size_y
77 |     if args.goal_x:
78 |         kwargs['goal_x'] = args.goal_x
79 |     if args.goal_y:
80 |         kwargs['goal_y'] = args.goal_y
81 |     if args.noise:
82 |         kwargs['noise'] = args.noise
83 |     if args.fudge:
84 |         kwargs['fudge'] = args.fudge
85 |     if args.random_restarts:
86 |         kwargs['random_start'] = args.random_restarts
87 | 
88 |     EnvironmentLoader.loadEnvironment(PuddleWorld(**kwargs))
89 | 


--------------------------------------------------------------------------------
/pyrl/environments/skeleton_environment.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (C) 2007, Mark Lee
 3 | #
 4 | #http://rl-glue-ext.googlecode.com/
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | #
18 | 
19 | import random
20 | from rlglue.environment.Environment import Environment
21 | from rlglue.environment import EnvironmentLoader as EnvironmentLoader
22 | from rlglue.types import Observation
23 | from rlglue.types import Action
24 | from rlglue.types import Reward_observation_terminal
25 | from pyrl.rlglue.registry import register_environment
26 | 
27 | # /**
28 | #  *  This is a very simple environment with discrete observations corresponding to states labeled {0,1,...,19,20}
29 | #     The starting state is 10.
30 | #
31 | #     There are 2 actions = {0,1}.  0 decrements the state, 1 increments the state.
32 | #
33 | #     The problem is episodic, ending when state 0 or 20 is reached, giving reward -1 or +1, respectively.  The reward is 0 on
34 | #     all other steps.
35 | #  * @author Brian Tanner
36 | #  */
37 | 
38 | @register_environment
39 | class skeleton_environment(Environment):
40 |     name = "Skeleton environment"
41 | 
42 |     currentState=10
43 |     def env_init(self):
44 |         return "VERSION RL-Glue-3.0 PROBLEMTYPE episodic DISCOUNTFACTOR 1.0 OBSERVATIONS INTS (0 20)  ACTIONS INTS (0 1)  REWARDS (-1.0 1.0)  EXTRA skeleton_environment(Python) by Brian Tanner."
45 | 
46 |     def env_start(self):
47 |         self.currentState=10
48 | 
49 |         returnObs=Observation()
50 |         returnObs.intArray=[self.currentState]
51 | 
52 |         return returnObs
53 | 
54 |     def env_step(self,thisAction):
55 |         episodeOver=0
56 |         theReward=0
57 | 
58 |         if    thisAction.intArray[0]==0:
59 |             self.currentState=self.currentState-1
60 |         if    thisAction.intArray[0]==1:
61 |             self.currentState=self.currentState+1
62 | 
63 |         if self.currentState <= 0:
64 |             self.currentState=0
65 |             theReward=-1
66 |             episodeOver=1
67 | 
68 |         if self.currentState >= 20:
69 |             self.currentState=20
70 |             theReward=1
71 |             episodeOver=1
72 | 
73 |         theObs=Observation()
74 |         theObs.intArray=[self.currentState]
75 | 
76 |         returnRO=Reward_observation_terminal()
77 |         returnRO.r=theReward
78 |         returnRO.o=theObs
79 |         returnRO.terminal=episodeOver
80 | 
81 |         return returnRO
82 | 
83 |     def env_cleanup(self):
84 |         pass
85 | 
86 |     def env_message(self,inMessage):
87 |         if inMessage=="what is your name?":
88 |             return "my name is skeleton_environment, Python edition!";
89 |         else:
90 |             return "I don't know how to respond to your message";
91 | 
92 | 
93 | if __name__=="__main__":
94 |     EnvironmentLoader.loadEnvironment(skeleton_environment())
95 | 


--------------------------------------------------------------------------------
/pyrl/environments/windyworld.py:
--------------------------------------------------------------------------------
 1 | # 
 2 | # Copyright (C) 2013, Will Dabney
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #
16 | 
17 | import sys
18 | import numpy
19 | 
20 | from rlglue.environment.Environment import Environment
21 | from rlglue.environment import EnvironmentLoader as EnvironmentLoader
22 | from rlglue.types import Observation
23 | from rlglue.types import Action
24 | from rlglue.types import Reward_observation_terminal
25 | from pyrl.rlglue import TaskSpecRLGlue
26 | from pyrl.rlglue.registry import register_environment
27 | 
28 | from . import gridworld
29 | from scipy.stats import norm
30 | 
31 | @register_environment
32 | class WindyGridworld(gridworld.Gridworld):
33 | 	name = "Windy Gridworld"
34 | 	# The effect of the wind is always positive in the y dimension, and 
35 | 	# is equal to the wind_power multiplied with the pdf of the current x-coordinate on a Gaussian distribution 
36 | 	# with mean wind_center and standard deviation wind_stdev.
37 | 	def __init__(self, size_x=10, size_y=10, goal_x=10, goal_y=10, wind_center=7., wind_stdev=1.0, wind_power=2.0, noise=0.0, random_start=False, fudge=1.4143):
38 | 		gridworld.Gridworld.__init__(self, size_x=size_x, size_y=size_y, goal_x=goal_x, goal_y=goal_y, noise=noise, random_start=random_start, fudge=fudge)
39 | 		self.wind_center = wind_center
40 | 		self.wind_stdev = wind_stdev
41 | 		self.wind_power = wind_power
42 | 		self.domain_name = "Continuous Windy Gridworld by Will Dabney"
43 | 		
44 | 	def reset(self):
45 | 		if self.random_start:
46 | 			self.pos = numpy.random.random((2,)) * self.size
47 | 		else:
48 | 			self.pos = numpy.array([0.0, self.size[1]*0.5])
49 | 	
50 | 	def takeAction(self, action):
51 | 		self.pos[1] += norm.pdf(self.pos[0], self.wind_center, self.wind_stdev) * self.wind_power
52 | 		return gridworld.Gridworld.takeAction(self, action)
53 | 
54 | 
55 | if __name__=="__main__":
56 | 	import argparse
57 | 	parser = argparse.ArgumentParser(description='Run 2D MultiRoom Noisy Continuous Gridworld environment in network mode.')
58 | 	gridworld.addGridworldArgs(parser)
59 | 	parser.add_argument("--wind_center", type=float, default=7, help="Center, or strongest point, in the x-direction of the wind")
60 | 	parser.add_argument("--wind_scale", type=float, default=1.0, help="Scale, or width, of the wind effects around the center.")
61 | 	parser.add_argument("--wind_power", type=float, default=2.0, help="The power, or strength, of the wind.")
62 | 	args = parser.parse_args()
63 | 	EnvironmentLoader.loadEnvironment(
64 | 		WindyGridworld(args.size_x, args.size_y, args.goal_x, args.goal_y, wind_center=args.wind_center, 
65 | 			       wind_stdev=args.wind_scale, wind_power=args.wind_power, noise=args.noise, 
66 | 			       random_start=args.random_restarts, fudge=args.fudge))
67 | 


--------------------------------------------------------------------------------
/pyrl/experiments/.gitignore:
--------------------------------------------------------------------------------
 1 | */*.py[co]
 2 | 
 3 | # Packages
 4 | *.egg
 5 | *.egg-info
 6 | dist
 7 | build
 8 | eggs
 9 | parts
10 | bin
11 | var
12 | sdist
13 | develop-eggs
14 | .installed.cfg
15 | */*~
16 | # Installer logs
17 | pip-log.txt
18 | 
19 | # Unit test / coverage reports
20 | .coverage
21 | .tox
22 | 
23 | #Translations
24 | *.mo
25 | 
26 | #Mr Developer
27 | .mr.developer.cfg
28 | 


--------------------------------------------------------------------------------
/pyrl/experiments/README.md:
--------------------------------------------------------------------------------
1 | python-rl.experiments
2 | =========
3 | 
4 | Experiment scripts in python are used to run an agent on an environment in RLGlue.


--------------------------------------------------------------------------------
/pyrl/experiments/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = ["episodic", "randomized"]
2 | 
3 | 


--------------------------------------------------------------------------------
/pyrl/experiments/episodic.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Author: Will Dabney
 3 | # Author: Pierre-Luc Bacon <pierrelucbacon@gmail.com>
 4 | 
 5 | import csv, os
 6 | from pyrl.misc.timer import Timer
 7 | from pyrl.rlglue import RLGlueLocal as RLGlueLocal
 8 | from pyrl.rlglue.registry import register_experiment
 9 | import rlglue.RLGlue as rl_glue
10 | import pyrl.visualizers.plotExperiment as plotExperiment
11 | 
12 | @register_experiment
13 | class Episodic(object):
14 |     name = "Episodic"
15 | 
16 |     def __init__(self, config, **kwargs):
17 |         self.maxsteps = kwargs.setdefault('maxsteps', 5000)
18 |         self.num_episodes = kwargs.setdefault('num_episodes', 10)
19 |         self.num_runs = kwargs.setdefault('num_runs', 1)
20 |         self.timed = kwargs.setdefault('timed', True)
21 |         self.configuration = config
22 | 
23 |         if kwargs.has_key('agent') and kwargs.has_key('environment'):
24 |             self.agent = kwargs['agent']
25 |             self.environment = kwargs['environment']
26 |             self.rlglue = RLGlueLocal.LocalGlue(self.environment, self.agent)
27 |         else:
28 |             self.rlglue = rl_glue
29 | 
30 |     def run_episode(self):
31 |         terminal = 0
32 |         runtime = 0
33 |         # Query the agent whether or not it has diverged
34 |         if self.hasAgentDiverged():
35 |             return 0, -1, 0.0, 0.0 # -1 number of steps, signals that divergence.
36 |         if self.timed:
37 |             timer = Timer()
38 |             with timer:
39 |                 terminal = self.rlglue.RL_episode(self.maxsteps)
40 |             runtime = timer.duration_in_seconds()
41 |         else:
42 |             terminal = self.rlglue.RL_episode(self.maxsteps)
43 |         totalSteps = self.rlglue.RL_num_steps()
44 |         totalReward = self.rlglue.RL_return()
45 | 
46 |         return terminal, totalSteps, totalReward, runtime
47 | 
48 |     def run_trial(self, filename=None):
49 |         self.rlglue.RL_init()
50 |         for i in range(self.num_episodes):
51 |             term, steps, reward, runtime = self.run_episode()
52 |             if filename is None:
53 |                 print i, steps, runtime, reward, term
54 |             else:
55 |                 with open(filename, "a") as f:
56 |                     csvwrite = csv.writer(f)
57 |                     csvwrite.writerow([i, steps, runtime, reward, term])
58 |         self.rlglue.RL_cleanup()
59 | 
60 |     def run_experiment(self, filename=None):
61 |         if filename is None:
62 |             print 'trial, number of steps, runtime, accumulated reward, termination'
63 |         for run in range(self.num_runs):
64 |             self.run_trial(filename=filename)
65 | 
66 |     def hasAgentDiverged(self):
67 |         """Sends an rl-glue message to the agent asking if it has diverged or not.
68 |         The message is exactly: agent_diverged?
69 |         The expected response is: True (if it has), False (if it has not)
70 |         The responses are not case sensitive, and anything other than true or false
71 |         will be treated as a false (to support agents which do not have this implemented).
72 |         """
73 |         return self.rlglue.RL_agent_message("agent_diverged?").lower() == "true"
74 | 
75 | 
76 | 
77 | 
78 | 
79 | 
80 | 
81 | 


--------------------------------------------------------------------------------
/pyrl/experiments/randomized.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Author: Will Dabney
 3 | 
 4 | import csv, os, json, numpy, copy
 5 | from pyrl.misc.timer import Timer
 6 | from pyrl.rlglue import RLGlueLocal as RLGlueLocal
 7 | from pyrl.rlglue.registry import register_experiment
 8 | import rlglue.RLGlue as rl_glue
 9 | from pyrl.experiments.episodic import Episodic
10 | import pyrl.visualizers.plotExperiment as plotExperiment
11 | from pyrl.misc.parameter import *
12 | 
13 | 
14 | @register_experiment
15 | class RandomizedTrial(Episodic):
16 |     name = "Randomized Trial"
17 | 
18 |     def __init__(self, config, **kwargs):
19 |         if not kwargs.has_key('agent') or not kwargs.has_key('environment'):
20 |             print "ERROR: RandomizedTrial must be run locally in order to randomize parameters."
21 |             import sys
22 |             sys.exit(1)
23 | 
24 |         self.num_trials = kwargs.setdefault('num_trials', 1)
25 |         self.evaluate = kwargs.setdefault('evaluate', 'reward') #reward, steps, time
26 |         self.eval_reduce = kwargs.setdefault('evaluate_reduce', 'sum') # None, 'sum', 'final', 'kmeans'
27 |         self.k = kwargs.setdefault('kmeans_k', 10)
28 |         Episodic.__init__(self, config, **kwargs)
29 | 
30 |     def run_experiment(self, filename=None):
31 |         param_parser = self.agent.agent_parameters()
32 |         for trial in range(self.num_trials):
33 |             parameters = copy.deepcopy(self.configuration['agent']['params'])
34 |             # Randomize the parameters, those marked not optimizable get their default
35 |             for name, value in randomize_parameters(param_parser):
36 |                 # Then, set the parameter value, but only if not already set
37 |                 parameters.setdefault(name, value)
38 | 
39 |             # Set params for current agent
40 |             self.agent.params = parameters
41 | 
42 |             # Run a trial...
43 |             tmp_file = "rndtrial" + str(numpy.random.randint(1.e10)) + ".dat"
44 |             Episodic.run_experiment(self, filename = tmp_file)
45 | 
46 |             # Collect results
47 |             locs, means, std = plotExperiment.processFile(tmp_file, self.evaluate, verbose=False, method=self.eval_reduce, kmeans_k=self.k)
48 |             json_out = copy.deepcopy(self.configuration)
49 |             json_out['agent']['params'] = parameters
50 |             json_out['experiment']['episodes'] = locs.tolist()
51 |             json_out['experiment']['returns'] = means.tolist()
52 |             json_out['experiment']['deviations'] = std.tolist()
53 | 
54 |             if filename is None:
55 |                 print json.dumps(json_out)
56 |             else:
57 |                 with open(filename, "a") as f:
58 |                     f.write(json.dumps(json_out) + "\n")
59 |             os.remove(tmp_file)
60 | 
61 | 


--------------------------------------------------------------------------------
/pyrl/misc/.gitignore:
--------------------------------------------------------------------------------
 1 | */*.py[co]
 2 | 
 3 | # Packages
 4 | *.egg
 5 | *.egg-info
 6 | dist
 7 | build
 8 | eggs
 9 | parts
10 | bin
11 | var
12 | sdist
13 | develop-eggs
14 | .installed.cfg
15 | */*~
16 | # Installer logs
17 | pip-log.txt
18 | 
19 | # Unit test / coverage reports
20 | .coverage
21 | .tox
22 | 
23 | #Translations
24 | *.mo
25 | 
26 | #Mr Developer
27 | .mr.developer.cfg
28 | 


--------------------------------------------------------------------------------
/pyrl/misc/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | __all__ = ["matrix", "timer", "json", "parameter"]
3 | 
4 | 
5 | 


--------------------------------------------------------------------------------
/pyrl/misc/json.py:
--------------------------------------------------------------------------------
 1 | 
 2 |     
 3 | # Cast all the unicode strings recieved from a json load into strings
 4 | # http://stackoverflow.com/questions/956867/how-to-get-string-objects-instead-unicode-ones-from-json-in-python
 5 | def convert(input):
 6 |     if isinstance(input, dict):
 7 |         return {convert(key): convert(value) for key, value in input.iteritems()}
 8 |     elif isinstance(input, list):
 9 |         return [convert(element) for element in input]
10 |     elif isinstance(input, unicode):
11 |         return input.encode('utf-8')
12 |     else:
13 |         return input
14 | 


--------------------------------------------------------------------------------
/pyrl/misc/matrix.py:
--------------------------------------------------------------------------------
 1 | import numpy, itertools, math
 2 | 
 3 | # Compute the value of (A + uv^T)^-1 given A^-1, u, and v. 
 4 | # Uses the Sherman-Morrison formula
 5 | def SMInv(Ainv, u, v, e):
 6 |     u = u.reshape((len(u),1))
 7 |     v = v.reshape((len(v),1))
 8 |     if e is not None:
 9 |         g = numpy.dot(Ainv, u) / (e + numpy.dot(v.T, numpy.dot(Ainv, u)))			
10 |         return (Ainv / e) - numpy.dot(g, numpy.dot(v.T, Ainv/e))
11 |     else:
12 |         return Ainv - numpy.dot(Ainv, numpy.dot(numpy.dot(u,v.T), Ainv)) / ( 1 + numpy.dot(v.T, numpy.dot(Ainv, u)))
13 | 
14 | def vector_angle(u, v):
15 |     return numpy.arccos(numpy.dot(u,v)/(numpy.linalg.norm(u)*numpy.linalg.norm(v)))*180.0/numpy.pi
16 | 
17 | # Modified version of this solution:
18 | # http://stackoverflow.com/questions/11615664/multivariate-normal-density-in-python
19 | # Takes the inverse of the covariance matrix instead of the covariance matrix
20 | def mvnpdf(x, mu, sigma_inv):
21 |     size = len(x)
22 |     if size == len(mu) and sigma_inv.shape == (size, size):
23 |         det = 1./numpy.linalg.det(sigma_inv)
24 |         norm_const = 1.0/ ( math.pow((2*numpy.pi),float(size)/2) * math.pow(det,0.5) )
25 |         x_mu = x - mu
26 |         result = math.pow(math.e, -0.5 * numpy.dot(x_mu, numpy.dot(sigma_inv, x_mu)))
27 |         return norm_const * result
28 |     else:
29 |         raise NameError("The dimensions of the input don't match")
30 | 
31 | 


--------------------------------------------------------------------------------
/pyrl/misc/timer.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | class Timer(object):
 4 |     """Context manager to time a block of code.
 5 | 
 6 |     From http://stackoverflow.com/a/1685337/1306923
 7 |     Thanks to Corey Porter!
 8 | 
 9 |     """
10 |     def __enter__(self):
11 |         self.__start = time.time()
12 | 
13 |     def __exit__(self, type, value, traceback):
14 |         # Error handling here
15 |         self.__finish = time.time()
16 | 
17 |     def duration_in_seconds(self):
18 |         return self.__finish - self.__start
19 | 


--------------------------------------------------------------------------------
/pyrl/rlglue/.gitignore:
--------------------------------------------------------------------------------
 1 | */*.py[co]
 2 | 
 3 | # Packages
 4 | *.egg
 5 | *.egg-info
 6 | dist
 7 | build
 8 | eggs
 9 | parts
10 | bin
11 | var
12 | sdist
13 | develop-eggs
14 | .installed.cfg
15 | */*~
16 | # Installer logs
17 | pip-log.txt
18 | 
19 | # Unit test / coverage reports
20 | .coverage
21 | .tox
22 | 
23 | #Translations
24 | *.mo
25 | 
26 | #Mr Developer
27 | .mr.developer.cfg
28 | 


--------------------------------------------------------------------------------
/pyrl/rlglue/RLGlueLocal.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # Copyright (C) 2013, Will Dabney
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | #
 16 | #
 17 | #  $Revision: 1 $
 18 | #  $Date: 2013-01-24 $
 19 | #  $Author: Will Dabney (amarack) $
 20 | 
 21 | from rlglue.agent.Agent import Agent
 22 | from rlglue.environment.Environment import Environment
 23 | 
 24 | from rlglue.types import Action
 25 | from rlglue.types import Observation
 26 | 
 27 | from rlglue.types import Observation_action
 28 | from rlglue.types import Reward_observation_action_terminal
 29 | from rlglue.types import Reward_observation_terminal
 30 | 
 31 | # This class provides a seemless way of running python RLGlue experiments locally without
 32 | # the use of sockets/network. I have no idea why this was not included in the python codec,
 33 | # but I really need this functionality. Maybe it will help you as well.
 34 | class LocalGlue:
 35 | 	def __init__(self,theEnvironment,theAgent):
 36 | 		self.env = theEnvironment
 37 | 		self.agent = theAgent
 38 | 		self.prevact = None
 39 | 		self.reward_return = 0.0
 40 | 		self.step_count = 0
 41 | 		self.episode_count = 0
 42 | 		self.exitStatus = 0
 43 | 
 44 | 	def RL_init(self):
 45 | 		taskSpecResponse = self.env.env_init()
 46 | 		self.agent.agent_init(taskSpecResponse)
 47 | 		self.prevact = None
 48 | 		self.reward_return = 0.0
 49 | 		self.step_count = 0
 50 | 		self.episode_count = 0
 51 | 		return taskSpecResponse
 52 | 
 53 | 	def RL_start(self):
 54 | 		self.reward_return = 0.0
 55 | 		self.step_count = 1
 56 | 		self.episode_count += 1
 57 | 		self.prevact = None
 58 | 		self.exitStatus = 0
 59 | 		obs = self.env.env_start()
 60 | 		action = self.agent.agent_start(obs)
 61 | 		obsact = Observation_action()
 62 | 		obsact.o = obs
 63 | 		obsact.a = action
 64 | 		self.prevact = action
 65 | 		return obsact
 66 | 
 67 | 	def RL_step(self):
 68 | 		if self.prevact is None:
 69 | 			self.RL_start()
 70 | 		self.step_count += 1
 71 | 		rot = self.env.env_step(self.prevact)
 72 | 		roat = Reward_observation_action_terminal()
 73 | 		roat.terminal = rot.terminal
 74 | 		self.exitStatus = rot.terminal
 75 | 
 76 | 		if rot.terminal == 1:
 77 | 			self.agent.agent_end(rot.r)
 78 | 			roat.a = self.prevact
 79 | 			self.prevact = None
 80 | 		else:
 81 | 			self.prevact = self.agent.agent_step(rot.r, rot.o)
 82 | 			roat.a = self.prevact
 83 | 
 84 | 		self.reward_return += rot.r
 85 | 		roat.r = rot.r
 86 | 		roat.o = rot.o
 87 | 		return roat
 88 | 
 89 | 	def RL_cleanup(self):
 90 | 		self.env.env_cleanup()
 91 | 		self.agent.agent_cleanup()
 92 | 
 93 | 	def RL_agent_message(self, message):
 94 | 		if message == None:
 95 | 			message=""
 96 | 		return self.agent.agent_message(message)
 97 | 
 98 | 	def RL_env_message(self, message):
 99 | 		if message == None:
100 | 			message=""
101 | 		return self.env.env_message(message)
102 | 
103 | 	def RL_return(self):
104 | 		return self.reward_return
105 | 
106 | 	def RL_num_steps(self):
107 | 		return self.step_count
108 | 
109 | 	def RL_num_episodes(self):
110 | 		return self.episode_count
111 | 
112 | 	def RL_episode(self, num_steps):
113 | 		self.RL_start()
114 | 		while self.exitStatus != 1:
115 | 			# If num_steps is zero (or less) then treat as unlimited
116 | 			if (num_steps > 0) and self.step_count >= num_steps:
117 | 				break
118 | 			roat = self.RL_step()
119 | 			self.exitStatus = roat.terminal
120 | 		return self.exitStatus
121 | 
122 | 


--------------------------------------------------------------------------------
/pyrl/rlglue/TaskSpecRLGlue.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # There didn't appear to be any python class in place in RLGlue to allow you to 
  3 | # easily create the Task Spec string like you can in java and c++. So this is 
  4 | # my substitute so that we can be as cool as those languages. 
  5 | 
  6 | # VERSION <version-name> PROBLEMTYPE <problem-type> DISCOUNTFACTOR <discount-factor> 
  7 | # OBSERVATIONS INTS ([times-to-repeat-this-tuple=1] <min-value> <max-value>)* DOUBLES 
  8 | # ([times-to-repeat-this-tuple=1] <min-value> <max-value>)* CHARCOUNT <char-count> ACTIONS INTS 
  9 | # ([times-to-repeat-this-tuple=1] <min-value> <max-value>)* DOUBLES ([times-to-repeat-this-tuple=1] 
 10 | # <min-value> <max-value>)* CHARCOUNT <char-count> REWARDS (<min-value> <max-value>) EXTRA 
 11 | # [extra text of your choice goes here]";
 12 | 
 13 | class TaskSpec:
 14 |         def __init__(self, discount_factor=1.0, reward_range=(-1,1)):
 15 |             self.version = "RL-Glue-3.0"
 16 |             self.actions = {}
 17 |             self.observations = {}
 18 |             self.prob_type = "episodic"
 19 |             self.disc_factor = discount_factor
 20 |             self.extras = ""
 21 |             self.act_charcount = 0
 22 |             self.obs_charcount = 0
 23 |             self.reward_range = reward_range
 24 | 
 25 |         def toTaskSpec(self):
 26 |             ts_list = ["VERSION " + self.version, 
 27 |                        "PROBLEMTYPE " + self.prob_type, 
 28 |                        "DISCOUNTFACTOR " + str(self.disc_factor)]
 29 |             
 30 |             # Observations
 31 |             if len(self.observations.keys()) > 0:
 32 |                 ts_list += ["OBSERVATIONS"]
 33 |                 if self.observations.has_key("INTS"):
 34 |                     ts_list += ["INTS"] + self.observations["INTS"] 
 35 |                 if self.observations.has_key("DOUBLES"):
 36 |                     ts_list += ["DOUBLES"] + self.observations["DOUBLES"] 
 37 |                 if self.observations.has_key("CHARCOUNT"):
 38 |                     ts_list += ["CHARCOUNT"] + self.observations["CHARCOUNT"] 
 39 | 
 40 |             # Actions
 41 |             if len(self.actions.keys()) > 0:
 42 |                 ts_list += ["ACTIONS"]
 43 |                 if self.actions.has_key("INTS"):
 44 |                     ts_list += ["INTS"] + self.actions["INTS"] 
 45 |                 if self.actions.has_key("DOUBLES"):
 46 |                     ts_list += ["DOUBLES"] + self.actions["DOUBLES"] 
 47 |                 if self.actions.has_key("CHARCOUNT"):
 48 |                     ts_list += ["CHARCOUNT"] + self.actions["CHARCOUNT"] 
 49 | 
 50 |             ts_list += ["REWARDS", "(" + str(self.reward_range[0]) + " " + str(self.reward_range[1]) + ")"]
 51 |             if self.extras != "":
 52 |                 ts_list += ["EXTRAS", self.extras]
 53 |             return ' '.join(ts_list)
 54 |             
 55 |         
 56 |         def addAction(self, dRange, repeat=1, type="INTS"):
 57 |             rept = "" if repeat<= 1 else str(repeat) + " "
 58 |             self.actions.setdefault(type, []).append("(" + rept + str(dRange[0]) + " " + str(dRange[1]) + ")")
 59 | 
 60 |         def addContinuousAction(self, dRange, repeat=1):
 61 |             self.addAction(dRange, repeat, "DOUBLES")
 62 | 
 63 |         def addDiscreteAction(self, dRange, repeat=1):
 64 |             self.addAction(map(int, dRange), repeat, "INTS")
 65 | 
 66 |         def addObservation(self, dRange, repeat=1, type="INTS"):
 67 |             rept = "" if repeat<= 1 else str(repeat) + " "
 68 |             self.observations.setdefault(type, []).append("(" + rept + str(dRange[0]) + " " + str(dRange[1]) + ")")
 69 | 
 70 |         def addContinuousObservation(self, dRange, repeat=1):
 71 |             self.addObservation(dRange, repeat, "DOUBLES")
 72 | 
 73 |         def addDiscreteObservation(self, dRange, repeat=1):
 74 |             self.addObservation(map(int, dRange), repeat, "INTS")
 75 | 
 76 |         def setActionCharLimit(self, charLimit):
 77 |             self.actions["CHARCOUNT"] = [str(charLimit)]
 78 | 
 79 |         def setObservationCharLimit(self, charLimit):
 80 |             self.observations["CHARCOUNT"] = [str(charLimit)]
 81 | 
 82 |         def setContinuing(self):
 83 |             self.prob_type = "continuing"
 84 | 
 85 |         def setEpisodic(self):
 86 |             self.prob_type = "episodic"
 87 | 
 88 |         def setDiscountFactor(self, factor):
 89 |             self.disc_factor = factor
 90 | 
 91 |         def setExtra(self, strExtra):
 92 |             self.extras = strExtra
 93 | 
 94 |         def setProblemTypeCustom(self, strProbType):
 95 |             self.prob_type = strProbType
 96 | 
 97 |         def setRewardRange(self, low, high):
 98 |             self.reward_range = (low, high)
 99 | 
100 | 


--------------------------------------------------------------------------------
/pyrl/rlglue/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | 
4 | 


--------------------------------------------------------------------------------
/pyrl/rlglue/registry.py:
--------------------------------------------------------------------------------
 1 | """
 2 | .. module:: RLGlueRegistry
 3 |    :platform: Unix, Windows
 4 |    :synopsis: Registry for rl-glue agents, environments and experiments
 5 | 
 6 | .. moduleauthor:: Pierre-Luc Bacon <pierrelucbacon@gmail.com>
 7 | 
 8 | """
 9 | 
10 | class RLGlueRegistry(object):
11 |     def __init__(self):
12 |         self.agents = {}
13 |         self.environments = {}
14 |         self.experiments = {}
15 | 
16 |     def register_agent(self, cls):
17 |         self.agents[cls.name] = cls
18 |         return cls
19 | 
20 |     def register_environment(self, cls):
21 |         self.environments[cls.name] = cls
22 |         return cls
23 | 
24 |     def register_experiment(self, cls):
25 |         self.experiments[cls.name] = cls
26 |         return cls
27 | 
28 | rlglue_registry = RLGlueRegistry()
29 | register_agent = rlglue_registry.register_agent
30 | register_environment = rlglue_registry.register_environment
31 | register_experiment = rlglue_registry.register_experiment
32 | 


--------------------------------------------------------------------------------
/pyrl/rlglue/run.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # Author: Will Dabney
  3 | # Author: Pierre-Luc Bacon <pierrelucbacon@gmail.com>
  4 | 
  5 | # Runs an experiment by starting up rl_glue,
  6 | # and letting the user choose from a set of
  7 | # agents, environments, and experiments.
  8 | 
  9 | import json
 10 | from multiprocessing import Process
 11 | from subprocess import Popen
 12 | 
 13 | from pyrl.agents import *
 14 | from pyrl.environments import *
 15 | from pyrl.experiments import *
 16 | from pyrl.rlglue.registry import rlglue_registry
 17 | from pyrl.misc.json import convert
 18 | 
 19 | from rlglue.agent import AgentLoader as AgentLoader
 20 | from rlglue.environment import EnvironmentLoader as EnvironmentLoader
 21 | 
 22 | 
 23 | def fromjson(filename):
 24 |     with open(filename, 'r') as f:
 25 |         config = json.load(f, object_hook=convert)
 26 | 
 27 |     # Process the environment
 28 |     environment = rlglue_registry.environments[config['environment']['name']]
 29 |     environment_params = config['environment']['params']
 30 |     # Process the agent
 31 |     agent = rlglue_registry.agents[config['agent']['name']]
 32 |     agent_params = config['agent']['params']
 33 |     # Process the experiment
 34 |     experiment = rlglue_registry.experiments[config['experiment']['name']]
 35 |     experiment_params = config['experiment']['params']
 36 | 
 37 |     return agent, agent_params, environment, environment_params, experiment, experiment_params
 38 | 
 39 | def tojson(agent, a_args, env, env_args, exp, exp_args, local=None):
 40 |     config = {'agent': {'name': agent.name, 'params': a_args},
 41 |               'environment': {'name': env.name, 'params': env_args},
 42 |               'experiment': {'name': exp.name, 'params': exp_args}}
 43 |     return json.dumps(config)
 44 | 
 45 | def fromuser():
 46 |     environment = interactive_choose(rlglue_registry.environments,
 47 |                                      "Choose an environment.")
 48 |     agent = interactive_choose(rlglue_registry.agents, "Choose an agent.")
 49 |     experiment = interactive_choose(rlglue_registry.experiments,
 50 |                                     "Choose an experiment.")
 51 |     return agent, {}, environment, {}, experiment, {}
 52 | 
 53 | 
 54 | def interactive_choose(choices, prompt):
 55 |     print(prompt)
 56 |     sortkeys = sorted(choices.keys())
 57 | 
 58 |     for ix, a_key in enumerate(sortkeys):
 59 |         print("  ({:d}): {}".format(ix + 1, a_key))
 60 | 
 61 |     choice = None
 62 |     while choice not in range(1, len(sortkeys) + 1):
 63 |         choice = raw_input("Enter number (1 - {:d}): ".format(
 64 |             len(sortkeys)))
 65 |         try:
 66 |             choice = int(choice)
 67 |         except:
 68 |             pass
 69 | 
 70 |     return choices[sortkeys[choice - 1]]
 71 | 
 72 | 
 73 | def run(agent, a_args, env, env_args, exp, exp_args, local=None, result_file=None):
 74 |     if local is None:
 75 |         ans = raw_input("Run locally? [y/n]: ")
 76 |         if ans.lower() == 'y' or ans.lower() == 'yes':
 77 |             local = True
 78 |         else:
 79 |             local = False
 80 | 
 81 |     config = {'agent': {'name': agent.name, 'params': a_args},
 82 |               'environment': {'name': env.name, 'params': env_args},
 83 |               'experiment': {'name': exp.name, 'params': exp_args}}
 84 |     if local:
 85 |         experiment = exp(config, agent=agent(**a_args),
 86 |                          environment=env(**env_args), **exp_args)
 87 |         experiment.run_experiment(filename=result_file)
 88 |     else:
 89 |         experiment = exp(config, **exp_args)
 90 |         # TODO: Figure out if rl_glue is running, don't start it in that case
 91 |         rlglue_p = Popen('rl_glue')
 92 |         agent_p = Process(target=AgentLoader.loadAgent,
 93 |                           args=(agent(**a_args),))
 94 |         agent_p.start()
 95 |         env_p = Process(target=EnvironmentLoader.loadEnvironment,
 96 |                         args=(env(**env_args),))
 97 |         env_p.start()
 98 |         experiment.run_experiment(filename=result_file, **a_args)
 99 |         env_p.terminate()
100 |         agent_p.terminate()
101 |         rlglue_p.terminate()
102 | 
103 | 
104 | def addRunExpArgs(parser):
105 |     json_group = parser.add_mutually_exclusive_group()
106 |     json_group.add_argument("--load", type=str, help="Load an experimental configuration from a JSON file.")
107 |     json_group.add_argument("--genjson", action='store_true', help="Generate an experimental configuration JSON file from " + \
108 |                             "interactive selections. Only generates, does not run.")
109 |     group = parser.add_mutually_exclusive_group()
110 |     group.add_argument("--local", action='store_true', default="True", help="Run experiment locally")
111 |     group.add_argument("--network", action='store_true', help="Run experiment through network sockets")
112 |     parser.add_argument("--output", type=str, help="Save the results to a file.")
113 |     return parser
114 | 
115 | if __name__ == '__main__':
116 |     import argparse
117 |     parser = argparse.ArgumentParser(description='Run a reinforcement learning experiment. Defaults to interactive experiment.')
118 |     addRunExpArgs(parser)
119 |     args = parser.parse_args()
120 | 
121 |     if args.load is None:
122 |         config = fromuser()
123 |         if args.genjson:
124 |             print tojson(*config)
125 |         else:
126 |             run(*config,local=args.local, result_file=args.output)
127 |     else:
128 |         config = fromjson(args.load)
129 |         run(*config, local=args.local, result_file=args.output)
130 | 


--------------------------------------------------------------------------------
/pyrl/visualizers/.gitignore:
--------------------------------------------------------------------------------
 1 | */*.py[co]
 2 | 
 3 | # Packages
 4 | *.egg
 5 | *.egg-info
 6 | dist
 7 | build
 8 | eggs
 9 | parts
10 | bin
11 | var
12 | sdist
13 | develop-eggs
14 | .installed.cfg
15 | */*~
16 | # Installer logs
17 | pip-log.txt
18 | 
19 | # Unit test / coverage reports
20 | .coverage
21 | .tox
22 | 
23 | #Translations
24 | *.mo
25 | 
26 | #Mr Developer
27 | .mr.developer.cfg
28 | 


--------------------------------------------------------------------------------
/pyrl/visualizers/README.md:
--------------------------------------------------------------------------------
1 | pyrl.visualizers
2 | =========
3 | 
4 | Scripts and modules related to visualizing the output from the agents, environments and experiments for the pyrl project.
5 | 


--------------------------------------------------------------------------------
/pyrl/visualizers/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | 


--------------------------------------------------------------------------------
/pyrl/visualizers/compareParameters.py:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | # plotParameters.py
 3 | # Author: Will Dabney
 4 | #
 5 | # A script to plot the output of the collected results of a randomized
 6 | # parameter search experiment.
 7 | #
 8 | # Using the --parameter index name, argument you can plot a single parameter or
 9 | # pair of parameters against their collected performance values. Specifying no
10 | # parameter will result in a low dimensionality embedding of all the parameters
11 | # to be plotted against their values.
12 | # Thus, with one parameter specified we get the usual results graphs used to
13 | # illustrate how an algorithm performs as a parameter varries. With two parameters
14 | # specified we get this same conceptual view but for the interaction of the two
15 | # parameters. And finally with three or more, we get something more interesting
16 | # which shows the overal behavior pattern with respect to parameter change that
17 | # the algorithm exibits (at least on the given domain).
18 | #
19 | # Example: python -m pyrl.visualizers.plotParameters --file exp.dat --parameter 1 alpha
20 | ################################################################################
21 | 
22 | import numpy
23 | import matplotlib.pyplot as plt
24 | import sys
25 | import argparse
26 | from scipy.interpolate import griddata
27 | 
28 | def loadParameterData(filename, param_index):
29 |     data = numpy.genfromtxt(filename, delimiter=',')[:,(0,1,param_index)] # Grab the mean, std, and parameter
30 |     data = data[numpy.lexsort((data[:,2],)),:]
31 | 
32 |     if data[:,2].std() <= 1.e-10:
33 |         xs = numpy.linspace(0, 1.0)
34 |         ys = xs.copy()
35 |         ys.fill(data[:,0].mean())
36 |         stdv = xs.copy()
37 |         xs.fill(0.0)
38 |         return numpy.array([ys, stdv, xs]).T
39 |     else:
40 |         return data
41 | 
42 | 
43 | if __name__=="__main__":
44 |     import argparse
45 |     parser = argparse.ArgumentParser(description='Plot a comparison of algorithms parameter ' + \
46 |                                          'exploration for a singe parameter.')
47 |     parser.add_argument("--file", type=str, action='append', nargs=3, required=True,
48 |                         help="Parameter exploration algorithm name, results file, and " + \
49 |                             "the index of the parameter to display. Ex: Alg algfile.dat 2")
50 |     parser.add_argument("--title", type=str, help="Title for the figure.",
51 |                         default="Parameter Exploration")
52 |     parser.add_argument("--xlabel", type=str, help="Name of parameter being compared, label for x-axis",
53 |                         default="Parameter")
54 |     parser.add_argument("--ylabel", type=str, help="Name of evaluation metric for algorithms. " + \
55 |                             "This is the label for the y-axis", default="Total Return")
56 |     parser.add_argument("--output", type=str, help="Filename to save the resulting figure.")
57 |     parser.add_argument("--nobars", action='store_true', default=False, help="Disable plotting of error bars for standard deviations.")
58 |     args = parser.parse_args()
59 | 
60 |     for (name, file, index) in args.file:
61 |         data = loadParameterData(file, index)
62 |         if not args.nobars:
63 |             plt.errorbar(data[:,2], data[:,0], yerr=data[:,1])
64 |         else:
65 |             plt.plot(data[:,2], data[:,0])
66 |         plt.hold(True)
67 | 
68 |     plt.legend(map(lambda k: k[0], args.file), loc='best')
69 |     plt.title(args.title)
70 |     plt.xlabel(args.xlabel)
71 |     plt.ylabel(args.ylabel)
72 |     if args.output:
73 |         plt.savefig(args.output)
74 |     else:
75 |         plt.show()
76 | 
77 | 


--------------------------------------------------------------------------------
/scripts/generate_spearmint.sh:
--------------------------------------------------------------------------------
 1 | #####################################################################
 2 | #
 3 | # Generate Spearmint configuration/experiment 
 4 | # to optimize parameters for some pyRL experiment file.
 5 | #
 6 | # Run with:
 7 | # sh scripts/generate_spearmint.sh /path/to/output experimentfile.json
 8 | #
 9 | # This will create all the necessary files in /path/to/output 
10 | # spearmint needs to run the optimization. It will use experimentfile.json 
11 | # as the template for the experiment.
12 | #
13 | # Note: Make sure *at least* one parameter for the algorithm 
14 | # is *NOT* specified in the experiment json file. This code 
15 | # will use whatever values are specified in the json file and 
16 | # will *ONLY* optimize the parameters that are not given in the json.
17 | #
18 | # Then, to run the experiment go to spearmint directory and run:
19 | # python spearmint_sync.py --method=GPEIOptChooser --method-args=noiseless=1 /path/to/output/
20 | #####################################################################
21 | 
22 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )"/../ && pwd )"
23 | OUTDIR=$1
24 | EXPFILE=$2
25 | 
26 | AGENT_NAME=`cat $EXPFILE | tr "\n" " " | sed -e 's/{.*"agent"[ ]*:.*"name"[ ]*:[ ]*"\([^"]*\)".*$/\1/g'`
27 | mkdir $OUTDIR
28 | 
29 | cat ${DIR}/scripts/spearmint_template.py | sed -e s:'pyrl_path = "###"':'pyrl_path = "'${DIR}'"':g > ${OUTDIR}/"${AGENT_NAME}.py"
30 | cp $EXPFILE ${OUTDIR}/experiment.json
31 | 
32 | python ${DIR}/scripts/spearmint_config.py "${AGENT_NAME}" $EXPFILE > ${OUTDIR}/config.pb
33 | 
34 | 
35 | 


--------------------------------------------------------------------------------
/scripts/spearmint_config.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Author: Will Dabney
 3 | 
 4 | #import csv, os, json, numpy, copy
 5 | import numpy, sys
 6 | sys.path.append("..")
 7 | from pyrl.misc.parameter import *
 8 | 
 9 | def gen_numeric_pbvar(name, size, type, min, max):
10 |     config = ["variable {", ' name: "' + name + '"']
11 |     if type is int:
12 |         config.append(" type: INT")
13 |     else:
14 |         config.append(" type: FLOAT")
15 |     config.append(" size: " + str(size))
16 |     config.append(" min: " + str(min))
17 |     config.append(" max: " + str(max))
18 |     config += ["}", ""]
19 |     return config
20 | 
21 | def gen_enum_pbvar(name, size, options):
22 |     config = ["variable {", ' name: "' + name + '"']
23 |     config.append(" type: ENUM")
24 |     config.append(" size: " + str(size))
25 |     for entry in options:
26 |         config.append(' options: "' + str(entry) + '"')
27 |     config += ["}", ""]
28 |     return config
29 | 
30 | def gen_config(agent_name, param_parser, fixed_params):
31 |     config_contents = ["language: PYTHON", 'name: "' + agent_name + '"', ""]
32 | 
33 |     opt_grp = get_optimize_group(param_parser)
34 |     opt_pnames = set(fixed_params.keys())
35 |     for param in opt_grp._group_actions:
36 |         if (param.dest in opt_pnames):
37 |             continue
38 |         opt_pnames.add(param.dest)
39 | 
40 |         var_size = param.nargs if param.nargs is not None else 1
41 |         if param.type is bool:
42 |             config_contents += gen_enum_pbvar(param.dest, var_size, ["true", "false"])
43 |         elif param.choices.__class__ is ValueRange:
44 |             config_contents += gen_numeric_pbvar(param.dest, var_size, param.type,
45 |                 param.choices.min(), param.choices.max())
46 |         else:
47 |             config_contents += gen_enum_pbvar(param.dest, var_size, param.choices)
48 | 
49 |     return config_contents
50 | 
51 | if __name__ == "__main__":
52 |     from pyrl.agents import *
53 |     from pyrl.rlglue.registry import rlglue_registry
54 |     from pyrl.rlglue import run
55 | 
56 |     agent_name = sys.argv[1]
57 |     agent = rlglue_registry.agents[agent_name]
58 |     param_parser = agent.agent_parameters()
59 |     fixed_params = run.fromjson(sys.argv[2])[1] # Grabs agent parameters from experiment file
60 | 
61 |     # Produce a config.pb file based upon parameter parser
62 |     config_contents = gen_config(agent_name, param_parser, fixed_params)
63 |     for line in config_contents:
64 |         print line
65 | 
66 | 
67 | 
68 | 
69 | 
70 | 
71 | 
72 | 
73 | 


--------------------------------------------------------------------------------
/scripts/spearmint_template.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Author: Will Dabney
 3 | 
 4 | import sys
 5 | pyrl_path = "###"
 6 | sys.path.append(pyrl_path)
 7 | 
 8 | import os, numpy
 9 | from pyrl.misc.timer import Timer
10 | from pyrl.rlglue import RLGlueLocal as RLGlueLocal
11 | from pyrl.rlglue.registry import register_experiment
12 | import rlglue.RLGlue as rl_glue
13 | from pyrl.experiments.episodic import Episodic
14 | import pyrl.visualizers.plotExperiment as plotExperiment
15 | from pyrl.misc.parameter import *
16 | from pyrl.rlglue.run import fromjson
17 | 
18 | def main(job_id, params):
19 |     print 'Anything printed here will end up in the output directory for job #:', str(job_id)
20 |     parameters = {}
21 |     for key in params:
22 |         if isinstance(key, unicode):
23 |             parameters[key.encode('utf-8')] = params[key]
24 |     else:
25 |             parameters[key] = params[key]
26 | 
27 |     for key in parameters:
28 |         parameters[key] = map(lambda k: k.encode('utf-8') if isinstance(k, unicode) else k, parameters[key])
29 |         if len(parameters[key]) == 1:
30 |             value = parameters[key][0]
31 |             try:
32 |                 value = float(value)
33 |             except:
34 |                 if value.lower() == "false":
35 |                     value = False
36 |                 elif value.lower() == "true":
37 |                     value = True
38 | 
39 |             parameters[key] = value
40 |     print parameters
41 | 
42 |     my_path = os.path.dirname(os.path.abspath(__file__))
43 |     tmp_file = os.path.join(my_path, "rndtrial" + str(numpy.random.randint(1.e10)) + ".dat")
44 |     my_path = os.path.abspath(os.path.join(my_path, "experiment.json"))
45 |     agent, a_args, env, env_args, exp, exp_args = fromjson(my_path)
46 | 
47 |     for key in parameters:
48 |         a_args.setdefault(key, parameters[key])
49 | 
50 |     config = {'agent': {'name': agent.name, 'params': a_args},
51 |               'environment': {'name': env.name, 'params': env_args},
52 |               'experiment': {'name': exp.name, 'params': exp_args}}
53 | 
54 |     experiment = Episodic(config, agent=agent(**a_args),
55 |                     environment=env(**env_args), **exp_args)
56 | 
57 |     # Using this try/except makes debugging in spearmint 1mil times easier
58 |     try:
59 |         experiment.run_experiment(filename=tmp_file)
60 |     except Exception as ex:
61 |         import traceback
62 |         traceback.print_exc()
63 | 
64 |     locs, means, std = plotExperiment.processFile(tmp_file, "reward", verbose=False, method="sum")
65 |     os.remove(tmp_file)
66 |     print "Result:", -means[0]
67 |     return -means[0]
68 | 
69 | 
70 | 


--------------------------------------------------------------------------------