├── .github
    └── workflows
    │   ├── python_formatting.yml
    │   └── python_lint.yml
├── .gitignore
├── CODE_OF_CONDUCT.md
├── LICENSE
├── README.md
├── SECURITY.md
├── SUPPORT.md
├── data
    ├── MontezumaRevengeDeterministic-v4
    │   ├── config.json
    │   └── constants.json
    ├── ai2thornav
    │   ├── config.json
    │   └── constants.json
    ├── base_config.json
    ├── base_constants.json
    ├── combolock
    │   ├── config.json
    │   └── constants.json
    ├── diabcombolock
    │   ├── config.json
    │   └── constants.json
    ├── gridworld-canonical
    │   ├── config.json
    │   └── constants.json
    ├── gridworld-empty
    │   ├── config.json
    │   └── constants.json
    ├── gridworld-randomized-small
    │   ├── config.json
    │   └── constants.json
    ├── gridworld-randomized
    │   ├── config.json
    │   └── constants.json
    ├── gridworld1
    │   ├── config.json
    │   └── constants.json
    ├── gridworld2
    │   ├── config.json
    │   └── constants.json
    ├── matterport
    │   ├── config.json
    │   └── constants.json
    ├── newtonianmotion
    │   ├── config.json
    │   └── constants.json
    ├── objectnav
    │   ├── config.json
    │   └── constants.json
    ├── safetyworld
    │   ├── config.json
    │   └── constants.json
    ├── safetyworld2
    │   ├── config.json
    │   └── constants.json
    ├── simplelqr
    │   ├── config.json
    │   └── constants.json
    ├── slotfactoredmdp
    │   ├── config.json
    │   └── constants.json
    ├── stochcombolock
    │   ├── config.json
    │   └── constants.json
    ├── temporal_combolock
    │   ├── config.json
    │   └── constants.json
    └── temporal_diabcombolock
    │   ├── config.json
    │   └── constants.json
├── local_runs
    ├── test_factorl.sh
    ├── test_homer.sh
    ├── test_id.sh
    └── test_ppe.sh
├── pyproject.toml
├── requirements.txt
└── src
    ├── analysis_tools
        ├── __init__.py
        └── visualize_latent_dynamics.py
    ├── environments
        ├── __init__.py
        ├── ai2thorenv
        │   ├── __init__.py
        │   ├── ai2thor_exo_util.py
        │   ├── navai2thor.py
        │   └── objectnav.py
        ├── app_simulator
        │   ├── __init__.py
        │   ├── run_interactive.py
        │   ├── toggle_switches.py
        │   ├── tree_nav.py
        │   └── ui.py
        ├── control_env
        │   ├── __init__.py
        │   ├── newtonian_motion.py
        │   └── simple_lqr.py
        ├── gym_env
        │   ├── __init__.py
        │   └── gym_wrapper.py
        ├── intrepid_env_meta
        │   ├── __init__.py
        │   ├── action_type.py
        │   ├── environment_keys.py
        │   ├── environment_wrapper.py
        │   ├── gym_compatible.py
        │   ├── intrepid_env_interface.py
        │   └── make_env.py
        ├── matterport
        │   ├── __init__.py
        │   └── matterport.py
        ├── minigrid
        │   ├── __init__.py
        │   ├── exogenous_noise_util.py
        │   ├── gridworld1.py
        │   ├── gridworld2.py
        │   ├── gridworld_canonical.py
        │   ├── gridworld_empty.py
        │   ├── gridworld_randomized.py
        │   ├── gridworld_randomized_small.py
        │   └── gridworld_wrapper.py
        ├── rl_acid_env
        │   ├── __init__.py
        │   ├── combolock.py
        │   ├── diabolical_combolock.py
        │   ├── grid_world.py
        │   ├── noise_gen.py
        │   ├── rl_acid_wrapper.py
        │   ├── safety_world.py
        │   ├── safety_world2.py
        │   ├── slot_factored_mdp.py
        │   ├── temporal_combolock.py
        │   ├── temporal_diabcombolock.py
        │   └── visual_combolock.py
        └── robot_car
        │   ├── __init__.py
        │   ├── client
        │       ├── __init__.py
        │       ├── alex_inference.py
        │       ├── client_base.py
        │       ├── client_utils.py
        │       ├── closed_loop_client.py
        │       ├── inference.py
        │       ├── random_actions_client.py
        │       ├── state.py
        │       ├── state_capture_client.py
        │       └── tests.py
        │   ├── server
        │       ├── __init__.py
        │       ├── mock_pi_libraries.py
        │       └── server.py
        │   └── utils
        │       ├── __init__.py
        │       ├── check_corrupted_images.py
        │       ├── dataset.py
        │       ├── join_logs.py
        │       └── rc_car_data_processing.py
    ├── experiments
        ├── __init__.py
        ├── experiment_header.py
        ├── experiment_save.py
        ├── experimental_setup.py
        ├── run_factorl.py
        ├── run_homer.py
        ├── run_id.py
        ├── run_interactive_agent.py
        ├── run_mbrl_oracle.py
        ├── run_ppe.py
        ├── run_psdp.py
        ├── run_rep_learn_video.py
        ├── run_richid.py
        ├── run_robot_car.py
        ├── run_sabre.py
        ├── run_sysid.py
        └── run_visualize_mbrl_oracle.py
    ├── learning
        ├── __init__.py
        ├── core_learner
        │   ├── __init__.py
        │   ├── abstract_rl_discrete_latent_state.py
        │   ├── abstract_video_rep_learner.py
        │   ├── acro_rep.py
        │   ├── factorl.py
        │   ├── fqi_oracle_decoder.py
        │   ├── homer.py
        │   ├── ik_learner.py
        │   ├── mbrl_oracle_decoder.py
        │   ├── ppe.py
        │   ├── ppe_util.py
        │   ├── prediction_video.py
        │   ├── richid.py
        │   ├── sabre.py
        │   └── temporal_contrastive_video.py
        ├── datastructures
        │   ├── __init__.py
        │   ├── abstract_tabular_mdp.py
        │   ├── count_conditional_probability.py
        │   ├── count_probability.py
        │   ├── elliptic_potential.py
        │   ├── episode.py
        │   └── transition.py
        ├── learning_utils
        │   ├── __init__.py
        │   ├── abstract_encoder_sampler.py
        │   ├── clustering_algorithm.py
        │   ├── collect_data_with_coverage.py
        │   ├── contextual_bandit_oracle.py
        │   ├── debug_train_encoding_function.py
        │   ├── encoder_sampler_all_random.py
        │   ├── encoder_sampler_bfs_reuse.py
        │   ├── encoder_sampler_forward_reuse.py
        │   ├── encoder_sampler_ik.py
        │   ├── encoder_sampler_reuse.py
        │   ├── encoder_sampler_same_policy.py
        │   ├── encoder_sampler_wrapper.py
        │   ├── entropy_decay_policy.py
        │   ├── evaluate_state_decoder.py
        │   ├── factorl_graph_identification.py
        │   ├── generic_learner.py
        │   ├── generic_train_classifier.py
        │   ├── homer_train_encoding_function.py
        │   ├── homer_train_encoding_function_utils.py
        │   ├── ik_train_encoding_function.py
        │   ├── independence_test.py
        │   ├── linear_disag_model.py
        │   ├── policy_evaluate.py
        │   ├── reconstruct_observation.py
        │   ├── ricatti_solver.py
        │   └── rl_discrete_latent_state_util.py
        ├── linear_mdp
        │   ├── __init__.py
        │   └── lsvi_ucb.py
        ├── model_estimation
        │   ├── __init__.py
        │   └── count_based_estimation.py
        ├── planning
        │   ├── __init__.py
        │   ├── cem
        │   │   ├── __init__.py
        │   │   └── cem_optimizer.py
        │   ├── high_level_planner
        │   │   ├── __init__.py
        │   │   └── dijkstra_planner.py
        │   ├── high_low_plan.py
        │   ├── hj_prox
        │   │   ├── __init__.py
        │   │   └── hj_prox_alg.py
        │   └── room_planner.py
        ├── policy_roll_in
        │   ├── __init__.py
        │   └── roll.py
        ├── policy_search
        │   ├── __init__.py
        │   ├── abstract_policy_search.py
        │   ├── fqi.py
        │   ├── greedy_policy_search.py
        │   ├── path_policy_search.py
        │   ├── policy_search_wrapper.py
        │   └── psdp.py
        ├── state_abstraction
        │   ├── __init__.py
        │   ├── abstract_state_decoder.py
        │   ├── autoencoder.py
        │   ├── generalized_inverse_kinematics.py
        │   ├── inverse_kinematics.py
        │   ├── noise_contrastive_dataset.py
        │   ├── noise_contrastive_global.py
        │   └── noise_contrastive_local.py
        └── tabular_rl
        │   ├── __init__.py
        │   ├── det_tabular_mdp_builder.py
        │   ├── q_learning_bonus.py
        │   ├── tabular_mdp_builder.py
        │   └── value_iteration.py
    ├── model
        ├── __init__.py
        ├── bottleneck
        │   ├── __init__.py
        │   ├── gaussian_bottleneck.py
        │   └── vq_bottleneck.py
        ├── classifiers
        │   ├── __init__.py
        │   ├── classifier_model_wrapper.py
        │   ├── conv2_classifier.py
        │   ├── conv3_classifier.py
        │   ├── convm_classifier.py
        │   ├── feedforward_classifier.py
        │   └── linear_classifier.py
        ├── decoder
        │   ├── __init__.py
        │   ├── conv_decoder.py
        │   ├── conv_decoder2.py
        │   ├── conv_decoder_ai2thor.py
        │   ├── decoder_wrapper.py
        │   └── feedforward_decoder.py
        ├── encoder
        │   ├── __init__.py
        │   ├── conv2_encoder.py
        │   ├── conv3_encoder.py
        │   ├── conv4_encoder.py
        │   ├── conv_encoder.py
        │   ├── encoder_wrapper.py
        │   └── feedforward_encoder.py
        ├── forward_model
        │   ├── __init__.py
        │   ├── conv_forward_model.py
        │   └── forward_model_wrapper.py
        ├── inverse_dynamics
        │   ├── __init__.py
        │   ├── encoded_mlp.py
        │   ├── inverse_dynamics_wrapper.py
        │   ├── simple_feed_forward.py
        │   └── tensor_inverse_dynamics.py
        ├── inverse_dynamics_model
        │   ├── __init__.py
        │   ├── action_predictor.py
        │   └── action_predictor_wrapper.py
        ├── misc
        │   ├── __init__.py
        │   ├── independence_test_model.py
        │   ├── lqr_model.py
        │   ├── richid_decoder.py
        │   └── robot_car
        │   │   ├── __init__.py
        │   │   ├── autoencoder_embeddings.py
        │   │   ├── autoencoder_test.py
        │   │   ├── autoencoder_train.py
        │   │   ├── dist_pred_model.py
        │   │   ├── ema_pytorch.py
        │   │   ├── emprical_mdp.py
        │   │   ├── latent_forward.py
        │   │   ├── latent_inverse.py
        │   │   ├── mixer.py
        │   │   ├── models.py
        │   │   ├── pl_vae.py
        │   │   └── positional_encoding.py
        ├── model_wrapper.py
        ├── policy
        │   ├── __init__.py
        │   ├── abstract_nonstationary.py
        │   ├── abstract_stationary.py
        │   ├── nonstationary_composed_policy.py
        │   ├── open_loop.py
        │   ├── stationary_action_condition_policy.py
        │   ├── stationary_constant_policy.py
        │   ├── stationary_decoder_dictionary_policy.py
        │   ├── stationary_deterministic_policy.py
        │   ├── stationary_dictionary_policy.py
        │   └── stationary_stochastic_policy.py
        └── transition_encoders
        │   ├── __init__.py
        │   ├── compositional_encoder_model.py
        │   ├── encoder_model.py
        │   ├── encoder_model_wrapper.py
        │   └── factorl_encoder.py
    ├── setup_validator
        ├── __init__.py
        ├── config_key_registry.py
        └── core_validator.py
    ├── unit_test
        ├── __init__.py
        ├── dynamic_arguments.py
        ├── gridworld_test.py
        ├── lp_solver.py
        ├── make_env.py
        ├── manual_control.py
        ├── matterport_exo.py
        ├── matterport_test.py
        ├── module_multiprocessing.py
        ├── montezuma_human_interactive.py
        ├── multiprocessing_different_gpu.py
        ├── shared_replay_memory.py
        ├── test_matterport.py
        └── test_slot_factored_mdp.py
    └── utils
        ├── __init__.py
        ├── average.py
        ├── beautify_time.py
        ├── conv_util.py
        ├── cuda.py
        ├── generic_policy.py
        ├── gumbel.py
        ├── leaky_softmax.py
        ├── multiprocess_logger.py
        ├── normalizer.py
        ├── shared_replay_memory.py
        ├── simclr_transform.py
        ├── telemetry.py
        └── tensorboard.py


/.github/workflows/python_formatting.yml:
--------------------------------------------------------------------------------
 1 | name: Python Formatting
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |       - 'releases/**'
 8 |   pull_request:
 9 |     branches:
10 |       - '*'
11 | 
12 | jobs:
13 |   python-formatting:
14 |     runs-on: ubuntu-latest
15 |     strategy:
16 |       matrix:
17 |         python-version: ["3.10"]
18 |     steps:
19 |     - uses: actions/checkout@v3
20 | 
21 |     - name: Set up Python ${{ matrix.python-version }}
22 |       uses: actions/setup-python@v3
23 |       with:
24 |         python-version: ${{ matrix.python-version }}
25 | 
26 |     - name: Install dependencies
27 |       run: |
28 |         python -m pip install --upgrade pip
29 |         pip install black black[jupyter]
30 | 
31 |     - name: Check formatting
32 |       run: |
33 |         python -m black --check .
34 | 
35 |     - name: How to fix errors
36 |       if: ${{ failure() }}
37 |       shell: bash
38 |       run: |
39 |         echo "To fix formatting errors, run:"
40 |         echo "python3 -m black ."
41 |         exit 1
42 | 


--------------------------------------------------------------------------------
/.github/workflows/python_lint.yml:
--------------------------------------------------------------------------------
 1 | name: Python Lint
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |       - 'releases/**'
 8 |   pull_request:
 9 |     branches:
10 |       - '*'
11 | 
12 | jobs:
13 |   python-lint:
14 |     runs-on: ubuntu-latest
15 |     strategy:
16 |       matrix:
17 |         python-version: ["3.10"]
18 |     steps:
19 |     - uses: actions/checkout@v3
20 | 
21 |     - name: Set up Python ${{ matrix.python-version }}
22 |       uses: actions/setup-python@v3
23 |       with:
24 |         python-version: ${{ matrix.python-version }}
25 | 
26 |     - uses: chartboost/ruff-action@v1
27 |       with:
28 |         # ignore error E501: line too long
29 |         args: --ignore E501 
30 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # PyCharm
 2 | __pycache__
 3 | .DS_Store
 4 | 
 5 | # Executables
 6 | *.o
 7 | *.a
 8 | bin
 9 | *.tgz
10 | *.pc
11 | *.pyc
12 | *~
13 | 
14 | # VS files
15 | *.so
16 | 
17 | # IntelliJ
18 | *.idea
19 | 
20 | # Java
21 | java/pom.xml
22 | 
23 | # Ruff linter
24 | .ruff_cache
25 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Microsoft Open Source Code of Conduct
 2 | 
 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
 4 | 
 5 | Resources:
 6 | 
 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/)
 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/)
 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns
10 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 |     MIT License
 2 | 
 3 |     Copyright (c) Microsoft Corporation.
 4 | 
 5 |     Permission is hereby granted, free of charge, to any person obtaining a copy
 6 |     of this software and associated documentation files (the "Software"), to deal
 7 |     in the Software without restriction, including without limitation the rights
 8 |     to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 |     copies of the Software, and to permit persons to whom the Software is
10 |     furnished to do so, subject to the following conditions:
11 | 
12 |     The above copyright notice and this permission notice shall be included in all
13 |     copies or substantial portions of the Software.
14 | 
15 |     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 |     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 |     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 |     AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 |     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 |     OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 |     SOFTWARE
22 | 


--------------------------------------------------------------------------------
/SUPPORT.md:
--------------------------------------------------------------------------------
 1 | # Support
 2 | 
 3 | ## How to file issues and get help  
 4 | 
 5 | This project uses GitHub Issues to track bugs and feature requests. Please search the existing 
 6 | issues before filing new issues to avoid duplicates.  For new issues, file your bug or 
 7 | feature request as a new Issue.
 8 | 
 9 | For help and questions about using this project, please raise issues and add a tag [Questions] or [Help] in the title.
10 | 
11 | ## Microsoft Support Policy  
12 | 
13 | Support for this project is limited to the resources listed above.
14 | 


--------------------------------------------------------------------------------
/data/MontezumaRevengeDeterministic-v4/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 | "num_actions": 18,
 3 | "actions": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17],
 4 | "horizon": 30, 
 5 | "obs_dim": [84, 84, 1],
 6 | "feature_type": "image",
 7 | "openai_state_type": "ram",
 8 | "gamma": 1.0,
 9 | "num_repeat_action": 4
10 | }
11 | 


--------------------------------------------------------------------------------
/data/MontezumaRevengeDeterministic-v4/constants.json:
--------------------------------------------------------------------------------
 1 | {
 2 | "learning_rate": 0.00025,
 3 | "num_homing_policy": 48,
 4 | "encoder_training_num_samples": 4000,
 5 | "encoder_training_epoch": 1000,
 6 | "encoder_training_lr": 0.00025,
 7 | "encoder_training_batch_size": 256,
 8 | "validation_data_percent": 0.2,
 9 | "psdp_training_num_samples": 8000,
10 | "cb_oracle_epoch": 20,
11 | "cb_oracle_lr": 0.00025,
12 | "cb_oracle_batch_size": 32,
13 | "cb_validation_pct": 0.2,
14 | "cb_patience": 5,
15 | "eval_homing_policy_sample_size": 50,
16 | "n_feature_maps": 64,
17 | "n_hidden": 512,
18 | "p_dropout": 0.0,
19 | "phi_layer_size": 25,
20 | "entropy_reg_coeff": 0.075,
21 | "bootstrap_encoder_model": false,
22 | "failed_homing_policy_filter": false,
23 | "encoder_sampling_style": "reuse",
24 | "data_aggregation": false,
25 | "reward_free_planner": "gps",
26 | "reward_sensitive_planner": "fqi",
27 | "patience": 40,
28 | "bias_homing_policy": false,
29 | "entropy_policy": "none",
30 | "filter_unreachable_abstract_states": true,
31 | "filter_old_abstract_states": true,
32 | "use_l1_penalty": false,
33 | "expected_optima": 0.0,
34 | "max_try": 1,
35 | "reward_type": "deterministic",
36 | "count_type": "state"
37 | }
38 | 


--------------------------------------------------------------------------------
/data/ai2thornav/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 | "num_actions": 4,
 3 | "actions": [0, 1, 2, 3],
 4 | "horizon": 7,
 5 | "scene_name": "FloorPlan201",
 6 | "headless": -1, 
 7 | "obs_dim": [56, 112, 3],
 8 | "feature_type": "image",
 9 | "gamma": 1.0
10 | }
11 | 


--------------------------------------------------------------------------------
/data/ai2thornav/constants.json:
--------------------------------------------------------------------------------
 1 | {
 2 | "learning_rate": 0.00025,
 3 | "num_homing_policy": 48,
 4 | "encoder_training_num_samples": 2000,
 5 | "encoder_training_epoch": 100,
 6 | "encoder_training_lr": 0.00025,
 7 | "encoder_training_batch_size": 32,
 8 | "validation_data_percent": 0.2,
 9 | "psdp_training_num_samples": 8000,
10 | "decoder_type": "conv-ai2thor",
11 | "cb_oracle_epoch": 20,
12 | "cb_oracle_lr": 0.00025,
13 | "cb_oracle_batch_size": 32,
14 | "cb_validation_pct": 0.2,
15 | "cb_patience": 5,
16 | "eval_homing_policy_sample_size": 50,
17 | "n_feature_maps": 64,
18 | "n_hidden": 512,
19 | "p_dropout": 0.0,
20 | "phi_layer_size": 25,
21 | "entropy_reg_coeff": 0.075,
22 | "bootstrap_encoder_model": false,
23 | "failed_homing_policy_filter": false,
24 | "encoder_sampling_style": "reuse",
25 | "data_aggregation": false,
26 | "reward_free_planner": "pps",
27 | "reward_sensitive_planner": "psdp",
28 | "patience": 20,
29 | "bias_homing_policy": false,
30 | "entropy_policy": "none",
31 | "filter_unreachable_abstract_states": true,
32 | "filter_old_abstract_states": true,
33 | "use_l1_penalty": false,
34 | "expected_optima": 0.0,
35 | "max_try": 1
36 | }
37 | 


--------------------------------------------------------------------------------
/data/base_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "num_actions": 10,
 3 |   "actions": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
 4 |   "horizon": 20,
 5 |   "obs_dim": -1,
 6 |   "enable_exo": 1,
 7 |   "exo_type": "pixel",
 8 |   "pixel_size": 5,
 9 |   "num_exo_var": 100,
10 |   "exo_reward": 1,
11 |   "color_map": -1,
12 |   "swap_prob": 0.0,
13 |   "spawn_prob": 0.0,
14 |   "optimal_reward": 5.0,
15 |   "anti_shaping_reward": 0.0,
16 |   "anti_shaping_reward2": 1.0,
17 |   "feature_type": "feature",
18 |   "noise_type": "hadamhardg",
19 |   "return_state": true,
20 |   "exo_dim": -1,
21 |   "gamma": 1.0,
22 |   "num_repeat_action": 1,
23 |   "scene_name": "FloorPlan201",
24 |   "headless": -1,
25 |   "det_start": 1,
26 |   "ego_centric": 1,
27 |   "freeze": 0,
28 |   "encoder_path": "none"
29 | }
30 | 


--------------------------------------------------------------------------------
/data/base_constants.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "samples": 1000,
 3 |   "lr": 0.001,
 4 |   "batch_size": 32,
 5 |   "bs": 256,
 6 |   "grad_clip": 20,
 7 |   "sabre_n": 10,
 8 |   "sabre_b": 3,
 9 |   "sabre_m": 100,
10 |   "sabre_eval": 100,
11 |   "is_autoencoder": 1,
12 |   "rep_alg": "none",
13 |   "data_type": "random",
14 |   "sabre_finetune": -1,
15 |   "max_episodes": 500,
16 |   "max_epoch": 50,
17 |   "rnd_bonus": 0,
18 |   "entropy_coeff": 0.001,
19 |   "num_ppo_updates": 4,
20 |   "eps_clip": 0.1,
21 |   "classifier_type": "conv2",
22 |   "encoder_type": "conv",
23 |   "decoder_type": "conv",
24 |   "do_rl": -1,
25 |   "temperature": 1.0,
26 |   "apply_aug": -1,
27 |   "hidden_dim": 56,
28 |   "learning_rate": 0.001,
29 |   "num_homing_policy": 2,
30 |   "elim_param": -1,
31 |   "encoder_training_num_samples": 20000,
32 |   "encoder_training_epoch": 200,
33 |   "num_processes": 1,
34 |   "forward_model_type": "forwardmodel",
35 |   "backward_model_type": "backwardmodel",
36 |   "model_type": "encoder",
37 |   "nce_from_dataset": true,
38 |   "discretization": true,
39 |   "policy_type": "linear",
40 |   "encoder_training_lr": 0.001,
41 |   "encoder_training_batch_size": 32,
42 |   "validation_data_percent": 0.2,
43 |   "psdp_training_num_samples": 20000,
44 |   "cb_oracle_epoch": 40,
45 |   "cb_oracle_lr": 0.001,
46 |   "cb_oracle_batch_size": 32,
47 |   "cb_validation_pct": 0.2,
48 |   "cb_patience": 5,
49 |   "eval_homing_policy_sample_size": 100,
50 |   "n_hidden": 56,
51 |   "entropy_reg_coeff": 0.075,
52 |   "bootstrap_encoder_model": false,
53 |   "failed_homing_policy_filter": false,
54 |   "encoder_sampling_style": "reuse",
55 |   "data_aggregation": false,
56 |   "reward_free_planner": "gps",
57 |   "reward_sensitive_planner": "fqi",
58 |   "patience": 10,
59 |   "bias_homing_policy": false,
60 |   "entropy_policy": "none",
61 |   "filter_unreachable_abstract_states": false,
62 |   "filter_old_abstract_states": false,
63 |   "use_l1_penalty": false,
64 |   "expected_optima": 0.685,
65 |   "max_try": 10,
66 |   "reward_type": "stochastic",
67 |   "count_type": "state-action",
68 |   "clustering_threshold": 0.15,
69 |   "vq_dim": 256,
70 |   "vq_codebook_size" : 512,     
71 |   "vq_decay" : 0.8,             
72 |   "vq_commitment_weight" : 0.1, 
73 |   "vq_orthogonal_reg_weight" : 100,                 
74 |   "vq_orthogonal_reg_max_codes" : 128,             
75 |   "vq_orthogonal_reg_active_codes_only" : false,    
76 |   "vq_heads" : 4,                          
77 |   "vq_separate_codebook_per_head": false,  
78 |   "vq_codebook_dim" : 32,
79 |   "vq_sample_codebook_temp": 0,
80 |   "vq_kmeans_init": false, 
81 |   "vq_kmeans_iters": 10,
82 |   "use_vq": 1,
83 |   "use_gb": 0,
84 |   "max_k": 1
85 | }
86 | 


--------------------------------------------------------------------------------
/data/combolock/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "num_actions": 2,
3 | "actions": [0, 1],
4 | "horizon": 20, 
5 | "obs_dim": -1,
6 | "feature_type": "feature",
7 | "gamma": 1.0
8 | }
9 | 


--------------------------------------------------------------------------------
/data/combolock/constants.json:
--------------------------------------------------------------------------------
 1 | {
 2 | "learning_rate": 0.001,
 3 | "num_homing_policy": 2,
 4 | "encoder_training_num_samples": 2000,
 5 | "encoder_training_epoch": 100,
 6 | "encoder_training_lr": 0.001,
 7 | "encoder_training_batch_size": 32,
 8 | "validation_data_percent": 0.2,
 9 | "psdp_training_num_samples": 500,
10 | "cb_oracle_epoch": 20,
11 | "cb_oracle_lr": 0.001,
12 | "cb_oracle_batch_size": 32,
13 | "eval_homing_policy_sample_size": 500,
14 | "n_hidden": 56,
15 | "entropy_reg_coeff": 0.075,
16 | "bootstrap_encoder_model": true,
17 | "failed_homing_policy_filter": false,
18 | "encoder_sampling_style": "reuse",
19 | "data_aggregation": false,
20 | "policy_search": "gps",
21 | "patience": 100,
22 | "bias_homing_policy": false,
23 | "entropy_policy": "smart",
24 | "filter_unreachable_abstract_states": false,
25 | "filter_old_abstract_states": false,
26 | "use_l1_penalty": false,
27 | "expected_optima": 0.62,
28 | "max_try": 3
29 | }
30 | 


--------------------------------------------------------------------------------
/data/diabcombolock/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 | "num_actions": 10,
 3 | "actions": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
 4 | "horizon": 20, 
 5 | "obs_dim": -1,
 6 | "swap_prob": 0.0,
 7 | "spawn_prob": 0.0,
 8 | "optimal_reward": 5.0,
 9 | "anti_shaping_reward": 0.0,
10 | "anti_shaping_reward2": 1.0,
11 | "feature_type": "feature",
12 | "return_state": true,
13 | "gamma": 1.0
14 | }
15 | 


--------------------------------------------------------------------------------
/data/diabcombolock/constants.json:
--------------------------------------------------------------------------------
 1 | {
 2 | "learning_rate": 0.001,
 3 | "num_homing_policy": 2,
 4 | "encoder_training_num_samples": 20000,
 5 | "encoder_training_epoch": 200,
 6 | "num_processes": 1,
 7 |   "forwardmodel": "forwardmodel",
 8 |   "backwardmodel": "backwardmodel",
 9 |   "discretization": true,
10 |   "policy_type": "linear",
11 | "encoder_training_lr": 0.001,
12 | "encoder_training_batch_size": 32,
13 | "validation_data_percent": 0.2,
14 | "psdp_training_num_samples": 20000,
15 | "cb_oracle_epoch": 40,
16 | "cb_oracle_lr": 0.001,
17 | "cb_oracle_batch_size": 32,
18 | "cb_validation_pct": 0.2,
19 | "cb_patience": 5,
20 | "eval_homing_policy_sample_size": 100,
21 | "n_hidden": 56,
22 | "entropy_reg_coeff": 0.075,
23 | "bootstrap_encoder_model": false,
24 | "failed_homing_policy_filter": false,
25 | "encoder_sampling_style": "reuse",
26 | "data_aggregation": false,
27 | "reward_free_planner": "gps",
28 | "reward_sensitive_planner": "fqi",
29 | "patience": 10,
30 | "bias_homing_policy": false,
31 | "entropy_policy": "none",
32 | "filter_unreachable_abstract_states": false,
33 | "filter_old_abstract_states": false,
34 | "use_l1_penalty": false,
35 | "expected_optima": 0.685,
36 | "max_try": 10,
37 | "reward_type": "stochastic",
38 | "count_type": "state-action",
39 | "clustering_threshold": 0.15
40 | }
41 | 


--------------------------------------------------------------------------------
/data/gridworld-canonical/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "num_actions": 4,
 3 |   "actions": [0, 1, 2, 3],
 4 |   "horizon": 10,
 5 |   "obs_dim": [56, 56, 3],
 6 |   "agent_view_size": 7,
 7 |   "width": 7,
 8 |   "height": 7,
 9 |   "tile_size": 8,
10 |   "enable_exo": false,
11 |   "exo_type": "pixel",
12 |   "num_exo_var": 0,
13 |   "circle_width": 1,
14 |   "circle_motion": 0.05,
15 |   "feature_type": "image",
16 |   "noise_type": "none",
17 |   "return_state": true,
18 |   "gamma": 1.0
19 | }
20 | 


--------------------------------------------------------------------------------
/data/gridworld-canonical/constants.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "learning_rate": 0.00025,
 3 |   "num_homing_policy": 13,
 4 |   "encoder_training_num_samples": 5000,
 5 |   "classifier_type": "conv",
 6 |   "encoder_type": "conv",
 7 |   "decoder_type": "conv2",
 8 | "elim_param": 0.02,
 9 | "lr": 3e-4,
10 | "sample_size": 40000,
11 | "max_epoch": 100,
12 | "batch_size": 32,
13 | "patience": 20,
14 | "bootstrap_action_predictor": "False",
15 | "validation_data_percent": 0.2,
16 | "hidden_dim": 256,
17 | "grad_clip": 2.5,
18 | "shared_action_predictor": "False",
19 | "reward_free_planner": "pps",
20 | "reward_sensitive_planner": "psdp",
21 | "encoder_training_epoch": 200,
22 | "encoder_training_lr": 0.00025,
23 | "encoder_training_batch_size": 128,
24 | "psdp_training_num_samples": 500,
25 | "cb_oracle_epoch": 20,
26 | "cb_oracle_lr": 0.001,
27 | "cb_oracle_batch_size": 32,
28 | "cb_patience": 5,
29 | "cb_validation_pct": 0.2,
30 | "eval_homing_policy_sample_size": 500,
31 | "n_hidden": 56,
32 | "entropy_reg_coeff": 0.005,
33 | "bootstrap_encoder_model": false,
34 | "clustering_threshold": 0.05,
35 | "nce_from_dataset": true,
36 | "failed_homing_policy_filter": false,
37 | "encoder_sampling_style": "reuse",
38 | "data_aggregation": false,
39 | "policy_search": "pps",
40 | "bias_homing_policy": false,
41 | "entropy_policy": "linear",
42 | "filter_unreachable_abstract_states": false,
43 | "filter_old_abstract_states": false,
44 | "use_l1_penalty": false,
45 | "expected_optima": 0.0,
46 | "max_try": 1
47 | }
48 | 


--------------------------------------------------------------------------------
/data/gridworld-empty/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "num_actions": 5,
 3 |   "actions": [0, 1, 2, 3, 4],
 4 |   "horizon": 10,
 5 |   "obs_dim": [56, 56, 3],
 6 |   "agent_view_size": 7,
 7 |   "width": 7,
 8 |   "height": 7,
 9 |   "tile_size": 8,
10 |   "enable_exo": false,
11 |   "exo_type": "pixel",
12 |   "num_exo_var": 0,
13 |   "circle_width": 1,
14 |   "circle_motion": 0.05,
15 |   "feature_type": "image",
16 |   "noise_type": "none",
17 |   "return_state": true,
18 |   "gamma": 1.0
19 | }
20 | 


--------------------------------------------------------------------------------
/data/gridworld-empty/constants.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "learning_rate": 0.00025,
 3 |   "num_homing_policy": 13,
 4 |   "encoder_training_num_samples": 5000,
 5 |   "classifier_type": "conv",
 6 |   "encoder_type": "conv",
 7 |   "decoder_type": "conv2",
 8 |   "elim_param": 0.02,
 9 |   "lr": 3e-4,
10 |   "sample_size": 40000,
11 |   "max_epoch": 100,
12 |   "batch_size": 32,
13 |   "patience": 20,
14 |   "bootstrap_action_predictor": "False",
15 |   "validation_data_percent": 0.2,
16 |   "hidden_dim": 256,
17 |   "grad_clip": 2.5,
18 |   "shared_action_predictor": "False",
19 |   "reward_free_planner": "pps",
20 |   "reward_sensitive_planner": "psdp",
21 |   "encoder_training_epoch": 200,
22 |   "encoder_training_lr": 0.00025,
23 |   "encoder_training_batch_size": 128,
24 |   "psdp_training_num_samples": 500,
25 |   "cb_oracle_epoch": 20,
26 |   "cb_oracle_lr": 0.001,
27 |   "cb_oracle_batch_size": 32,
28 |   "cb_patience": 5,
29 |   "cb_validation_pct": 0.2,
30 |   "eval_homing_policy_sample_size": 500,
31 |   "n_hidden": 56,
32 |   "entropy_reg_coeff": 0.005,
33 |   "bootstrap_encoder_model": false,
34 |   "clustering_threshold": 0.05,
35 |   "nce_from_dataset": true,
36 |   "failed_homing_policy_filter": false,
37 |   "encoder_sampling_style": "reuse",
38 |   "data_aggregation": false,
39 |   "policy_search": "pps",
40 |   "bias_homing_policy": false,
41 |   "entropy_policy": "linear",
42 |   "filter_unreachable_abstract_states": false,
43 |   "filter_old_abstract_states": false,
44 |   "use_l1_penalty": false,
45 |   "expected_optima": 0.0,
46 |   "max_try": 1
47 | }
48 | 


--------------------------------------------------------------------------------
/data/gridworld-randomized-small/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "num_actions": 3,
 3 |   "actions": [0, 1, 2],
 4 |   "horizon": 10,
 5 |   "obs_dim": [56, 56, 3],
 6 |   "agent_view_size": 7,
 7 |   "width": 12,
 8 |   "height": 12,
 9 |   "tile_size": 8,
10 |   "color_map": 1,
11 |   "exo_type": "pixel",
12 |   "num_exo_var": 10,
13 |   "pixel_size": 5,
14 |   "circle_width": 1,
15 |   "circle_motion": 0.05,
16 |   "feature_type": "image",
17 |   "noise_type": "none",
18 |   "return_state": true,
19 |   "gamma": 1.0,
20 |   "ego_centric": 1
21 | }
22 | 


--------------------------------------------------------------------------------
/data/gridworld-randomized-small/constants.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "learning_rate": 0.00025,
 3 |   "num_homing_policy": 13,
 4 |   "encoder_training_num_samples": 5000,
 5 |   "classifier_type": "conv",
 6 |   "encoder_type": "conv3",
 7 |   "decoder_type": "conv",
 8 |   "policy_type": "conv1",
 9 |   "model_type": "conv1",
10 |   "elim_param": 0.02,
11 |   "lr": 3e-4,
12 |   "sample_size": 40000,
13 |   "max_epoch": 100,
14 |   "batch_size": 32,
15 |   "patience": 20,
16 |   "bootstrap_action_predictor": "False",
17 |   "validation_data_percent": 0.2,
18 |   "hidden_dim": 256,
19 |   "grad_clip": 2.5,
20 |   "shared_action_predictor": "False",
21 |   "reward_free_planner": "pps",
22 |   "reward_sensitive_planner": "psdp",
23 |   "encoder_training_epoch": 100,
24 |   "encoder_training_lr": 0.001,
25 |   "encoder_training_batch_size": 128,
26 |   "psdp_training_num_samples": 500,
27 |   "cb_oracle_epoch": 20,
28 |   "cb_oracle_lr": 0.001,
29 |   "cb_oracle_batch_size": 32,
30 |   "cb_patience": 5,
31 |   "cb_validation_pct": 0.2,
32 |   "eval_homing_policy_sample_size": 500,
33 |   "n_hidden": 56,
34 |   "entropy_reg_coeff": 0.005,
35 |   "bootstrap_encoder_model": false,
36 |   "clustering_threshold": 0.05,
37 |   "nce_from_dataset": true,
38 |   "failed_homing_policy_filter": false,
39 |   "encoder_sampling_style": "reuse",
40 |   "data_aggregation": false,
41 |   "policy_search": "pps",
42 |   "bias_homing_policy": false,
43 |   "entropy_policy": "linear",
44 |   "filter_unreachable_abstract_states": false,
45 |   "filter_old_abstract_states": false,
46 |   "use_l1_penalty": false,
47 |   "expected_optima": 0.0,
48 |   "max_try": 1
49 | }
50 | 


--------------------------------------------------------------------------------
/data/gridworld-randomized/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "num_actions": 3,
 3 |   "actions": [0, 1, 2],
 4 |   "horizon": 10,
 5 |   "obs_dim": [56, 56, 3],
 6 |   "agent_view_size": 7,
 7 |   "width": 15,
 8 |   "height": 15,
 9 |   "tile_size": 8,
10 |   "num_exo_var": 0,
11 |   "circle_width": 1,
12 |   "circle_motion": 0.05,
13 |   "feature_type": "image",
14 |   "noise_type": "none",
15 |   "return_state": true,
16 |   "gamma": 1.0
17 | }
18 | 


--------------------------------------------------------------------------------
/data/gridworld-randomized/constants.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "learning_rate": 0.00025,
 3 |   "num_homing_policy": 13,
 4 |   "encoder_training_num_samples": 5000,
 5 |   "classifier_type": "conv",
 6 |   "encoder_type": "conv",
 7 |   "decoder_type": "conv",
 8 |   "policy_type": "conv1",
 9 |   "model_type": "conv1",
10 | "elim_param": 0.02,
11 | "lr": 3e-4,
12 | "sample_size": 40000,
13 | "max_epoch": 100,
14 | "batch_size": 32,
15 | "patience": 20,
16 | "bootstrap_action_predictor": "False",
17 | "validation_data_percent": 0.2,
18 | "hidden_dim": 256,
19 | "grad_clip": 2.5,
20 | "shared_action_predictor": "False",
21 | "reward_free_planner": "pps",
22 | "reward_sensitive_planner": "psdp",
23 | "encoder_training_epoch": 200,
24 | "encoder_training_lr": 0.00025,
25 | "encoder_training_batch_size": 512,
26 | "psdp_training_num_samples": 500,
27 | "cb_oracle_epoch": 20,
28 | "cb_oracle_lr": 0.001,
29 | "cb_oracle_batch_size": 32,
30 | "cb_patience": 5,
31 | "cb_validation_pct": 0.2,
32 | "eval_homing_policy_sample_size": 500,
33 | "n_hidden": 56,
34 | "entropy_reg_coeff": 0.005,
35 | "bootstrap_encoder_model": false,
36 | "clustering_threshold": 0.05,
37 | "nce_from_dataset": true,
38 | "failed_homing_policy_filter": false,
39 | "encoder_sampling_style": "reuse",
40 | "data_aggregation": false,
41 | "policy_search": "pps",
42 | "bias_homing_policy": false,
43 | "entropy_policy": "linear",
44 | "filter_unreachable_abstract_states": false,
45 | "filter_old_abstract_states": false,
46 | "use_l1_penalty": false,
47 | "expected_optima": 0.0,
48 | "max_try": 1
49 | }
50 | 


--------------------------------------------------------------------------------
/data/gridworld1/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "num_actions": 5,
 3 |   "actions": [0, 1, 2, 3, 4],
 4 |   "horizon": 18,
 5 |   "obs_dim": [56, 56, 3],
 6 |   "agent_view_size": 7,
 7 |   "width": 7,
 8 |   "height": 7,
 9 |   "tile_size": 8,
10 |   "num_exo_var": 0,
11 |   "feature_type": "image",
12 |   "noise_type": "none",
13 |   "return_state": true,
14 |   "gamma": 1.0
15 | }
16 | 


--------------------------------------------------------------------------------
/data/gridworld1/constants.json:
--------------------------------------------------------------------------------
 1 | {
 2 | "learning_rate": 0.001,
 3 |   "num_homing_policy": 2,
 4 |   "encoder_training_num_samples": 5000,
 5 | "classifier_type": "conv",
 6 |   "elim_param": 0.02,
 7 | "lr": 3e-4,
 8 | "sample_size": 40000,
 9 | "max_epoch": 100,
10 | "batch_size": 32,
11 | "patience": 5,
12 | "bootstrap_action_predictor": "False",
13 | "validation_data_percent": 0.2,
14 | "hidden_dim": 512,
15 | "grad_clip": 100,
16 | "shared_action_predictor": "False",
17 |   "reward_free_planner": "pps",
18 | "reward_sensitive_planner": "psdp",
19 | "encoder_training_epoch": 50,
20 | "encoder_training_lr": 0.001,
21 | "encoder_training_batch_size": 256,
22 | "psdp_training_num_samples": 500,
23 | "cb_oracle_epoch": 20,
24 | "cb_oracle_lr": 0.001,
25 | "cb_oracle_batch_size": 32,
26 | "cb_patience": 5,
27 | "cb_validation_pct": 0.2,
28 | "eval_homing_policy_sample_size": 500,
29 | "n_hidden": 56,
30 | "entropy_reg_coeff": 0.075,
31 | "bootstrap_encoder_model": false,
32 | "clustering_threshold": 0.05,
33 | "nce_from_dataset": true,
34 | "failed_homing_policy_filter": false,
35 | "encoder_sampling_style": "reuse",
36 | "data_aggregation": false,
37 | "policy_search": "pps",
38 | "bias_homing_policy": false,
39 | "entropy_policy": "none",
40 | "filter_unreachable_abstract_states": false,
41 | "filter_old_abstract_states": false,
42 | "use_l1_penalty": false,
43 | "expected_optima": 0.0,
44 | "max_try": 1
45 | }
46 | 


--------------------------------------------------------------------------------
/data/gridworld2/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "num_actions": 5,
 3 |   "actions": [0, 1, 2, 3, 4],
 4 |   "horizon": 10,
 5 |   "obs_dim": [56, 56, 3],
 6 |   "agent_view_size": 7,
 7 |   "width": 7,
 8 |   "height": 7,
 9 |   "tile_size": 8,
10 |   "num_exo_var": 0,
11 |   "circle_width": 1,
12 |   "circle_motion": 0.05,
13 |   "feature_type": "image",
14 |   "noise_type": "none",
15 |   "return_state": true,
16 |   "gamma": 1.0
17 | }
18 | 


--------------------------------------------------------------------------------
/data/gridworld2/constants.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "learning_rate": 0.00025,
 3 |   "num_homing_policy": 13,
 4 |   "encoder_training_num_samples": 5000,
 5 |   "classifier_type": "conv",
 6 |   "encoder_type": "conv",
 7 |   "decoder_type": "conv",
 8 | "elim_param": 0.02,
 9 | "lr": 3e-4,
10 | "sample_size": 40000,
11 | "max_epoch": 100,
12 | "batch_size": 32,
13 | "patience": 20,
14 | "bootstrap_action_predictor": "False",
15 | "validation_data_percent": 0.2,
16 | "hidden_dim": 256,
17 | "grad_clip": 2.5,
18 | "shared_action_predictor": "False",
19 | "reward_free_planner": "pps",
20 | "reward_sensitive_planner": "psdp",
21 | "encoder_training_epoch": 200,
22 | "encoder_training_lr": 0.00025,
23 | "encoder_training_batch_size": 512,
24 | "psdp_training_num_samples": 500,
25 | "cb_oracle_epoch": 20,
26 | "cb_oracle_lr": 0.001,
27 | "cb_oracle_batch_size": 32,
28 | "cb_patience": 5,
29 | "cb_validation_pct": 0.2,
30 | "eval_homing_policy_sample_size": 500,
31 | "n_hidden": 56,
32 | "entropy_reg_coeff": 0.005,
33 | "bootstrap_encoder_model": false,
34 | "clustering_threshold": 0.05,
35 | "nce_from_dataset": true,
36 | "failed_homing_policy_filter": false,
37 | "encoder_sampling_style": "reuse",
38 | "data_aggregation": false,
39 | "policy_search": "pps",
40 | "bias_homing_policy": false,
41 | "entropy_policy": "linear",
42 | "filter_unreachable_abstract_states": false,
43 | "filter_old_abstract_states": false,
44 | "use_l1_penalty": false,
45 | "expected_optima": 0.0,
46 | "max_try": 1
47 | }
48 | 


--------------------------------------------------------------------------------
/data/matterport/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "num_actions": 4,
 3 |   "actions": [0, 1, 2, 3],
 4 |   "horizon": 30,
 5 |   "obs_dim": [60, 80, 3],
 6 |   "feature_type": "image",
 7 |   "gamma": 1.0,
 8 |    "use_exo": true,
 9 |   "height": 480,
10 |   "width": 640,
11 |   "vfov": 60,
12 |   "dataset": "/root/mount/Matterport3DSimulator/data/v1/scans",
13 |   "connectivity": "/root/mount/Matterport3DSimulator/connectivity/"
14 | }
15 | 


--------------------------------------------------------------------------------
/data/matterport/constants.json:
--------------------------------------------------------------------------------
 1 | {
 2 | "learning_rate": 0.00025,
 3 | "num_homing_policy": 48,
 4 | "encoder_training_num_samples": 4000,
 5 | "encoder_training_epoch": 100,
 6 | "encoder_training_lr": 0.00025,
 7 | "encoder_training_batch_size": 32,
 8 | "validation_data_percent": 0.2,
 9 | "psdp_training_num_samples": 8000,
10 | "cb_oracle_epoch": 20,
11 | "cb_oracle_lr": 0.00025,
12 | "cb_oracle_batch_size": 32,
13 | "cb_validation_pct": 0.2,
14 | "cb_patience": 5,
15 | "eval_homing_policy_sample_size": 50,
16 | "n_feature_maps": 64,
17 | "n_hidden": 512,
18 | "p_dropout": 0.0,
19 | "phi_layer_size": 25,
20 | "entropy_reg_coeff": 0.075,
21 | "bootstrap_encoder_model": false,
22 | "failed_homing_policy_filter": false,
23 | "encoder_sampling_style": "reuse",
24 | "data_aggregation": false,
25 | "reward_free_planner": "pps",
26 | "reward_sensitive_planner": "psdp",
27 | "patience": 30,
28 | "bias_homing_policy": false,
29 | "entropy_policy": "none",
30 | "filter_unreachable_abstract_states": true,
31 | "filter_old_abstract_states": true,
32 | "use_l1_penalty": false,
33 | "expected_optima": 0.0,
34 | "max_try": 1,
35 | "reward_type": "deterministic",
36 | "count_type": "state"
37 | }
38 | 


--------------------------------------------------------------------------------
/data/newtonianmotion/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 | "world_dim": 2,
 3 | "state_dim": 4,
 4 | "act_dim": 2,
 5 | "actions": ["continuous"],
 6 | "horizon": 1000,
 7 | "noise": 0.1,  
 8 | "obs_dim": 4,
 9 | "feature_type": "feature",
10 | "acc_penalty": 0.5, 
11 | "gamma": 1.0
12 | }
13 | 


--------------------------------------------------------------------------------
/data/newtonianmotion/constants.json:
--------------------------------------------------------------------------------
 1 | {
 2 | "max_iter": 10000,
 3 | "learning_rate": 0.001,
 4 | "k0": 1,
 5 | "batch_size": 32,
 6 | "grad_clip": 2.5,
 7 | "samples": 1000,
 8 | "max_epoch": 100,
 9 | "validation_data_percent": 0.2,
10 | "failed_homing_policy_filter": false,
11 | "encoder_sampling_style": "reuse",
12 | "data_aggregation": false,
13 | "reward_free_planner": "gps",
14 | "reward_sensitive_planner": "fqi",
15 | "patience": 100,
16 | "bias_homing_policy": false,
17 | "entropy_policy": "none",
18 | "filter_unreachable_abstract_states": false,
19 | "filter_old_abstract_states": false,
20 | "expected_optima": 0.685,
21 | "max_try": 10
22 | }
23 | 


--------------------------------------------------------------------------------
/data/objectnav/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 | "num_actions": 4,
 3 | "actions": [0, 1, 2, 3],
 4 | "horizon": 7,
 5 | "scene_name": "FloorPlan201",
 6 | "headless": -1, 
 7 | "obs_dim": [1000, 1000, 3],
 8 | "feature_type": "image",
 9 | "gamma": 1.0
10 | }
11 | 


--------------------------------------------------------------------------------
/data/objectnav/constants.json:
--------------------------------------------------------------------------------
 1 | {
 2 | "learning_rate": 0.00025,
 3 | "num_homing_policy": 48,
 4 | "encoder_training_num_samples": 2000,
 5 | "encoder_training_epoch": 100,
 6 | "encoder_training_lr": 0.00025,
 7 | "encoder_training_batch_size": 32,
 8 | "validation_data_percent": 0.2,
 9 | "psdp_training_num_samples": 8000,
10 | "decoder_type": "conv-ai2thor",
11 | "cb_oracle_epoch": 20,
12 | "cb_oracle_lr": 0.00025,
13 | "cb_oracle_batch_size": 32,
14 | "cb_validation_pct": 0.2,
15 | "cb_patience": 5,
16 | "eval_homing_policy_sample_size": 50,
17 | "n_feature_maps": 64,
18 | "n_hidden": 512,
19 | "p_dropout": 0.0,
20 | "phi_layer_size": 25,
21 | "entropy_reg_coeff": 0.075,
22 | "bootstrap_encoder_model": false,
23 | "failed_homing_policy_filter": false,
24 | "encoder_sampling_style": "reuse",
25 | "data_aggregation": false,
26 | "reward_free_planner": "pps",
27 | "reward_sensitive_planner": "psdp",
28 | "patience": 20,
29 | "bias_homing_policy": false,
30 | "entropy_policy": "none",
31 | "filter_unreachable_abstract_states": true,
32 | "filter_old_abstract_states": true,
33 | "use_l1_penalty": false,
34 | "expected_optima": 0.0,
35 | "max_try": 1
36 | }
37 | 


--------------------------------------------------------------------------------
/data/safetyworld/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 | "num_actions": 4,
 3 | "actions": [0, 1, 2, 3],
 4 | "stop_action": 3,
 5 | "horizon": 20, 
 6 | "obs_dim": -1,
 7 | "swap_prob": 0.0,
 8 | "spawn_prob": 0.0,
 9 | "optimal_reward": 5.0,
10 | "anti_shaping_reward": 0.0,
11 | "anti_shaping_reward2": 1.0,
12 | "feature_type": "feature",
13 | "return_state": true,
14 | "gamma": 1.0
15 | }
16 | 


--------------------------------------------------------------------------------
/data/safetyworld/constants.json:
--------------------------------------------------------------------------------
 1 | {
 2 | "learning_rate": 0.001,
 3 | "num_homing_policy": 2,
 4 | "batch_size": 32,
 5 | "max_episodes": 500,
 6 | "sabre_finetune": 1,
 7 | "model_type": "safe_ff",
 8 | "eps_clip": 0.1,
 9 | "num_ppo_updates": 10,
10 | "rnd_bonus_coeff": 0,
11 | "entropy_coeff": 0.01,
12 | "encoder_training_num_samples": 20000,
13 | "encoder_training_epoch": 200,
14 | "num_processes": 1,
15 | "forwardmodel": "forwardmodel",
16 | "backwardmodel": "backwardmodel",
17 | "discretization": true,
18 | "policy_type": "safe_ff",
19 | "encoder_training_lr": 0.001,
20 | "encoder_training_batch_size": 32,
21 | "validation_data_percent": 0.2,
22 | "psdp_training_num_samples": 20000,
23 | "cb_oracle_epoch": 40,
24 | "cb_oracle_lr": 0.001,
25 | "cb_oracle_batch_size": 32,
26 | "cb_validation_pct": 0.2,
27 | "cb_patience": 5,
28 | "eval_homing_policy_sample_size": 100,
29 | "n_hidden": 56,
30 | "entropy_reg_coeff": 0.075,
31 | "bootstrap_encoder_model": false,
32 | "failed_homing_policy_filter": false,
33 | "encoder_sampling_style": "reuse",
34 | "data_aggregation": false,
35 | "reward_free_planner": "gps",
36 | "reward_sensitive_planner": "fqi",
37 | "patience": 10,
38 | "bias_homing_policy": false,
39 | "entropy_policy": "none",
40 | "filter_unreachable_abstract_states": false,
41 | "filter_old_abstract_states": false,
42 | "use_l1_penalty": false,
43 | "expected_optima": 0.685,
44 | "max_try": 10,
45 | "reward_type": "stochastic",
46 | "count_type": "state-action",
47 | "clustering_threshold": 0.15
48 | }
49 | 


--------------------------------------------------------------------------------
/data/safetyworld2/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 | "num_actions": 4,
 3 | "actions": [0, 1, 2, 3],
 4 | "stop_action": 3,
 5 | "horizon": 20, 
 6 | "obs_dim": -1,
 7 | "swap_prob": 0.0,
 8 | "spawn_prob": 0.0,
 9 | "optimal_reward": 5.0,
10 | "anti_shaping_reward": 0.0,
11 | "anti_shaping_reward2": 1.0,
12 | "feature_type": "feature",
13 | "return_state": true,
14 | "gamma": 1.0
15 | }
16 | 


--------------------------------------------------------------------------------
/data/safetyworld2/constants.json:
--------------------------------------------------------------------------------
 1 | {
 2 | "learning_rate": 0.001,
 3 | "num_homing_policy": 2,
 4 | "batch_size": 32,
 5 | "max_episodes": 500,
 6 | "sabre_finetune": 1,
 7 | "model_type": "safe_ff",
 8 | "eps_clip": 0.1,
 9 | "num_ppo_updates": 10,
10 | "rnd_bonus_coeff": 0,
11 | "entropy_coeff": 0.01,
12 | "encoder_training_num_samples": 20000,
13 | "encoder_training_epoch": 200,
14 | "num_processes": 1,
15 | "forwardmodel": "forwardmodel",
16 | "backwardmodel": "backwardmodel",
17 | "discretization": true,
18 | "policy_type": "linear",
19 | "encoder_training_lr": 0.001,
20 | "encoder_training_batch_size": 32,
21 | "validation_data_percent": 0.2,
22 | "psdp_training_num_samples": 20000,
23 | "cb_oracle_epoch": 40,
24 | "cb_oracle_lr": 0.001,
25 | "cb_oracle_batch_size": 32,
26 | "cb_validation_pct": 0.2,
27 | "cb_patience": 5,
28 | "eval_homing_policy_sample_size": 100,
29 | "n_hidden": 56,
30 | "entropy_reg_coeff": 0.075,
31 | "bootstrap_encoder_model": false,
32 | "failed_homing_policy_filter": false,
33 | "encoder_sampling_style": "reuse",
34 | "data_aggregation": false,
35 | "reward_free_planner": "gps",
36 | "reward_sensitive_planner": "fqi",
37 | "patience": 10,
38 | "bias_homing_policy": false,
39 | "entropy_policy": "none",
40 | "filter_unreachable_abstract_states": false,
41 | "filter_old_abstract_states": false,
42 | "use_l1_penalty": false,
43 | "expected_optima": 0.685,
44 | "max_try": 10,
45 | "reward_type": "stochastic",
46 | "count_type": "state-action",
47 | "clustering_threshold": 0.15
48 | }
49 | 


--------------------------------------------------------------------------------
/data/simplelqr/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 | "state_dim": 2,
 3 | "act_dim": 1,
 4 | "actions": ["continuous"],
 5 | "horizon": 500, 
 6 | "obs_dim": 2,
 7 | "feature_type": "feature",
 8 | "gamma": 1.0
 9 | }
10 | 


--------------------------------------------------------------------------------
/data/simplelqr/constants.json:
--------------------------------------------------------------------------------
 1 | {
 2 | "max_iter": 10000,
 3 | "learning_rate": 0.01,
 4 | "k0": 50,
 5 | "batch_size": 32,
 6 | "samples": 4000,
 7 | "max_epoch": 50,
 8 | "validation_data_percent": 0.2,
 9 | "failed_homing_policy_filter": false,
10 | "encoder_sampling_style": "reuse",
11 | "data_aggregation": false,
12 | "reward_free_planner": "gps",
13 | "reward_sensitive_planner": "fqi",
14 | "patience": 100,
15 | "bias_homing_policy": false,
16 | "entropy_policy": "none",
17 | "filter_unreachable_abstract_states": false,
18 | "filter_old_abstract_states": false,
19 | "expected_optima": 0.685,
20 | "max_try": 10
21 | }
22 | 


--------------------------------------------------------------------------------
/data/slotfactoredmdp/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 | "num_actions": 10,
 3 | "state_dim": 10,
 4 | "actions": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
 5 | "horizon": 10, 
 6 | "obs_dim": -1,
 7 | "atom_dim": 1,
 8 | "feature_type": "feature",
 9 | "return_state": true,
10 | "gamma": 1.0
11 | }
12 | 


--------------------------------------------------------------------------------
/data/slotfactoredmdp/constants.json:
--------------------------------------------------------------------------------
 1 | {
 2 | "learning_rate": 0.001,
 3 | "num_homing_policy": 2,
 4 | "max_parents": 1,
 5 | "num_factor_vals": 2,  
 6 | "encoder_training_num_samples": 10000,
 7 | "encoder_training_epoch": 200,
 8 | "encoder_training_lr": 0.001,
 9 | "encoder_training_batch_size": 32,
10 | "validation_data_percent": 0.2,
11 | "psdp_training_num_samples": 20000,
12 | "cb_oracle_epoch": 40,
13 | "cb_oracle_lr": 0.001,
14 | "cb_oracle_batch_size": 32,
15 | "cb_validation_pct": 0.2,
16 | "cb_patience": 5,
17 | "eval_homing_policy_sample_size": 100,
18 | "n_hidden": 56,
19 | "entropy_reg_coeff": 0.075,
20 | "bootstrap_encoder_model": false,
21 | "failed_homing_policy_filter": false,
22 | "encoder_sampling_style": "reuse",
23 | "data_aggregation": false,
24 | "reward_free_planner": "gps",
25 | "reward_sensitive_planner": "fqi",
26 | "patience": 10,
27 | "bias_homing_policy": false,
28 | "entropy_policy": "none",
29 | "filter_unreachable_abstract_states": false,
30 | "filter_old_abstract_states": false,
31 | "use_l1_penalty": false,
32 | "expected_optima": 0.42,
33 | "max_try": 1,
34 | "reward_type": "stochastic",
35 | "count_type": "state-action",
36 | "clustering_threshold": 0.15
37 | }
38 | 


--------------------------------------------------------------------------------
/data/stochcombolock/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "num_actions": 2,
3 | "actions": [0, 1],
4 | "horizon": 20, 
5 | "obs_dim": -1,
6 | "feature_type": "feature",
7 | "gamma": 1.0
8 | }
9 | 


--------------------------------------------------------------------------------
/data/stochcombolock/constants.json:
--------------------------------------------------------------------------------
 1 | {
 2 | "learning_rate": 0.0001,
 3 | "num_homing_policy": 2,
 4 | "encoder_training_num_samples": 3000,
 5 | "encoder_training_epoch": 200,
 6 | "encoder_training_lr": 0.001,
 7 | "encoder_training_batch_size": 32,
 8 | "validation_data_percent": 0.2,
 9 | "psdp_training_num_samples": 2000,
10 | "cb_oracle_epoch": 20,
11 | "cb_oracle_lr": 0.001,
12 | "cb_oracle_batch_size": 32,
13 | "eval_homing_policy_sample_size": 500,
14 | "n_hidden": 56,
15 | "entropy_reg_coeff": 0.075,
16 | "bootstrap_encoder_model": false,
17 | "failed_homing_policy_filter": false,
18 | "encoder_sampling_style": "reuse",
19 | "data_aggregation": false,
20 | "policy_search": "gps",
21 | "patience": 100,
22 | "bias_homing_policy": false,
23 | "entropy_policy": "none",
24 | "filter_unreachable_abstract_states": false,
25 | "filter_old_abstract_states": false,
26 | "use_l1_penalty": false,
27 | "expected_optima": 0.675,
28 | "max_try": 3
29 | }
30 | 


--------------------------------------------------------------------------------
/data/temporal_combolock/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 | "num_actions": 10,
 3 | "actions": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
 4 | "horizon": 10, 
 5 | "obs_dim": -1,
 6 | "feature_type": "feature",
 7 | "gamma": 1.0,
 8 | "noise_type": "hadamhardg",
 9 | "optimal_reward": 1.0,
10 | "anti_shaping_reward": 0.0,
11 | "anti_shaping_reward2": 0.0,
12 | "exo_flip_prob": 0.1,
13 | "exo_dim": 10,
14 | "seed": 1234
15 | }
16 | 


--------------------------------------------------------------------------------
/data/temporal_combolock/constants.json:
--------------------------------------------------------------------------------
 1 | {
 2 | "learning_rate": 0.001,
 3 | "num_homing_policy": 2,
 4 | "encoder_training_num_samples": 5000,
 5 | "elim_param": 0.1,
 6 | "grad_clip": 10,
 7 | "eps_clip": 0.1,
 8 | "batch_size": 100,
 9 | "num_ppo_updates": 4,
10 | "entropy_coeff": 0.001,
11 | "rnd_bonus_coeff": 0,
12 | "forward_model_type": "forwardmodel",
13 | "backward_model_type": "backwardmodel",
14 | "discretization": true,
15 | "rnd_obs_norm_init_episode": 100,
16 | "classifier_type": "ff",
17 | "reward_free_planner": "pps",
18 | "reward_sensitive_planner": "psdp",
19 | "encoder_training_epoch": 50,
20 | "encoder_training_lr": 0.001,
21 | "encoder_training_batch_size": 256,
22 | "validation_data_percent": 0.2,
23 | "psdp_training_num_samples": 500,
24 | "cb_oracle_epoch": 20,
25 | "cb_oracle_lr": 0.001,
26 | "cb_oracle_batch_size": 32,
27 | "cb_patience": 5,
28 | "cb_validation_pct": 0.2,
29 | "eval_homing_policy_sample_size": 10,
30 | "n_hidden": 56,
31 | "entropy_reg_coeff": 0.075,
32 | "bootstrap_encoder_model": false,
33 | "clustering_threshold": 0.05,
34 | "nce_from_dataset": true,
35 | "failed_homing_policy_filter": false,
36 | "encoder_sampling_style": "reuse",
37 | "data_aggregation": false,
38 | "policy_search": "pps",
39 | "patience": 20,
40 | "bias_homing_policy": false,
41 | "entropy_policy": "none",
42 | "filter_unreachable_abstract_states": false,
43 | "filter_old_abstract_states": false,
44 | "use_l1_penalty": false,
45 | "expected_optima": 0.0,
46 | "max_try": 1
47 | }
48 | 


--------------------------------------------------------------------------------
/data/temporal_diabcombolock/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 | "num_actions": 10,
 3 | "actions": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
 4 | "horizon": 10,
 5 | "obs_dim": -1,
 6 | "feature_type": "feature",
 7 | "gamma": 1.0,
 8 | "noise_type": "hadamhardg",
 9 | "optimal_reward_a": 1.0,
10 | "optimal_reward_b": 0.1,
11 | "anti_shaping_reward": 0.0,
12 | "anti_shaping_reward2": 0.0,
13 | "exo_flip_prob": 0.1,
14 | "exo_dim": 10,
15 | "seed": 1234
16 | }
17 | 


--------------------------------------------------------------------------------
/data/temporal_diabcombolock/constants.json:
--------------------------------------------------------------------------------
 1 | {
 2 | "learning_rate": 0.001,
 3 | "num_homing_policy": 3,
 4 | "encoder_training_num_samples": 5000,
 5 | "elim_param": 0.1,
 6 | "grad_clip": 10,
 7 | "eps_clip": 0.1,
 8 | "batch_size": 100,
 9 | "num_ppo_updates": 4,
10 | "entropy_coeff": 0.001,
11 | "rnd_bonus_coeff": 0,
12 | "forward_model_type": "forwardmodel",
13 | "backward_model_type": "backwardmodel",
14 | "discretization": true,
15 | "rnd_obs_norm_init_episode": 100,
16 | "classifier_type": "ff",
17 | "reward_free_planner": "pps",
18 | "reward_sensitive_planner": "psdp",
19 | "encoder_training_epoch": 50,
20 | "encoder_training_lr": 0.001,
21 | "encoder_training_batch_size": 256,
22 | "validation_data_percent": 0.2,
23 | "psdp_training_num_samples": 500,
24 | "cb_oracle_epoch": 20,
25 | "cb_oracle_lr": 0.001,
26 | "cb_oracle_batch_size": 32,
27 | "cb_patience": 5,
28 | "cb_validation_pct": 0.2,
29 | "eval_homing_policy_sample_size": 10,
30 | "n_hidden": 56,
31 | "entropy_reg_coeff": 0.075,
32 | "bootstrap_encoder_model": false,
33 | "clustering_threshold": 0.05,
34 | "nce_from_dataset": true,
35 | "failed_homing_policy_filter": false,
36 | "encoder_sampling_style": "reuse",
37 | "data_aggregation": false,
38 | "policy_search": "pps",
39 | "patience": 20,
40 | "bias_homing_policy": false,
41 | "entropy_policy": "none",
42 | "filter_unreachable_abstract_states": false,
43 | "filter_old_abstract_states": false,
44 | "use_l1_penalty": false,
45 | "expected_optima": 0.0,
46 | "max_try": 1
47 | }
48 | 


--------------------------------------------------------------------------------
/local_runs/test_factorl.sh:
--------------------------------------------------------------------------------
1 | cd ..
2 | export PYTHONPATH=$$PYTHONPATH:src
3 | python3 src/experiments/run_factorl.py --env slotfactoredmdp --encoder_training_num_samples 5000 --horizon 5 --noise hadamhardg --save_path ./results  --seed 1234 --name test-factorl
4 | 


--------------------------------------------------------------------------------
/local_runs/test_homer.sh:
--------------------------------------------------------------------------------
1 | cd ..
2 | export PYTHONPATH=$$PYTHONPATH:src
3 | python3 src/experiments/run_homer.py --env diabcombolock --encoder_training_num_samples 5000 --horizon 5 --debug -1 --noise hadamhardg --save_path ./results  --seed 1234 --name test-homer
4 | 


--------------------------------------------------------------------------------
/local_runs/test_id.sh:
--------------------------------------------------------------------------------
1 | cd ..
2 | export PYTHONPATH=$$PYTHONPATH:src
3 | python3 src/experiments/run_id.py --env temporal_combolock --encoder_training_num_samples 5000 --horizon 5 --exo_dim -1 --noise hadamhardg --classifier_type ff --save_path ./results  --seed 1234 --name test-ppe
4 | 


--------------------------------------------------------------------------------
/local_runs/test_ppe.sh:
--------------------------------------------------------------------------------
1 | cd ..
2 | export PYTHONPATH=$$PYTHONPATH:src
3 | python3 src/experiments/run_ppe.py --env temporal_combolock --encoder_training_num_samples 5000 --horizon 5 --exo_dim -1 --noise hadamhardg --classifier_type ff --save_path ./results  --seed 1234 --name test-ppe
4 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.black]
2 | line-length = 127
3 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | ## Requirements file for INTREPID: Interactive Representation Discovery
 2 | ## Note that not all algorithms may need all of the packages below.
 3 | ## If you are unable to install a particular package, then it
 4 | ## maybe worth trying to comment it below and install the others.
 5 | 
 6 | ## Requirements without Version Specifiers
 7 | scikit-learn
 8 | scikit-image
 9 | scipy
10 | imageio
11 | tensorboardX
12 | matplotlib
13 | ai2thor
14 | Pillow
15 | vector-quantize-pytorch
16 | tqdm
17 | wandb
18 | lightning
19 | 
20 | ## Requirements with Version Specifiers
21 | torch>=1.8.1
22 | torchvision>=0.9.1
23 | gym-minigrid>=1.0.3
24 | gym>=0.17.3
25 | 


--------------------------------------------------------------------------------
/src/analysis_tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/analysis_tools/__init__.py


--------------------------------------------------------------------------------
/src/environments/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/environments/__init__.py


--------------------------------------------------------------------------------
/src/environments/ai2thorenv/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/environments/ai2thorenv/__init__.py


--------------------------------------------------------------------------------
/src/environments/app_simulator/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/environments/app_simulator/__init__.py


--------------------------------------------------------------------------------
/src/environments/app_simulator/run_interactive.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | 
 5 | def run_interactive(app):
 6 |     """
 7 |     Run app in human interactive mode
 8 |     This will open a window that allows the user to click on UI elements
 9 |     Actions can also be taken by providing strings in the command prompt
10 |     """
11 | 
12 |     def on_click(event):
13 |         if event.xdata is None or event.ydata is None:
14 |             # clicked outside of plot
15 |             return
16 | 
17 |         obs = app.get_observation()
18 |         action = obs.get_action_at_click(event.xdata, event.ydata)
19 |         if action:
20 |             print(f"\nClicked on: {action}")
21 |             obs, reward, done, info = app.step(action)
22 |             plt.imshow(obs.get_screenshot())
23 |             plt.show()
24 | 
25 |     plt.ion()
26 |     plt.axis("off")
27 |     plt.connect("button_press_event", on_click)
28 |     plt.connect("close_event", lambda _: sys.exit(0))
29 | 
30 |     obs, info = app.reset()
31 |     while True:
32 |         plt.imshow(obs.get_screenshot())
33 |         plt.show()
34 | 
35 |         available_actions = info["valid_actions"]
36 |         print(f"\nAvailable actions: {available_actions}")
37 |         action = input("Enter action: ")
38 | 
39 |         if not action or action == "quit":
40 |             print("Quitting app")
41 |             break
42 |         elif action == "reset":
43 |             print("Resetting app")
44 |             obs, info = app.reset()
45 |         elif action not in available_actions:
46 |             print(f"Invalid action: {action}")
47 |         else:
48 |             obs, reward, done, info = app.step(action)
49 | 


--------------------------------------------------------------------------------
/src/environments/control_env/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/environments/control_env/__init__.py


--------------------------------------------------------------------------------
/src/environments/gym_env/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/environments/gym_env/__init__.py


--------------------------------------------------------------------------------
/src/environments/intrepid_env_meta/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/environments/intrepid_env_meta/__init__.py


--------------------------------------------------------------------------------
/src/environments/intrepid_env_meta/action_type.py:
--------------------------------------------------------------------------------
 1 | class ActionType:
 2 |     Discrete = "discrete"
 3 |     Continuous = "continuous"
 4 |     Structured = "structured"
 5 |     Variable = "variable"
 6 | 
 7 |     @staticmethod
 8 |     def get_action_type_from_name(act_type_name):
 9 |         if act_type_name == "discrete":
10 |             return ActionType.Discrete
11 | 
12 |         elif act_type_name == "continuous":
13 |             return ActionType.Continuous
14 | 
15 |         elif act_type_name == "structured":
16 |             return ActionType.Structured
17 | 
18 |         elif act_type_name == "variable":
19 |             return ActionType.Variable
20 | 
21 |         else:
22 |             raise AssertionError("No action type found for %r" % act_type_name)
23 | 


--------------------------------------------------------------------------------
/src/environments/intrepid_env_meta/environment_keys.py:
--------------------------------------------------------------------------------
 1 | class EnvKeys:
 2 |     """
 3 |     meta information returned can use the following keys to make the code more
 4 |     generalizable across different environments
 5 |     """
 6 | 
 7 |     # Counter from which the time step in an episode
 8 |     # Designed to deal with starting with 0 vs 1 issue
 9 |     INITIAL_TIME_STEP = 0
10 | 
11 |     # Overall state
12 |     STATE = "state"
13 | 
14 |     # Endogenous state
15 |     ENDO_STATE = "endogenous_state"
16 | 
17 |     # Time step
18 |     TIME_STEP = "timestep"
19 | 


--------------------------------------------------------------------------------
/src/environments/intrepid_env_meta/gym_compatible.py:
--------------------------------------------------------------------------------
 1 | import gym
 2 | 
 3 | 
 4 | class GymCompatible(gym.Env):
 5 |     def __init__(self, cerebral_env):
 6 |         self.cerebral_env = cerebral_env
 7 |         # Define action and observation space
 8 |         # They must be gym.spaces objects
 9 |         # Example when using discrete actions:
10 |         self.action_space = None  # TODO spaces.Discrete(N_DISCRETE_ACTIONS)
11 | 
12 |         # Example for using image as input:
13 |         self.observation_space = None
14 |         # self.observation_space = spaces.Box(low=0, high=255, shape=
15 |         # (HEIGHT, WIDTH, N_CHANNELS), dtype=np.uint8)      # TODO
16 | 
17 |     def step(self, action):
18 |         # Execute one time step within the environment
19 |         return self.cerebral_env.step(action)
20 | 
21 |     def reset(self):
22 |         # Reset the state of the environment to an initial state
23 |         obs, info = self.cerebral_env.reset()
24 |         return obs
25 | 
26 |     def render(self, mode="human", close=False):
27 |         # Render the environment to the screen
28 |         raise NotImplementedError()
29 | 


--------------------------------------------------------------------------------
/src/environments/intrepid_env_meta/intrepid_env_interface.py:
--------------------------------------------------------------------------------
 1 | class IntrepidEnvInterface:
 2 |     """Any environment using Intrepid Env Interface must support the following API"""
 3 | 
 4 |     def reset(self):
 5 |         """
 6 |         :return:
 7 |             obs:        Agent observation. No assumption made on the structure of observation.
 8 |             info:       Dictionary containing relevant information such as latent state, etc.
 9 |         """
10 | 
11 |         raise NotImplementedError()
12 | 
13 |     def step(self, action):
14 |         """
15 |         :param action:
16 |         :return:
17 |             obs:        Agent observation. No assumption made on the structure of observation.
18 |             reward:     Reward received by the agent. No Markov assumption is made.
19 |             done:       True if the episode has terminated and False otherwise.
20 |             info:       Dictionary containing relevant information such as latent state, etc.
21 |         """
22 |         raise NotImplementedError()
23 | 
24 |     def get_action_type(self):
25 |         """
26 |         :return:
27 |             action_type:     Return type of action space the agent is using
28 |         """
29 |         raise NotImplementedError()
30 | 
31 |     def save(self, save_path, fname=None):
32 |         """
33 |         Save the environment
34 |         :param save_path:   Save directory
35 |         :param fname:       Additionally, a file name can be provided. If save is a single file, then this will be
36 |                             used else it can be ignored.
37 |         :return: None
38 |         """
39 |         raise NotImplementedError()
40 | 
41 |     def load(self, load_path, fname=None):
42 |         """
43 |         Save the environment
44 |         :param load_path:   Load directory
45 |         :param fname:       Additionally, a file name can be provided. If load is a single file, then only file
46 |                             with the given fname will be used.
47 |         :return: Environment
48 |         """
49 |         raise NotImplementedError()
50 | 
51 |     def is_episodic(self):
52 |         """
53 |         :return:                Return True or False, True if the environment is episodic and False otherwise.
54 |         """
55 |         raise NotImplementedError()
56 | 
57 |     def act_to_str(self, action):
58 |         """
59 |         :param: given an action
60 |         :return: action in string representation
61 |         """
62 | 
63 |         return "%r" % action
64 | 


--------------------------------------------------------------------------------
/src/environments/matterport/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/environments/matterport/__init__.py


--------------------------------------------------------------------------------
/src/environments/minigrid/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/environments/minigrid/__init__.py


--------------------------------------------------------------------------------
/src/environments/rl_acid_env/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/environments/rl_acid_env/__init__.py


--------------------------------------------------------------------------------
/src/environments/rl_acid_env/noise_gen.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import numpy as np
 3 | 
 4 | 
 5 | # Generate Hadamhard matrix of atleast a given size using Sylvester's method
 6 | def generated_hadamhard_matrix(lower_bound):
 7 |     dim = 1
 8 |     h = np.array([[1.0]], dtype=float)
 9 | 
10 |     while dim < lower_bound:
11 |         h = np.block([[h, h], [h, -h]])
12 |         dim = 2 * dim
13 | 
14 |     # Trim the columns of the matrix to match the lower bound
15 |     return h[:, :lower_bound]
16 | 
17 | 
18 | # Size of the smallest Hadamhard matrix which is greater than lower bound, as generated by Sylvester's method.
19 | def get_sylvester_hadamhard_matrix_dim(lower_bound):
20 |     return int(math.pow(2, math.ceil(math.log(lower_bound, 2))))
21 | 


--------------------------------------------------------------------------------
/src/environments/robot_car/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/environments/robot_car/__init__.py


--------------------------------------------------------------------------------
/src/environments/robot_car/client/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/environments/robot_car/client/__init__.py


--------------------------------------------------------------------------------
/src/environments/robot_car/client/client_utils.py:
--------------------------------------------------------------------------------
 1 | import cv2 as cv
 2 | import sys
 3 | from time import time, strftime, localtime
 4 | 
 5 | 
 6 | def get_timestamp_str():
 7 |     return strftime("%Y-%m-%d-%H-%M-%S", localtime(time()))
 8 | 
 9 | 
10 | # Initialize webcams
11 | # devnames: list of strings containing numbers ["0", "1"] or names (["/dev/video0", "/dev/video1"])
12 | def init_cameras(devnames):
13 |     cameras = []
14 |     for i, devname in enumerate(devnames):
15 |         print(f"Opening camera device {devname}")
16 |         try:
17 |             # Open device as int index
18 |             c = cv.VideoCapture(int(devname))
19 |         except ValueError:
20 |             # Open device as string like "/dev/video0"
21 |             c = cv.VideoCapture(devname)
22 |         if not c.isOpened():
23 |             print(f"Error: Could not open camera {devname}")
24 |             for c in cameras:
25 |                 c.release()
26 |             sys.exit(-1)
27 | 
28 |         # Test camera by taking a picture
29 |         print(f"Testing camera {devname}")
30 |         ret, pic = c.read()
31 |         if not ret:
32 |             print(f"Error: Could not read from camera {devname}")
33 |             for c in cameras:
34 |                 c.release()
35 |             sys.exit(-1)
36 |         print(f"Got image of size {pic.shape}")
37 | 
38 |         # Add camera to list
39 |         cameras.append(c)
40 |         print(f"Initialized camera at index {i}: {devname}")
41 |     return cameras
42 | 


--------------------------------------------------------------------------------
/src/environments/robot_car/client/state_capture_client.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import asyncio
 3 | import os
 4 | 
 5 | from environments.robot_car.client.client_base import CarClient
 6 | from environments.robot_car.client.client_utils import get_timestamp_str, init_cameras
 7 | from environments.robot_car.client.state import CarState
 8 | 
 9 | 
10 | async def do_capture(host, port, output_dir, cameras):
11 |     car = CarClient(host, port)
12 |     await car.connect()
13 | 
14 |     car_state = CarState()
15 |     await car_state.capture_from_cameras(car, cameras)
16 |     car_state.save_to_files(output_dir)
17 |     print(f"Saved images to {output_dir}")
18 | 
19 | 
20 | if __name__ == "__main__":
21 |     parser = argparse.ArgumentParser()
22 |     parser.add_argument("--host", type=str, default="localhost")
23 |     parser.add_argument("--port", type=int, default=21219)
24 |     parser.add_argument("--output_dir", type=str, default=os.path.join(os.getcwd(), get_timestamp_str()))
25 |     parser.add_argument("--cameras", type=str, nargs="+", default=[])
26 |     args = parser.parse_args()
27 | 
28 |     assert len(args.cameras) > 0, "Must specify at least one camera"
29 | 
30 |     # Create output directory
31 |     os.makedirs(args.output_dir, exist_ok=True)
32 | 
33 |     cameras = init_cameras(args.cameras)
34 |     try:
35 |         asyncio.run(do_capture(args.host, args.port, args.output_dir, cameras))
36 |     finally:
37 |         for c in cameras:
38 |             c.release()
39 | 


--------------------------------------------------------------------------------
/src/environments/robot_car/server/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/environments/robot_car/server/__init__.py


--------------------------------------------------------------------------------
/src/environments/robot_car/server/mock_pi_libraries.py:
--------------------------------------------------------------------------------
 1 | # Mock versions of raspberry pi specific libraries
 2 | 
 3 | 
 4 | class Picarx:
 5 |     def forward(self, speed):
 6 |         print(f"[mock picarx] forward(speed={speed})")
 7 | 
 8 |     def backward(self, speed):
 9 |         print(f"[mock picarx] backward(speed={speed})")
10 | 
11 |     def set_dir_servo_angle(self, angle):
12 |         print(f"[mock picarx] set_dir_servo_angle(angle={angle})")
13 | 
14 |     def stop(self):
15 |         print("[mock picarx] stop()")
16 | 
17 | 
18 | class PiCamera:
19 |     def start_preview(self):
20 |         print("[mock picamera] start_preview()")
21 | 
22 |     def capture(self, output_file, **kwargs):
23 |         print(f"[mock picamera] capture(output_file={output_file})")
24 | 


--------------------------------------------------------------------------------
/src/environments/robot_car/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/environments/robot_car/utils/__init__.py


--------------------------------------------------------------------------------
/src/environments/robot_car/utils/check_corrupted_images.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import cv2 as cv
 4 | from tqdm import tqdm
 5 | 
 6 | 
 7 | def load_pic(filename, resize=(256, 256)):
 8 |     pic = cv.imread(filename)
 9 |     return pic
10 | 
11 | 
12 | def run_check(root_dir):
13 |     # get all subdirectories
14 |     # each should contain a file called actions.txt
15 |     subdirs = [
16 |         d
17 |         for d in os.listdir(root_dir)
18 |         if os.path.isdir(os.path.join(root_dir, d)) and os.path.isfile(os.path.join(root_dir, d, "actions.txt"))
19 |     ]
20 |     assert len(subdirs) > 0, f"No subdirectories found in {root_dir}"
21 | 
22 |     for dir in tqdm(subdirs):
23 |         # read log file for this subdirectory
24 |         with open(os.path.join(root_dir, dir, "actions.txt")) as f:
25 |             log = f.readlines()
26 |         log = [json.loads(a) for a in log if a.strip() != ""]
27 | 
28 |         # load a single trajectory
29 |         traj_pics = []
30 |         for line in log:
31 |             traj_pics.append(
32 |                 [
33 |                     os.path.join(root_dir, dir, line["cam0"]),
34 |                     os.path.join(root_dir, dir, line["cam1"]),
35 |                     os.path.join(root_dir, dir, line["cam_car"]),
36 |                 ]
37 |             )
38 | 
39 |         for traj in tqdm(traj_pics):
40 |             for pic in traj:
41 |                 if not os.path.isfile(pic):
42 |                     print(f"File {pic} does not exist")
43 |                 loaded = load_pic(pic)
44 |                 if loaded is None:
45 |                     print(f"File {pic} could not be loaded")
46 | 
47 | 
48 | if __name__ == "__main__":
49 |     import argparse
50 | 
51 |     parser = argparse.ArgumentParser()
52 |     parser.add_argument("root_dir", help="Root directory containing trajectories in subdirectories")
53 |     args = parser.parse_args()
54 | 
55 |     run_check(args.root_dir)
56 | 


--------------------------------------------------------------------------------
/src/experiments/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/experiments/__init__.py


--------------------------------------------------------------------------------
/src/experiments/experiment_save.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | import numpy as np
 3 | 
 4 | 
 5 | def terminate(performance, exp_setup, seeds):
 6 |     setting = dict()
 7 | 
 8 |     for k, v in exp_setup.config.items():
 9 |         setting["config/%s" % k] = v
10 | 
11 |     for k, v in exp_setup.constants.items():
12 |         setting["constants/%s" % k] = v
13 | 
14 |     for k, v in exp_setup.args.__dict__.items():
15 |         setting["args/%s" % k] = v
16 | 
17 |     results = {"setting": setting, "performance": performance, "seeds": seeds}
18 | 
19 |     # Save performance
20 |     with open("%s/results.pickle" % exp_setup.experiment, "wb") as f:
21 |         pickle.dump(results, f)
22 | 
23 |     if len(performance) > 0:
24 |         for key in performance[0]:  # Assumes the keys are same across all runes
25 |             if not isinstance(performance[0][key], int) and not isinstance(performance[0][key], float):
26 |                 continue
27 | 
28 |             metrics = [performance_[key] for performance_ in performance]
29 | 
30 |             exp_setup.logger.log(
31 |                 "%r: Mean %f, Std %f, Median %f, Min %f, Max %f, Num runs %d, All performance %r"
32 |                 % (
33 |                     key,
34 |                     np.mean(metrics),
35 |                     np.std(metrics),
36 |                     np.median(metrics),
37 |                     np.min(metrics),
38 |                     np.max(metrics),
39 |                     len(metrics),
40 |                     metrics,
41 |                 )
42 |             )
43 | 
44 |     exp_setup.logger.log("Experiment Completed.")
45 | 
46 |     # Cleanup
47 |     exp_setup.logger_manager.cleanup()
48 | 


--------------------------------------------------------------------------------
/src/experiments/experimental_setup.py:
--------------------------------------------------------------------------------
 1 | class ExperimentalSetup:
 2 |     def __init__(
 3 |         self,
 4 |         config,
 5 |         constants,
 6 |         experiment,
 7 |         exp_name,
 8 |         env_name,
 9 |         args,
10 |         debug,
11 |         logger,
12 |         logger_manager,
13 |     ):
14 |         """
15 |         :param config: Dictionary containing values for the environment
16 |         :param constants: Dictionary containing hyperparameters for the algorithm
17 |         :param experiment: the full experiment folder where all contents should be saved
18 |         :param exp_name: name of the main experiment log file
19 |         :param env_name: name of the environment
20 |         :param args: command line arguments
21 |         :param debug: if set to true, then run the code in debug mode
22 |         :param logger: Logger for logging data
23 |         :param logger_manager: Logger Manager
24 |         """
25 | 
26 |         self.config = config
27 |         self.constants = constants
28 |         self.experiment = experiment
29 |         self.exp_name = exp_name
30 |         self.env_name = env_name
31 |         self.base_env_name = env_name.split("/")[-1]
32 |         self.args = args
33 |         self.logger = logger
34 |         self.debug = debug
35 |         self.logger_manager = logger_manager
36 | 


--------------------------------------------------------------------------------
/src/experiments/run_factorl.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import random
 3 | import numpy as np
 4 | import torch.multiprocessing as mp
 5 | 
 6 | from experiments.experiment_header import get_header
 7 | from experiments.experiment_save import terminate
 8 | from learning.core_learner.factorl import FactoRL
 9 | from environments.intrepid_env_meta.make_env import MakeEnvironment
10 | 
11 | 
12 | def main():
13 |     exp_setup = get_header()
14 | 
15 |     performance = []
16 | 
17 |     if exp_setup.config["seed"] == -1:
18 |         seeds = list(range(1234, 1234 + 10))
19 |         num_runs = len(seeds)
20 |     else:
21 |         seeds = [exp_setup.config["seed"]]
22 |         num_runs = 1
23 | 
24 |     for exp_id in range(1, num_runs + 1):
25 |         exp_setup.config["seed"] = seeds[exp_id - 1]
26 |         exp_setup.config["env_seed"] = seeds[exp_id - 1] * 10
27 |         exp_setup.logger.log("========= STARTING EXPERIMENT %d (Seed = %d) ======== " % (exp_id, exp_setup.config["seed"]))
28 | 
29 |         # Set the random seed
30 |         random.seed(exp_setup.config["seed"])
31 |         np.random.seed(exp_setup.config["seed"])
32 |         torch.manual_seed(exp_setup.config["seed"])
33 |         if torch.cuda.is_available():
34 |             torch.cuda.manual_seed_all(exp_setup.config["seed"])
35 | 
36 |         # Create a new environment
37 |         make_env = MakeEnvironment()
38 |         env = make_env.make(exp_setup)
39 |         exp_setup.logger.log("Environment Created")
40 | 
41 |         # Save the environment for reproducibility
42 |         # env.save_environment(experiment, trial_name=exp_id)
43 |         # print("Saving Environment...")
44 | 
45 |         learning_alg = FactoRL(exp_setup)
46 | 
47 |         policy_result = learning_alg.train(env=env, exp_id=exp_id, opt_reward=True)
48 | 
49 |         performance.append(policy_result)
50 | 
51 |     terminate(performance, exp_setup, seeds)
52 | 
53 | 
54 | if __name__ == "__main__":
55 |     print("SETTING THE START METHOD ")
56 |     mp.freeze_support()
57 |     mp.set_start_method("spawn")
58 |     main()
59 | 


--------------------------------------------------------------------------------
/src/experiments/run_homer.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import random
 3 | import numpy as np
 4 | import torch.multiprocessing as mp
 5 | 
 6 | from learning.core_learner.homer import Homer
 7 | from experiments.experiment_save import terminate
 8 | from experiments.experiment_header import get_header
 9 | from environments.intrepid_env_meta.make_env import MakeEnvironment
10 | 
11 | 
12 | def main():
13 |     exp_setup = get_header()
14 | 
15 |     performance = []
16 | 
17 |     if exp_setup.config["seed"] == -1:
18 |         seeds = list(range(1234, 1234 + 10))
19 |         num_runs = len(seeds)
20 |     else:
21 |         seeds = [exp_setup.config["seed"]]
22 |         num_runs = 1
23 | 
24 |     for exp_id in range(1, num_runs + 1):
25 |         exp_setup.config["seed"] = seeds[exp_id - 1]
26 |         exp_setup.config["env_seed"] = seeds[exp_id - 1] * 10
27 |         exp_setup.logger.log("========= STARTING EXPERIMENT %d (Seed = %d) ======== " % (exp_id, exp_setup.config["seed"]))
28 | 
29 |         # Set the random seed
30 |         random.seed(exp_setup.config["seed"])
31 |         np.random.seed(exp_setup.config["seed"])
32 |         torch.manual_seed(exp_setup.config["seed"])
33 |         if torch.cuda.is_available():
34 |             torch.cuda.manual_seed_all(exp_setup.config["seed"])
35 | 
36 |         # Create a new environment
37 |         make_env = MakeEnvironment()
38 |         env = make_env.make(exp_setup)
39 |         exp_setup.logger.log("Environment Created")
40 | 
41 |         cover_validator = env.generate_homing_policy_validation_fn()
42 | 
43 |         learning_alg = Homer(exp_setup)
44 |         policy_result = learning_alg.train(
45 |             env=env,
46 |             env_name=exp_setup.env_name,
47 |             homing_policy_validator=cover_validator,
48 |             exp_id=exp_id,
49 |             opt_reward=False,
50 |             num_processes=exp_setup.constants["num_processes"],
51 |         )
52 | 
53 |         performance.append(policy_result)
54 | 
55 |     terminate(performance, exp_setup, seeds)
56 | 
57 | 
58 | if __name__ == "__main__":
59 |     print("SETTING THE START METHOD ")
60 |     mp.freeze_support()
61 |     mp.set_start_method("spawn")
62 |     main()
63 | 


--------------------------------------------------------------------------------
/src/experiments/run_id.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import random
 3 | import numpy as np
 4 | import torch.multiprocessing as mp
 5 | 
 6 | from experiments.experiment_save import terminate
 7 | from experiments.experiment_header import get_header
 8 | from learning.core_learner.ik_learner import IDLearning
 9 | from environments.intrepid_env_meta.make_env import MakeEnvironment
10 | 
11 | 
12 | def main():
13 |     exp_setup = get_header()
14 | 
15 |     performance = []
16 | 
17 |     if exp_setup.config["seed"] == -1:
18 |         seeds = list(range(1234, 1234 + 10))
19 |         num_runs = len(seeds)
20 |     else:
21 |         seeds = [exp_setup.config["seed"]]
22 |         num_runs = 1
23 | 
24 |     for exp_id in range(1, num_runs + 1):
25 |         exp_setup.config["seed"] = seeds[exp_id - 1]
26 |         exp_setup.config["env_seed"] = seeds[exp_id - 1] * 10
27 |         exp_setup.logger.log("========= STARTING EXPERIMENT %d (Seed = %d) ======== " % (exp_id, exp_setup.config["seed"]))
28 | 
29 |         # Set the random seed
30 |         random.seed(exp_setup.config["seed"])
31 |         np.random.seed(exp_setup.config["seed"])
32 |         torch.manual_seed(exp_setup.config["seed"])
33 |         if torch.cuda.is_available():
34 |             torch.cuda.manual_seed_all(exp_setup.config["seed"])
35 | 
36 |         # Create a new environment
37 |         make_env = MakeEnvironment()
38 |         env = make_env.make(exp_setup)
39 |         exp_setup.logger.log("Environment Created")
40 | 
41 |         # # Save the environment for reproducibility
42 |         # env.save_environment(experiment, trial_name=exp_id)
43 |         # print("Saving Environment...")
44 |         cover_validator = None  # env.generate_homing_policy_validation_fn()
45 | 
46 |         learning_alg = IDLearning(exp_setup)
47 |         policy_result = learning_alg.train(
48 |             env=env,
49 |             env_name=exp_setup.env_name,
50 |             homing_policy_validator=cover_validator,
51 |             exp_id=exp_id,
52 |             opt_reward=True,
53 |             num_processes=exp_setup.constants["num_processes"],
54 |         )
55 | 
56 |         performance.append(policy_result)
57 | 
58 |     terminate(performance, exp_setup, seeds)
59 | 
60 | 
61 | if __name__ == "__main__":
62 |     print("SETTING THE START METHOD ")
63 |     mp.freeze_support()
64 |     mp.set_start_method("spawn")
65 |     main()
66 | 


--------------------------------------------------------------------------------
/src/experiments/run_mbrl_oracle.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import random
 3 | import numpy as np
 4 | import torch.multiprocessing as mp
 5 | 
 6 | from experiments.experiment_header import get_header
 7 | from environments.intrepid_env_meta.make_env import MakeEnvironment
 8 | from experiments.experiment_save import terminate
 9 | from learning.core_learner.mbrl_oracle_decoder import MBRLOracleDecoder
10 | 
11 | 
12 | def main():
13 |     exp_setup = get_header()
14 | 
15 |     performance = []
16 | 
17 |     if exp_setup.config["seed"] == -1:
18 |         seeds = list(range(1234, 1234 + 10))
19 |         num_runs = len(seeds)
20 |     else:
21 |         seeds = [exp_setup.config["seed"]]
22 |         num_runs = 1
23 | 
24 |     for exp_id in range(1, num_runs + 1):
25 |         exp_setup.config["seed"] = seeds[exp_id - 1]
26 |         exp_setup.config["env_seed"] = seeds[exp_id - 1] * 10
27 |         exp_setup.logger.log("========= STARTING EXPERIMENT %d (Seed = %d) ======== " % (exp_id, exp_setup.config["seed"]))
28 | 
29 |         # Set the random seed
30 |         random.seed(exp_setup.config["seed"])
31 |         np.random.seed(exp_setup.config["seed"])
32 |         torch.manual_seed(exp_setup.config["seed"])
33 |         if torch.cuda.is_available():
34 |             torch.cuda.manual_seed_all(exp_setup.config["seed"])
35 | 
36 |         # Create a new environment
37 |         make_env = MakeEnvironment()
38 |         env = make_env.make(exp_setup)
39 |         exp_setup.logger.log("Environment Created")
40 | 
41 |         # # Save the environment for reproducibility
42 |         # env.save_environment(experiment, trial_name=exp_id)
43 |         # print("Saving Environment...")
44 | 
45 |         learning_alg = MBRLOracleDecoder(exp_setup)
46 | 
47 |         policy_result = learning_alg.train(env=env, exp_id=exp_id)
48 | 
49 |         performance.append(policy_result)
50 | 
51 |     terminate(performance, exp_setup, seeds)
52 | 
53 | 
54 | if __name__ == "__main__":
55 |     print("SETTING THE START METHOD ")
56 |     mp.freeze_support()
57 |     mp.set_start_method("spawn")
58 |     main()
59 | 


--------------------------------------------------------------------------------
/src/experiments/run_ppe.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import random
 3 | import numpy as np
 4 | import torch.multiprocessing as mp
 5 | 
 6 | from learning.core_learner.ppe import PPE
 7 | from experiments.experiment_save import terminate
 8 | from experiments.experiment_header import get_header
 9 | from environments.intrepid_env_meta.make_env import MakeEnvironment
10 | 
11 | 
12 | def main():
13 |     exp_setup = get_header()
14 | 
15 |     performance = []
16 | 
17 |     if exp_setup.config["seed"] == -1:
18 |         seeds = list(range(1234, 1234 + 10))
19 |         num_runs = len(seeds)
20 |     else:
21 |         seeds = [exp_setup.config["seed"]]
22 |         num_runs = 1
23 | 
24 |     for exp_id in range(1, num_runs + 1):
25 |         exp_setup.config["seed"] = seeds[exp_id - 1]
26 |         exp_setup.config["env_seed"] = seeds[exp_id - 1] * 10
27 |         exp_setup.logger.log("========= STARTING EXPERIMENT %d (Seed = %d) ======== " % (exp_id, exp_setup.config["seed"]))
28 | 
29 |         # Set the random seed
30 |         random.seed(exp_setup.config["seed"])
31 |         np.random.seed(exp_setup.config["seed"])
32 |         torch.manual_seed(exp_setup.config["seed"])
33 |         if torch.cuda.is_available():
34 |             torch.cuda.manual_seed_all(exp_setup.config["seed"])
35 | 
36 |         # Create a new environment
37 |         make_env = MakeEnvironment()
38 |         env = make_env.make(exp_setup)
39 |         exp_setup.logger.log("Environment Created")
40 | 
41 |         learning_alg = PPE(exp_setup)
42 |         policy_result = learning_alg.train(env=env, exp_id=exp_id, opt_reward=True)
43 | 
44 |         performance.append(policy_result)
45 | 
46 |     terminate(performance, exp_setup, seeds)
47 | 
48 | 
49 | if __name__ == "__main__":
50 |     print("SETTING THE START METHOD ")
51 |     mp.freeze_support()
52 |     mp.set_start_method("spawn")
53 |     main()
54 | 


--------------------------------------------------------------------------------
/src/experiments/run_richid.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import random
 3 | import numpy as np
 4 | import torch.multiprocessing as mp
 5 | 
 6 | from learning.core_learner.richid import RichId
 7 | from experiments.experiment_save import terminate
 8 | from experiments.experiment_header import get_header
 9 | from environments.intrepid_env_meta.make_env import MakeEnvironment
10 | 
11 | 
12 | def main():
13 |     exp_setup = get_header()
14 | 
15 |     performance = []
16 | 
17 |     if exp_setup.config["seed"] == -1:
18 |         seeds = list(range(1234, 1234 + 10))
19 |         num_runs = len(seeds)
20 |     else:
21 |         seeds = [exp_setup.config["seed"]]
22 |         num_runs = 1
23 | 
24 |     for exp_id in range(1, num_runs + 1):
25 |         exp_setup.config["seed"] = seeds[exp_id - 1]
26 |         exp_setup.config["env_seed"] = seeds[exp_id - 1] * 10
27 |         exp_setup.logger.log("========= STARTING EXPERIMENT %d (Seed = %d) ======== " % (exp_id, exp_setup.config["seed"]))
28 | 
29 |         # Set the random seed
30 |         random.seed(exp_setup.config["seed"])
31 |         np.random.seed(exp_setup.config["seed"])
32 |         torch.manual_seed(exp_setup.config["seed"])
33 |         if torch.cuda.is_available():
34 |             torch.cuda.manual_seed_all(exp_setup.config["seed"])
35 | 
36 |         # Create a new environment
37 |         make_env = MakeEnvironment()
38 |         env = make_env.make(exp_setup)
39 |         exp_setup.logger.log("Environment Created")
40 | 
41 |         learning_alg = RichId(exp_setup)
42 |         policy_result = learning_alg.train(env=env, latent_lqr=env.env.get_latent_lqr().copy())
43 | 
44 |         performance.append(policy_result)
45 | 
46 |     terminate(performance, exp_setup, seeds)
47 | 
48 | 
49 | if __name__ == "__main__":
50 |     print("SETTING THE START METHOD ")
51 |     mp.freeze_support()
52 |     mp.set_start_method("spawn")
53 |     main()
54 | 


--------------------------------------------------------------------------------
/src/experiments/run_sysid.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import random
 3 | import numpy as np
 4 | import torch.multiprocessing as mp
 5 | 
 6 | from learning.core_learner.richid import SysID
 7 | from experiments.experiment_save import terminate
 8 | from experiments.experiment_header import get_header
 9 | from environments.intrepid_env_meta.make_env import MakeEnvironment
10 | 
11 | 
12 | def main():
13 |     exp_setup = get_header()
14 | 
15 |     performance = []
16 | 
17 |     if exp_setup.config["seed"] == -1:
18 |         seeds = list(range(1234, 1234 + 10))
19 |         num_runs = len(seeds)
20 |     else:
21 |         seeds = [exp_setup.config["seed"]]
22 |         num_runs = 1
23 | 
24 |     for exp_id in range(1, num_runs + 1):
25 |         exp_setup.config["seed"] = seeds[exp_id - 1]
26 |         exp_setup.config["env_seed"] = seeds[exp_id - 1] * 10
27 |         exp_setup.logger.log("========= STARTING EXPERIMENT %d (Seed = %d) ======== " % (exp_id, exp_setup.config["seed"]))
28 | 
29 |         # Set the random seed
30 |         random.seed(exp_setup.config["seed"])
31 |         np.random.seed(exp_setup.config["seed"])
32 |         torch.manual_seed(exp_setup.config["seed"])
33 |         if torch.cuda.is_available():
34 |             torch.cuda.manual_seed_all(exp_setup.config["seed"])
35 | 
36 |         # Create a new environment
37 |         make_env = MakeEnvironment()
38 |         env = make_env.make(exp_setup)
39 |         exp_setup.logger.log("Environment Created")
40 | 
41 |         learning_alg = SysID(exp_setup)
42 |         policy_result = learning_alg.train(env=env)
43 | 
44 |         performance.append(policy_result)
45 | 
46 |     terminate(performance, exp_setup, seeds)
47 | 
48 | 
49 | if __name__ == "__main__":
50 |     print("SETTING THE START METHOD ")
51 |     mp.freeze_support()
52 |     mp.set_start_method("spawn")
53 |     main()
54 | 


--------------------------------------------------------------------------------
/src/learning/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/learning/__init__.py


--------------------------------------------------------------------------------
/src/learning/core_learner/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/learning/core_learner/__init__.py


--------------------------------------------------------------------------------
/src/learning/core_learner/ik_learner.py:
--------------------------------------------------------------------------------
 1 | from learning.core_learner.abstract_rl_discrete_latent_state import (
 2 |     AbstractRLDiscreteLatentState,
 3 | )
 4 | from learning.learning_utils.encoder_sampler_ik import EncoderSamplerIK
 5 | from learning.learning_utils.ik_train_encoding_function import IKTrainEncodingFunction
 6 | 
 7 | 
 8 | class IDLearning(AbstractRLDiscreteLatentState):
 9 |     """
10 |     An algorithm similar to Homer but that instead relies on inverse kinematics:
11 |             max_\theta p(a_h | x_h, \phi(x_{h+1}))
12 |     """
13 | 
14 |     def __init__(self, exp_setup):
15 |         super(IDLearning, self).__init__(exp_setup)
16 | 
17 |         self.config = exp_setup.config
18 |         self.constants = exp_setup.constants
19 |         self.logger = exp_setup.logger
20 |         self.experiment = exp_setup.experiment
21 |         self.actions = self.config["actions"]
22 | 
23 |         # Train encoding function
24 |         self.train_encoder = IKTrainEncodingFunction(self.config, self.constants)
25 | 
26 |         # Sampler for generating data for training the encoding function
27 |         self.encoder_sampler = EncoderSamplerIK()
28 | 
29 |     def gather_dataset(self, env, step, homing_policies, num_samples, dataset):
30 |         """
31 |         Collect a set of dataset given the environment. Return a tuple of:
32 |             - dataset which can be arbitrary and used by the specific implementation to train the dataset
33 |             - list of episodes generated by the agent in the process
34 |         Any implementation needs to implement this.
35 |         """
36 | 
37 |         dataset = self.encoder_sampler.gather_samples(num_samples, env, self.actions, step, homing_policies)
38 |         return dataset, dataset
39 | 
40 |     def train_discrete_encoder(
41 |         self,
42 |         dataset,
43 |         logger,
44 |         tensorboard,
45 |         debug,
46 |         bootstrap_model,
47 |         undiscretized_initialization=True,
48 |     ):
49 |         """
50 |         Returns:
51 |             - encoding_function: a function that maps an observation to one of the s values where s is a natural number
52 |             - num_state_budget: the natural number s
53 |         """
54 | 
55 |         encoding_function, num_state_budget = self.train_encoder.train_model(
56 |             dataset=dataset,
57 |             logger=self.logger,
58 |             tensorboard=tensorboard,
59 |             discretized=True,
60 |             bootstrap_model=None,
61 |         )
62 | 
63 |         return encoding_function, num_state_budget
64 | 


--------------------------------------------------------------------------------
/src/learning/datastructures/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/learning/datastructures/__init__.py


--------------------------------------------------------------------------------
/src/learning/datastructures/abstract_tabular_mdp.py:
--------------------------------------------------------------------------------
 1 | class AbstractTabularMDP:
 2 |     def __init__(self, actions, horizon, gamma=1.0):
 3 |         self.actions = actions
 4 |         self.horizon = horizon
 5 |         self.gamma = gamma
 6 | 
 7 |     def get_states(self, timestep):
 8 |         raise NotImplementedError()
 9 | 
10 |     def get_transitions(self, state, action):
11 |         raise NotImplementedError()
12 | 
13 |     def get_reward(self, state, action, next_state, step):
14 |         raise NotImplementedError()
15 | 


--------------------------------------------------------------------------------
/src/learning/datastructures/count_conditional_probability.py:
--------------------------------------------------------------------------------
 1 | from learning.datastructures.count_probability import CountProbability
 2 | 
 3 | 
 4 | class CountConditionalProbability:
 5 |     """
 6 |     A simple class to estimate conditional probabilities based on counts
 7 |     """
 8 | 
 9 |     def __init__(self):
10 |         self._conditions = {}
11 | 
12 |     def add(self, entry, condition):
13 |         if condition not in self._conditions:
14 |             self._conditions[condition] = CountProbability()
15 | 
16 |         self._conditions[condition].add(entry)
17 | 
18 |     def get_conditions(self):
19 |         return self._conditions
20 | 
21 |     def get_entry(self, condition):
22 |         if condition not in self._conditions:
23 |             return None
24 |         else:
25 |             return self._conditions[condition]
26 | 
27 |     def total_count(self, condition):
28 |         if condition not in self._conditions:
29 |             return 0
30 |         else:
31 |             return self._conditions[condition].total_count()
32 | 
33 |     def get_prob_entry(self, entry, condition):
34 |         if condition not in self._conditions:
35 |             return None
36 |         else:
37 |             return self._conditions[condition].get_prob_entry(entry)
38 | 
39 |     def __str__(self):
40 |         return "{%s}" % (
41 |             "; ".join(["%r -> %s" % (condition, str(prob)) for (condition, prob) in sorted(self._conditions.items())])
42 |         )
43 | 


--------------------------------------------------------------------------------
/src/learning/datastructures/count_probability.py:
--------------------------------------------------------------------------------
 1 | from utils.generic_policy import sample_action_from_prob
 2 | 
 3 | 
 4 | class CountProbability:
 5 |     """
 6 |     A simple class to estimate probabilities based on counts
 7 |     """
 8 | 
 9 |     def __init__(self):
10 |         self._total_count = 0
11 |         self._values = {}
12 | 
13 |     def add(self, entry):
14 |         self._total_count += 1
15 | 
16 |         if entry in self._values:
17 |             self._values[entry] += 1.0
18 |         else:
19 |             self._values[entry] = 1.0
20 | 
21 |     def get_probability(self):
22 |         z = float(max(1.0, self._total_count))
23 |         prob = [(key, value / z) for (key, value) in sorted(self._values.items())]
24 | 
25 |         return prob
26 | 
27 |     def get_probability_dict(self):
28 |         z = float(max(1.0, self._total_count))
29 |         return {key: value / z for (key, value) in self._values.items()}
30 | 
31 |     def get_prob_entry(self, entry):
32 |         if entry in self._values:
33 |             return self._values[entry] / float(max(1.0, self._total_count))
34 |         else:
35 |             return 0.0
36 | 
37 |     def sample(self):
38 |         key_prob = self.get_probability()
39 |         prob = [key_prob_[1] for key_prob_ in key_prob]
40 |         ix = sample_action_from_prob(prob)
41 | 
42 |         return key_prob[ix][0]
43 | 
44 |     def total_count(self):
45 |         return self._total_count
46 | 
47 |     def get_entries(self):
48 |         return self._values.keys()
49 | 
50 |     def total_variation(self, other_prob):
51 |         union_keys = set(self._values.keys()).union(set(other_prob._values.keys()))
52 | 
53 |         tv = 0.0
54 |         for key in union_keys:
55 |             tv += abs(self.get_prob_entry(key) - other_prob.get_prob_entry(key))
56 | 
57 |         return 0.5 * tv
58 | 
59 |     def __str__(self):
60 |         prob = self.get_probability()
61 | 
62 |         return "{%s}" % ("; ".join(["%r: %f" % (entry_, prob_) for entry_, prob_ in prob]))
63 | 


--------------------------------------------------------------------------------
/src/learning/datastructures/elliptic_potential.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class EllipticPotential:
 5 | 
 6 |     def __init__(self, lam=1.0):
 7 |         """
 8 |         A function to keep track of the matrix
 9 |            Lambda =  lambda I + \sum_{i=1}^0 v_i v_i^T
10 |            and return Lambda^{-1} and det(Lambda^{-1}) efficiently
11 | 
12 |         Supports features in both numpy and torch format.
13 |         """
14 |         self.lam = lam
15 |         self._inv_matrix = None
16 |         self._det_inv_matrix = None
17 | 
18 |     def reset(self):
19 |         self._inv_matrix = None
20 |         self._det_inv_matrix = None
21 | 
22 |     def add_feature(self, feature):
23 |         """
24 |         :param feature: A torch tensor or a numpy ndarray of 1-d or 2-d (of type 1xd or dx1)
25 |             Sherman Woodbury Morrison update
26 |             (A + uv^T)^{-1} = A^{-1} - (A^-1 uv^T A^-1) / (1 + v^T A^-1 u)
27 |         """
28 | 
29 |         if feature.ndim == 1:
30 |             feature = feature.unsqueeze(0)
31 | 
32 |         elif feature.ndim == 2:
33 |             pass  # TODO
34 |         else:
35 |             raise AssertionError("Feature dimension must be either 1-D or 2D of type 1xd or dx1")
36 | 
37 |         if self._inv_matrix is None:
38 |             self._inv_matrix = (1.0 / self.lam) * torch.eye(feature.shape[0])
39 | 
40 |         rho = 1.0 / (1.0 + feature.T @ self._inv_matrix @ feature)
41 |         self._inv_matrix = self._inv_matrix - (self._inv_matrix @ feature @ feature.T @ self._inv_matrix) * rho
42 | 
43 |     def get_inverse(self):
44 |         return self._inv_matrix
45 | 
46 |     def get_inv_mat_det(self):
47 |         return self._det_inv_matrix
48 | 
49 |     def get_elliptic_bonus(self, features):
50 |         """
51 |         :param features: Given a feature of size either dim or batch x dim
52 |         :return: Bonus which is either scalar if input is 1-d or batch if 2-d
53 |         """
54 | 
55 |         bonus = torch.sqrt(torch.diagonal(features @ self._inv_matrix @ features.T))
56 | 
57 |         if features.ndim == 1:
58 |             bonus = bonus[0]
59 | 
60 |         return bonus
61 | 


--------------------------------------------------------------------------------
/src/learning/learning_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/learning/learning_utils/__init__.py


--------------------------------------------------------------------------------
/src/learning/learning_utils/abstract_encoder_sampler.py:
--------------------------------------------------------------------------------
 1 | class AbstractEncoderSampler:
 2 |     def __init__(self):
 3 |         pass
 4 | 
 5 |     @staticmethod
 6 |     def gather_samples(num_samples, env, actions, step, homing_policies):
 7 |         """Gather samples given the environment, action space, the step at which the sample has to be
 8 |         gathered and the homing policies for the given step"""
 9 |         raise NotImplementedError()
10 | 


--------------------------------------------------------------------------------
/src/learning/learning_utils/generic_train_classifier.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | from learning.learning_utils.generic_learner import GenericLearner
 4 | from utils.cuda import cuda_var
 5 | 
 6 | 
 7 | class GenericTrainClassifier(GenericLearner):
 8 |     """Class for training a classifier. Fairly generic with minimal assumption"""
 9 | 
10 |     def __init__(self, exp_setup):
11 |         GenericLearner.__init__(self, exp_setup)
12 | 
13 |     @staticmethod
14 |     def calc_prob(model, batch):
15 |         obs = cuda_var(torch.cat([torch.from_numpy(np.array(pt[0])).view(1, -1) for pt in batch], dim=0)).float()
16 | 
17 |         prob, info_dict = model.gen_prob(obs)  # Batch x Num Classes
18 | 
19 |         return prob, info_dict
20 | 
21 |     def calc_loss(self, model, batch, test=False):
22 |         obs = cuda_var(torch.cat([torch.from_numpy(np.array(pt[0])).view(1, -1) for pt in batch], dim=0)).float()
23 |         y = cuda_var(torch.LongTensor([pt[1] for pt in batch]).view(-1))
24 | 
25 |         log_prob, info_dict = model.gen_log_prob(obs)  # Batch x Num Classes
26 | 
27 |         selected_log_prob = log_prob.gather(1, y.view(-1, 1))  # Batch
28 |         loss = -selected_log_prob.mean()
29 | 
30 |         return loss, info_dict
31 | 
32 |     def get_class_mean_prob(self, model, dataset):
33 |         """
34 |         :param model:   A classification model f for mapping input space X to distribution over K classes. Given input
35 |                         x in X, the model f(j | x) denotes the probability of class j.
36 |         :param dataset: A list of tuples where first dimension of tuple is input x in X, and second is class label,
37 |                         rest dimensions are ignored and can be used for adding meta-information.
38 |         :return:        A pytorch cpu matrix of size dataset_size x N where (i, j)^{th} value denote
39 |                         1/N f(j | x_i) where N is the size of dataset and x_i is the i^{th} input in the dataset.
40 |         """
41 | 
42 |         dataset_size = len(dataset)
43 |         batches = [dataset[i : i + self.batch_size] for i in range(0, dataset_size, self.batch_size)]
44 |         all_prob = []
45 | 
46 |         for batch in batches:
47 |             prob, info_dict = self.calc_prob(model, batch)
48 |             prob = prob.detach().data.cpu()  # batch x num_class
49 | 
50 |             all_prob.append(prob)
51 | 
52 |         all_prob = torch.cat(all_prob, dim=0)  # Dataset x num_class
53 | 
54 |         return all_prob / float(all_prob.size(0))
55 | 


--------------------------------------------------------------------------------
/src/learning/learning_utils/ricatti_solver.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import numpy as np
 3 | 
 4 | from utils.beautify_time import beautify
 5 | 
 6 | 
 7 | class RicattiSolver:
 8 |     def __init__(self, logger, max_it=1000, min_change=0.000001):
 9 |         self.max_it = max_it
10 |         self.logger = logger
11 |         self.min_change = min_change
12 | 
13 |     def solve(self, A, B, Q, R):
14 |         time_start = time.time()
15 |         self.logger.debug("Performing Ricatti Iterations")
16 |         P = np.eye(A.shape[0])
17 | 
18 |         for it in range(0, self.max_it):
19 |             inv_term = np.linalg.inv(R + B.T @ P @ B)
20 |             new_P = A.T @ P @ A + Q - A.T @ P @ B @ inv_term @ B.T @ P @ A
21 | 
22 |             change = np.linalg.norm(P - new_P)
23 | 
24 |             if it % 10 == 0:
25 |                 self.logger.debug("Ricatti Solver: Iteration=%d, Change in P %f" % (it, change))
26 | 
27 |             P = new_P
28 | 
29 |             if change < self.min_change:
30 |                 break
31 | 
32 |         K = np.linalg.inv(R + B.T @ P @ B) @ B.T @ P @ A
33 |         self.logger.debug("Ricatti Iterations Performed. Time taken %s" % beautify(time.time() - time_start))
34 | 
35 |         return P, K
36 | 


--------------------------------------------------------------------------------
/src/learning/linear_mdp/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/learning/linear_mdp/__init__.py


--------------------------------------------------------------------------------
/src/learning/model_estimation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/learning/model_estimation/__init__.py


--------------------------------------------------------------------------------
/src/learning/model_estimation/count_based_estimation.py:
--------------------------------------------------------------------------------
 1 | class CountBasedEstimation:
 2 |     def __init__(self, stationary=False):
 3 |         self.stationary = stationary
 4 | 
 5 |     def estimate_all(self, replay_memory, decoder):
 6 |         raise NotImplementedError()
 7 | 
 8 |     def estimate_step(self, mdp, replay_memory, step, decoders):
 9 |         if not isinstance(replay_memory, list):
10 |             raise AssertionError("Replay memory must be a list")
11 | 
12 |         transitions = [episode.get_transitions_at_step(step - 1) for episode in replay_memory]
13 | 
14 |         latent_transitions = [
15 |             (
16 |                 decoders[step - 1].encode_observations(x),
17 |                 a,
18 |                 r,
19 |                 decoders[step].encode_observations(next_x),
20 |             )
21 |             for (x, a, r, next_x) in transitions
22 |         ]
23 | 
24 |         abs_states = set([lt[3] for lt in latent_transitions])
25 | 
26 |         for abs_state in abs_states:
27 |             mdp.add_state(abs_state, step)
28 | 
29 |         for abs_state, action, reward, next_abs_state in latent_transitions:
30 |             mdp.add_transition(abs_state, action, next_abs_state)
31 |             mdp.add_reward(abs_state, action, next_abs_state, reward)
32 | 


--------------------------------------------------------------------------------
/src/learning/planning/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/learning/planning/__init__.py


--------------------------------------------------------------------------------
/src/learning/planning/cem/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/learning/planning/cem/__init__.py


--------------------------------------------------------------------------------
/src/learning/planning/cem/cem_optimizer.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.distributions as dist
 3 | import time
 4 | 
 5 | 
 6 | class CEM_Optimizer:
 7 |     def __init__(self, cost_fcn, x_min=None, x_max=None) -> None:
 8 |         self.cost_fcn = cost_fcn
 9 |         self.x_min, self.x_max = x_min, x_max
10 | 
11 |     def cem_iter(self, x_init, num_samples=500, num_iter=10, elite_ratio=0.2, sigma=0.2):
12 |         # x_init has size [1, horizon, nu]
13 |         _, horizon, nu = x_init.size()
14 |         device = x_init.device
15 | 
16 |         dim = horizon * nu
17 | 
18 |         # we have fixed mean and cov initialization
19 |         mean = torch.zeros(dim).to(device)
20 |         cov = torch.eye(dim).to(device)
21 | 
22 |         cost_fcn = self.cost_fcn
23 |         # initialize mean and cov
24 |         for i in range(num_iter):
25 |             start_time = time.time()
26 |             x_samples = dist.MultivariateNormal(mean, cov).sample((num_samples,))
27 |             input_samples = x_samples.view((-1, 1, horizon, nu))
28 |             if self.x_min is not None:
29 |                 input_samples = torch.clamp(input_samples, min=self.x_min)
30 | 
31 |             if self.x_max is not None:
32 |                 input_samples = torch.clamp(input_samples, max=self.x_max)
33 | 
34 |             scores = cost_fcn(input_samples).view(-1)
35 | 
36 |             # minimize the cost function
37 |             _, elite_idx = torch.topk(scores, int(num_samples * elite_ratio), largest=False)
38 |             elite_samples = x_samples[elite_idx]
39 |             mean = elite_samples.mean(dim=0)
40 |             cov = torch.diag(elite_samples.var(dim=0))
41 |             run_time = time.time() - start_time
42 |             print(
43 |                 "cem iter {:d} takes {:.2f} secs. cost min: {:.2f}, cost max: {:.2f}".format(
44 |                     i, run_time, scores.min().item(), scores.max().item()
45 |                 )
46 |             )
47 | 
48 |         best_x = elite_samples[0].view((1, horizon, nu))
49 |         best_score = cost_fcn(best_x)
50 |         return best_x, best_score
51 | 


--------------------------------------------------------------------------------
/src/learning/planning/high_level_planner/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/learning/planning/high_level_planner/__init__.py


--------------------------------------------------------------------------------
/src/learning/planning/hj_prox/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/learning/planning/hj_prox/__init__.py


--------------------------------------------------------------------------------
/src/learning/policy_roll_in/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/learning/policy_roll_in/__init__.py


--------------------------------------------------------------------------------
/src/learning/policy_roll_in/roll.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | 
 3 | from learning.datastructures.episode import Episode
 4 | from environments.intrepid_env_meta.environment_keys import EnvKeys
 5 | 
 6 | 
 7 | class Roll:
 8 |     def __init__(self, env, actions):
 9 |         self._env = env
10 |         self.eps = None
11 |         self.actions = actions
12 | 
13 |     def roll_in(self, policy, t):
14 |         """
15 |         Roll-in in the environment using the above policy till time step t
16 |         :param policy: A policy for taking actions
17 |         :param t: Number of actions taken by the policy.
18 |         :return:
19 |         """
20 | 
21 |         obs, info = self._env.reset()
22 |         self.eps = Episode(state=info[EnvKeys.ENDO_STATE], observation=obs, gamma=1.0)
23 | 
24 |         for h in range(0, t):
25 |             action = policy.sample_action(obs, h)
26 |             obs, reward, done, info = self._env.step(action)
27 |             self.eps.add(
28 |                 action=action,
29 |                 reward=reward,
30 |                 new_obs=obs,
31 |                 new_state=info[EnvKeys.ENDO_STATE],
32 |             )
33 | 
34 |         return self
35 | 
36 |     def take_random(self, k):
37 |         for t in range(0, k):
38 |             action = random.choice(self.actions)
39 |             self.take_action(action)
40 | 
41 |         return self
42 | 
43 |     def take_action(self, action):
44 |         obs, reward, done, info = self._env.step(action)
45 | 
46 |         self.eps.add(
47 |             action=action,
48 |             reward=reward,
49 |             new_obs=obs,
50 |             new_state=info[EnvKeys.ENDO_STATE],
51 |         )
52 | 
53 |         return self
54 | 
55 |     def roll_out(self, policy, t):
56 |         raise NotImplementedError()
57 | 
58 |     def terminate(self):
59 |         """Terminate the roll-out"""
60 | 
61 |         self.eps.terminate()
62 |         return self
63 | 
64 |     def retrieve(self, pattern=None):
65 |         """Retrieve the details"""
66 | 
67 |         if pattern is not None:
68 |             raise NotImplementedError()
69 |         else:
70 |             return self.eps
71 | 


--------------------------------------------------------------------------------
/src/learning/policy_search/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/learning/policy_search/__init__.py


--------------------------------------------------------------------------------
/src/learning/state_abstraction/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/learning/state_abstraction/__init__.py


--------------------------------------------------------------------------------
/src/learning/state_abstraction/abstract_state_decoder.py:
--------------------------------------------------------------------------------
 1 | class AbstractStateDecoder:
 2 |     def __init__(self):
 3 |         pass
 4 | 
 5 |     def calc_loss(self, model, sample, **kwargs):
 6 |         """
 7 |         :model
 8 |         :sample
 9 |         :**kwargs
10 |         """
11 |         raise NotImplementedError()
12 | 


--------------------------------------------------------------------------------
/src/learning/state_abstraction/autoencoder.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | 
 4 | from utils.cuda import cuda_var
 5 | from learning.learning_utils.entropy_decay_policy import EntropyDecayPolicy
 6 | 
 7 | 
 8 | class Autoencoder:
 9 |     """
10 |     State abstraction using autoencoder
11 |     """
12 | 
13 |     def __init__(self, constants, epoch):
14 |         self.entropy_decay_policy = EntropyDecayPolicy(constants, epoch)
15 |         self.entropy_coeff = constants["entropy_reg_coeff"]
16 | 
17 |     def calc_loss(self, model, batch, epoch, discretized, test_set_errors=None, past_entropy=None):
18 |         observations = cuda_var(
19 |             torch.cat(
20 |                 [torch.from_numpy(np.array(point.get_next_obs())).view(1, -1) for point in batch],
21 |                 dim=0,
22 |             )
23 |         ).float()
24 | 
25 |         # Compute loss given by L2 distance between the observation and reconstructed observation.
26 |         # The returned observation is flattened.
27 |         reconstructed_obs, meta_dict = model.reconstruct(
28 |             observations=observations, discretized=discretized
29 |         )  # outputs of size batch x -1
30 | 
31 |         reconstruction_loss = torch.norm(observations - reconstructed_obs)
32 | 
33 |         if discretized:
34 |             # For discretized models, there is an internal classification step representation by a probability
35 |             # distribution that can be controlled using entropy bonus
36 |             # NOT SUPPORTED AT THE MOMENT
37 |             decay_coeff = self.entropy_decay_policy.get_entropy_coeff(epoch, test_set_errors, past_entropy)
38 |             loss = reconstruction_loss - self.entropy_coeff * decay_coeff * meta_dict["mean_entropy"]
39 |         else:
40 |             decay_coeff = None
41 |             loss = reconstruction_loss
42 | 
43 |         info_dict = dict()
44 | 
45 |         info_dict["reconstruction_loss"] = reconstruction_loss
46 | 
47 |         if discretized:
48 |             info_dict["mean_entropy"] = meta_dict["mean_entropy"]
49 |             info_dict["entropy_coeff"] = self.entropy_coeff * decay_coeff
50 |         else:
51 |             info_dict["mean_entropy"] = -1
52 |             info_dict["entropy_coeff"] = 0.0
53 | 
54 |         return loss, info_dict
55 | 


--------------------------------------------------------------------------------
/src/learning/state_abstraction/generalized_inverse_kinematics.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | 
 4 | from utils.cuda import cuda_var
 5 | from learning.learning_utils.entropy_decay_policy import EntropyDecayPolicy
 6 | 
 7 | 
 8 | class GeneralizedInverseKinematics:
 9 |     """
10 |     State abstraction using generalized inverse kinematics
11 |     """
12 | 
13 |     def __init__(self, constants, epoch):
14 |         self.entropy_decay_policy = EntropyDecayPolicy(constants, epoch)
15 |         self.entropy_coeff = constants["entropy_reg_coeff"]
16 | 
17 |     def calc_loss(self, model, batch, epoch, discretized, test_set_errors=None, past_entropy=None):
18 |         past_observations = cuda_var(
19 |             torch.cat(
20 |                 [torch.from_numpy(np.array(point.get_curr_obs())).view(1, -1) for point in batch],
21 |                 dim=0,
22 |             )
23 |         ).float()
24 |         past_actions = cuda_var(
25 |             torch.cat(
26 |                 [torch.from_numpy(np.array(point.get_action())).view(1, -1) for point in batch],
27 |                 dim=0,
28 |             )
29 |         ).long()
30 |         observations = cuda_var(
31 |             torch.cat(
32 |                 [torch.from_numpy(np.array(point.get_next_obs())).view(1, -1) for point in batch],
33 |                 dim=0,
34 |             )
35 |         ).float()
36 | 
37 |         # Compute loss
38 |         log_probs, meta_dict = model.gen_log_prob(
39 |             prev_observations=past_observations,
40 |             observations=observations,
41 |             discretized=discretized,
42 |         )  # outputs of size batch x num_actions
43 |         classification_loss = -torch.mean(log_probs.gather(1, past_actions.view(-1, 1)))
44 | 
45 |         if discretized:
46 |             # For discretized models, there is an internal classification step representation by a probability
47 |             # distribution that can be controlled using entropy bonus
48 |             # NOT SUPPORTED AT THE MOMENT
49 |             decay_coeff = self.entropy_decay_policy.get_entropy_coeff(epoch, test_set_errors, past_entropy)
50 |             loss = classification_loss - self.entropy_coeff * decay_coeff * meta_dict["mean_entropy"]
51 |         else:
52 |             decay_coeff = None
53 |             loss = classification_loss
54 | 
55 |         info_dict = dict()
56 | 
57 |         info_dict["classification_loss"] = classification_loss
58 | 
59 |         if discretized:
60 |             info_dict["mean_entropy"] = meta_dict["mean_entropy"]
61 |             info_dict["entropy_coeff"] = self.entropy_coeff * decay_coeff
62 |         else:
63 |             info_dict["mean_entropy"] = -1
64 |             info_dict["entropy_coeff"] = 0.0
65 | 
66 |         return loss, info_dict
67 | 


--------------------------------------------------------------------------------
/src/learning/state_abstraction/inverse_kinematics.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | 
 4 | from utils.cuda import cuda_var
 5 | from learning.learning_utils.entropy_decay_policy import EntropyDecayPolicy
 6 | 
 7 | 
 8 | class InverseKinematics:
 9 |     """
10 |     State abstraction using inverse kinematics
11 |     """
12 | 
13 |     def __init__(self, constants, epoch):
14 |         self.entropy_decay_policy = EntropyDecayPolicy(constants, epoch)
15 |         self.entropy_coeff = constants["entropy_reg_coeff"]
16 | 
17 |     def calc_loss(self, model, batch, epoch, discretized, test_set_errors=None, past_entropy=None):
18 |         prev_observations = cuda_var(
19 |             torch.cat(
20 |                 [torch.from_numpy(np.array(point.get_curr_obs())).view(1, -1) for point in batch],
21 |                 dim=0,
22 |             )
23 |         ).float()
24 |         actions = cuda_var(
25 |             torch.cat(
26 |                 [torch.from_numpy(np.array(point.get_action())).view(1, -1) for point in batch],
27 |                 dim=0,
28 |             )
29 |         ).long()
30 |         observations = cuda_var(
31 |             torch.cat(
32 |                 [torch.from_numpy(np.array(point.get_next_obs())).view(1, -1) for point in batch],
33 |                 dim=0,
34 |             )
35 |         ).float()
36 | 
37 |         # Compute loss
38 |         log_probs, meta_dict = model.gen_log_prob(
39 |             prev_observations=prev_observations,
40 |             observations=observations,
41 |             discretized=discretized,
42 |         )  # outputs of size batch x num_actions
43 |         classification_loss = -torch.mean(log_probs.gather(1, actions.view(-1, 1)))
44 | 
45 |         if discretized:
46 |             # For discretized models, there is an internal classification step representation by a probability
47 |             # distribution that can be controlled using entropy bonus
48 |             # NOT SUPPORTED AT THE MOMENT
49 |             decay_coeff = self.entropy_decay_policy.get_entropy_coeff(epoch, test_set_errors, past_entropy)
50 |             loss = classification_loss - self.entropy_coeff * decay_coeff * meta_dict["mean_entropy"]
51 |         else:
52 |             decay_coeff = None
53 |             loss = classification_loss
54 | 
55 |         info_dict = dict()
56 | 
57 |         info_dict["classification_loss"] = classification_loss
58 | 
59 |         if discretized:
60 |             info_dict["mean_entropy"] = meta_dict["mean_entropy"]
61 |             info_dict["entropy_coeff"] = self.entropy_coeff * decay_coeff
62 |         else:
63 |             info_dict["mean_entropy"] = -1
64 |             info_dict["entropy_coeff"] = 0.0
65 | 
66 |         return loss, info_dict
67 | 


--------------------------------------------------------------------------------
/src/learning/state_abstraction/noise_contrastive_global.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | 
 4 | from utils.cuda import cuda_var
 5 | from learning.learning_utils.entropy_decay_policy import EntropyDecayPolicy
 6 | 
 7 | 
 8 | class NoiseContrastiveGlobal:
 9 |     """
10 |     State abstraction using noise contrastive learning with globally normalized probabilities
11 |     """
12 | 
13 |     def __init__(self, constants, epoch):
14 |         self.entropy_decay_policy = EntropyDecayPolicy(constants, epoch)
15 |         self.entropy_coeff = constants["entropy_reg_coeff"]
16 | 
17 |     @staticmethod
18 |     def calc_loss(model, batch, epoch, discretized, test_set_errors=None, past_entropy=None):
19 |         prev_observations = cuda_var(
20 |             torch.cat(
21 |                 [torch.from_numpy(np.array(point[0])).view(1, -1) for point in batch],
22 |                 dim=0,
23 |             )
24 |         ).float()
25 |         actions = cuda_var(
26 |             torch.cat(
27 |                 [torch.from_numpy(np.array(point[1])).view(1, -1) for point in batch],
28 |                 dim=0,
29 |             )
30 |         ).long()
31 |         observations = cuda_var(
32 |             torch.cat(
33 |                 [torch.from_numpy(np.array(point[2])).view(1, -1) for point in batch],
34 |                 dim=0,
35 |             )
36 |         ).float()
37 | 
38 |         # Generate a matrix M of size batch x batch where M[i, j] denotes p(y = 1 | x_i, a_i, x'_j)
39 |         # diagonal elements are real transitions, non-diagonal elements are imposter candidates
40 |         scores = model.gen_scores(
41 |             prev_observations=prev_observations,
42 |             actions=actions,
43 |             observations=observations,
44 |         )
45 | 
46 |         classification_loss = (torch.diagonal(scores, 0) - torch.logsumexp(scores, 1)).mean()
47 | 
48 |         info_dict = dict()
49 |         info_dict["classification_loss"] = classification_loss
50 |         info_dict["mean_entropy"] = 0.0
51 |         info_dict["entropy_coeff"] = 0.0
52 | 
53 |         return classification_loss, info_dict
54 | 


--------------------------------------------------------------------------------
/src/learning/state_abstraction/noise_contrastive_local.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | import torch.nn.functional as F
 4 | 
 5 | from utils.cuda import cuda_var
 6 | from learning.learning_utils.entropy_decay_policy import EntropyDecayPolicy
 7 | 
 8 | 
 9 | class NoiseContrastiveLocal:
10 |     """
11 |     State abstraction using noise contrastive learning with locally normalized probabilities
12 |     """
13 | 
14 |     def __init__(self, constants, epoch):
15 |         self.entropy_decay_policy = EntropyDecayPolicy(constants, epoch)
16 |         self.entropy_coeff = constants["entropy_reg_coeff"]
17 | 
18 |     @staticmethod
19 |     def calc_loss(model, batch):
20 |         prev_observations = cuda_var(
21 |             torch.cat(
22 |                 [torch.from_numpy(np.array(point[0])).view(1, -1) for point in batch],
23 |                 dim=0,
24 |             )
25 |         ).float()
26 |         actions = cuda_var(
27 |             torch.cat(
28 |                 [torch.from_numpy(np.array(point[1])).view(1, -1) for point in batch],
29 |                 dim=0,
30 |             )
31 |         ).long()
32 |         observations = cuda_var(
33 |             torch.cat(
34 |                 [torch.from_numpy(np.array(point[2])).view(1, -1) for point in batch],
35 |                 dim=0,
36 |             )
37 |         ).float()
38 | 
39 |         # Generate a matrix M of size batch x batch where M[i, j] denotes p(y = 1 | x_i, a_i, x'_j)
40 |         # diagonal elements are real transitions, non-diagonal elements are imposter candidates
41 |         scores = model.gen_scores(
42 |             prev_observations=prev_observations,
43 |             actions=actions,
44 |             observations=observations,
45 |         )
46 | 
47 |         batch_size = len(batch)
48 | 
49 |         # Single negative example
50 |         log_probs = F.logsigmoid((2 * torch.eye(batch_size) - 1).cuda() * scores)
51 | 
52 |         classification_loss = -torch.sum(
53 |             log_probs
54 |             * (torch.eye(batch_size).cuda() / batch_size + (1 - torch.eye(batch_size).cuda()) / batch_size / (batch_size - 1))
55 |         )
56 | 
57 |         info_dict = dict()
58 |         info_dict["classification_loss"] = classification_loss
59 |         info_dict["mean_entropy"] = 0.0
60 |         info_dict["entropy_coeff"] = 0.0
61 | 
62 |         return classification_loss, info_dict
63 | 


--------------------------------------------------------------------------------
/src/learning/tabular_rl/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/learning/tabular_rl/__init__.py


--------------------------------------------------------------------------------
/src/learning/tabular_rl/det_tabular_mdp_builder.py:
--------------------------------------------------------------------------------
 1 | from learning.datastructures.abstract_tabular_mdp import AbstractTabularMDP
 2 | 
 3 | 
 4 | class DetTabularMDPBuilder(AbstractTabularMDP):
 5 |     """
 6 |     Builder class to construct a deterministic tabular MDP
 7 |     """
 8 | 
 9 |     def __init__(self, actions, horizon, gamma=1.0):
10 |         AbstractTabularMDP.__init__(self, actions, horizon, gamma)
11 | 
12 |         self.actions = actions
13 |         self.horizon = horizon
14 |         self.gamma = gamma
15 | 
16 |         # States reached at different time step
17 |         # timestep -> [state1, state2, ...]
18 |         self._states = dict()
19 | 
20 |         # (state, action) -> [(new_state, 1.0)]
21 |         self._transitions = dict()
22 | 
23 |         # (state, action) -> scalar_value
24 |         self._rewards = dict()
25 | 
26 |         self._finalize = False
27 | 
28 |     def add_state(self, state, timestep):
29 |         assert not self._finalize, "This MDP has been finalized so new states cannot be added to it."
30 | 
31 |         if timestep not in self._states:
32 |             self._states[timestep] = []
33 | 
34 |         self._states[timestep].append(state)
35 | 
36 |     def add_transition(self, state, action, new_state):
37 |         assert not self._finalize, "This MDP has been finalized so new transitions cannot be added to it."
38 | 
39 |         if (state, action) in self._transitions:
40 |             return
41 | 
42 |         self._transitions[(state, action)] = [(new_state, 1.0)]
43 | 
44 |     def add_reward(self, state, action, reward):
45 |         assert not self._finalize, "This MDP has been finalized so new rewards cannot be added to it."
46 | 
47 |         if (state, action) in self._rewards:
48 |             return
49 | 
50 |         self._rewards[(state, action)] = reward
51 | 
52 |     def finalize(self):
53 |         self._finalize = True
54 | 
55 |     def get_states(self, timestep):
56 |         return self._states[timestep]
57 | 
58 |     def num_states(self, timestep):
59 |         return len(self._states[timestep])
60 | 
61 |     def get_transitions(self, state, action):
62 |         return self._transitions[(state, action)]
63 | 
64 |     def get_reward(self, state, action, next_state, step):
65 |         return self._rewards[(state, action)]
66 | 


--------------------------------------------------------------------------------
/src/learning/tabular_rl/value_iteration.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | class ValueIteration:
 5 |     """
 6 |     Performs Bellman Optimal Q-iteration on Tabular MDP
 7 |     """
 8 | 
 9 |     def __init__(self):
10 |         pass
11 | 
12 |     def do_value_iteration(self, tabular_mdp, reward_func=None, min_reward_val=0.0):
13 |         actions = tabular_mdp.actions
14 |         num_actions = len(actions)
15 |         q_values = dict()
16 | 
17 |         for h in range(tabular_mdp.horizon, -1, -1):
18 |             states = tabular_mdp.get_states(h)
19 | 
20 |             for state in states:
21 |                 state_with_timestep = (h, state)
22 | 
23 |                 q_values[state_with_timestep] = np.repeat(min_reward_val, num_actions).astype(np.float32)
24 | 
25 |                 for action in actions:
26 |                     if h == tabular_mdp.horizon:
27 |                         q_values[state_with_timestep][action] = 0.0
28 |                     else:
29 |                         q_val = 0.0
30 |                         for new_state, prob_val in tabular_mdp.get_transitions(state, action):
31 |                             if reward_func is None:
32 |                                 # Use the environment reward function
33 |                                 reward = tabular_mdp.get_reward(state, action, new_state, h)
34 |                             else:
35 |                                 # Use the given reward function
36 |                                 reward = reward_func(state, action, new_state, h)
37 | 
38 |                             q_val += prob_val * (reward + tabular_mdp.gamma * q_values[(h + 1, new_state)].max())
39 | 
40 |                         q_values[state_with_timestep][action] = q_val
41 | 
42 |         return q_values
43 | 


--------------------------------------------------------------------------------
/src/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/model/__init__.py


--------------------------------------------------------------------------------
/src/model/bottleneck/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/model/bottleneck/__init__.py


--------------------------------------------------------------------------------
/src/model/bottleneck/gaussian_bottleneck.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class GaussianBottleneck(nn.Module):
 6 | 
 7 |     def __init__(self, hidden_dim):
 8 |         super(GaussianBottleneck, self).__init__()
 9 | 
10 |         self.hidden_dim = hidden_dim
11 |         self.pre_enc = nn.Linear(self.hidden_dim, 2 * self.hidden_dim)
12 |         self.post_enc = nn.Linear(self.hidden_dim, self.hidden_dim)
13 | 
14 |         # TODO add command line argument
15 |         self.kl_weight = 0.0001
16 | 
17 |         self.mu_prior = nn.Parameter(torch.zeros(self.hidden_dim))
18 |         self.sigma_prior = nn.Parameter(torch.ones(self.hidden_dim))
19 | 
20 |         if torch.cuda.is_available():
21 |             self.cuda()
22 | 
23 |     def gb_helper(self, h):
24 | 
25 |         h = self.pre_enc(h)
26 |         mu = h[:, :self.hidden_dim]
27 |         std = torch.exp(h[:, self.hidden_dim:]) + 1e-6
28 |         q_z = torch.distributions.Normal(loc=mu, scale=std)
29 | 
30 |         if self.training:
31 |             # print('std: {}, mu: {}, mu_prior: {}'.format(std.mean(), torch.abs(mu).mean(),
32 |             #                                              torch.abs(self.mu_prior).mean()))
33 |             # h = mu + torch.randn_like(std) * std
34 |             h = q_z.rsample()
35 |             # klb_loss = (mu**2 + std**2 - 2*torch.log(std)).sum(dim=1).mean() * self.kl_weight
36 |             p_z = torch.distributions.Normal(loc=self.mu_prior, scale=self.sigma_prior)
37 |             klb_loss = self.kl_weight * torch.distributions.kl_divergence(q_z, p_z).sum(dim=1).mean()
38 |         else:
39 |             h = mu
40 |             klb_loss = 0.0
41 | 
42 |         # h = self.post_enc(h)
43 |         return h, klb_loss
44 | 


--------------------------------------------------------------------------------
/src/model/bottleneck/vq_bottleneck.py:
--------------------------------------------------------------------------------
 1 | 
 2 | class VQBottleneckWrapper:
 3 | 
 4 |     def __init__(self):
 5 |         pass
 6 | 
 7 |     @staticmethod
 8 |     def get_bottleneck(model_name, constants, heads=None, codebook_size=None):
 9 | 
10 |         if model_name == "vq":
11 | 
12 |             from vector_quantize_pytorch import VectorQuantize
13 | 
14 |             return VectorQuantize(
15 | 
16 |                 dim=constants["vq_dim"],
17 |                 codebook_size=constants["vq_codebook_size"] if codebook_size is None else codebook_size,
18 | 
19 |                 # the exponential moving average decay, lower means the dictionary will change faster
20 |                 decay=constants["vq_decay"],
21 | 
22 |                 # 1.,  # the weight on the commitment loss
23 |                 commitment_weight=constants["vq_commitment_weight"],
24 | 
25 |                 # in paper, they recommended a value of 10
26 |                 orthogonal_reg_weight=constants["vq_orthogonal_reg_weight"],
27 | 
28 |                 # this would randomly sample from the codebook for the orthogonal regularization loss,
29 |                 # for limiting memory usage
30 |                 orthogonal_reg_max_codes=constants["vq_orthogonal_reg_max_codes"],
31 | 
32 |                 # set this to True if you have a very large codebook, and would only like to enforce the
33 |                 # loss on the activated codes per batch
34 |                 orthogonal_reg_active_codes_only=constants["vq_orthogonal_reg_active_codes_only"],
35 | 
36 |                 # number of heads to vector quantize, codebook shared across all heads
37 |                 heads=constants["vq_heads"] if heads is None else heads,
38 | 
39 |                 # whether to have a separate codebook per head. False would mean 1 shared codebook
40 |                 separate_codebook_per_head=constants["vq_separate_codebook_per_head"],
41 | 
42 |                 codebook_dim=constants["vq_codebook_dim"],
43 |                 sample_codebook_temp=constants["vq_sample_codebook_temp"],
44 |                 kmeans_init=constants["vq_kmeans_init"],  # set to True
45 | 
46 |                 # number of kmeans iterations to calculate the centroids for the codebook on init
47 |                 kmeans_iters=constants["vq_kmeans_iters"]
48 |             )
49 | 
50 |         else:
51 |             raise AssertionError("Unhandled model name %r" % model_name)
52 | 
53 |     @staticmethod
54 |     def vq_helper(vq_model, encoding):
55 |         encoding = encoding.unsqueeze(0)
56 |         encoding, indices, vq_loss = vq_model(encoding)   # https://github.com/lucidrains/vector-quantize-pytorch
57 |         vq_loss = vq_loss.sum()
58 |         encoding = encoding.squeeze(0)
59 |         return encoding, indices, vq_loss
60 | 


--------------------------------------------------------------------------------
/src/model/classifiers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/model/classifiers/__init__.py


--------------------------------------------------------------------------------
/src/model/classifiers/classifier_model_wrapper.py:
--------------------------------------------------------------------------------
 1 | from model.classifiers.convm_classifier import ConvMClassifier
 2 | from model.classifiers.conv3_classifier import Conv3Classifier
 3 | from model.classifiers.conv2_classifier import Conv2Classifier
 4 | from model.classifiers.feedforward_classifier import FeedForwardClassifier
 5 | from model.classifiers.linear_classifier import LinearClassifier
 6 | 
 7 | 
 8 | class ClassifierModelWrapper:
 9 |     """Wrapper for classification model"""
10 | 
11 |     @staticmethod
12 |     def get_classifier(model_name, num_class, config, constants, bootstrap_model=None):
13 |         if model_name == "linear":
14 |             return LinearClassifier(num_class, config, constants, bootstrap_model)
15 | 
16 |         elif model_name == "ff":
17 |             return FeedForwardClassifier(num_class, config, constants, bootstrap_model)
18 | 
19 |         elif model_name == "conv2":
20 |             return Conv2Classifier(num_class, config, constants, bootstrap_model)
21 | 
22 |         elif model_name == "conv3":
23 |             return Conv3Classifier(num_class, config, constants, bootstrap_model)
24 | 
25 |         elif model_name == "convm":
26 |             return ConvMClassifier(num_class, config, constants, bootstrap_model)
27 | 
28 |         else:
29 |             raise NotImplementedError(
30 |                 "Model %s is not implemented. Implemented models are linear, " "ff, conv2, covn3, and convm" % model_name
31 |             )
32 | 


--------------------------------------------------------------------------------
/src/model/classifiers/feedforward_classifier.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | class FeedForwardClassifier(nn.Module):
 7 |     """Model for learning the forward kinematic inseparability"""
 8 | 
 9 |     NAME = "ff"
10 | 
11 |     def __init__(self, num_class, config, constants, bootstrap_model=None):
12 |         super(FeedForwardClassifier, self).__init__()
13 | 
14 |         self.num_class = num_class
15 |         self.config = config
16 |         self.constants = constants
17 | 
18 |         if config["feature_type"] == "feature":
19 |             self.obs_encoder = nn.Sequential(
20 |                 nn.Linear(config["obs_dim"], constants["n_hidden"]),
21 |                 nn.LeakyReLU(),
22 |                 nn.Linear(constants["n_hidden"], constants["n_hidden"]),
23 |                 nn.LeakyReLU(),
24 |                 nn.Linear(constants["n_hidden"], self.num_class),
25 |             )
26 | 
27 |         elif config["feature_type"] == "image":
28 |             raise NotImplementedError()
29 | 
30 |         else:
31 |             raise NotImplementedError()
32 | 
33 |         if torch.cuda.is_available():
34 |             self.cuda()
35 | 
36 |         if bootstrap_model is not None:
37 |             self.load_state_dict(bootstrap_model.state_dict())
38 | 
39 |     def _gen_logits(self, observations, return_log_prob=True):
40 |         if self.config["feature_type"] == "image":
41 |             raise AssertionError("Cannot handle images right now")
42 | 
43 |         logits = self.obs_encoder(observations)
44 | 
45 |         if return_log_prob:
46 |             return F.log_softmax(logits, dim=1), dict()
47 |         else:
48 |             return F.softmax(logits, dim=1), dict()
49 | 
50 |     def gen_log_prob(self, observations):
51 |         return self._gen_logits(observations, return_log_prob=True)
52 | 
53 |     def gen_prob(self, observations):
54 |         return self._gen_logits(observations, return_log_prob=False)
55 | 
56 |     def save(self, folder_name, model_name=None):
57 |         if model_name is None:
58 |             torch.save(self.state_dict(), folder_name + FeedForwardClassifier.NAME)
59 |         else:
60 |             torch.save(self.state_dict(), folder_name + model_name)
61 | 
62 |     def load(self, folder_name, model_name=None):
63 |         if model_name is None:
64 |             self.load_state_dict(torch.load(folder_name + FeedForwardClassifier.NAME))
65 |         else:
66 |             self.load_state_dict(torch.load(folder_name + model_name))
67 | 


--------------------------------------------------------------------------------
/src/model/classifiers/linear_classifier.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | class LinearClassifier(nn.Module):
 7 |     """Linear classifier"""
 8 | 
 9 |     NAME = "linear"
10 | 
11 |     def __init__(self, num_class, config, constants, bootstrap_model=None):
12 |         super(LinearClassifier, self).__init__()
13 | 
14 |         self.num_class = num_class
15 |         self.config = config
16 |         self.constants = constants
17 | 
18 |         if config["feature_type"] == "feature":
19 |             self.obs_encoder = nn.Sequential(nn.Linear(config["obs_dim"], self.num_class))
20 | 
21 |         elif config["feature_type"] == "image":
22 |             raise NotImplementedError()
23 | 
24 |         else:
25 |             raise NotImplementedError()
26 | 
27 |         if torch.cuda.is_available():
28 |             self.cuda()
29 | 
30 |         if bootstrap_model is not None:
31 |             self.load_state_dict(bootstrap_model.state_dict())
32 | 
33 |     def _gen_logits(self, observations, return_log_prob=True):
34 |         if self.config["feature_type"] == "image":
35 |             raise AssertionError("Cannot handle images right now")
36 | 
37 |         logits = self.obs_encoder(observations)
38 | 
39 |         if return_log_prob:
40 |             return F.log_softmax(logits, dim=1), dict()
41 |         else:
42 |             return F.softmax(logits, dim=1), dict()
43 | 
44 |     def gen_log_prob(self, observations):
45 |         return self._gen_logits(observations, return_log_prob=True)
46 | 
47 |     def gen_prob(self, observations):
48 |         return self._gen_logits(observations, return_log_prob=False)
49 | 
50 |     def save(self, folder_name, model_name=None):
51 |         if model_name is None:
52 |             torch.save(self.state_dict(), folder_name + LinearClassifier.NAME)
53 |         else:
54 |             torch.save(self.state_dict(), folder_name + model_name)
55 | 
56 |     def load(self, folder_name, model_name=None):
57 |         if model_name is None:
58 |             self.load_state_dict(torch.load(folder_name + LinearClassifier.NAME))
59 |         else:
60 |             self.load_state_dict(torch.load(folder_name + model_name))
61 | 


--------------------------------------------------------------------------------
/src/model/decoder/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/model/decoder/__init__.py


--------------------------------------------------------------------------------
/src/model/decoder/conv_decoder.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class ConvDecoder(nn.Module):
 6 |     NAME = "conv"
 7 | 
 8 |     def __init__(self, height, width, channel, out_dim, bootstrap_model=None):
 9 |         super(ConvDecoder, self).__init__()
10 | 
11 |         self.height = height
12 |         self.channel = channel
13 |         self.width = width
14 | 
15 |         self.out_dim = out_dim
16 | 
17 |         self.linear_layer = nn.Linear(out_dim, 32 * 2 * 2)
18 | 
19 |         self.model = nn.Sequential(
20 |             nn.LeakyReLU(),
21 |             nn.ConvTranspose2d(32, 16, (2, 2), 2, output_padding=1),
22 |             nn.LeakyReLU(),
23 |             nn.ConvTranspose2d(16, 16, (4, 4), 2),
24 |             nn.LeakyReLU(),
25 |             nn.ConvTranspose2d(16, 16, (4, 4), 2),
26 |             nn.LeakyReLU(),
27 |             nn.ConvTranspose2d(16, self.channel, (6, 6), 2),
28 |         )
29 | 
30 |         if torch.cuda.is_available():
31 |             self.cuda()
32 | 
33 |         if bootstrap_model is not None:
34 |             self.load_state_dict(bootstrap_model.state_dict())
35 | 
36 |     def forward(self, vec):
37 |         return self.decode(vec)
38 | 
39 |     def decode(self, vec):
40 |         batch_size = vec.size(0)
41 |         out = self.linear_layer(vec).view(batch_size, 32, 2, 2)
42 |         return self.model(out)
43 | 


--------------------------------------------------------------------------------
/src/model/decoder/conv_decoder2.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class ConvDecoder2(nn.Module):
 6 |     NAME = "conv2"
 7 | 
 8 |     def __init__(self, height, width, channel, out_dim, bootstrap_model=None):
 9 |         super(ConvDecoder2, self).__init__()
10 | 
11 |         self.height = height
12 |         self.channel = channel
13 |         self.width = width
14 | 
15 |         self.out_dim = out_dim
16 | 
17 |         self.linear_layer = nn.Linear(out_dim, 32 * 2 * 2)
18 | 
19 |         self.model = nn.Sequential(
20 |             nn.LeakyReLU(),
21 |             nn.Conv2d(32, 128, 3, stride=1, padding=1),
22 |             nn.Upsample(scale_factor=2),
23 |             nn.BatchNorm2d(128),
24 |             nn.LeakyReLU(),
25 |             nn.Conv2d(128, 64, 3, stride=1, padding=1),
26 |             nn.Upsample(scale_factor=2),
27 |             nn.BatchNorm2d(64),
28 |             nn.LeakyReLU(),
29 |             nn.Conv2d(64, 32, 3, stride=1, padding=1),
30 |             nn.Upsample(scale_factor=2),
31 |             nn.BatchNorm2d(32),
32 |             nn.LeakyReLU(),
33 |             nn.Conv2d(32, 16, 3, stride=1, padding=1),
34 |             nn.Upsample(scale_factor=2),
35 |             nn.BatchNorm2d(16),
36 |             nn.LeakyReLU(),
37 |             nn.Conv2d(16, 8, 3, stride=1, padding=1),
38 |             nn.Upsample(size=(56, 56)),
39 |             nn.BatchNorm2d(8),
40 |             nn.LeakyReLU(),
41 |             nn.Conv2d(8, self.channel, 3, stride=1, padding=1),
42 |         )
43 | 
44 |         if torch.cuda.is_available():
45 |             self.cuda()
46 | 
47 |         if bootstrap_model is not None:
48 |             self.load_state_dict(bootstrap_model.state_dict())
49 | 
50 |     def forward(self, vec):
51 |         return self.decode(vec)
52 | 
53 |     def decode(self, vec):
54 |         batch_size = vec.size(0)
55 |         out = self.linear_layer(vec).view(batch_size, 32, 2, 2)
56 |         # print(out.shape)
57 |         # raise Exception('done')
58 |         return self.model(out)
59 | 


--------------------------------------------------------------------------------
/src/model/decoder/conv_decoder_ai2thor.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class ConvDecoderAI2Thor(nn.Module):
 6 |     NAME = "conv-ai2thor"
 7 | 
 8 |     def __init__(self, height, width, channel, out_dim, bootstrap_model=None):
 9 |         super(ConvDecoderAI2Thor, self).__init__()
10 | 
11 |         self.height = height
12 |         self.channel = channel
13 |         self.width = width
14 | 
15 |         self.out_dim = out_dim
16 | 
17 |         self.linear_layer = nn.Linear(out_dim, 32 * 2 * 4)
18 | 
19 |         self.model = nn.Sequential(
20 |             nn.LeakyReLU(),
21 |             nn.Conv2d(32, 128, 3, stride=1, padding=1),
22 |             nn.Upsample(scale_factor=2),
23 |             nn.BatchNorm2d(128),
24 |             nn.LeakyReLU(),
25 |             nn.Conv2d(128, 64, 3, stride=1, padding=1),
26 |             nn.Upsample(scale_factor=2),
27 |             nn.BatchNorm2d(64),
28 |             nn.LeakyReLU(),
29 |             nn.Conv2d(64, 32, 3, stride=1, padding=1),
30 |             nn.Upsample(scale_factor=2),
31 |             nn.BatchNorm2d(32),
32 |             nn.LeakyReLU(),
33 |             nn.Conv2d(32, 16, 3, stride=1, padding=1),
34 |             nn.Upsample(scale_factor=2),
35 |             nn.BatchNorm2d(16),
36 |             nn.LeakyReLU(),
37 |             nn.Conv2d(16, 8, 3, stride=1, padding=1),
38 |             nn.Upsample(size=(56, 56 * 2)),
39 |             nn.BatchNorm2d(8),
40 |             nn.LeakyReLU(),
41 |             nn.Conv2d(8, self.channel, 3, stride=1, padding=1),
42 |         )
43 | 
44 |         if torch.cuda.is_available():
45 |             self.cuda()
46 | 
47 |         if bootstrap_model is not None:
48 |             self.load_state_dict(bootstrap_model.state_dict())
49 | 
50 |     def forward(self, vec):
51 |         return self.decode(vec)
52 | 
53 |     def decode(self, vec):
54 |         batch_size = vec.size(0)
55 |         out = self.linear_layer(vec).view(batch_size, 32, 2, 4)
56 |         # print(out.shape)
57 |         # raise Exception('done')
58 |         return self.model(out)
59 | 


--------------------------------------------------------------------------------
/src/model/decoder/decoder_wrapper.py:
--------------------------------------------------------------------------------
 1 | from model.decoder.conv_decoder import ConvDecoder
 2 | from model.decoder.conv_decoder2 import ConvDecoder2
 3 | from model.decoder.conv_decoder_ai2thor import ConvDecoderAI2Thor
 4 | from model.decoder.feedforward_decoder import FeedForwardDecoder
 5 | 
 6 | 
 7 | class DecoderModelWrapper:
 8 |     """Wrapper for decoder models"""
 9 | 
10 |     @staticmethod
11 |     def get_decoder(model_name, bootstrap_model=None, **kwargs):
12 |         models = [FeedForwardDecoder.NAME, ConvDecoder.NAME, ConvDecoder2.NAME]
13 | 
14 |         if model_name == FeedForwardDecoder.NAME:
15 |             return FeedForwardDecoder(**kwargs, bootstrap_model=bootstrap_model)
16 | 
17 |         elif model_name == ConvDecoder.NAME:
18 |             return ConvDecoder(**kwargs, bootstrap_model=bootstrap_model)
19 | 
20 |         elif model_name == ConvDecoder2.NAME:
21 |             return ConvDecoder2(**kwargs, bootstrap_model=bootstrap_model)
22 | 
23 |         elif model_name == ConvDecoderAI2Thor.NAME:
24 |             return ConvDecoderAI2Thor(**kwargs, bootstrap_model=bootstrap_model)
25 | 
26 |         else:
27 |             raise NotImplementedError("Model %s is not implemented. Implemented models are linear, %r" % (model_name, models))
28 | 


--------------------------------------------------------------------------------
/src/model/decoder/feedforward_decoder.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class FeedForwardDecoder(nn.Module):
 6 |     NAME = "ff"
 7 | 
 8 |     def __init__(self, num_inputs, inp_dim, out_dim, hidden_dim, bootstrap_model=None):
 9 |         super(FeedForwardDecoder, self).__init__()
10 | 
11 |         self.num_inputs = num_inputs
12 |         self.inp_dim = inp_dim
13 |         self.out_dim = out_dim
14 |         self.hidden_dim = hidden_dim
15 | 
16 |         self.model = nn.Sequential(
17 |             nn.Linear(self.out_dim, self.hidden_dim),
18 |             nn.LeakyReLU(),
19 |             nn.Linear(self.hidden_dim, self.num_inputs * self.inp_dim),
20 |         )
21 | 
22 |         if torch.cuda.is_available():
23 |             self.cuda()
24 | 
25 |         if bootstrap_model is not None:
26 |             self.load_state_dict(bootstrap_model.state_dict())
27 | 
28 |     def forward(self, vec):
29 |         return self.decode(vec)
30 | 
31 |     def decode(self, vec):
32 |         output = self.model(vec).data.cpu()
33 |         outputs = []
34 |         for i in range(0, self.num_inputs):
35 |             outputs.append(output[i * self.inp_dim : (i + 1) * self.inp_dim])
36 | 
37 |         return outputs
38 | 


--------------------------------------------------------------------------------
/src/model/encoder/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/model/encoder/__init__.py


--------------------------------------------------------------------------------
/src/model/encoder/conv2_encoder.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class Conv2Encoder(nn.Module):
 6 |     NAME = "conv2"
 7 | 
 8 |     def __init__(self, height, width, channel, out_dim, bootstrap_model=None):
 9 |         super(Conv2Encoder, self).__init__()
10 | 
11 |         self.height = height
12 |         self.channel = channel
13 |         self.width = width
14 | 
15 |         self.out_dim = out_dim
16 | 
17 |         self.model = nn.Sequential(
18 |             nn.Conv2d(self.channel, 16, (8, 8), 4),
19 |             nn.LeakyReLU(),
20 |             nn.Conv2d(16, 32, (4, 4), 2),
21 |             nn.LeakyReLU(),
22 |             nn.Conv2d(32, 32, (4, 4), 1),
23 |             nn.Flatten(),
24 |             nn.Linear(128, out_dim),
25 |         )
26 | 
27 |         if torch.cuda.is_available():
28 |             self.cuda()
29 | 
30 |         if bootstrap_model is not None:
31 |             self.load_state_dict(bootstrap_model.state_dict())
32 | 
33 |     def forward(self, img):
34 |         return self.encode(img)
35 | 
36 |     def encode(self, img):
37 |         return self.model(img)
38 | 


--------------------------------------------------------------------------------
/src/model/encoder/conv3_encoder.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | from utils.conv_util import get_conv_out_size
 5 | 
 6 | 
 7 | class Conv3Encoder(nn.Module):
 8 |     """
 9 |     Recommended convolution for gridworld
10 |     """
11 | 
12 |     NAME = "conv3"
13 | 
14 |     def __init__(self, height, width, channel, out_dim, bootstrap_model=None):
15 |         super(Conv3Encoder, self).__init__()
16 | 
17 |         self.height = height
18 |         self.channel = channel
19 |         self.width = width
20 | 
21 |         self.out_dim = out_dim
22 | 
23 |         # Note that the dynamic size below is calculated based on the model. If the model changes, then the size
24 |         # will also change
25 |         kernel_size1 = (5, 5)
26 |         stride1 = (2, 2)
27 | 
28 |         kernel_size2 = (5, 5)
29 |         stride2 = (2, 2)
30 | 
31 |         kernel_size3 = (5, 5)
32 |         stride3 = (1, 1)
33 | 
34 |         dynamic_size_h1, dynamic_size_w1 = get_conv_out_size(self.height, self.width, kernel_size=kernel_size1, stride=stride1)
35 | 
36 |         dynamic_size_h2, dynamic_size_w2 = get_conv_out_size(
37 |             dynamic_size_h1, dynamic_size_w1, kernel_size=kernel_size2, stride=stride2
38 |         )
39 | 
40 |         dynamic_size_h3, dynamic_size_w3 = get_conv_out_size(
41 |             dynamic_size_h2, dynamic_size_w2, kernel_size=kernel_size3, stride=stride3
42 |         )
43 | 
44 |         self.n_channels_out = 32
45 |         self.dynamic_size = dynamic_size_h3 * dynamic_size_w3 * self.n_channels_out
46 | 
47 |         self.model = nn.Sequential(
48 |             nn.Conv2d(3, 32, kernel_size=kernel_size1, stride=stride1),
49 |             nn.BatchNorm2d(32),
50 |             nn.LeakyReLU(),
51 |             nn.Conv2d(32, 64, kernel_size=kernel_size2, stride=stride2),
52 |             nn.BatchNorm2d(64),
53 |             nn.LeakyReLU(),
54 |             nn.Conv2d(64, self.n_channels_out, kernel_size=kernel_size3, stride=stride3),
55 |             nn.BatchNorm2d(self.n_channels_out),
56 |             nn.Flatten(),
57 |             nn.Linear(self.dynamic_size, out_dim),
58 |         )
59 | 
60 |         if torch.cuda.is_available():
61 |             self.cuda()
62 | 
63 |         if bootstrap_model is not None:
64 |             self.load_state_dict(bootstrap_model.state_dict())
65 | 
66 |     def forward(self, img):
67 |         return self.encode(img)
68 | 
69 |     def encode(self, img):
70 |         # print(img.shape)
71 |         return self.model(img)
72 | 


--------------------------------------------------------------------------------
/src/model/encoder/conv4_encoder.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | from utils.conv_util import get_conv_out_size
 5 | 
 6 | 
 7 | class Conv4Encoder(nn.Module):
 8 |     NAME = "conv4"
 9 | 
10 |     def __init__(self, height, width, channel, out_dim, bootstrap_model=None):
11 |         super(Conv4Encoder, self).__init__()
12 | 
13 |         self.height = height
14 |         self.channel = channel
15 |         self.width = width
16 | 
17 |         self.out_dim = out_dim
18 | 
19 |         # Note that the dynamic size below is calculated based on the model. If the model changes, then the size
20 |         # will also change
21 |         kernel_size1 = (8, 8)
22 |         stride1 = (4, 4)
23 | 
24 |         kernel_size2 = (4, 4)
25 |         stride2 = (2, 2)
26 | 
27 |         dynamic_size_h1, dynamic_size_w1 = get_conv_out_size(self.height, self.width, kernel_size=kernel_size1, stride=stride1)
28 | 
29 |         dynamic_size_h2, dynamic_size_w2 = get_conv_out_size(
30 |             dynamic_size_h1, dynamic_size_w1, kernel_size=kernel_size2, stride=stride2
31 |         )
32 | 
33 |         self.n_channels_out = 32
34 |         self.dynamic_size = dynamic_size_h2 * dynamic_size_w2 * self.n_channels_out
35 | 
36 |         self.model = nn.Sequential(
37 |             nn.Conv2d(3, 16, (8, 8), 4),
38 |             nn.ReLU(),
39 |             nn.Conv2d(16, self.n_channels_out, (4, 4), 2),
40 |             nn.ReLU(),
41 |             nn.Flatten(),
42 |             nn.Linear(self.dynamic_size, out_dim),
43 |         )
44 | 
45 |         if torch.cuda.is_available():
46 |             self.cuda()
47 | 
48 |         if bootstrap_model is not None:
49 |             self.load_state_dict(bootstrap_model.state_dict())
50 | 
51 |     def forward(self, img):
52 |         return self.encode(img)
53 | 
54 |     def encode(self, img):
55 |         # print(img.shape)
56 |         return self.model(img)
57 | 


--------------------------------------------------------------------------------
/src/model/encoder/conv_encoder.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | from utils.conv_util import get_conv_out_size
 5 | 
 6 | 
 7 | class ConvEncoder(nn.Module):
 8 |     NAME = "conv"
 9 | 
10 |     def __init__(self, height, width, channel, out_dim, bootstrap_model=None):
11 |         super(ConvEncoder, self).__init__()
12 | 
13 |         self.height = height
14 |         self.channel = channel
15 |         self.width = width
16 | 
17 |         self.out_dim = out_dim
18 | 
19 |         # Note that the dynamic size below is calculated based on the model. If the model changes, then the size
20 |         # will also change
21 |         kernel_size1 = (8, 8)
22 |         stride1 = (4, 4)
23 | 
24 |         kernel_size2 = (4, 4)
25 |         stride2 = (2, 2)
26 | 
27 |         kernel_size3 = (4, 4)
28 |         stride3 = (1, 1)
29 | 
30 |         dynamic_size_h1, dynamic_size_w1 = get_conv_out_size(self.height, self.width, kernel_size=kernel_size1, stride=stride1)
31 | 
32 |         dynamic_size_h2, dynamic_size_w2 = get_conv_out_size(
33 |             dynamic_size_h1, dynamic_size_w1, kernel_size=kernel_size2, stride=stride2
34 |         )
35 | 
36 |         dynamic_size_h3, dynamic_size_w3 = get_conv_out_size(
37 |             dynamic_size_h2, dynamic_size_w2, kernel_size=kernel_size3, stride=stride3
38 |         )
39 | 
40 |         self.n_channels_out = 32
41 |         self.dynamic_size = dynamic_size_h3 * dynamic_size_w3 * self.n_channels_out
42 | 
43 |         self.model = nn.Sequential(
44 |             nn.Conv2d(3, 16, kernel_size=kernel_size1, stride=stride1),
45 |             nn.BatchNorm2d(16),
46 |             nn.LeakyReLU(),
47 |             nn.Conv2d(16, 32, kernel_size=kernel_size2, stride=stride2),
48 |             nn.BatchNorm2d(32),
49 |             nn.LeakyReLU(),
50 |             nn.Conv2d(32, self.n_channels_out, kernel_size=kernel_size3, stride=stride3),
51 |             nn.BatchNorm2d(self.n_channels_out),
52 |             nn.Flatten(),
53 |             nn.Linear(self.dynamic_size, out_dim),
54 |         )
55 | 
56 |         if torch.cuda.is_available():
57 |             self.cuda()
58 | 
59 |         if bootstrap_model is not None:
60 |             self.load_state_dict(bootstrap_model.state_dict())
61 | 
62 |     def forward(self, img):
63 |         return self.encode(img)
64 | 
65 |     def encode(self, img):
66 |         # print(img.shape)
67 |         return self.model(img)
68 | 


--------------------------------------------------------------------------------
/src/model/encoder/encoder_wrapper.py:
--------------------------------------------------------------------------------
 1 | from model.encoder.conv2_encoder import Conv2Encoder
 2 | from model.encoder.conv3_encoder import Conv3Encoder
 3 | from model.encoder.conv4_encoder import Conv4Encoder
 4 | from model.encoder.conv_encoder import ConvEncoder
 5 | from model.encoder.feedforward_encoder import FeedForwardEncoder
 6 | 
 7 | 
 8 | class EncoderModelWrapper:
 9 |     """Wrapper for encoder models"""
10 | 
11 |     @staticmethod
12 |     def get_encoder(model_name, bootstrap_model=None, **kwargs):
13 |         models = [
14 |             FeedForwardEncoder,
15 |             ConvEncoder,
16 |             Conv2Encoder,
17 |             Conv3Encoder,
18 |             Conv4Encoder,
19 |         ]
20 |         model_names = [model.NAME for model in models]
21 | 
22 |         for model in models:
23 |             if model_name == model.NAME:
24 |                 return model(**kwargs, bootstrap_model=bootstrap_model)
25 | 
26 |         raise NotImplementedError("Model %s is not implemented. Implemented models are %s" % (model_name, model_names))
27 | 


--------------------------------------------------------------------------------
/src/model/encoder/feedforward_encoder.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class FeedForwardEncoder(nn.Module):
 6 |     NAME = "ff"
 7 | 
 8 |     def __init__(self, num_inputs, inp_dim, out_dim, hidden_dim, bootstrap_model=None):
 9 |         super(FeedForwardEncoder, self).__init__()
10 | 
11 |         self.num_inputs = num_inputs
12 |         self.inp_dim = inp_dim
13 |         self.out_dim = out_dim
14 |         self.hidden_dim = hidden_dim
15 | 
16 |         self.model = nn.Sequential(
17 |             nn.Linear(self.num_inputs * self.inp_dim, self.hidden_dim),
18 |             nn.LeakyReLU(),
19 |             nn.Linear(self.hidden_dim, self.out_dim),
20 |         )
21 | 
22 |         if torch.cuda.is_available():
23 |             self.cuda()
24 | 
25 |         if bootstrap_model is not None:
26 |             self.load_state_dict(bootstrap_model.state_dict())
27 | 
28 |     def forward(self, **inputs):
29 |         return self.encode(**inputs)
30 | 
31 |     def encode(self, **inputs):
32 |         vec = torch.cat(inputs, dim=1)
33 |         return self.model(vec)
34 | 


--------------------------------------------------------------------------------
/src/model/forward_model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/model/forward_model/__init__.py


--------------------------------------------------------------------------------
/src/model/forward_model/conv_forward_model.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class ConvForwardModel(nn.Module):
 6 |     NAME = "ConvForward"
 7 | 
 8 |     def __init__(self, exp_setup, bootstrap_model=None):
 9 |         super(ConvForwardModel, self).__init__()
10 | 
11 |         self.num_actions = exp_setup.config["num_actions"]
12 |         self.latent_action_vec_dim = 256
13 |         self.height, self.width, self.channel = exp_setup.config["obs_dim"]
14 |         self.encoder_dim = exp_setup.constants["hidden_dim"]
15 |         # self.reshape_layer = nn.Linear(self.encoder_dim + self.num_actions, 32 * 2 * 2)
16 |         self.reshape_layer = nn.Linear(self.encoder_dim + self.latent_action_vec_dim, 256 * 2 * 2)
17 | 
18 |         self.model = nn.Sequential(
19 |             nn.LeakyReLU(),
20 |             nn.Conv2d(256, 128, 3, stride=1, padding=1),
21 |             nn.Upsample(scale_factor=2),
22 |             nn.BatchNorm2d(128),
23 |             nn.LeakyReLU(),
24 |             nn.Conv2d(128, 64, 3, stride=1, padding=1),
25 |             nn.Upsample(scale_factor=2),
26 |             nn.BatchNorm2d(64),
27 |             nn.LeakyReLU(),
28 |             nn.Conv2d(64, 32, 3, stride=1, padding=1),
29 |             nn.Upsample(scale_factor=2),
30 |             nn.BatchNorm2d(32),
31 |             nn.LeakyReLU(),
32 |             nn.Conv2d(32, 16, 3, stride=1, padding=1),
33 |             nn.Upsample(scale_factor=2),
34 |             nn.BatchNorm2d(16),
35 |             nn.LeakyReLU(),
36 |             nn.Conv2d(16, 8, 3, stride=1, padding=1),
37 |             nn.Upsample(size=(56, 56)),
38 |             nn.BatchNorm2d(8),
39 |             nn.LeakyReLU(),
40 |             nn.Conv2d(8, self.channel, 3, stride=1, padding=1),
41 |         )
42 | 
43 |         if torch.cuda.is_available():
44 |             self.cuda()
45 | 
46 |         if bootstrap_model is not None:
47 |             self.load_state_dict(bootstrap_model.state_dict())
48 | 
49 |     def forward(self, obs_encoding, latent_action_vec):
50 |         batch = obs_encoding.size(0)
51 |         vec = torch.cat([obs_encoding, latent_action_vec], dim=1)  # batch x dim
52 |         vec = self.reshape_layer(vec).resize(batch, 256, 2, 2)
53 |         return self.model(vec)
54 | 


--------------------------------------------------------------------------------
/src/model/forward_model/forward_model_wrapper.py:
--------------------------------------------------------------------------------
 1 | from model.forward_model.conv_forward_model import ConvForwardModel
 2 | 
 3 | 
 4 | class ForwardDynamicsWrapper:
 5 |     """Wrapper for forward dynamics models"""
 6 | 
 7 |     @staticmethod
 8 |     def get_forward_dynamics_model(model_name, bootstrap_model=None, **kwargs):
 9 |         models = [ConvForwardModel.NAME]
10 | 
11 |         if model_name == ConvForwardModel.NAME:
12 |             return ConvForwardModel(**kwargs, bootstrap_model=bootstrap_model)
13 | 
14 |         else:
15 |             raise NotImplementedError("Model %s is not implemented. Implemented models are linear, %r" % (model_name, models))
16 | 


--------------------------------------------------------------------------------
/src/model/inverse_dynamics/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/model/inverse_dynamics/__init__.py


--------------------------------------------------------------------------------
/src/model/inverse_dynamics/encoded_mlp.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from utils.gumbel import gumbel_sample
 4 | 
 5 | 
 6 | class EncodedMLP(nn.Module):
 7 |     NAME = "encoded-mlp"
 8 | 
 9 |     def __init__(self, exp_setup, bootstrap_model=None):
10 |         super(EncodedMLP, self).__init__()
11 | 
12 |         self.temperature = 1.0
13 |         self.action_dim = exp_setup.config["num_actions"]
14 |         self.dim = exp_setup.constants["hidden_dim"]
15 | 
16 |         self.mlp = nn.Sequential(
17 |             nn.Linear(2 * self.dim, self.dim),
18 |             nn.LeakyReLU(),
19 |             nn.Linear(self.dim, self.action_dim),
20 |         )
21 | 
22 |         self.mlp_h = nn.Sequential(nn.Linear(2 * self.dim, self.dim), nn.LeakyReLU(), nn.Linear(self.dim, 256))
23 | 
24 |         if torch.cuda.is_available():
25 |             self.cuda()
26 | 
27 |         if bootstrap_model is not None:
28 |             self.load_state_dict(bootstrap_model.state_dict())
29 | 
30 |     def get_action_dim(self):
31 |         return self.action_dim
32 | 
33 |     def get_latent_action(self, prev_encoding, obs_encoding):
34 |         x = torch.cat([prev_encoding, obs_encoding], dim=1)  # batch x (2 dim)
35 |         logits = self.mlp(x)  # batch x action_dim
36 | 
37 |         # Compute probability using Gumbel softmax
38 |         prob, log_prob = gumbel_sample(logits, self.temperature)
39 | 
40 |         h = self.mlp_h(x)
41 | 
42 |         return prob, log_prob, h
43 | 


--------------------------------------------------------------------------------
/src/model/inverse_dynamics/inverse_dynamics_wrapper.py:
--------------------------------------------------------------------------------
 1 | from model.inverse_dynamics.encoded_mlp import EncodedMLP
 2 | from model.inverse_dynamics.simple_feed_forward import SimpleFeedForwardIK
 3 | from model.inverse_dynamics.tensor_inverse_dynamics import TensorInverseDynamics
 4 | 
 5 | 
 6 | class InverseDynamicsWrapper:
 7 |     """Wrapper for inverse dynamics models"""
 8 | 
 9 |     @staticmethod
10 |     def get_inv_dynamics_model(model_name, bootstrap_model=None, **kwargs):
11 |         models = [EncodedMLP.NAME, SimpleFeedForwardIK.NAME, TensorInverseDynamics.NAME]
12 | 
13 |         if model_name == EncodedMLP.NAME:
14 |             return EncodedMLP(**kwargs, bootstrap_model=bootstrap_model)
15 | 
16 |         elif model_name == SimpleFeedForwardIK.NAME:
17 |             return SimpleFeedForwardIK(**kwargs, bootstrap_model=bootstrap_model)
18 | 
19 |         elif model_name == TensorInverseDynamics.NAME:
20 |             return TensorInverseDynamics(**kwargs, bootstrap_model=bootstrap_model)
21 | 
22 |         else:
23 |             raise NotImplementedError("Model %s is not implemented. Implemented models are linear, %r" % (model_name, models))
24 | 


--------------------------------------------------------------------------------
/src/model/inverse_dynamics/tensor_inverse_dynamics.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from utils.gumbel import gumbel_sample
 4 | 
 5 | 
 6 | class TensorInverseDynamics(nn.Module):
 7 |     NAME = "tensor-inv-dyn"
 8 | 
 9 |     def __init__(self, exp_setup, bootstrap_model=None):
10 |         super(TensorInverseDynamics, self).__init__()
11 | 
12 |         self.temperature = 1.0
13 |         self.action_dim = exp_setup.config["num_actions"]
14 |         self.dim = exp_setup.constants["hidden_dim"]
15 | 
16 |         self.tensor_W = nn.Parameter(torch.randn(self.dim, self.action_dim, self.dim) * 0.01)
17 | 
18 |         if torch.cuda.is_available():
19 |             self.cuda()
20 | 
21 |         if bootstrap_model is not None:
22 |             self.load_state_dict(bootstrap_model.state_dict())
23 | 
24 |     def get_action_dim(self):
25 |         return self.action_dim
26 | 
27 |     def get_latent_action(self, prev_encoding, obs_encoding):
28 |         batch_size = prev_encoding.size(0)
29 | 
30 |         x = torch.matmul(prev_encoding, self.tensor_W.view(self.dim, self.action_dim * self.dim))
31 |         x = x.view(batch_size, self.action_dim, self.dim)
32 | 
33 |         x = (obs_encoding[:, None, :] * x).sum(2)  # batch x num_actions
34 | 
35 |         # Compute probability using Gumbel softmax
36 |         prob, log_prob = gumbel_sample(x, self.temperature)
37 | 
38 |         return prob, log_prob
39 | 


--------------------------------------------------------------------------------
/src/model/inverse_dynamics_model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/model/inverse_dynamics_model/__init__.py


--------------------------------------------------------------------------------
/src/model/inverse_dynamics_model/action_predictor_wrapper.py:
--------------------------------------------------------------------------------
 1 | from model.inverse_dynamics_model.action_predictor import (
 2 |     ActionPredictor,
 3 |     ActionPredictorFlatNN,
 4 |     ActionPredictorCNN1,
 5 |     ActionPredictorCNN2,
 6 |     ActionPredictorCNN3,
 7 |     ActionPredictorCNN4,
 8 |     ActionPredictorCNN5,
 9 |     ActionPredictorCNN6,
10 |     ActionPredictorCNN7,
11 | )
12 | 
13 | 
14 | class InverseDynamicsWrapper:
15 |     FF = range(1)
16 | 
17 |     def __init__(self):
18 |         pass
19 | 
20 |     @staticmethod
21 |     def get_model(config, constants, bootstrap_model=None):
22 |         model_type_str = constants["model_type"]
23 |         if model_type_str == "ff":
24 |             return ActionPredictor(config, constants, bootstrap_model)
25 |         elif model_type_str == "flat":
26 |             return ActionPredictorFlatNN(config, constants, bootstrap_model)
27 |         elif model_type_str == "conv1":
28 |             return ActionPredictorCNN1(config, constants, bootstrap_model)
29 |         elif model_type_str == "conv2":
30 |             return ActionPredictorCNN2(config, constants, bootstrap_model)
31 |         elif model_type_str == "conv3":
32 |             return ActionPredictorCNN3(config, constants, bootstrap_model)
33 |         elif model_type_str == "conv4":
34 |             return ActionPredictorCNN4(config, constants, bootstrap_model)
35 |         elif model_type_str == "conv5":
36 |             return ActionPredictorCNN5(config, constants, bootstrap_model)
37 |         elif model_type_str == "conv6":
38 |             return ActionPredictorCNN6(config, constants, bootstrap_model)
39 |         elif model_type_str == "conv7":
40 |             return ActionPredictorCNN7(config, constants, bootstrap_model)
41 |         else:
42 |             raise AssertionError("Unhandled model type %r" % model_type_str)
43 | 


--------------------------------------------------------------------------------
/src/model/misc/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/model/misc/__init__.py


--------------------------------------------------------------------------------
/src/model/misc/independence_test_model.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | class IndependenceTestModel(nn.Module):
 7 |     def __init__(self, config, model_input_dim, hidden_dim):
 8 |         super(IndependenceTestModel, self).__init__()
 9 | 
10 |         self.config = config
11 | 
12 |         if config["feature_type"] == "feature":
13 |             # Model head
14 |             self.classifier = nn.Sequential(
15 |                 nn.Linear(model_input_dim, hidden_dim),
16 |                 nn.LeakyReLU(),
17 |                 nn.Linear(hidden_dim, 2),
18 |             )
19 | 
20 |         else:
21 |             raise AssertionError("Unhandled feature type")
22 | 
23 |         if torch.cuda.is_available():
24 |             self.cuda()
25 | 
26 |     def gen_logits_(self, model_input, type="logsoftmax"):
27 |         """
28 |         :param model_input:    Pytorch float tensor of size batch x dim
29 |         :return:
30 |         """
31 | 
32 |         if self.config["feature_type"] == "image":
33 |             raise AssertionError()
34 | 
35 |         logits = self.classifier(model_input)
36 | 
37 |         if type == "logsoftmax":
38 |             result = F.log_softmax(logits, dim=1)
39 |         elif type == "softmax":
40 |             result = F.softmax(logits, dim=1)
41 |         else:
42 |             raise AssertionError("Unhandled type ", type)
43 | 
44 |         return result
45 | 
46 |     def gen_log_prob(self, model_input):
47 |         return self.gen_logits_(model_input, type="logsoftmax")
48 | 
49 |     def gen_prob(self, model_input):
50 |         return self.gen_logits_(model_input, type="softmax")
51 | 


--------------------------------------------------------------------------------
/src/model/misc/lqr_model.py:
--------------------------------------------------------------------------------
 1 | class LQRModel:
 2 |     def __init__(self, A, B, Q, R, Sigma_W, Sigma_0):
 3 |         """
 4 |         LQR model describes a simple continuous control dynamics where
 5 |         the state s evolves as
 6 | 
 7 |         s_1 ~ N(0, Sigma_0)
 8 |         s_{t+1} = A s_t + B u_t + epsilon_t, for all t
 9 |         epsilon_t ~ N(0, Sigma_W)
10 | 
11 |         where s_t and u_t is the state and action respectively at time step t
12 | 
13 |         cost at time step t is given by s_t^T Q s_t + u_t^T R u_t
14 |         """
15 |         self.A = A
16 |         self.B = B
17 |         self.Q = Q
18 |         self.R = R
19 |         self.Sigma_W = Sigma_W
20 |         self.Sigma_0 = Sigma_0
21 | 
22 |     def copy(self):
23 |         return LQRModel(
24 |             A=self.A.copy(),
25 |             B=self.B.copy(),
26 |             Q=self.Q.copy(),
27 |             R=self.R.copy(),
28 |             Sigma_W=self.Sigma_W.copy(),
29 |             Sigma_0=self.Sigma_0.copy(),
30 |         )
31 | 


--------------------------------------------------------------------------------
/src/model/misc/robot_car/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/model/misc/robot_car/__init__.py


--------------------------------------------------------------------------------
/src/model/misc/robot_car/autoencoder_embeddings.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | import pickle
 4 | import numpy as np
 5 | import torch
 6 | from tqdm import tqdm
 7 | 
 8 | from environments.robot_car.utils.dataset import CarDataset
 9 | from model.misc.robot_car.autoencoder_train import CarAutoencoder
10 | 
11 | 
12 | # Precompute VAE embeddings for all images in the dataset
13 | # Embeddings are then saved into a pickle file
14 | if __name__ == "__main__":
15 |     batch_size = 24
16 |     data_root = "./car_data"
17 |     train_root = "./autoencoder_training"
18 |     torch.set_float32_matmul_precision("medium")
19 | 
20 |     parser = argparse.ArgumentParser()
21 |     parser.add_argument("checkpoint", type=str)
22 |     parser.add_argument("output_file", type=str)
23 |     args = parser.parse_args()
24 | 
25 |     # check that checkpoint file exists
26 |     if not os.path.isfile(args.checkpoint):
27 |         raise FileNotFoundError(args.checkpoint)
28 | 
29 |     print(f"Loading model from {args.checkpoint}")
30 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
31 |     model = CarAutoencoder.load_from_checkpoint(args.checkpoint).to(device)
32 | 
33 |     print("Loading data...")
34 |     dataset = CarDataset(data_root, max_k=1, resize=(256, 256), cache_into_memory=False)
35 |     num_samples = len(dataset.actions)
36 |     assert len(dataset.pic_filenames) == num_samples
37 |     assert sum(dataset.traj_lengths) == num_samples
38 |     assert dataset.cumulative_lengths[-1] == num_samples
39 |     assert len(dataset.traj_lengths) == len(dataset.cumulative_lengths)
40 | 
41 |     print("Generating embeddings...")
42 |     traj_ends = dataset.cumulative_lengths - 1
43 |     embeddings = []
44 |     actions = []
45 |     for i in tqdm(range(num_samples)):
46 |         pics = dataset._load_pics_at_index(i)
47 |         pics = torch.tensor(pics, dtype=torch.float32, device=device).unsqueeze(0)
48 |         pics = pics / 256.0
49 |         with torch.no_grad():
50 |             z, _ = model.encode(pics)
51 |         embeddings.append(z.squeeze().cpu().numpy())
52 | 
53 |         if i in traj_ends:
54 |             action = np.array([0.5, 0.5, 0.5, 0.5])
55 |         else:
56 |             # get action that comes after this observation
57 |             action = dataset.actions[i + 1]
58 |         actions.append(action)
59 | 
60 |     embeddings = np.array(embeddings, dtype=np.float32)
61 |     actions = np.array(actions, dtype=np.float32)
62 | 
63 |     output = {
64 |         "embeddings": embeddings,
65 |         "actions": actions,
66 |         "traj_lengths": dataset.traj_lengths,
67 |         "total_samples": num_samples,
68 |     }
69 | 
70 |     print("Saving pickle...")
71 |     with open(args.output_file, "wb") as f:
72 |         pickle.dump(output, f)
73 | 


--------------------------------------------------------------------------------
/src/model/misc/robot_car/dist_pred_model.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Given (x[t], x[t+k]) predict k.  
 3 | 
 4 | Have ability to find smallest k with probability above some epsilon.  
 5 | 
 6 | 
 7 | """
 8 | 
 9 | import torch
10 | import torch.nn as nn
11 | 
12 | 
13 | def argmax_first(a):
14 |     b = torch.stack([torch.arange(a.shape[1])] * a.shape[0])
15 |     max_values, _ = torch.max(a, dim=1)
16 |     b[a != max_values[:, None]] = a.shape[1]
17 |     first_max, _ = torch.min(b, dim=1)
18 | 
19 |     if torch.cuda.is_available():
20 |         first_max = first_max.cuda()
21 | 
22 |     return first_max
23 | 
24 | 
25 | class DistPred(nn.Module):
26 |     def __init__(self, inp_size, maxk):
27 |         super(DistPred, self).__init__()
28 | 
29 |         self.enc = nn.Sequential(
30 |             nn.Linear(inp_size * 2, 512), nn.LeakyReLU(), nn.Linear(512, 512), nn.LeakyReLU(), nn.Linear(512, maxk)
31 |         )
32 | 
33 |     def forward(self, x, xk):
34 |         bs = x.shape[0]
35 |         x = x.reshape((bs, -1))
36 |         xk = xk.reshape((bs, -1))
37 | 
38 |         h = torch.cat([x, xk], dim=1)
39 | 
40 |         py = self.enc(h)
41 | 
42 |         return py
43 | 
44 |     def predict_k(self, x, xk):
45 |         sm = nn.Softmax(dim=1)
46 |         py = sm(self.forward(x, xk))
47 | 
48 |         cdf = torch.gt(torch.cumsum(py, dim=1), 0.5).float()
49 | 
50 |         first_max = argmax_first(cdf)
51 | 
52 |         return first_max
53 | 
54 |     def loss(self, x, xk, k):
55 |         py = self.forward(x, xk)
56 | 
57 |         ce = nn.CrossEntropyLoss()
58 |         loss = ce(py, k)
59 | 
60 |         return loss
61 | 
62 | 
63 | if __name__ == "__main__":
64 |     dp = DistPred(64, 32 * 32 * 3).cuda()
65 | 
66 |     x = torch.randn(1, 3, 32, 32).repeat(100, 1, 1, 1).cuda()
67 |     xk = torch.randn(1, 3, 32, 32).repeat(100, 1, 1, 1).cuda()
68 | 
69 |     y = torch.zeros(100).long().cuda()
70 | 
71 |     # y[0:25] += 4
72 |     # y[25:50] += 5
73 |     # y[50:75] += 6
74 |     # y[75:100] += 1
75 | 
76 |     # y += 3
77 | 
78 |     for i in range(0, 1000):
79 |         dp.train(x, xk, y)
80 | 
81 |     kpred = dp.predict_k(x[0:1], xk[0:1])
82 | 
83 |     print(kpred)
84 | 


--------------------------------------------------------------------------------
/src/model/misc/robot_car/positional_encoding.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import torch
 3 | 
 4 | 
 5 | def positionalencoding1d(d_model, length):
 6 |     """
 7 |     :param d_model: dimension of the model
 8 |     :param length: length of positions
 9 |     :return: length*d_model position matrix
10 |     """
11 |     if d_model % 2 != 0:
12 |         raise ValueError("Cannot use sin/cos positional encoding with " "odd dim (got dim={:d})".format(d_model))
13 |     pe = torch.zeros(length, d_model)
14 |     position = torch.arange(0, length).unsqueeze(1)
15 |     div_term = torch.exp((torch.arange(0, d_model, 2, dtype=torch.float) * -(math.log(10000.0) / d_model)))
16 |     pe[:, 0::2] = torch.sin(position.float() * div_term)
17 |     pe[:, 1::2] = torch.cos(position.float() * div_term)
18 | 
19 |     return pe
20 | 
21 | 
22 | def positionalencoding2d(d_model, height, width):
23 |     """
24 |     :param d_model: dimension of the model
25 |     :param height: height of the positions
26 |     :param width: width of the positions
27 |     :return: d_model*height*width position matrix
28 |     """
29 |     if d_model % 4 != 0:
30 |         raise ValueError("Cannot use sin/cos positional encoding with " "odd dimension (got dim={:d})".format(d_model))
31 |     pe = torch.zeros(d_model, height, width)
32 |     # Each dimension use half of d_model
33 |     d_model = int(d_model / 2)
34 |     div_term = torch.exp(torch.arange(0.0, d_model, 2) * -(math.log(10000.0) / d_model))
35 |     pos_w = torch.arange(0.0, width).unsqueeze(1)
36 |     pos_h = torch.arange(0.0, height).unsqueeze(1)
37 |     pe[0:d_model:2, :, :] = torch.sin(pos_w * div_term).transpose(0, 1).unsqueeze(1).repeat(1, height, 1)
38 |     pe[1:d_model:2, :, :] = torch.cos(pos_w * div_term).transpose(0, 1).unsqueeze(1).repeat(1, height, 1)
39 |     pe[d_model::2, :, :] = torch.sin(pos_h * div_term).transpose(0, 1).unsqueeze(2).repeat(1, 1, width)
40 |     pe[d_model + 1 :: 2, :, :] = torch.cos(pos_h * div_term).transpose(0, 1).unsqueeze(2).repeat(1, 1, width)
41 | 
42 |     return pe
43 | 


--------------------------------------------------------------------------------
/src/model/model_wrapper.py:
--------------------------------------------------------------------------------
 1 | from model.decoder.decoder_wrapper import DecoderModelWrapper
 2 | from model.encoder.encoder_wrapper import EncoderModelWrapper
 3 | from model.classifiers.classifier_model_wrapper import ClassifierModelWrapper
 4 | 
 5 | 
 6 | class ModelWrapper:
 7 |     def __init__(self):
 8 |         pass
 9 | 
10 |     @staticmethod
11 |     def get_model(model_type, model_name, config, constants, bootstrap_model=None, **kwargs):
12 |         if model_type == "classifier":
13 |             return ClassifierModelWrapper.get_classifier(
14 |                 model_name=model_name,
15 |                 num_class=kwargs["num_class"],
16 |                 config=config,
17 |                 constants=constants,
18 |                 bootstrap_model=None,
19 |             )
20 | 
21 |         elif model_type == "encoder":
22 |             return EncoderModelWrapper.get_encoder(model_name=model_name, bootstrap_model=bootstrap_model, **kwargs)
23 | 
24 |         elif model_type == "decoder":
25 |             return DecoderModelWrapper.get_decoder(model_name=model_name, bootstrap_model=bootstrap_model, **kwargs)
26 | 
27 |         else:
28 |             raise NotImplementedError()
29 | 


--------------------------------------------------------------------------------
/src/model/policy/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/model/policy/__init__.py


--------------------------------------------------------------------------------
/src/model/policy/abstract_nonstationary.py:
--------------------------------------------------------------------------------
 1 | class AbstractNonStationaryPolicy:
 2 |     def __init__(self):
 3 |         pass
 4 | 
 5 |     def action_type(self):
 6 |         """
 7 |         :return: Type of action returned by the policy
 8 |         """
 9 |         raise NotImplementedError()
10 | 
11 |     def sample_action(self, observation, timestep):
12 |         """
13 |         :param observation: Observation of the world
14 |         :param timestep: time step at which observation is observed
15 |         :return: a numpy vector denoting the probability distribution over actions
16 |         """
17 |         raise NotImplementedError()
18 | 
19 |     def get_argmax_action(self, observation, timestep):
20 |         """
21 |         :param observation: Observation of the world
22 |         :param timestep: time step at which observation is observed
23 |         :return: action representation (can be integer, real number, real-valued vector, or object of some class)
24 |         """
25 |         raise NotImplementedError()
26 | 


--------------------------------------------------------------------------------
/src/model/policy/abstract_stationary.py:
--------------------------------------------------------------------------------
 1 | class AbstractStationaryPolicy:
 2 |     def __init__(self):
 3 |         pass
 4 | 
 5 |     def action_type(self):
 6 |         """
 7 |         :return: Type of action returned by the policy
 8 |         """
 9 |         raise NotImplementedError()
10 | 
11 |     def sample_action(self, observation):
12 |         """
13 |         :param observation: Observation of the world
14 |         :return: a numpy vector denoting the probability distribution over actions
15 |         """
16 |         raise NotImplementedError()
17 | 
18 |     def get_argmax_action(self, observation):
19 |         """
20 |         :param observation: Observation of the world
21 |         :return: action representation (can be integer, real number, real-valued vector, or object of some class)
22 |         """
23 |         raise NotImplementedError()
24 | 


--------------------------------------------------------------------------------
/src/model/policy/nonstationary_composed_policy.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import numpy as np
 3 | import torch.nn as nn
 4 | 
 5 | from environments.intrepid_env_meta.action_type import ActionType
 6 | 
 7 | 
 8 | class NonStationaryComposedPolicy(nn.Module, ActionType):
 9 |     def __init__(self, encoder_fn, q_values, config):
10 |         super(NonStationaryComposedPolicy, self).__init__()
11 |         super(ActionType, self).__init__()
12 | 
13 |         self.encoder_fn = encoder_fn
14 |         self.q_values = q_values
15 |         self.action_space = config["actions"]
16 | 
17 |     def action_type(self):
18 |         raise ActionType.Discrete
19 | 
20 |     def sample_action(self, observations, time_step):
21 |         return self.get_argmax_action(observations, time_step)
22 | 
23 |     def get_argmax_action(self, observations, time_step):
24 |         if self.encoder_fn is None:
25 |             return random.choice(self.action_space)
26 | 
27 |         if isinstance(self.encoder_fn, list) or isinstance(self.encoder_fn, dict):
28 |             latent_state = self.encoder_fn[time_step].encode_observations(observations)
29 |         else:
30 |             latent_state = self.encoder_fn.encode_observations(observations)
31 | 
32 |         if (time_step, latent_state) in self.q_values:
33 |             q_values = self.q_values[(time_step, latent_state)]
34 | 
35 |             return np.random.choice(np.flatnonzero(q_values == q_values.max()))
36 |         else:
37 |             return random.choice(self.action_space)
38 | 
39 |     def save(self, folder_name, model_name=None):
40 |         raise NotImplementedError()
41 | 
42 |     def load(self, folder_name, model_name=None):
43 |         raise NotImplementedError()
44 | 


--------------------------------------------------------------------------------
/src/model/policy/open_loop.py:
--------------------------------------------------------------------------------
 1 | from model.policy.abstract_nonstationary import AbstractNonStationaryPolicy
 2 | 
 3 | 
 4 | class OpenLoopPolicy(AbstractNonStationaryPolicy):
 5 |     def __init__(self, actions=None, path_id=None):
 6 |         AbstractNonStationaryPolicy.__init__(self)
 7 | 
 8 |         # List of actions
 9 |         if actions is None:
10 |             self._actions = []
11 |         else:
12 |             self._actions = list(actions)
13 | 
14 |         # ID of this path
15 |         self.path_id = path_id
16 | 
17 |         # Last action that formed this path
18 |         self.action = None if len(self._actions) == 0 else self._actions[-1]
19 | 
20 |         # ID of the parent
21 |         self.parent_path_id = None
22 | 
23 |     def extend(self, action, path_id=None):
24 |         policy = self.clone()
25 |         policy._actions.append(action)
26 | 
27 |         policy.parent_path_id = policy.path_id
28 |         policy.path_id = path_id
29 |         policy.action = action
30 | 
31 |         return policy
32 | 
33 |     def num_timesteps(self):
34 |         return len(self._actions)
35 | 
36 |     def action_type(self):
37 |         """
38 |         :return: Type of action returned by the policy
39 |         """
40 |         raise NotImplementedError()
41 | 
42 |     def sample_action(self, observation, timestep):
43 |         return self._actions[timestep]
44 | 
45 |     def get_argmax_action(self, observation, timestep):
46 |         return self._actions[timestep]
47 | 
48 |     def clone(self):
49 |         policy = OpenLoopPolicy()
50 |         policy._actions = list(self._actions)
51 | 
52 |         policy.path_id = self.path_id
53 |         policy.action = self.action
54 |         policy.parent_path_id = self.parent_path_id
55 | 
56 |         return policy
57 | 
58 |     def __str__(self):
59 |         if self.parent_path_id is None or self.action is None or self.path_id is None:
60 |             return "NA"
61 |         else:
62 |             return "[%d -> %r -> %d]" % (self.parent_path_id, self.action, self.path_id)
63 | 


--------------------------------------------------------------------------------
/src/model/policy/stationary_action_condition_policy.py:
--------------------------------------------------------------------------------
 1 | from model.policy.abstract_stationary import AbstractStationaryPolicy
 2 | 
 3 | 
 4 | class StationaryActionConditionPolicy(AbstractStationaryPolicy):
 5 |     """A policy that takes action by evaluating an input condition"""
 6 | 
 7 |     def __init__(self, action_condition):
 8 |         super(StationaryActionConditionPolicy, self).__init__()
 9 |         self.action_condition = action_condition
10 | 
11 |     def action_type(self):
12 |         raise NotImplementedError()
13 | 
14 |     def gen_q_val(self, observations):
15 |         raise NotImplementedError()
16 | 
17 |     def sample_action(self, observations):
18 |         return self.action_condition(observations)
19 | 
20 |     def get_argmax_action(self, observations):
21 |         return self.action_condition(observations)
22 | 
23 |     def save(self, folder_name, model_name=None):
24 |         raise NotImplementedError()
25 | 
26 |     def load(self, folder_name, model_name=None):
27 |         raise NotImplementedError()
28 | 


--------------------------------------------------------------------------------
/src/model/policy/stationary_constant_policy.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | 
 3 | from environments.intrepid_env_meta.action_type import ActionType
 4 | 
 5 | 
 6 | class StationaryConstantPolicy(ActionType):
 7 |     """A policy that always takes the same action deterministically regardless of input"""
 8 | 
 9 |     def __init__(self, action):
10 |         super(ActionType, self).__init__()
11 |         self.action = action
12 | 
13 |     def action_type(self):
14 |         raise NotImplementedError()
15 | 
16 |     def gen_q_val(self, observations):
17 |         raise NotImplementedError()
18 | 
19 |     def sample_action(self, observations):
20 |         return self.action
21 | 
22 |     def get_argmax_action(self, observations):
23 |         return self.action
24 | 
25 |     def save(self, folder_name, model_name=None):
26 |         with open(folder_name + model_name, "wb") as fobj:
27 |             pickle.dump(self.action, fobj)
28 | 
29 |     def load(self, folder_name, model_name=None):
30 |         with open(folder_name + model_name, "rb") as fobj:
31 |             self.action = pickle.load(fobj)
32 | 


--------------------------------------------------------------------------------
/src/model/policy/stationary_decoder_dictionary_policy.py:
--------------------------------------------------------------------------------
 1 | from environments.intrepid_env_meta.action_type import ActionType
 2 | from model.policy.stationary_dictionary_policy import StationaryDictionaryPolicy
 3 | 
 4 | 
 5 | class StationaryDecoderLatentPolicy(ActionType):
 6 |     def __init__(self, decoder, q_val_dictionary, actions):
 7 |         super(ActionType, self).__init__()
 8 | 
 9 |         self.decoder = decoder
10 |         self.latent_policy = StationaryDictionaryPolicy(q_val_dictionary, actions)
11 | 
12 |     def action_type(self):
13 |         raise NotImplementedError()
14 | 
15 |     def gen_q_val(self, observations):
16 |         raise NotImplementedError()
17 | 
18 |     def sample_action(self, obs):
19 |         obs = obs[0]
20 |         latent_state = self.decoder.encode_observations(obs)
21 |         return self.latent_policy.sample_action(latent_state)
22 | 
23 |     def get_argmax_action(self, obs):
24 |         latent_state = self.decoder.encode_observations(obs)
25 |         return self.latent_policy.get_argmax_action(latent_state)
26 | 
27 |     def save(self, folder_name, model_name=None):
28 |         raise NotImplementedError()
29 | 
30 |     def load(self, folder_name, model_name=None):
31 |         raise NotImplementedError()
32 | 


--------------------------------------------------------------------------------
/src/model/policy/stationary_dictionary_policy.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | 
 3 | from environments.intrepid_env_meta.action_type import ActionType
 4 | 
 5 | 
 6 | class StationaryDictionaryPolicy(ActionType):
 7 |     def __init__(self, q_val_dictionary, actions):
 8 |         super(ActionType, self).__init__()
 9 | 
10 |         self.q_val_dictionary = q_val_dictionary
11 |         self.actions = actions
12 | 
13 |     def action_type(self):
14 |         raise NotImplementedError()
15 | 
16 |     def gen_q_val(self, observations):
17 |         raise NotImplementedError()
18 | 
19 |     def sample_action(self, state):
20 |         action = self.get_argmax_action(state)
21 |         assert isinstance(action, int), "Action should be of type int. Found %r of type %r" % (action, type(action))
22 |         return action
23 | 
24 |     def get_argmax_action(self, state):
25 |         state = tuple(state)
26 |         if state in self.q_val_dictionary:
27 |             return int(self.q_val_dictionary[state].argmax())
28 |         else:
29 |             return random.choice(self.actions)
30 | 
31 |     def save(self, folder_name, model_name=None):
32 |         raise NotImplementedError()
33 | 
34 |     def load(self, folder_name, model_name=None):
35 |         raise NotImplementedError()
36 | 


--------------------------------------------------------------------------------
/src/model/policy/stationary_stochastic_policy.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | from environments.intrepid_env_meta.action_type import ActionType
 6 | 
 7 | 
 8 | class StationaryStochasticPolicy(nn.Module, ActionType):
 9 |     def __init__(self, constants, config):
10 |         super(StationaryStochasticPolicy, self).__init__()
11 |         super(ActionType, self).__init__()
12 | 
13 |         self.layer1 = nn.Linear(config["obs_dim"], 56)
14 |         self.layer2 = nn.Linear(56, 56)
15 |         self.layer3 = nn.Linear(56, config["num_actions"])
16 | 
17 |     def gen_prob(self, observations):
18 |         x = F.relu(self.layer1(observations))
19 |         x = F.relu(self.layer2(x))
20 |         x = F.softmax(self.layer3(x))
21 | 
22 |         return x
23 | 
24 |     def action_type(self):
25 |         raise ActionType.Discrete
26 | 
27 |     def sample_action(self, observations):
28 |         prob = self.gen_prob(observations)
29 |         dist = torch.distributions.Categorical(prob)
30 |         return torch.multinomial(dist.probs, 1, True)
31 | 
32 |     def get_argmax_action(self, observations):
33 |         prob = self.gen_prob(observations)
34 |         return prob.max(1)[1]
35 | 
36 |     def save(self, folder_name, model_name=None):
37 |         if model_name is None:
38 |             torch.save(self.state_dict(), folder_name + "stationary_policy")
39 |         else:
40 |             torch.save(self.state_dict(), folder_name + model_name)
41 | 
42 |     def load(self, folder_name, model_name=None):
43 |         if model_name is None:
44 |             self.load_state_dict(torch.load(folder_name + "stationary_policy"))
45 |         else:
46 |             self.load_state_dict(torch.load(folder_name + model_name))
47 | 


--------------------------------------------------------------------------------
/src/model/transition_encoders/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/model/transition_encoders/__init__.py


--------------------------------------------------------------------------------
/src/model/transition_encoders/encoder_model_wrapper.py:
--------------------------------------------------------------------------------
 1 | from model.transition_encoders.compositional_encoder_model import (
 2 |     CompositionalEncoderModel,
 3 | )
 4 | from model.transition_encoders.encoder_model import (
 5 |     BackwardEncoderModel,
 6 |     ForwardEncoderModel,
 7 | )
 8 | 
 9 | 
10 | class EncoderModelWrapper:
11 |     """Wrapper for encoder model"""
12 | 
13 |     @staticmethod
14 |     def get_encoder_model(model_type, config, constants, bootstrap_model=None):
15 |         if model_type == "backwardmodel":
16 |             return BackwardEncoderModel(config, constants, bootstrap_model)
17 |         elif model_type == "forwardmodel":
18 |             return ForwardEncoderModel(config, constants, bootstrap_model)
19 |         elif model_type == "compbackwardmodel":
20 |             return CompositionalEncoderModel(config, constants, bootstrap_model)
21 |         else:
22 |             raise NotImplementedError("Did not implement %r" % model_type)
23 | 


--------------------------------------------------------------------------------
/src/setup_validator/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/setup_validator/__init__.py


--------------------------------------------------------------------------------
/src/setup_validator/config_key_registry.py:
--------------------------------------------------------------------------------
 1 | # number of actions; -1 to denote not set
 2 | NUM_ACTIONS = "num_actions"
 3 | 
 4 | # action space; -1 to denote not set
 5 | ACTIONS = "actions"
 6 | 
 7 | # Horizon of the problem; -1 to denote not set
 8 | HORIZON = "horizon"
 9 | 
10 | # Dimension of the observation; -1 to denote not set
11 | OBS_DIM = "obs_dim"
12 | 
13 | # Type of features the agent receives
14 | FEATURE_TYPE = "feature_type"
15 | 
16 | # Discount factor; -1 to denote not set
17 | GAMMA = "gamma"
18 | 


--------------------------------------------------------------------------------
/src/setup_validator/core_validator.py:
--------------------------------------------------------------------------------
 1 | REQUIRED_CONFIG_KEYS = [
 2 |     "num_actions",
 3 |     "actions",
 4 |     "horizon",
 5 |     "obs_dim",
 6 |     "feature_type",
 7 |     "gamma",
 8 | ]
 9 | 
10 | REQUIRED_CONSTANT_KEYS = [
11 |     "learning_rate",
12 |     "num_homing_policy",
13 |     "encoder_training_num_samples",
14 |     "encoder_training_epoch",
15 |     "encoder_training_lr",
16 |     "encoder_training_batch_size",
17 |     "validation_data_percent",
18 |     "psdp_training_num_samples",
19 |     "cb_oracle_epoch",
20 |     "cb_oracle_lr",
21 |     "cb_oracle_batch_size",
22 |     "eval_homing_policy_sample_size",
23 |     "reward_free_planner",
24 |     "reward_sensitive_planner",
25 | ]
26 | 
27 | 
28 | def validate(config, constants):
29 |     # TODO validate based on the algorithm that is being run
30 | 
31 |     for key in REQUIRED_CONFIG_KEYS:
32 |         assert key in config, "Did not find the key %r in config in dictionary" % key
33 | 
34 |     for key in REQUIRED_CONSTANT_KEYS:
35 |         assert key in constants, "Did not find the key %r in constants dictionary" % key
36 | 
37 |     return True
38 | 


--------------------------------------------------------------------------------
/src/unit_test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/unit_test/__init__.py


--------------------------------------------------------------------------------
/src/unit_test/dynamic_arguments.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | import argparse
 3 | import pdb
 4 | 
 5 | parser = argparse.ArgumentParser()
 6 | parser.add_argument("-args_file", default="args.txt")
 7 | 
 8 | data = {"f": 0, "g": "cat"}
 9 | 
10 | for k, v in data.items():
11 |     parser.add_argument("--%s" % k, default=None, type=type(v), help="data")
12 | 
13 | dynamic_args = parser.parse_args()
14 | dynamic_args = vars(dynamic_args)
15 | 
16 | pdb.set_trace()
17 | 
18 | print(dynamic_args)
19 | 


--------------------------------------------------------------------------------
/src/unit_test/gridworld_test.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import random
 3 | import numpy as np
 4 | import torch.multiprocessing as mp
 5 | 
 6 | from experiments.experiment_header import get_header
 7 | from environments.intrepid_env_meta.make_env import MakeEnvironment
 8 | 
 9 | 
10 | def main():
11 |     exp_setup = get_header()
12 | 
13 |     if exp_setup.config["seed"] == -1:
14 |         seeds = list(range(1234, 1234 + 10))
15 |         num_runs = len(seeds)
16 |     else:
17 |         seeds = [exp_setup.config["seed"]]
18 |         num_runs = 1
19 | 
20 |     for exp_id in range(1, num_runs + 1):
21 |         exp_setup.config["seed"] = seeds[exp_id - 1]
22 |         exp_setup.config["env_seed"] = seeds[exp_id - 1] * 10
23 |         exp_setup.logger.log("========= STARTING EXPERIMENT %d (Seed = %d) ======== " % (exp_id, exp_setup.config["seed"]))
24 | 
25 |         # Set the random seed
26 |         random.seed(exp_setup.config["seed"])
27 |         np.random.seed(exp_setup.config["seed"])
28 |         torch.manual_seed(exp_setup.config["seed"])
29 |         if torch.cuda.is_available():
30 |             torch.cuda.manual_seed_all(exp_setup.config["seed"])
31 | 
32 |         # Create a new environment
33 |         make_env = MakeEnvironment()
34 |         env = make_env.make(exp_setup)
35 |         exp_setup.logger.log("Environment Created")
36 | 
37 |         import imageio
38 |         import matplotlib.pyplot as plt
39 | 
40 |         plt.ion()
41 | 
42 |         images = []
43 | 
44 |         for _ in range(0, 5):
45 |             img, info = env.reset()
46 |             plt.imshow(img)
47 |             images.append(img)
48 | 
49 |             # print("Image shape is ", img.shape)
50 |             for _ in range(0, exp_setup.config["horizon"]):
51 |                 action = random.choice(exp_setup.config["actions"])
52 |                 img, _, _, _ = env.step(action)
53 |                 images.append(img)
54 |                 # print("Image shape is ", img.shape)
55 |                 plt.imshow(img)
56 |                 plt.pause(0.05)
57 | 
58 |         imageio.mimsave("./gridworld.gif", images)
59 | 
60 |         import pdb
61 | 
62 |         pdb.set_trace()
63 | 
64 | 
65 | if __name__ == "__main__":
66 |     print("SETTING THE START METHOD ")
67 |     mp.freeze_support()
68 |     mp.set_start_method("spawn")
69 |     main()
70 | 


--------------------------------------------------------------------------------
/src/unit_test/make_env.py:
--------------------------------------------------------------------------------
1 | from environments.intrepid_env_meta.make_env import MakeEnvironment
2 | 
3 | 
4 | make_env = MakeEnvironment()
5 | 


--------------------------------------------------------------------------------
/src/unit_test/matterport_exo.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import imageio
 3 | import numpy as np
 4 | import matplotlib.pyplot as plt
 5 | import pdb
 6 | 
 7 | from skimage.transform import resize
 8 | 
 9 | # Read image
10 | img = imageio.imread("./matterport_sample.png")
11 | obs_dim = img.shape
12 | print("Obs dim shape is ", obs_dim)
13 | 
14 | fnames = glob.glob("data/matterport/icon_figs/*png")
15 | distractors = []
16 | 
17 | for fname in fnames:
18 |     distractor_img = imageio.imread(fname)
19 |     print("Read distractor from %s of size %r" % (fname, distractor_img.shape))
20 | 
21 |     assert len(distractor_img.shape) == 3 and (
22 |         distractor_img.shape[2] == 3 or distractor_img.shape[2] == 4
23 |     ), "Can only read RGB and RGBA images"
24 |     if distractor_img.shape[2] == 4:
25 |         distractor_img = distractor_img[:, :, :3]
26 | 
27 |     # Resize based on original image so that width of the obstacle is 10% of the width and
28 |     # height is at most 40% of the height
29 |     distractor_img = resize(
30 |         distractor_img,
31 |         (
32 |             min(distractor_img.shape[0], int(0.2 * obs_dim[0])),
33 |             min(distractor_img.shape[1], int(0.2 * obs_dim[1])),
34 |             3,
35 |         ),
36 |     )
37 |     distractor_img = (distractor_img * 255).astype(np.uint8)
38 |     distractors.append(distractor_img)
39 | 
40 | print("Read %d many distractors " % len(distractors))
41 | 
42 | distractor_hor = 40
43 | distractor_ver = 30
44 | distractor_id = 0
45 | 
46 | # Add distractor
47 | distractor_img = distractors[distractor_id]
48 | distractor_shape = distractor_img.shape
49 | 
50 | img_slice = img[
51 |     distractor_ver : distractor_ver + distractor_shape[0],
52 |     distractor_hor : distractor_hor + distractor_shape[1],
53 |     :,
54 | ]
55 | 
56 | print("Img shape is ", img.shape)
57 | print("Img slice's shape is ", img_slice.shape)
58 | print("Distractor slice's shape is ", distractor_shape)
59 | 
60 | distractor_img = distractor_img.reshape((-1, 3))
61 | img_slice = img_slice.reshape((-1, 3))
62 | distractor_img_min = distractor_img.min(1)
63 | blue_pixel_ix = np.argwhere(distractor_img_min < 220)  # flattened (x, y) position where pixels are blue in color
64 | values = np.squeeze(distractor_img[blue_pixel_ix])
65 | np.put_along_axis(img_slice, blue_pixel_ix, values, axis=0)
66 | 
67 | img_slice = img_slice.reshape(distractor_shape)  # distractor and img_slice have the same shape
68 | 
69 | img[
70 |     distractor_ver : distractor_ver + distractor_shape[0],
71 |     distractor_hor : distractor_hor + distractor_shape[1],
72 |     :,
73 | ] = img_slice
74 | 
75 | imgplot = plt.imshow(img)
76 | plt.show()
77 | 
78 | pdb.set_trace()
79 | 


--------------------------------------------------------------------------------
/src/unit_test/matterport_test.py:
--------------------------------------------------------------------------------
 1 | import pdb
 2 | import json
 3 | import numpy as np
 4 | 
 5 | from environments.matterport.matterport import Matterport
 6 | 
 7 | with open("data/matterport/config.json") as f:
 8 |     config = json.load(f)
 9 | 
10 | env = Matterport(config)
11 | img, info = env.reset()
12 | 
13 | 
14 | def print_stuff():
15 |     print(env.sim.getState()[0].scanId)
16 |     print(env.sim.getState()[0].location.viewpointId)
17 |     print(env.sim.getState()[0].viewIndex)
18 |     print(env.sim.getState()[0].heading)
19 |     print(env.sim.getState()[0].elevation)
20 |     print(env.sim.getState()[0].step)
21 |     print(env.sim.getState()[0].navigableLocations)
22 |     print(np.array(env.sim.getState()[0].rgb, copy=False).shape)
23 |     print()
24 | 
25 | 
26 | print_stuff()
27 | print("Taking action")
28 | for i in range(0, config["horizon"]):
29 |     img, reward, done, info = env.step(0)
30 | print_stuff()
31 | 
32 | pdb.set_trace()
33 | 


--------------------------------------------------------------------------------
/src/unit_test/module_multiprocessing.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import multiprocessing as mp
 3 | import torch.nn as nn
 4 | import numpy as np
 5 | 
 6 | 
 7 | from utils.cuda import cuda_var
 8 | 
 9 | 
10 | class Worker:
11 |     def __init__(self):
12 |         pass
13 | 
14 |     @staticmethod
15 |     def forward(id, model, vector):
16 |         output = model(vector)
17 |         print("Client: %r Output Sum is %r" % (id, output.sum()))
18 | 
19 | 
20 | class Model(nn.Module):
21 |     def __init__(self):
22 |         super(Model, self).__init__()
23 | 
24 |         self.transform = nn.Sequential(nn.Linear(32, 64), nn.ReLU(), nn.Linear(64, 32), nn.ReLU())
25 | 
26 |         if torch.cuda.is_available():
27 |             self.cuda()
28 | 
29 |     def forward(self, x):
30 |         return self.transform(x)
31 | 
32 | 
33 | if __name__ == "__main__":
34 |     mp.freeze_support()
35 |     mp.set_start_method("spawn")
36 | 
37 |     a = np.random.rand(1, 32)
38 |     a_v = cuda_var(torch.from_numpy(a)).float()
39 | 
40 |     model = Model()
41 |     output = model(a_v)
42 |     print("Master Output Sum is %r " % output.sum())
43 | 
44 |     # creating new process
45 |     p1 = mp.Process(target=Worker.forward, args=(0, model, a_v))
46 |     p1.start()
47 | 
48 |     p2 = mp.Process(target=Worker.forward, args=(1, model, a_v))
49 |     p2.start()
50 | 
51 |     # wait until process is finished
52 |     p1.join()
53 |     p2.join()
54 | 


--------------------------------------------------------------------------------
/src/unit_test/montezuma_human_interactive.py:
--------------------------------------------------------------------------------
 1 | import gym
 2 | import time
 3 | import pickle
 4 | import matplotlib.pyplot as plt
 5 | 
 6 | from textwrap import wrap
 7 | from skimage.transform import resize
 8 | 
 9 | plt.ion()
10 | env = gym.make("MontezumaRevengeDeterministic-v4")
11 | 
12 | 
13 | def process_obs_and_show(obs, seq, ret):
14 |     obs = obs[34 : 34 + 160, :160]
15 |     obs = resize(obs, (500, 500, 3))
16 |     seq_str = ", ".join([str(action) for action in seq])
17 |     plt.clf()
18 | 
19 |     plt.title("\n".join(wrap("Trajectory [%s], return: %f" % (seq_str, ret), 90)), fontsize=8)
20 |     plt.imshow(obs)
21 |     plt.show()
22 | 
23 | 
24 | def take_action(action):
25 |     obs = None
26 |     reward = 0
27 |     for _ in range(4):
28 |         obs, reward_, _, _ = env.step(action)
29 |         reward += reward_
30 |     return obs, reward
31 | 
32 | 
33 | def play(seq):
34 |     obs = env.reset()
35 |     ret = 0
36 |     for ix, action in enumerate(seq):
37 |         obs, reward = take_action(action)
38 |         ret += reward
39 |     process_obs_and_show(obs, seq, ret)
40 |     return obs, ret
41 | 
42 | 
43 | seq = []
44 | ret = 0
45 | obs = env.reset()
46 | process_obs_and_show(obs, seq, ret)
47 | 
48 | while True:
49 |     cmd_str = input("Enter a number between 0 and 18, and press b to go back, and press q to quit\n\n")
50 | 
51 |     cmd_seq = [tk.strip() for tk in cmd_str.split(",")]
52 |     cmd_seq = [tk for tk in cmd_seq if len(tk) > 0]
53 | 
54 |     for cmd in cmd_seq:
55 |         if cmd == "b":
56 |             if len(seq) > 0:
57 |                 # go back
58 |                 seq.pop()
59 |                 obs, ret = play(seq)
60 |             else:
61 |                 print("No observation to backtrack\n\n")
62 | 
63 |         elif cmd == "q":
64 |             with open(
65 |                 "key-montezuma-achieved-return-%d-%d.pkl" % (ret, int(time.time())),
66 |                 "wb",
67 |             ) as f:
68 |                 pickle.dump({"seq": seq, "total_return": ret}, f)
69 |             print("Quitting.")
70 |             exit(0)
71 | 
72 |         elif cmd.startswith("load"):
73 |             with open(cmd.split()[1], "rb") as f:
74 |                 data = pickle.load(f)
75 |             obs, ret = play(data["seq"])
76 | 
77 |         else:
78 |             try:
79 |                 action = int(cmd)
80 |                 obs, reward = take_action(action)
81 |                 seq.append(action)
82 |                 ret += reward
83 |                 process_obs_and_show(obs, seq, ret)
84 |             except Exception:
85 |                 print("Enter b, q or a number")
86 |                 continue
87 | 


--------------------------------------------------------------------------------
/src/unit_test/multiprocessing_different_gpu.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import multiprocessing as mp
 3 | import torch.nn as nn
 4 | import numpy as np
 5 | 
 6 | from utils.cuda import cuda_var
 7 | from copy import deepcopy
 8 | 
 9 | 
10 | class Worker:
11 |     def __init__(self):
12 |         pass
13 | 
14 |     @staticmethod
15 |     def forward(id, model, gpu_id):
16 |         # Set this process to use a different GPU
17 |         torch.cuda.set_device(gpu_id)
18 |         assert gpu_id == torch.cuda.current_device()
19 | 
20 |         a = np.eye(32)
21 |         vector = cuda_var(torch.from_numpy(a)).float()
22 | 
23 |         output = None
24 |         for i in range(0, 25000):  # A time consuming process
25 |             output = model(vector)
26 |         print(
27 |             "Client %r: Given GPU-ID to use %d, using GPU-ID %r out of %r, Output Sum is %r"
28 |             % (
29 |                 id,
30 |                 gpu_id,
31 |                 torch.cuda.current_device(),
32 |                 torch.cuda.device_count(),
33 |                 output.sum(),
34 |             )
35 |         )
36 | 
37 | 
38 | class Model(nn.Module):
39 |     def __init__(self):
40 |         super(Model, self).__init__()
41 | 
42 |         self.transform = nn.Sequential(nn.Linear(32, 64), nn.ReLU(), nn.Linear(64, 32), nn.ReLU())
43 | 
44 |         if torch.cuda.is_available():
45 |             self.cuda()
46 | 
47 |     def forward(self, x):
48 |         return self.transform(x)
49 | 
50 | 
51 | if __name__ == "__main__":
52 |     mp.freeze_support()
53 |     mp.set_start_method("spawn")
54 | 
55 |     a = np.eye(32)
56 |     a_v = cuda_var(torch.from_numpy(a)).float()
57 | 
58 |     model = Model()
59 |     output = model(a_v)
60 |     print("Master Output Sum is %r " % output.sum())
61 | 
62 |     # creating new process
63 |     new_model = deepcopy(model)
64 |     new_model.cuda(0)
65 |     p1 = mp.Process(target=Worker.forward, args=(0, new_model, 0))
66 |     p1.start()
67 | 
68 |     new_model = deepcopy(model)
69 |     new_model.cuda(1)
70 |     p2 = mp.Process(target=Worker.forward, args=(1, new_model, 1))
71 |     p2.start()
72 | 
73 |     new_model = deepcopy(model)
74 |     new_model.cuda(2)
75 |     p3 = mp.Process(target=Worker.forward, args=(2, new_model, 2))
76 |     p3.start()
77 | 
78 |     # wait until process is finished
79 |     p1.join()
80 |     p2.join()
81 |     p3.join()
82 | 


--------------------------------------------------------------------------------
/src/unit_test/shared_replay_memory.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import multiprocessing as mp
 3 | 
 4 | 
 5 | def square_list(n, results):
 6 |     """
 7 |     function to square a given list
 8 |     """
 9 |     while True:
10 |         i = random.randint(0, 4)
11 |         results[n][i] = random.random()
12 | 
13 |         # print result Array
14 |         if i == 0:
15 |             print("Result(in process p1) 0: " + str(sum(results[0])))
16 |             print("Result(in process p1) 1: " + str(sum(results[1])))
17 | 
18 | 
19 | if __name__ == "__main__":
20 |     # creating Array of int data type with space for 4 integers
21 |     results = []
22 |     for i in range(0, 2):
23 |         results.append(mp.Array("f", range(5)))
24 | 
25 |     # creating new process
26 |     p1 = mp.Process(target=square_list, args=(0, results))
27 |     p1.start()
28 | 
29 |     p2 = mp.Process(target=square_list, args=(1, results))
30 |     p2.start()
31 | 
32 |     # wait until process is finished
33 |     p1.join()
34 |     p2.join()
35 | 


--------------------------------------------------------------------------------
/src/unit_test/test_matterport.py:
--------------------------------------------------------------------------------
 1 | import MatterSim
 2 | import numpy as np
 3 | import json
 4 | import os
 5 | from environments.intrepid_env_meta.environment_wrapper import GenerateEnvironmentWrapper
 6 | 
 7 | 
 8 | def first_test():
 9 |     env = MatterSim.Simulator()
10 |     env.setCameraResolution(640, 480)
11 |     env.setPreloadingEnabled(False)
12 |     env.setDepthEnabled(False)
13 |     env.setBatchSize(1)
14 |     env.setCacheSize(2)
15 | 
16 |     env.setDatasetPath("/mnt/data/matterport/v1/scans")
17 |     env.setNavGraphPath("/mnt/data/matterport/v1/connectivity/")
18 | 
19 |     env.initialize()
20 |     house_id = "17DRP5sb8fy"
21 |     room_id = "0f37bd0737e349de9d536263a4bdd60d"
22 | 
23 |     env.newEpisode([house_id], [room_id], [0], [0])
24 | 
25 |     def print_stuff():
26 |         print(env.getState()[0].scanId)
27 |         print(env.getState()[0].location.viewpointId)
28 |         print(env.getState()[0].viewIndex)
29 |         print(env.getState()[0].heading)
30 |         print(env.getState()[0].elevation)
31 |         print(env.getState()[0].step)
32 |         print(env.getState()[0].navigableLocations)
33 |         print(np.array(env.getState()[0].rgb, copy=False).shape)
34 |         print()
35 | 
36 |     print_stuff()
37 |     env.makeAction([1], [0], [0])
38 |     print_stuff()
39 | 
40 |     env.newEpisode([house_id], [room_id], [0], [0])
41 | 
42 | 
43 | def test_env():
44 |     with open("../data/matterport/config.json") as f:
45 |         config = json.load(f)
46 | 
47 |     config["save_trace"] = "True"
48 |     config["trace_sample_rate"] = 500
49 |     config["save_path"] = os.getenv("PT_OUTPUT_DIR")
50 |     config["exp_name"] = "test"
51 |     config["env_seed"] = 0
52 |     config["policy_type"] = "linear"
53 | 
54 |     env = GenerateEnvironmentWrapper("matterport", config)
55 |     env.reset()
56 |     for _ in range(30):
57 |         print("Stepping in env with action {}".format(1))
58 |         obs, rew, done, info = env.step(1)
59 |         print("Got:", rew, done, info["location"])
60 |         print()
61 |     env.reset()
62 | 
63 | 
64 | if __name__ == "__main__":
65 |     test_env()
66 | 


--------------------------------------------------------------------------------
/src/unit_test/test_slot_factored_mdp.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | 
 3 | from environments.rl_acid_env.slot_factored_mdp import SlotFactoredMDP
 4 | 
 5 | config = {"state_dim": 5, "grid_x": 3, "grid_y": 5, "horizon": 10}
 6 | 
 7 | mdp = SlotFactoredMDP(config)
 8 | obs, info = mdp.reset()
 9 | print("State \n", info["state"])
10 | 
11 | for _ in range(0, config["horizon"]):
12 |     # pdb.set_trace()
13 |     action = random.randint(0, config["grid_x"] * config["grid_y"] - 1)
14 |     obs, reward, done, info = mdp.step(action)
15 | 
16 |     print("Action ", action)
17 |     print("State \n", info["state"])
18 | 


--------------------------------------------------------------------------------
/src/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/utils/__init__.py


--------------------------------------------------------------------------------
/src/utils/average.py:
--------------------------------------------------------------------------------
 1 | class AverageUtil:
 2 |     def __init__(self, init_val=None):
 3 |         if init_val is None:
 4 |             self._sum_val = 0.0
 5 |             self._num_items = 0
 6 |         else:
 7 |             self._sum_val = init_val
 8 |             self._num_items = 1
 9 | 
10 |     def get_num_items(self):
11 |         return self._num_items
12 | 
13 |     def acc(self, val):
14 |         self._sum_val += val
15 |         self._num_items += 1
16 | 
17 |     def get_mean(self):
18 |         return self._sum_val / float(max(1, self._num_items))
19 | 
20 |     def __str__(self):
21 |         return "%f (count: %d)" % (self.get_mean(), self._num_items)
22 | 


--------------------------------------------------------------------------------
/src/utils/beautify_time.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | 
 4 | def beautify(time_taken_sec):
 5 |     """Given time taken in seconds it returns a beautified string"""
 6 | 
 7 |     time_taken_sec = int(time_taken_sec)
 8 | 
 9 |     if time_taken_sec < 60:
10 |         return "%r second" % time_taken_sec
11 |     elif 60 <= time_taken_sec < 60 * 60:
12 |         return "%d minutes" % int(time_taken_sec / 60.0)
13 |     elif 60 * 60 <= time_taken_sec < 24 * 60 * 60:
14 |         return "%d hours" % int(time_taken_sec / (60.0 * 60.0))
15 |     elif 24 * 60 * 60 <= time_taken_sec < 30 * 24 * 60 * 60:
16 |         return "%d days" % int(time_taken_sec / (24.0 * 60.0 * 60.0))
17 |     elif 30 * 24 * 60 * 60 <= time_taken_sec < 365 * 24 * 60 * 60:
18 |         return "%d months" % int(time_taken_sec / (30 * 24 * 60 * 60))
19 |     elif 365 * 24 * 60 * 60 <= time_taken_sec:
20 |         months = int((time_taken_sec % (365.0 * 24 * 60.0 * 60.0)) / (30.0 * 24.0 * 60.0 * 60.0))
21 |         return "%d years %d months" % (
22 |             int(time_taken_sec / (365.0 * 24.0 * 60.0 * 60.0)),
23 |             months,
24 |         )
25 | 
26 | 
27 | def elapsed_from_str(time_from):
28 |     """Given a time from, create timestep using the current time step"""
29 | 
30 |     return beautify(time.time() - time_from)
31 | 


--------------------------------------------------------------------------------
/src/utils/conv_util.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | 
 4 | def get_conv_out_size(h, w, kernel_size, stride, dilation=1, padding=0):
 5 |     """Note that in PyTorch the image is channel x height x width"""
 6 | 
 7 |     if isinstance(kernel_size, tuple):
 8 |         kernel_size_h, kernel_size_w = kernel_size
 9 |     elif isinstance(kernel_size, int):
10 |         kernel_size_h = kernel_size
11 |         kernel_size_w = kernel_size
12 |     else:
13 |         raise AssertionError("Kernel size must either be tuple with 2 values or int")
14 | 
15 |     if isinstance(padding, tuple):
16 |         padding_h, padding_w = padding
17 |     elif isinstance(padding, int):
18 |         padding_h = padding
19 |         padding_w = padding
20 |     else:
21 |         raise AssertionError("Padding must either be tuple with 2 values or int")
22 | 
23 |     if isinstance(dilation, tuple):
24 |         dilation_h, dilation_w = dilation
25 |     elif isinstance(dilation, int):
26 |         dilation_h = dilation
27 |         dilation_w = dilation
28 |     else:
29 |         raise AssertionError("Dilation must either be tuple with 2 values or int")
30 | 
31 |     if isinstance(stride, tuple):
32 |         stride_h, stride_w = stride
33 |     elif isinstance(stride, int):
34 |         stride_h = stride
35 |         stride_w = stride
36 |     else:
37 |         raise AssertionError("Stride must either be tuple with 2 values or int")
38 | 
39 |     h_out = int(math.floor((h + 2 * padding_h - dilation_h * (kernel_size_h - 1) - 1) / float(stride_h) + 1))
40 | 
41 |     w_out = int(math.floor((w + 2 * padding_w - dilation_w * (kernel_size_w - 1) - 1) / float(stride_w) + 1))
42 | 
43 |     return h_out, w_out
44 | 


--------------------------------------------------------------------------------
/src/utils/cuda.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Variable
 3 | 
 4 | 
 5 | def cuda_tensor(t):
 6 |     if torch.cuda.is_available():
 7 |         return t.cuda()
 8 |     else:
 9 |         return t
10 | 
11 | 
12 | def cuda_var(t, volatile=False, requires_grad=False):
13 |     if volatile:
14 |         return Variable(cuda_tensor(t), volatile=True, requires_grad=requires_grad)
15 |     else:
16 |         return Variable(cuda_tensor(t), requires_grad=False)
17 | 


--------------------------------------------------------------------------------
/src/utils/generic_policy.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | 
 3 | 
 4 | """ Basic functionality for sampling from discrete probability distributions """
 5 | 
 6 | 
 7 | def sample_action_from_prob(prob):
 8 |     """Pick an action sampled from the probability distribution"""
 9 | 
10 |     num_actions = len(prob)
11 |     if num_actions == 0:
12 |         raise AssertionError("There must be atleast one action.")
13 | 
14 |     v = random.random()
15 | 
16 |     for i in range(0, num_actions):
17 |         v = v - prob[i]
18 |         if v <= 0:
19 |             return i
20 | 
21 |     return num_actions - 1
22 | 
23 | 
24 | def sample_uniform_from_prob(num_actions):
25 |     return random.randint(0, num_actions - 1)
26 | 
27 | 
28 | def get_argmax_action(model_out_val):
29 |     """Returns argmax_a model_out_val(a) with random tie breaking."""
30 | 
31 |     num_actions = len(model_out_val)
32 | 
33 |     if num_actions == 0:
34 |         raise AssertionError("There must be atleast one action.")
35 | 
36 |     ix_max = [0]
37 |     for i in range(1, num_actions):
38 |         if model_out_val[i] > model_out_val[ix_max[0]]:
39 |             ix_max[:] = [i]
40 |         elif model_out_val[i] == model_out_val[ix_max[0]]:
41 |             ix_max.append(i)
42 | 
43 |     return ix_max[random.randint(0, len(ix_max) - 1)]
44 | 


--------------------------------------------------------------------------------
/src/utils/gumbel.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | 
 4 | from utils.cuda import cuda_var
 5 | 
 6 | 
 7 | def _sample_gumbel(input_size):
 8 |     noise = torch.rand(input_size)
 9 |     eps = 1e-20
10 |     noise.add_(eps).log_().neg_()
11 |     noise.add_(eps).log_().neg_()
12 |     return cuda_var(noise)
13 | 
14 | 
15 | def gumbel_sample(input, temperature):
16 |     noise = _sample_gumbel(input.size())
17 |     x = (input + noise) / temperature
18 |     prob = F.softmax(x, dim=1)
19 |     log_prob = F.log_softmax(x, dim=1)
20 |     return prob, log_prob
21 | 


--------------------------------------------------------------------------------
/src/utils/leaky_softmax.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def leaky_softmax(matrix):
 5 |     """Given a matrix of size batch x num_factors we output another matrix using leaky softmax based on
 6 |     Dynamic Routing Between Capsules, Sabour et al., 2017
 7 |     """
 8 | 
 9 |     vector_norms = torch.norm(matrix, dim=1).view(-1)  # Batch
10 |     sq_vector_norms = vector_norms * vector_norms  # Batch
11 |     ratio = sq_vector_norms / (1.0 + sq_vector_norms)  # Batch
12 | 
13 |     unit_vector = matrix / vector_norms.view(-1, 1)  # Batch x num_factors
14 |     output = unit_vector * ratio.view(-1, 1)
15 | 
16 |     return output
17 | 


--------------------------------------------------------------------------------
/src/utils/multiprocess_logger.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import atexit
 3 | import logging
 4 | 
 5 | from multiprocessing import Process, Queue
 6 | 
 7 | 
 8 | def logtxt(fname, s):
 9 |     if not os.path.isdir(os.path.dirname(fname)):
10 |         os.system("mkdir -p {os.path.dirname(fname)}")  # had a f prefix TODO
11 |     f = open(fname, "a")
12 |     f.write("{str(datetime.now())}: {s}\n")  # had a f prefix TODO
13 |     f.close()
14 | 
15 | 
16 | class MultiprocessingLoggerManager(object):
17 |     def __init__(self, file_path, logging_level):
18 |         self.log_queue = Queue()
19 |         self.p = Process(target=logger_daemon, args=(self.log_queue, file_path, logging_level))
20 |         self.p.start()
21 |         atexit.register(self.cleanup)
22 | 
23 |     def get_logger(self, client_id):
24 |         return MultiprocessingLogger(client_id, self.log_queue)
25 | 
26 |     def cleanup(self):
27 |         self.p.terminate()
28 | 
29 | 
30 | class MultiprocessingLogger(object):
31 |     def __init__(self, client_id, log_queue):
32 |         self.client_id = client_id
33 |         self.log_queue = log_queue
34 | 
35 |     def log(self, message):
36 |         print("Client %r: %r" % (self.client_id, message))
37 |         self.log_queue.put("Client %r: %r" % (self.client_id, message))
38 | 
39 |     def debug(self, message):
40 |         print("Client %r: %r" % (self.client_id, message))
41 |         self.log_queue.put("Client %r: %r" % (self.client_id, message))
42 | 
43 | 
44 | def logger_daemon(log_queue, file_path, logging_level):
45 |     logging.basicConfig(filename=file_path, level=logging_level)
46 |     while True:
47 |         logging.info(log_queue.get())
48 | 


--------------------------------------------------------------------------------
/src/utils/normalizer.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | class BaseNormalizer:
 5 |     def __init__(self, read_only=False):
 6 |         self.read_only = read_only
 7 | 
 8 |     def set_read_only(self):
 9 |         self.read_only = True
10 | 
11 |     def unset_read_only(self):
12 |         self.read_only = False
13 | 
14 |     def state_dict(self):
15 |         return None
16 | 
17 |     def load_state_dict(self, _):
18 |         return
19 | 
20 | 
21 | class MeanStdNormalizer(BaseNormalizer):
22 |     def __init__(self, read_only=False, clip=5.0, epsilon=1e-8):
23 |         BaseNormalizer.__init__(self, read_only)
24 |         self.read_only = read_only
25 |         self.rms = None
26 |         self.clip = clip
27 |         self.epsilon = epsilon
28 | 
29 |     def __call__(self, x):
30 |         x = np.asarray(x)
31 |         if self.rms is None:
32 |             self.rms = RunningMeanStd(shape=(1,) + x.shape[1:])
33 |         if not self.read_only:
34 |             self.rms.update(x)
35 |         return np.clip(
36 |             (x - self.rms.mean) / np.sqrt(self.rms.var + self.epsilon),
37 |             -self.clip,
38 |             self.clip,
39 |         )
40 | 
41 |     def state_dict(self):
42 |         return {"mean": self.rms.mean, "var": self.rms.var}
43 | 
44 |     def load_state_dict(self, saved):
45 |         self.rms.mean = saved["mean"]
46 |         self.rms.var = saved["var"]
47 | 
48 | 
49 | class RunningMeanStd(object):
50 |     # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm
51 |     def __init__(self, epsilon=1e-4, shape=()):
52 |         self.mean = np.zeros(shape, "float64")
53 |         self.var = np.ones(shape, "float64")
54 |         self.count = epsilon
55 | 
56 |     def update(self, x):
57 |         batch_mean = np.mean(x, axis=0)
58 |         batch_var = np.var(x, axis=0)
59 |         batch_count = x.shape[0]
60 |         self.update_from_moments(batch_mean, batch_var, batch_count)
61 | 
62 |     def update_from_moments(self, batch_mean, batch_var, batch_count):
63 |         self.mean, self.var, self.count = update_mean_var_count_from_moments(
64 |             self.mean, self.var, self.count, batch_mean, batch_var, batch_count
65 |         )
66 | 
67 | 
68 | def update_mean_var_count_from_moments(mean, var, count, batch_mean, batch_var, batch_count):
69 |     delta = batch_mean - mean
70 |     tot_count = count + batch_count
71 | 
72 |     new_mean = mean + delta * batch_count / tot_count
73 |     m_a = var * count
74 |     m_b = batch_var * batch_count
75 |     M2 = m_a + m_b + np.square(delta) * count * batch_count / tot_count
76 |     new_var = M2 / tot_count
77 |     new_count = tot_count
78 | 
79 |     return new_mean, new_var, new_count
80 | 


--------------------------------------------------------------------------------
/src/utils/simclr_transform.py:
--------------------------------------------------------------------------------
 1 | # import cv2
 2 | # import torch
 3 | # import numpy as np
 4 | 
 5 | from torchvision import transforms
 6 | 
 7 | 
 8 | # class GaussianBlur(object):
 9 | #     # Implements Gaussian blur as described in the SimCLR paper
10 | #     def __init__(self, kernel_size, min=0.1, max=2.0):
11 | #         self.min = min
12 | #         self.max = max
13 | #         # kernel size is set to be 10% of the image height/width
14 | #         self.kernel_size = kernel_size
15 | #
16 | #     def __call__(self, sample):
17 | #         sample = np.array(sample)
18 | #
19 | #         # blur the image with a 50% chance
20 | #         prob = np.random.random_sample()
21 | #
22 | #         if prob < 0.5:
23 | #             sigma = (self.max - self.min) * np.random.random_sample() + self.min
24 | #             sample = cv2.GaussianBlur(sample, (self.kernel_size, self.kernel_size), sigma)
25 | #
26 | #         return torch.FloatTensor(sample)
27 | 
28 | 
29 | train_transform = transforms.Compose(
30 |     [
31 |         transforms.RandomResizedCrop(32),
32 |         transforms.RandomHorizontalFlip(p=0.5),
33 |         transforms.RandomApply([transforms.ColorJitter(0.4, 0.4, 0.4, 0.1)], p=0.8),
34 |         transforms.RandomGrayscale(p=0.2),
35 |         transforms.RandomApply(
36 |             [transforms.GaussianBlur(kernel_size=int(0.1 * 32), sigma=(0.1, 2.0))],
37 |             p=0.5,
38 |         ),
39 |         # GaussianBlur(kernel_size=int(0.1 * 32)),
40 |         # transforms.ToTensor(),        # Removed it as it permuted the order, instead move it to the stop
41 |         transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]),
42 |     ]
43 | )
44 | 
45 | test_transform = transforms.Compose(
46 |     [
47 |         transforms.ToTensor(),
48 |         transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]),
49 |     ]
50 | )
51 | 


--------------------------------------------------------------------------------
/src/utils/telemetry.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | 
 4 | class Telemeter:
 5 |     """A class for measuring time taken by different code snippets"""
 6 | 
 7 |     def __init__(self, name):
 8 |         self.name = name
 9 | 
10 |         self.time_data = dict()
11 |         self.ctr = dict()
12 | 
13 |         self.active_key = None
14 |         self.timer = None  # Start of current code window
15 | 
16 |     def start(self, key):
17 |         self.active_key = "%s_%s" % (self.name, key)
18 | 
19 |         # Start the timer
20 |         self.timer = time.time()
21 | 
22 |     def stop(self):
23 |         time_taken = time.time() - self.timer
24 |         self.timer = None  # Reset the timer
25 | 
26 |         if self.active_key in self.time_data:
27 |             self.time_data[self.active_key] += time_taken
28 |             self.ctr[self.active_key] += 1
29 |         else:
30 |             self.time_data[self.active_key] = time_taken
31 |             self.ctr[self.active_key] = 1
32 | 
33 |     def merge(self, telemetry):
34 |         assert self.timer is not None and telemetry is not None, "Cannot merge telemeters with running timer."
35 |         assert self.name != telemetry.name, "Telemeters with same name cannot be merged."
36 | 
37 |         for key in telemetry.time_data:
38 |             self.time_data[key] = telemetry.time_data[key]
39 |             self.ctr[key] = telemetry.ctr[key]
40 | 
41 |     def save_to_log(self, logger):
42 |         for key, time_taken in sorted(self.time_data.items()):
43 |             count = self.ctr[key]
44 |             avg = round(time_taken / float(max(1, count)), 4)
45 |             logger.log("%r: Avg time taken %r sec with %d count" % (key, avg, count))
46 | 
47 |     def print_report(self):
48 |         for key, time_taken in sorted(self.time_data.items()):
49 |             count = self.ctr[key]
50 |             avg = round(time_taken / float(max(1, count)), 4)
51 |             print("%r: Avg time taken %r sec with %d count" % (key, avg, count))
52 | 


--------------------------------------------------------------------------------
/src/utils/tensorboard.py:
--------------------------------------------------------------------------------
 1 | from tensorboardX import SummaryWriter
 2 | 
 3 | 
 4 | class Tensorboard:
 5 |     def __init__(self, log_dir):
 6 |         save_dir = log_dir + "/tensorboard_logs/"
 7 |         self.writer = SummaryWriter(save_dir)
 8 |         self.index_dict = dict()
 9 | 
10 |     def log_scalar(self, name, value, index=-1):
11 |         if index == -1:
12 |             if name in self.index_dict:
13 |                 self.index_dict[name] += 1
14 |                 index = self.index_dict[name]
15 |             else:
16 |                 self.index_dict[name] = 1
17 |                 index = 1
18 |         self.writer.add_scalar(name, value, index)
19 | 
20 |     def log_histogram(self, name, value, bins, index=-1):
21 |         if index == -1:
22 |             if name in self.index_dict:
23 |                 self.index_dict[name] += 1
24 |                 index = self.index_dict[name]
25 |             else:
26 |                 self.index_dict[name] = 1
27 |                 index = 1
28 |         self.writer.add_histogram(name, value, index, bins)
29 | 


--------------------------------------------------------------------------------