├── .github └── workflows │ ├── python_formatting.yml │ └── python_lint.yml ├── .gitignore ├── CODE_OF_CONDUCT.md ├── LICENSE ├── README.md ├── SECURITY.md ├── SUPPORT.md ├── data ├── MontezumaRevengeDeterministic-v4 │ ├── config.json │ └── constants.json ├── ai2thornav │ ├── config.json │ └── constants.json ├── base_config.json ├── base_constants.json ├── combolock │ ├── config.json │ └── constants.json ├── diabcombolock │ ├── config.json │ └── constants.json ├── gridworld-canonical │ ├── config.json │ └── constants.json ├── gridworld-empty │ ├── config.json │ └── constants.json ├── gridworld-randomized-small │ ├── config.json │ └── constants.json ├── gridworld-randomized │ ├── config.json │ └── constants.json ├── gridworld1 │ ├── config.json │ └── constants.json ├── gridworld2 │ ├── config.json │ └── constants.json ├── matterport │ ├── config.json │ └── constants.json ├── newtonianmotion │ ├── config.json │ └── constants.json ├── objectnav │ ├── config.json │ └── constants.json ├── safetyworld │ ├── config.json │ └── constants.json ├── safetyworld2 │ ├── config.json │ └── constants.json ├── simplelqr │ ├── config.json │ └── constants.json ├── slotfactoredmdp │ ├── config.json │ └── constants.json ├── stochcombolock │ ├── config.json │ └── constants.json ├── temporal_combolock │ ├── config.json │ └── constants.json └── temporal_diabcombolock │ ├── config.json │ └── constants.json ├── local_runs ├── test_factorl.sh ├── test_homer.sh ├── test_id.sh └── test_ppe.sh ├── pyproject.toml ├── requirements.txt └── src ├── analysis_tools ├── __init__.py └── visualize_latent_dynamics.py ├── environments ├── __init__.py ├── ai2thorenv │ ├── __init__.py │ ├── ai2thor_exo_util.py │ ├── navai2thor.py │ └── objectnav.py ├── app_simulator │ ├── __init__.py │ ├── run_interactive.py │ ├── toggle_switches.py │ ├── tree_nav.py │ └── ui.py ├── control_env │ ├── __init__.py │ ├── newtonian_motion.py │ └── simple_lqr.py ├── gym_env │ ├── __init__.py │ └── gym_wrapper.py ├── intrepid_env_meta │ ├── __init__.py │ ├── action_type.py │ ├── environment_keys.py │ ├── environment_wrapper.py │ ├── gym_compatible.py │ ├── intrepid_env_interface.py │ └── make_env.py ├── matterport │ ├── __init__.py │ └── matterport.py ├── minigrid │ ├── __init__.py │ ├── exogenous_noise_util.py │ ├── gridworld1.py │ ├── gridworld2.py │ ├── gridworld_canonical.py │ ├── gridworld_empty.py │ ├── gridworld_randomized.py │ ├── gridworld_randomized_small.py │ └── gridworld_wrapper.py ├── rl_acid_env │ ├── __init__.py │ ├── combolock.py │ ├── diabolical_combolock.py │ ├── grid_world.py │ ├── noise_gen.py │ ├── rl_acid_wrapper.py │ ├── safety_world.py │ ├── safety_world2.py │ ├── slot_factored_mdp.py │ ├── temporal_combolock.py │ ├── temporal_diabcombolock.py │ └── visual_combolock.py └── robot_car │ ├── __init__.py │ ├── client │ ├── __init__.py │ ├── alex_inference.py │ ├── client_base.py │ ├── client_utils.py │ ├── closed_loop_client.py │ ├── inference.py │ ├── random_actions_client.py │ ├── state.py │ ├── state_capture_client.py │ └── tests.py │ ├── server │ ├── __init__.py │ ├── mock_pi_libraries.py │ └── server.py │ └── utils │ ├── __init__.py │ ├── check_corrupted_images.py │ ├── dataset.py │ ├── join_logs.py │ └── rc_car_data_processing.py ├── experiments ├── __init__.py ├── experiment_header.py ├── experiment_save.py ├── experimental_setup.py ├── run_factorl.py ├── run_homer.py ├── run_id.py ├── run_interactive_agent.py ├── run_mbrl_oracle.py ├── run_ppe.py ├── run_psdp.py ├── run_rep_learn_video.py ├── run_richid.py ├── run_robot_car.py ├── run_sabre.py ├── run_sysid.py └── run_visualize_mbrl_oracle.py ├── learning ├── __init__.py ├── core_learner │ ├── __init__.py │ ├── abstract_rl_discrete_latent_state.py │ ├── abstract_video_rep_learner.py │ ├── acro_rep.py │ ├── factorl.py │ ├── fqi_oracle_decoder.py │ ├── homer.py │ ├── ik_learner.py │ ├── mbrl_oracle_decoder.py │ ├── ppe.py │ ├── ppe_util.py │ ├── prediction_video.py │ ├── richid.py │ ├── sabre.py │ └── temporal_contrastive_video.py ├── datastructures │ ├── __init__.py │ ├── abstract_tabular_mdp.py │ ├── count_conditional_probability.py │ ├── count_probability.py │ ├── elliptic_potential.py │ ├── episode.py │ └── transition.py ├── learning_utils │ ├── __init__.py │ ├── abstract_encoder_sampler.py │ ├── clustering_algorithm.py │ ├── collect_data_with_coverage.py │ ├── contextual_bandit_oracle.py │ ├── debug_train_encoding_function.py │ ├── encoder_sampler_all_random.py │ ├── encoder_sampler_bfs_reuse.py │ ├── encoder_sampler_forward_reuse.py │ ├── encoder_sampler_ik.py │ ├── encoder_sampler_reuse.py │ ├── encoder_sampler_same_policy.py │ ├── encoder_sampler_wrapper.py │ ├── entropy_decay_policy.py │ ├── evaluate_state_decoder.py │ ├── factorl_graph_identification.py │ ├── generic_learner.py │ ├── generic_train_classifier.py │ ├── homer_train_encoding_function.py │ ├── homer_train_encoding_function_utils.py │ ├── ik_train_encoding_function.py │ ├── independence_test.py │ ├── linear_disag_model.py │ ├── policy_evaluate.py │ ├── reconstruct_observation.py │ ├── ricatti_solver.py │ └── rl_discrete_latent_state_util.py ├── linear_mdp │ ├── __init__.py │ └── lsvi_ucb.py ├── model_estimation │ ├── __init__.py │ └── count_based_estimation.py ├── planning │ ├── __init__.py │ ├── cem │ │ ├── __init__.py │ │ └── cem_optimizer.py │ ├── high_level_planner │ │ ├── __init__.py │ │ └── dijkstra_planner.py │ ├── high_low_plan.py │ ├── hj_prox │ │ ├── __init__.py │ │ └── hj_prox_alg.py │ └── room_planner.py ├── policy_roll_in │ ├── __init__.py │ └── roll.py ├── policy_search │ ├── __init__.py │ ├── abstract_policy_search.py │ ├── fqi.py │ ├── greedy_policy_search.py │ ├── path_policy_search.py │ ├── policy_search_wrapper.py │ └── psdp.py ├── state_abstraction │ ├── __init__.py │ ├── abstract_state_decoder.py │ ├── autoencoder.py │ ├── generalized_inverse_kinematics.py │ ├── inverse_kinematics.py │ ├── noise_contrastive_dataset.py │ ├── noise_contrastive_global.py │ └── noise_contrastive_local.py └── tabular_rl │ ├── __init__.py │ ├── det_tabular_mdp_builder.py │ ├── q_learning_bonus.py │ ├── tabular_mdp_builder.py │ └── value_iteration.py ├── model ├── __init__.py ├── bottleneck │ ├── __init__.py │ ├── gaussian_bottleneck.py │ └── vq_bottleneck.py ├── classifiers │ ├── __init__.py │ ├── classifier_model_wrapper.py │ ├── conv2_classifier.py │ ├── conv3_classifier.py │ ├── convm_classifier.py │ ├── feedforward_classifier.py │ └── linear_classifier.py ├── decoder │ ├── __init__.py │ ├── conv_decoder.py │ ├── conv_decoder2.py │ ├── conv_decoder_ai2thor.py │ ├── decoder_wrapper.py │ └── feedforward_decoder.py ├── encoder │ ├── __init__.py │ ├── conv2_encoder.py │ ├── conv3_encoder.py │ ├── conv4_encoder.py │ ├── conv_encoder.py │ ├── encoder_wrapper.py │ └── feedforward_encoder.py ├── forward_model │ ├── __init__.py │ ├── conv_forward_model.py │ └── forward_model_wrapper.py ├── inverse_dynamics │ ├── __init__.py │ ├── encoded_mlp.py │ ├── inverse_dynamics_wrapper.py │ ├── simple_feed_forward.py │ └── tensor_inverse_dynamics.py ├── inverse_dynamics_model │ ├── __init__.py │ ├── action_predictor.py │ └── action_predictor_wrapper.py ├── misc │ ├── __init__.py │ ├── independence_test_model.py │ ├── lqr_model.py │ ├── richid_decoder.py │ └── robot_car │ │ ├── __init__.py │ │ ├── autoencoder_embeddings.py │ │ ├── autoencoder_test.py │ │ ├── autoencoder_train.py │ │ ├── dist_pred_model.py │ │ ├── ema_pytorch.py │ │ ├── emprical_mdp.py │ │ ├── latent_forward.py │ │ ├── latent_inverse.py │ │ ├── mixer.py │ │ ├── models.py │ │ ├── pl_vae.py │ │ └── positional_encoding.py ├── model_wrapper.py ├── policy │ ├── __init__.py │ ├── abstract_nonstationary.py │ ├── abstract_stationary.py │ ├── nonstationary_composed_policy.py │ ├── open_loop.py │ ├── stationary_action_condition_policy.py │ ├── stationary_constant_policy.py │ ├── stationary_decoder_dictionary_policy.py │ ├── stationary_deterministic_policy.py │ ├── stationary_dictionary_policy.py │ └── stationary_stochastic_policy.py └── transition_encoders │ ├── __init__.py │ ├── compositional_encoder_model.py │ ├── encoder_model.py │ ├── encoder_model_wrapper.py │ └── factorl_encoder.py ├── setup_validator ├── __init__.py ├── config_key_registry.py └── core_validator.py ├── unit_test ├── __init__.py ├── dynamic_arguments.py ├── gridworld_test.py ├── lp_solver.py ├── make_env.py ├── manual_control.py ├── matterport_exo.py ├── matterport_test.py ├── module_multiprocessing.py ├── montezuma_human_interactive.py ├── multiprocessing_different_gpu.py ├── shared_replay_memory.py ├── test_matterport.py └── test_slot_factored_mdp.py └── utils ├── __init__.py ├── average.py ├── beautify_time.py ├── conv_util.py ├── cuda.py ├── generic_policy.py ├── gumbel.py ├── leaky_softmax.py ├── multiprocess_logger.py ├── normalizer.py ├── shared_replay_memory.py ├── simclr_transform.py ├── telemetry.py └── tensorboard.py /.github/workflows/python_formatting.yml: -------------------------------------------------------------------------------- 1 | name: Python Formatting 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | - 'releases/**' 8 | pull_request: 9 | branches: 10 | - '*' 11 | 12 | jobs: 13 | python-formatting: 14 | runs-on: ubuntu-latest 15 | strategy: 16 | matrix: 17 | python-version: ["3.10"] 18 | steps: 19 | - uses: actions/checkout@v3 20 | 21 | - name: Set up Python ${{ matrix.python-version }} 22 | uses: actions/setup-python@v3 23 | with: 24 | python-version: ${{ matrix.python-version }} 25 | 26 | - name: Install dependencies 27 | run: | 28 | python -m pip install --upgrade pip 29 | pip install black black[jupyter] 30 | 31 | - name: Check formatting 32 | run: | 33 | python -m black --check . 34 | 35 | - name: How to fix errors 36 | if: ${{ failure() }} 37 | shell: bash 38 | run: | 39 | echo "To fix formatting errors, run:" 40 | echo "python3 -m black ." 41 | exit 1 42 | -------------------------------------------------------------------------------- /.github/workflows/python_lint.yml: -------------------------------------------------------------------------------- 1 | name: Python Lint 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | - 'releases/**' 8 | pull_request: 9 | branches: 10 | - '*' 11 | 12 | jobs: 13 | python-lint: 14 | runs-on: ubuntu-latest 15 | strategy: 16 | matrix: 17 | python-version: ["3.10"] 18 | steps: 19 | - uses: actions/checkout@v3 20 | 21 | - name: Set up Python ${{ matrix.python-version }} 22 | uses: actions/setup-python@v3 23 | with: 24 | python-version: ${{ matrix.python-version }} 25 | 26 | - uses: chartboost/ruff-action@v1 27 | with: 28 | # ignore error E501: line too long 29 | args: --ignore E501 30 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # PyCharm 2 | __pycache__ 3 | .DS_Store 4 | 5 | # Executables 6 | *.o 7 | *.a 8 | bin 9 | *.tgz 10 | *.pc 11 | *.pyc 12 | *~ 13 | 14 | # VS files 15 | *.so 16 | 17 | # IntelliJ 18 | *.idea 19 | 20 | # Java 21 | java/pom.xml 22 | 23 | # Ruff linter 24 | .ruff_cache 25 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Microsoft Open Source Code of Conduct 2 | 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 4 | 5 | Resources: 6 | 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/) 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Microsoft Corporation. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE 22 | -------------------------------------------------------------------------------- /SUPPORT.md: -------------------------------------------------------------------------------- 1 | # Support 2 | 3 | ## How to file issues and get help 4 | 5 | This project uses GitHub Issues to track bugs and feature requests. Please search the existing 6 | issues before filing new issues to avoid duplicates. For new issues, file your bug or 7 | feature request as a new Issue. 8 | 9 | For help and questions about using this project, please raise issues and add a tag [Questions] or [Help] in the title. 10 | 11 | ## Microsoft Support Policy 12 | 13 | Support for this project is limited to the resources listed above. 14 | -------------------------------------------------------------------------------- /data/MontezumaRevengeDeterministic-v4/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "num_actions": 18, 3 | "actions": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17], 4 | "horizon": 30, 5 | "obs_dim": [84, 84, 1], 6 | "feature_type": "image", 7 | "openai_state_type": "ram", 8 | "gamma": 1.0, 9 | "num_repeat_action": 4 10 | } 11 | -------------------------------------------------------------------------------- /data/MontezumaRevengeDeterministic-v4/constants.json: -------------------------------------------------------------------------------- 1 | { 2 | "learning_rate": 0.00025, 3 | "num_homing_policy": 48, 4 | "encoder_training_num_samples": 4000, 5 | "encoder_training_epoch": 1000, 6 | "encoder_training_lr": 0.00025, 7 | "encoder_training_batch_size": 256, 8 | "validation_data_percent": 0.2, 9 | "psdp_training_num_samples": 8000, 10 | "cb_oracle_epoch": 20, 11 | "cb_oracle_lr": 0.00025, 12 | "cb_oracle_batch_size": 32, 13 | "cb_validation_pct": 0.2, 14 | "cb_patience": 5, 15 | "eval_homing_policy_sample_size": 50, 16 | "n_feature_maps": 64, 17 | "n_hidden": 512, 18 | "p_dropout": 0.0, 19 | "phi_layer_size": 25, 20 | "entropy_reg_coeff": 0.075, 21 | "bootstrap_encoder_model": false, 22 | "failed_homing_policy_filter": false, 23 | "encoder_sampling_style": "reuse", 24 | "data_aggregation": false, 25 | "reward_free_planner": "gps", 26 | "reward_sensitive_planner": "fqi", 27 | "patience": 40, 28 | "bias_homing_policy": false, 29 | "entropy_policy": "none", 30 | "filter_unreachable_abstract_states": true, 31 | "filter_old_abstract_states": true, 32 | "use_l1_penalty": false, 33 | "expected_optima": 0.0, 34 | "max_try": 1, 35 | "reward_type": "deterministic", 36 | "count_type": "state" 37 | } 38 | -------------------------------------------------------------------------------- /data/ai2thornav/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "num_actions": 4, 3 | "actions": [0, 1, 2, 3], 4 | "horizon": 7, 5 | "scene_name": "FloorPlan201", 6 | "headless": -1, 7 | "obs_dim": [56, 112, 3], 8 | "feature_type": "image", 9 | "gamma": 1.0 10 | } 11 | -------------------------------------------------------------------------------- /data/ai2thornav/constants.json: -------------------------------------------------------------------------------- 1 | { 2 | "learning_rate": 0.00025, 3 | "num_homing_policy": 48, 4 | "encoder_training_num_samples": 2000, 5 | "encoder_training_epoch": 100, 6 | "encoder_training_lr": 0.00025, 7 | "encoder_training_batch_size": 32, 8 | "validation_data_percent": 0.2, 9 | "psdp_training_num_samples": 8000, 10 | "decoder_type": "conv-ai2thor", 11 | "cb_oracle_epoch": 20, 12 | "cb_oracle_lr": 0.00025, 13 | "cb_oracle_batch_size": 32, 14 | "cb_validation_pct": 0.2, 15 | "cb_patience": 5, 16 | "eval_homing_policy_sample_size": 50, 17 | "n_feature_maps": 64, 18 | "n_hidden": 512, 19 | "p_dropout": 0.0, 20 | "phi_layer_size": 25, 21 | "entropy_reg_coeff": 0.075, 22 | "bootstrap_encoder_model": false, 23 | "failed_homing_policy_filter": false, 24 | "encoder_sampling_style": "reuse", 25 | "data_aggregation": false, 26 | "reward_free_planner": "pps", 27 | "reward_sensitive_planner": "psdp", 28 | "patience": 20, 29 | "bias_homing_policy": false, 30 | "entropy_policy": "none", 31 | "filter_unreachable_abstract_states": true, 32 | "filter_old_abstract_states": true, 33 | "use_l1_penalty": false, 34 | "expected_optima": 0.0, 35 | "max_try": 1 36 | } 37 | -------------------------------------------------------------------------------- /data/base_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "num_actions": 10, 3 | "actions": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 4 | "horizon": 20, 5 | "obs_dim": -1, 6 | "enable_exo": 1, 7 | "exo_type": "pixel", 8 | "pixel_size": 5, 9 | "num_exo_var": 100, 10 | "exo_reward": 1, 11 | "color_map": -1, 12 | "swap_prob": 0.0, 13 | "spawn_prob": 0.0, 14 | "optimal_reward": 5.0, 15 | "anti_shaping_reward": 0.0, 16 | "anti_shaping_reward2": 1.0, 17 | "feature_type": "feature", 18 | "noise_type": "hadamhardg", 19 | "return_state": true, 20 | "exo_dim": -1, 21 | "gamma": 1.0, 22 | "num_repeat_action": 1, 23 | "scene_name": "FloorPlan201", 24 | "headless": -1, 25 | "det_start": 1, 26 | "ego_centric": 1, 27 | "freeze": 0, 28 | "encoder_path": "none" 29 | } 30 | -------------------------------------------------------------------------------- /data/base_constants.json: -------------------------------------------------------------------------------- 1 | { 2 | "samples": 1000, 3 | "lr": 0.001, 4 | "batch_size": 32, 5 | "bs": 256, 6 | "grad_clip": 20, 7 | "sabre_n": 10, 8 | "sabre_b": 3, 9 | "sabre_m": 100, 10 | "sabre_eval": 100, 11 | "is_autoencoder": 1, 12 | "rep_alg": "none", 13 | "data_type": "random", 14 | "sabre_finetune": -1, 15 | "max_episodes": 500, 16 | "max_epoch": 50, 17 | "rnd_bonus": 0, 18 | "entropy_coeff": 0.001, 19 | "num_ppo_updates": 4, 20 | "eps_clip": 0.1, 21 | "classifier_type": "conv2", 22 | "encoder_type": "conv", 23 | "decoder_type": "conv", 24 | "do_rl": -1, 25 | "temperature": 1.0, 26 | "apply_aug": -1, 27 | "hidden_dim": 56, 28 | "learning_rate": 0.001, 29 | "num_homing_policy": 2, 30 | "elim_param": -1, 31 | "encoder_training_num_samples": 20000, 32 | "encoder_training_epoch": 200, 33 | "num_processes": 1, 34 | "forward_model_type": "forwardmodel", 35 | "backward_model_type": "backwardmodel", 36 | "model_type": "encoder", 37 | "nce_from_dataset": true, 38 | "discretization": true, 39 | "policy_type": "linear", 40 | "encoder_training_lr": 0.001, 41 | "encoder_training_batch_size": 32, 42 | "validation_data_percent": 0.2, 43 | "psdp_training_num_samples": 20000, 44 | "cb_oracle_epoch": 40, 45 | "cb_oracle_lr": 0.001, 46 | "cb_oracle_batch_size": 32, 47 | "cb_validation_pct": 0.2, 48 | "cb_patience": 5, 49 | "eval_homing_policy_sample_size": 100, 50 | "n_hidden": 56, 51 | "entropy_reg_coeff": 0.075, 52 | "bootstrap_encoder_model": false, 53 | "failed_homing_policy_filter": false, 54 | "encoder_sampling_style": "reuse", 55 | "data_aggregation": false, 56 | "reward_free_planner": "gps", 57 | "reward_sensitive_planner": "fqi", 58 | "patience": 10, 59 | "bias_homing_policy": false, 60 | "entropy_policy": "none", 61 | "filter_unreachable_abstract_states": false, 62 | "filter_old_abstract_states": false, 63 | "use_l1_penalty": false, 64 | "expected_optima": 0.685, 65 | "max_try": 10, 66 | "reward_type": "stochastic", 67 | "count_type": "state-action", 68 | "clustering_threshold": 0.15, 69 | "vq_dim": 256, 70 | "vq_codebook_size" : 512, 71 | "vq_decay" : 0.8, 72 | "vq_commitment_weight" : 0.1, 73 | "vq_orthogonal_reg_weight" : 100, 74 | "vq_orthogonal_reg_max_codes" : 128, 75 | "vq_orthogonal_reg_active_codes_only" : false, 76 | "vq_heads" : 4, 77 | "vq_separate_codebook_per_head": false, 78 | "vq_codebook_dim" : 32, 79 | "vq_sample_codebook_temp": 0, 80 | "vq_kmeans_init": false, 81 | "vq_kmeans_iters": 10, 82 | "use_vq": 1, 83 | "use_gb": 0, 84 | "max_k": 1 85 | } 86 | -------------------------------------------------------------------------------- /data/combolock/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "num_actions": 2, 3 | "actions": [0, 1], 4 | "horizon": 20, 5 | "obs_dim": -1, 6 | "feature_type": "feature", 7 | "gamma": 1.0 8 | } 9 | -------------------------------------------------------------------------------- /data/combolock/constants.json: -------------------------------------------------------------------------------- 1 | { 2 | "learning_rate": 0.001, 3 | "num_homing_policy": 2, 4 | "encoder_training_num_samples": 2000, 5 | "encoder_training_epoch": 100, 6 | "encoder_training_lr": 0.001, 7 | "encoder_training_batch_size": 32, 8 | "validation_data_percent": 0.2, 9 | "psdp_training_num_samples": 500, 10 | "cb_oracle_epoch": 20, 11 | "cb_oracle_lr": 0.001, 12 | "cb_oracle_batch_size": 32, 13 | "eval_homing_policy_sample_size": 500, 14 | "n_hidden": 56, 15 | "entropy_reg_coeff": 0.075, 16 | "bootstrap_encoder_model": true, 17 | "failed_homing_policy_filter": false, 18 | "encoder_sampling_style": "reuse", 19 | "data_aggregation": false, 20 | "policy_search": "gps", 21 | "patience": 100, 22 | "bias_homing_policy": false, 23 | "entropy_policy": "smart", 24 | "filter_unreachable_abstract_states": false, 25 | "filter_old_abstract_states": false, 26 | "use_l1_penalty": false, 27 | "expected_optima": 0.62, 28 | "max_try": 3 29 | } 30 | -------------------------------------------------------------------------------- /data/diabcombolock/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "num_actions": 10, 3 | "actions": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 4 | "horizon": 20, 5 | "obs_dim": -1, 6 | "swap_prob": 0.0, 7 | "spawn_prob": 0.0, 8 | "optimal_reward": 5.0, 9 | "anti_shaping_reward": 0.0, 10 | "anti_shaping_reward2": 1.0, 11 | "feature_type": "feature", 12 | "return_state": true, 13 | "gamma": 1.0 14 | } 15 | -------------------------------------------------------------------------------- /data/diabcombolock/constants.json: -------------------------------------------------------------------------------- 1 | { 2 | "learning_rate": 0.001, 3 | "num_homing_policy": 2, 4 | "encoder_training_num_samples": 20000, 5 | "encoder_training_epoch": 200, 6 | "num_processes": 1, 7 | "forwardmodel": "forwardmodel", 8 | "backwardmodel": "backwardmodel", 9 | "discretization": true, 10 | "policy_type": "linear", 11 | "encoder_training_lr": 0.001, 12 | "encoder_training_batch_size": 32, 13 | "validation_data_percent": 0.2, 14 | "psdp_training_num_samples": 20000, 15 | "cb_oracle_epoch": 40, 16 | "cb_oracle_lr": 0.001, 17 | "cb_oracle_batch_size": 32, 18 | "cb_validation_pct": 0.2, 19 | "cb_patience": 5, 20 | "eval_homing_policy_sample_size": 100, 21 | "n_hidden": 56, 22 | "entropy_reg_coeff": 0.075, 23 | "bootstrap_encoder_model": false, 24 | "failed_homing_policy_filter": false, 25 | "encoder_sampling_style": "reuse", 26 | "data_aggregation": false, 27 | "reward_free_planner": "gps", 28 | "reward_sensitive_planner": "fqi", 29 | "patience": 10, 30 | "bias_homing_policy": false, 31 | "entropy_policy": "none", 32 | "filter_unreachable_abstract_states": false, 33 | "filter_old_abstract_states": false, 34 | "use_l1_penalty": false, 35 | "expected_optima": 0.685, 36 | "max_try": 10, 37 | "reward_type": "stochastic", 38 | "count_type": "state-action", 39 | "clustering_threshold": 0.15 40 | } 41 | -------------------------------------------------------------------------------- /data/gridworld-canonical/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "num_actions": 4, 3 | "actions": [0, 1, 2, 3], 4 | "horizon": 10, 5 | "obs_dim": [56, 56, 3], 6 | "agent_view_size": 7, 7 | "width": 7, 8 | "height": 7, 9 | "tile_size": 8, 10 | "enable_exo": false, 11 | "exo_type": "pixel", 12 | "num_exo_var": 0, 13 | "circle_width": 1, 14 | "circle_motion": 0.05, 15 | "feature_type": "image", 16 | "noise_type": "none", 17 | "return_state": true, 18 | "gamma": 1.0 19 | } 20 | -------------------------------------------------------------------------------- /data/gridworld-canonical/constants.json: -------------------------------------------------------------------------------- 1 | { 2 | "learning_rate": 0.00025, 3 | "num_homing_policy": 13, 4 | "encoder_training_num_samples": 5000, 5 | "classifier_type": "conv", 6 | "encoder_type": "conv", 7 | "decoder_type": "conv2", 8 | "elim_param": 0.02, 9 | "lr": 3e-4, 10 | "sample_size": 40000, 11 | "max_epoch": 100, 12 | "batch_size": 32, 13 | "patience": 20, 14 | "bootstrap_action_predictor": "False", 15 | "validation_data_percent": 0.2, 16 | "hidden_dim": 256, 17 | "grad_clip": 2.5, 18 | "shared_action_predictor": "False", 19 | "reward_free_planner": "pps", 20 | "reward_sensitive_planner": "psdp", 21 | "encoder_training_epoch": 200, 22 | "encoder_training_lr": 0.00025, 23 | "encoder_training_batch_size": 128, 24 | "psdp_training_num_samples": 500, 25 | "cb_oracle_epoch": 20, 26 | "cb_oracle_lr": 0.001, 27 | "cb_oracle_batch_size": 32, 28 | "cb_patience": 5, 29 | "cb_validation_pct": 0.2, 30 | "eval_homing_policy_sample_size": 500, 31 | "n_hidden": 56, 32 | "entropy_reg_coeff": 0.005, 33 | "bootstrap_encoder_model": false, 34 | "clustering_threshold": 0.05, 35 | "nce_from_dataset": true, 36 | "failed_homing_policy_filter": false, 37 | "encoder_sampling_style": "reuse", 38 | "data_aggregation": false, 39 | "policy_search": "pps", 40 | "bias_homing_policy": false, 41 | "entropy_policy": "linear", 42 | "filter_unreachable_abstract_states": false, 43 | "filter_old_abstract_states": false, 44 | "use_l1_penalty": false, 45 | "expected_optima": 0.0, 46 | "max_try": 1 47 | } 48 | -------------------------------------------------------------------------------- /data/gridworld-empty/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "num_actions": 5, 3 | "actions": [0, 1, 2, 3, 4], 4 | "horizon": 10, 5 | "obs_dim": [56, 56, 3], 6 | "agent_view_size": 7, 7 | "width": 7, 8 | "height": 7, 9 | "tile_size": 8, 10 | "enable_exo": false, 11 | "exo_type": "pixel", 12 | "num_exo_var": 0, 13 | "circle_width": 1, 14 | "circle_motion": 0.05, 15 | "feature_type": "image", 16 | "noise_type": "none", 17 | "return_state": true, 18 | "gamma": 1.0 19 | } 20 | -------------------------------------------------------------------------------- /data/gridworld-empty/constants.json: -------------------------------------------------------------------------------- 1 | { 2 | "learning_rate": 0.00025, 3 | "num_homing_policy": 13, 4 | "encoder_training_num_samples": 5000, 5 | "classifier_type": "conv", 6 | "encoder_type": "conv", 7 | "decoder_type": "conv2", 8 | "elim_param": 0.02, 9 | "lr": 3e-4, 10 | "sample_size": 40000, 11 | "max_epoch": 100, 12 | "batch_size": 32, 13 | "patience": 20, 14 | "bootstrap_action_predictor": "False", 15 | "validation_data_percent": 0.2, 16 | "hidden_dim": 256, 17 | "grad_clip": 2.5, 18 | "shared_action_predictor": "False", 19 | "reward_free_planner": "pps", 20 | "reward_sensitive_planner": "psdp", 21 | "encoder_training_epoch": 200, 22 | "encoder_training_lr": 0.00025, 23 | "encoder_training_batch_size": 128, 24 | "psdp_training_num_samples": 500, 25 | "cb_oracle_epoch": 20, 26 | "cb_oracle_lr": 0.001, 27 | "cb_oracle_batch_size": 32, 28 | "cb_patience": 5, 29 | "cb_validation_pct": 0.2, 30 | "eval_homing_policy_sample_size": 500, 31 | "n_hidden": 56, 32 | "entropy_reg_coeff": 0.005, 33 | "bootstrap_encoder_model": false, 34 | "clustering_threshold": 0.05, 35 | "nce_from_dataset": true, 36 | "failed_homing_policy_filter": false, 37 | "encoder_sampling_style": "reuse", 38 | "data_aggregation": false, 39 | "policy_search": "pps", 40 | "bias_homing_policy": false, 41 | "entropy_policy": "linear", 42 | "filter_unreachable_abstract_states": false, 43 | "filter_old_abstract_states": false, 44 | "use_l1_penalty": false, 45 | "expected_optima": 0.0, 46 | "max_try": 1 47 | } 48 | -------------------------------------------------------------------------------- /data/gridworld-randomized-small/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "num_actions": 3, 3 | "actions": [0, 1, 2], 4 | "horizon": 10, 5 | "obs_dim": [56, 56, 3], 6 | "agent_view_size": 7, 7 | "width": 12, 8 | "height": 12, 9 | "tile_size": 8, 10 | "color_map": 1, 11 | "exo_type": "pixel", 12 | "num_exo_var": 10, 13 | "pixel_size": 5, 14 | "circle_width": 1, 15 | "circle_motion": 0.05, 16 | "feature_type": "image", 17 | "noise_type": "none", 18 | "return_state": true, 19 | "gamma": 1.0, 20 | "ego_centric": 1 21 | } 22 | -------------------------------------------------------------------------------- /data/gridworld-randomized-small/constants.json: -------------------------------------------------------------------------------- 1 | { 2 | "learning_rate": 0.00025, 3 | "num_homing_policy": 13, 4 | "encoder_training_num_samples": 5000, 5 | "classifier_type": "conv", 6 | "encoder_type": "conv3", 7 | "decoder_type": "conv", 8 | "policy_type": "conv1", 9 | "model_type": "conv1", 10 | "elim_param": 0.02, 11 | "lr": 3e-4, 12 | "sample_size": 40000, 13 | "max_epoch": 100, 14 | "batch_size": 32, 15 | "patience": 20, 16 | "bootstrap_action_predictor": "False", 17 | "validation_data_percent": 0.2, 18 | "hidden_dim": 256, 19 | "grad_clip": 2.5, 20 | "shared_action_predictor": "False", 21 | "reward_free_planner": "pps", 22 | "reward_sensitive_planner": "psdp", 23 | "encoder_training_epoch": 100, 24 | "encoder_training_lr": 0.001, 25 | "encoder_training_batch_size": 128, 26 | "psdp_training_num_samples": 500, 27 | "cb_oracle_epoch": 20, 28 | "cb_oracle_lr": 0.001, 29 | "cb_oracle_batch_size": 32, 30 | "cb_patience": 5, 31 | "cb_validation_pct": 0.2, 32 | "eval_homing_policy_sample_size": 500, 33 | "n_hidden": 56, 34 | "entropy_reg_coeff": 0.005, 35 | "bootstrap_encoder_model": false, 36 | "clustering_threshold": 0.05, 37 | "nce_from_dataset": true, 38 | "failed_homing_policy_filter": false, 39 | "encoder_sampling_style": "reuse", 40 | "data_aggregation": false, 41 | "policy_search": "pps", 42 | "bias_homing_policy": false, 43 | "entropy_policy": "linear", 44 | "filter_unreachable_abstract_states": false, 45 | "filter_old_abstract_states": false, 46 | "use_l1_penalty": false, 47 | "expected_optima": 0.0, 48 | "max_try": 1 49 | } 50 | -------------------------------------------------------------------------------- /data/gridworld-randomized/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "num_actions": 3, 3 | "actions": [0, 1, 2], 4 | "horizon": 10, 5 | "obs_dim": [56, 56, 3], 6 | "agent_view_size": 7, 7 | "width": 15, 8 | "height": 15, 9 | "tile_size": 8, 10 | "num_exo_var": 0, 11 | "circle_width": 1, 12 | "circle_motion": 0.05, 13 | "feature_type": "image", 14 | "noise_type": "none", 15 | "return_state": true, 16 | "gamma": 1.0 17 | } 18 | -------------------------------------------------------------------------------- /data/gridworld-randomized/constants.json: -------------------------------------------------------------------------------- 1 | { 2 | "learning_rate": 0.00025, 3 | "num_homing_policy": 13, 4 | "encoder_training_num_samples": 5000, 5 | "classifier_type": "conv", 6 | "encoder_type": "conv", 7 | "decoder_type": "conv", 8 | "policy_type": "conv1", 9 | "model_type": "conv1", 10 | "elim_param": 0.02, 11 | "lr": 3e-4, 12 | "sample_size": 40000, 13 | "max_epoch": 100, 14 | "batch_size": 32, 15 | "patience": 20, 16 | "bootstrap_action_predictor": "False", 17 | "validation_data_percent": 0.2, 18 | "hidden_dim": 256, 19 | "grad_clip": 2.5, 20 | "shared_action_predictor": "False", 21 | "reward_free_planner": "pps", 22 | "reward_sensitive_planner": "psdp", 23 | "encoder_training_epoch": 200, 24 | "encoder_training_lr": 0.00025, 25 | "encoder_training_batch_size": 512, 26 | "psdp_training_num_samples": 500, 27 | "cb_oracle_epoch": 20, 28 | "cb_oracle_lr": 0.001, 29 | "cb_oracle_batch_size": 32, 30 | "cb_patience": 5, 31 | "cb_validation_pct": 0.2, 32 | "eval_homing_policy_sample_size": 500, 33 | "n_hidden": 56, 34 | "entropy_reg_coeff": 0.005, 35 | "bootstrap_encoder_model": false, 36 | "clustering_threshold": 0.05, 37 | "nce_from_dataset": true, 38 | "failed_homing_policy_filter": false, 39 | "encoder_sampling_style": "reuse", 40 | "data_aggregation": false, 41 | "policy_search": "pps", 42 | "bias_homing_policy": false, 43 | "entropy_policy": "linear", 44 | "filter_unreachable_abstract_states": false, 45 | "filter_old_abstract_states": false, 46 | "use_l1_penalty": false, 47 | "expected_optima": 0.0, 48 | "max_try": 1 49 | } 50 | -------------------------------------------------------------------------------- /data/gridworld1/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "num_actions": 5, 3 | "actions": [0, 1, 2, 3, 4], 4 | "horizon": 18, 5 | "obs_dim": [56, 56, 3], 6 | "agent_view_size": 7, 7 | "width": 7, 8 | "height": 7, 9 | "tile_size": 8, 10 | "num_exo_var": 0, 11 | "feature_type": "image", 12 | "noise_type": "none", 13 | "return_state": true, 14 | "gamma": 1.0 15 | } 16 | -------------------------------------------------------------------------------- /data/gridworld1/constants.json: -------------------------------------------------------------------------------- 1 | { 2 | "learning_rate": 0.001, 3 | "num_homing_policy": 2, 4 | "encoder_training_num_samples": 5000, 5 | "classifier_type": "conv", 6 | "elim_param": 0.02, 7 | "lr": 3e-4, 8 | "sample_size": 40000, 9 | "max_epoch": 100, 10 | "batch_size": 32, 11 | "patience": 5, 12 | "bootstrap_action_predictor": "False", 13 | "validation_data_percent": 0.2, 14 | "hidden_dim": 512, 15 | "grad_clip": 100, 16 | "shared_action_predictor": "False", 17 | "reward_free_planner": "pps", 18 | "reward_sensitive_planner": "psdp", 19 | "encoder_training_epoch": 50, 20 | "encoder_training_lr": 0.001, 21 | "encoder_training_batch_size": 256, 22 | "psdp_training_num_samples": 500, 23 | "cb_oracle_epoch": 20, 24 | "cb_oracle_lr": 0.001, 25 | "cb_oracle_batch_size": 32, 26 | "cb_patience": 5, 27 | "cb_validation_pct": 0.2, 28 | "eval_homing_policy_sample_size": 500, 29 | "n_hidden": 56, 30 | "entropy_reg_coeff": 0.075, 31 | "bootstrap_encoder_model": false, 32 | "clustering_threshold": 0.05, 33 | "nce_from_dataset": true, 34 | "failed_homing_policy_filter": false, 35 | "encoder_sampling_style": "reuse", 36 | "data_aggregation": false, 37 | "policy_search": "pps", 38 | "bias_homing_policy": false, 39 | "entropy_policy": "none", 40 | "filter_unreachable_abstract_states": false, 41 | "filter_old_abstract_states": false, 42 | "use_l1_penalty": false, 43 | "expected_optima": 0.0, 44 | "max_try": 1 45 | } 46 | -------------------------------------------------------------------------------- /data/gridworld2/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "num_actions": 5, 3 | "actions": [0, 1, 2, 3, 4], 4 | "horizon": 10, 5 | "obs_dim": [56, 56, 3], 6 | "agent_view_size": 7, 7 | "width": 7, 8 | "height": 7, 9 | "tile_size": 8, 10 | "num_exo_var": 0, 11 | "circle_width": 1, 12 | "circle_motion": 0.05, 13 | "feature_type": "image", 14 | "noise_type": "none", 15 | "return_state": true, 16 | "gamma": 1.0 17 | } 18 | -------------------------------------------------------------------------------- /data/gridworld2/constants.json: -------------------------------------------------------------------------------- 1 | { 2 | "learning_rate": 0.00025, 3 | "num_homing_policy": 13, 4 | "encoder_training_num_samples": 5000, 5 | "classifier_type": "conv", 6 | "encoder_type": "conv", 7 | "decoder_type": "conv", 8 | "elim_param": 0.02, 9 | "lr": 3e-4, 10 | "sample_size": 40000, 11 | "max_epoch": 100, 12 | "batch_size": 32, 13 | "patience": 20, 14 | "bootstrap_action_predictor": "False", 15 | "validation_data_percent": 0.2, 16 | "hidden_dim": 256, 17 | "grad_clip": 2.5, 18 | "shared_action_predictor": "False", 19 | "reward_free_planner": "pps", 20 | "reward_sensitive_planner": "psdp", 21 | "encoder_training_epoch": 200, 22 | "encoder_training_lr": 0.00025, 23 | "encoder_training_batch_size": 512, 24 | "psdp_training_num_samples": 500, 25 | "cb_oracle_epoch": 20, 26 | "cb_oracle_lr": 0.001, 27 | "cb_oracle_batch_size": 32, 28 | "cb_patience": 5, 29 | "cb_validation_pct": 0.2, 30 | "eval_homing_policy_sample_size": 500, 31 | "n_hidden": 56, 32 | "entropy_reg_coeff": 0.005, 33 | "bootstrap_encoder_model": false, 34 | "clustering_threshold": 0.05, 35 | "nce_from_dataset": true, 36 | "failed_homing_policy_filter": false, 37 | "encoder_sampling_style": "reuse", 38 | "data_aggregation": false, 39 | "policy_search": "pps", 40 | "bias_homing_policy": false, 41 | "entropy_policy": "linear", 42 | "filter_unreachable_abstract_states": false, 43 | "filter_old_abstract_states": false, 44 | "use_l1_penalty": false, 45 | "expected_optima": 0.0, 46 | "max_try": 1 47 | } 48 | -------------------------------------------------------------------------------- /data/matterport/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "num_actions": 4, 3 | "actions": [0, 1, 2, 3], 4 | "horizon": 30, 5 | "obs_dim": [60, 80, 3], 6 | "feature_type": "image", 7 | "gamma": 1.0, 8 | "use_exo": true, 9 | "height": 480, 10 | "width": 640, 11 | "vfov": 60, 12 | "dataset": "/root/mount/Matterport3DSimulator/data/v1/scans", 13 | "connectivity": "/root/mount/Matterport3DSimulator/connectivity/" 14 | } 15 | -------------------------------------------------------------------------------- /data/matterport/constants.json: -------------------------------------------------------------------------------- 1 | { 2 | "learning_rate": 0.00025, 3 | "num_homing_policy": 48, 4 | "encoder_training_num_samples": 4000, 5 | "encoder_training_epoch": 100, 6 | "encoder_training_lr": 0.00025, 7 | "encoder_training_batch_size": 32, 8 | "validation_data_percent": 0.2, 9 | "psdp_training_num_samples": 8000, 10 | "cb_oracle_epoch": 20, 11 | "cb_oracle_lr": 0.00025, 12 | "cb_oracle_batch_size": 32, 13 | "cb_validation_pct": 0.2, 14 | "cb_patience": 5, 15 | "eval_homing_policy_sample_size": 50, 16 | "n_feature_maps": 64, 17 | "n_hidden": 512, 18 | "p_dropout": 0.0, 19 | "phi_layer_size": 25, 20 | "entropy_reg_coeff": 0.075, 21 | "bootstrap_encoder_model": false, 22 | "failed_homing_policy_filter": false, 23 | "encoder_sampling_style": "reuse", 24 | "data_aggregation": false, 25 | "reward_free_planner": "pps", 26 | "reward_sensitive_planner": "psdp", 27 | "patience": 30, 28 | "bias_homing_policy": false, 29 | "entropy_policy": "none", 30 | "filter_unreachable_abstract_states": true, 31 | "filter_old_abstract_states": true, 32 | "use_l1_penalty": false, 33 | "expected_optima": 0.0, 34 | "max_try": 1, 35 | "reward_type": "deterministic", 36 | "count_type": "state" 37 | } 38 | -------------------------------------------------------------------------------- /data/newtonianmotion/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "world_dim": 2, 3 | "state_dim": 4, 4 | "act_dim": 2, 5 | "actions": ["continuous"], 6 | "horizon": 1000, 7 | "noise": 0.1, 8 | "obs_dim": 4, 9 | "feature_type": "feature", 10 | "acc_penalty": 0.5, 11 | "gamma": 1.0 12 | } 13 | -------------------------------------------------------------------------------- /data/newtonianmotion/constants.json: -------------------------------------------------------------------------------- 1 | { 2 | "max_iter": 10000, 3 | "learning_rate": 0.001, 4 | "k0": 1, 5 | "batch_size": 32, 6 | "grad_clip": 2.5, 7 | "samples": 1000, 8 | "max_epoch": 100, 9 | "validation_data_percent": 0.2, 10 | "failed_homing_policy_filter": false, 11 | "encoder_sampling_style": "reuse", 12 | "data_aggregation": false, 13 | "reward_free_planner": "gps", 14 | "reward_sensitive_planner": "fqi", 15 | "patience": 100, 16 | "bias_homing_policy": false, 17 | "entropy_policy": "none", 18 | "filter_unreachable_abstract_states": false, 19 | "filter_old_abstract_states": false, 20 | "expected_optima": 0.685, 21 | "max_try": 10 22 | } 23 | -------------------------------------------------------------------------------- /data/objectnav/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "num_actions": 4, 3 | "actions": [0, 1, 2, 3], 4 | "horizon": 7, 5 | "scene_name": "FloorPlan201", 6 | "headless": -1, 7 | "obs_dim": [1000, 1000, 3], 8 | "feature_type": "image", 9 | "gamma": 1.0 10 | } 11 | -------------------------------------------------------------------------------- /data/objectnav/constants.json: -------------------------------------------------------------------------------- 1 | { 2 | "learning_rate": 0.00025, 3 | "num_homing_policy": 48, 4 | "encoder_training_num_samples": 2000, 5 | "encoder_training_epoch": 100, 6 | "encoder_training_lr": 0.00025, 7 | "encoder_training_batch_size": 32, 8 | "validation_data_percent": 0.2, 9 | "psdp_training_num_samples": 8000, 10 | "decoder_type": "conv-ai2thor", 11 | "cb_oracle_epoch": 20, 12 | "cb_oracle_lr": 0.00025, 13 | "cb_oracle_batch_size": 32, 14 | "cb_validation_pct": 0.2, 15 | "cb_patience": 5, 16 | "eval_homing_policy_sample_size": 50, 17 | "n_feature_maps": 64, 18 | "n_hidden": 512, 19 | "p_dropout": 0.0, 20 | "phi_layer_size": 25, 21 | "entropy_reg_coeff": 0.075, 22 | "bootstrap_encoder_model": false, 23 | "failed_homing_policy_filter": false, 24 | "encoder_sampling_style": "reuse", 25 | "data_aggregation": false, 26 | "reward_free_planner": "pps", 27 | "reward_sensitive_planner": "psdp", 28 | "patience": 20, 29 | "bias_homing_policy": false, 30 | "entropy_policy": "none", 31 | "filter_unreachable_abstract_states": true, 32 | "filter_old_abstract_states": true, 33 | "use_l1_penalty": false, 34 | "expected_optima": 0.0, 35 | "max_try": 1 36 | } 37 | -------------------------------------------------------------------------------- /data/safetyworld/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "num_actions": 4, 3 | "actions": [0, 1, 2, 3], 4 | "stop_action": 3, 5 | "horizon": 20, 6 | "obs_dim": -1, 7 | "swap_prob": 0.0, 8 | "spawn_prob": 0.0, 9 | "optimal_reward": 5.0, 10 | "anti_shaping_reward": 0.0, 11 | "anti_shaping_reward2": 1.0, 12 | "feature_type": "feature", 13 | "return_state": true, 14 | "gamma": 1.0 15 | } 16 | -------------------------------------------------------------------------------- /data/safetyworld/constants.json: -------------------------------------------------------------------------------- 1 | { 2 | "learning_rate": 0.001, 3 | "num_homing_policy": 2, 4 | "batch_size": 32, 5 | "max_episodes": 500, 6 | "sabre_finetune": 1, 7 | "model_type": "safe_ff", 8 | "eps_clip": 0.1, 9 | "num_ppo_updates": 10, 10 | "rnd_bonus_coeff": 0, 11 | "entropy_coeff": 0.01, 12 | "encoder_training_num_samples": 20000, 13 | "encoder_training_epoch": 200, 14 | "num_processes": 1, 15 | "forwardmodel": "forwardmodel", 16 | "backwardmodel": "backwardmodel", 17 | "discretization": true, 18 | "policy_type": "safe_ff", 19 | "encoder_training_lr": 0.001, 20 | "encoder_training_batch_size": 32, 21 | "validation_data_percent": 0.2, 22 | "psdp_training_num_samples": 20000, 23 | "cb_oracle_epoch": 40, 24 | "cb_oracle_lr": 0.001, 25 | "cb_oracle_batch_size": 32, 26 | "cb_validation_pct": 0.2, 27 | "cb_patience": 5, 28 | "eval_homing_policy_sample_size": 100, 29 | "n_hidden": 56, 30 | "entropy_reg_coeff": 0.075, 31 | "bootstrap_encoder_model": false, 32 | "failed_homing_policy_filter": false, 33 | "encoder_sampling_style": "reuse", 34 | "data_aggregation": false, 35 | "reward_free_planner": "gps", 36 | "reward_sensitive_planner": "fqi", 37 | "patience": 10, 38 | "bias_homing_policy": false, 39 | "entropy_policy": "none", 40 | "filter_unreachable_abstract_states": false, 41 | "filter_old_abstract_states": false, 42 | "use_l1_penalty": false, 43 | "expected_optima": 0.685, 44 | "max_try": 10, 45 | "reward_type": "stochastic", 46 | "count_type": "state-action", 47 | "clustering_threshold": 0.15 48 | } 49 | -------------------------------------------------------------------------------- /data/safetyworld2/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "num_actions": 4, 3 | "actions": [0, 1, 2, 3], 4 | "stop_action": 3, 5 | "horizon": 20, 6 | "obs_dim": -1, 7 | "swap_prob": 0.0, 8 | "spawn_prob": 0.0, 9 | "optimal_reward": 5.0, 10 | "anti_shaping_reward": 0.0, 11 | "anti_shaping_reward2": 1.0, 12 | "feature_type": "feature", 13 | "return_state": true, 14 | "gamma": 1.0 15 | } 16 | -------------------------------------------------------------------------------- /data/safetyworld2/constants.json: -------------------------------------------------------------------------------- 1 | { 2 | "learning_rate": 0.001, 3 | "num_homing_policy": 2, 4 | "batch_size": 32, 5 | "max_episodes": 500, 6 | "sabre_finetune": 1, 7 | "model_type": "safe_ff", 8 | "eps_clip": 0.1, 9 | "num_ppo_updates": 10, 10 | "rnd_bonus_coeff": 0, 11 | "entropy_coeff": 0.01, 12 | "encoder_training_num_samples": 20000, 13 | "encoder_training_epoch": 200, 14 | "num_processes": 1, 15 | "forwardmodel": "forwardmodel", 16 | "backwardmodel": "backwardmodel", 17 | "discretization": true, 18 | "policy_type": "linear", 19 | "encoder_training_lr": 0.001, 20 | "encoder_training_batch_size": 32, 21 | "validation_data_percent": 0.2, 22 | "psdp_training_num_samples": 20000, 23 | "cb_oracle_epoch": 40, 24 | "cb_oracle_lr": 0.001, 25 | "cb_oracle_batch_size": 32, 26 | "cb_validation_pct": 0.2, 27 | "cb_patience": 5, 28 | "eval_homing_policy_sample_size": 100, 29 | "n_hidden": 56, 30 | "entropy_reg_coeff": 0.075, 31 | "bootstrap_encoder_model": false, 32 | "failed_homing_policy_filter": false, 33 | "encoder_sampling_style": "reuse", 34 | "data_aggregation": false, 35 | "reward_free_planner": "gps", 36 | "reward_sensitive_planner": "fqi", 37 | "patience": 10, 38 | "bias_homing_policy": false, 39 | "entropy_policy": "none", 40 | "filter_unreachable_abstract_states": false, 41 | "filter_old_abstract_states": false, 42 | "use_l1_penalty": false, 43 | "expected_optima": 0.685, 44 | "max_try": 10, 45 | "reward_type": "stochastic", 46 | "count_type": "state-action", 47 | "clustering_threshold": 0.15 48 | } 49 | -------------------------------------------------------------------------------- /data/simplelqr/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "state_dim": 2, 3 | "act_dim": 1, 4 | "actions": ["continuous"], 5 | "horizon": 500, 6 | "obs_dim": 2, 7 | "feature_type": "feature", 8 | "gamma": 1.0 9 | } 10 | -------------------------------------------------------------------------------- /data/simplelqr/constants.json: -------------------------------------------------------------------------------- 1 | { 2 | "max_iter": 10000, 3 | "learning_rate": 0.01, 4 | "k0": 50, 5 | "batch_size": 32, 6 | "samples": 4000, 7 | "max_epoch": 50, 8 | "validation_data_percent": 0.2, 9 | "failed_homing_policy_filter": false, 10 | "encoder_sampling_style": "reuse", 11 | "data_aggregation": false, 12 | "reward_free_planner": "gps", 13 | "reward_sensitive_planner": "fqi", 14 | "patience": 100, 15 | "bias_homing_policy": false, 16 | "entropy_policy": "none", 17 | "filter_unreachable_abstract_states": false, 18 | "filter_old_abstract_states": false, 19 | "expected_optima": 0.685, 20 | "max_try": 10 21 | } 22 | -------------------------------------------------------------------------------- /data/slotfactoredmdp/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "num_actions": 10, 3 | "state_dim": 10, 4 | "actions": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 5 | "horizon": 10, 6 | "obs_dim": -1, 7 | "atom_dim": 1, 8 | "feature_type": "feature", 9 | "return_state": true, 10 | "gamma": 1.0 11 | } 12 | -------------------------------------------------------------------------------- /data/slotfactoredmdp/constants.json: -------------------------------------------------------------------------------- 1 | { 2 | "learning_rate": 0.001, 3 | "num_homing_policy": 2, 4 | "max_parents": 1, 5 | "num_factor_vals": 2, 6 | "encoder_training_num_samples": 10000, 7 | "encoder_training_epoch": 200, 8 | "encoder_training_lr": 0.001, 9 | "encoder_training_batch_size": 32, 10 | "validation_data_percent": 0.2, 11 | "psdp_training_num_samples": 20000, 12 | "cb_oracle_epoch": 40, 13 | "cb_oracle_lr": 0.001, 14 | "cb_oracle_batch_size": 32, 15 | "cb_validation_pct": 0.2, 16 | "cb_patience": 5, 17 | "eval_homing_policy_sample_size": 100, 18 | "n_hidden": 56, 19 | "entropy_reg_coeff": 0.075, 20 | "bootstrap_encoder_model": false, 21 | "failed_homing_policy_filter": false, 22 | "encoder_sampling_style": "reuse", 23 | "data_aggregation": false, 24 | "reward_free_planner": "gps", 25 | "reward_sensitive_planner": "fqi", 26 | "patience": 10, 27 | "bias_homing_policy": false, 28 | "entropy_policy": "none", 29 | "filter_unreachable_abstract_states": false, 30 | "filter_old_abstract_states": false, 31 | "use_l1_penalty": false, 32 | "expected_optima": 0.42, 33 | "max_try": 1, 34 | "reward_type": "stochastic", 35 | "count_type": "state-action", 36 | "clustering_threshold": 0.15 37 | } 38 | -------------------------------------------------------------------------------- /data/stochcombolock/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "num_actions": 2, 3 | "actions": [0, 1], 4 | "horizon": 20, 5 | "obs_dim": -1, 6 | "feature_type": "feature", 7 | "gamma": 1.0 8 | } 9 | -------------------------------------------------------------------------------- /data/stochcombolock/constants.json: -------------------------------------------------------------------------------- 1 | { 2 | "learning_rate": 0.0001, 3 | "num_homing_policy": 2, 4 | "encoder_training_num_samples": 3000, 5 | "encoder_training_epoch": 200, 6 | "encoder_training_lr": 0.001, 7 | "encoder_training_batch_size": 32, 8 | "validation_data_percent": 0.2, 9 | "psdp_training_num_samples": 2000, 10 | "cb_oracle_epoch": 20, 11 | "cb_oracle_lr": 0.001, 12 | "cb_oracle_batch_size": 32, 13 | "eval_homing_policy_sample_size": 500, 14 | "n_hidden": 56, 15 | "entropy_reg_coeff": 0.075, 16 | "bootstrap_encoder_model": false, 17 | "failed_homing_policy_filter": false, 18 | "encoder_sampling_style": "reuse", 19 | "data_aggregation": false, 20 | "policy_search": "gps", 21 | "patience": 100, 22 | "bias_homing_policy": false, 23 | "entropy_policy": "none", 24 | "filter_unreachable_abstract_states": false, 25 | "filter_old_abstract_states": false, 26 | "use_l1_penalty": false, 27 | "expected_optima": 0.675, 28 | "max_try": 3 29 | } 30 | -------------------------------------------------------------------------------- /data/temporal_combolock/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "num_actions": 10, 3 | "actions": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 4 | "horizon": 10, 5 | "obs_dim": -1, 6 | "feature_type": "feature", 7 | "gamma": 1.0, 8 | "noise_type": "hadamhardg", 9 | "optimal_reward": 1.0, 10 | "anti_shaping_reward": 0.0, 11 | "anti_shaping_reward2": 0.0, 12 | "exo_flip_prob": 0.1, 13 | "exo_dim": 10, 14 | "seed": 1234 15 | } 16 | -------------------------------------------------------------------------------- /data/temporal_combolock/constants.json: -------------------------------------------------------------------------------- 1 | { 2 | "learning_rate": 0.001, 3 | "num_homing_policy": 2, 4 | "encoder_training_num_samples": 5000, 5 | "elim_param": 0.1, 6 | "grad_clip": 10, 7 | "eps_clip": 0.1, 8 | "batch_size": 100, 9 | "num_ppo_updates": 4, 10 | "entropy_coeff": 0.001, 11 | "rnd_bonus_coeff": 0, 12 | "forward_model_type": "forwardmodel", 13 | "backward_model_type": "backwardmodel", 14 | "discretization": true, 15 | "rnd_obs_norm_init_episode": 100, 16 | "classifier_type": "ff", 17 | "reward_free_planner": "pps", 18 | "reward_sensitive_planner": "psdp", 19 | "encoder_training_epoch": 50, 20 | "encoder_training_lr": 0.001, 21 | "encoder_training_batch_size": 256, 22 | "validation_data_percent": 0.2, 23 | "psdp_training_num_samples": 500, 24 | "cb_oracle_epoch": 20, 25 | "cb_oracle_lr": 0.001, 26 | "cb_oracle_batch_size": 32, 27 | "cb_patience": 5, 28 | "cb_validation_pct": 0.2, 29 | "eval_homing_policy_sample_size": 10, 30 | "n_hidden": 56, 31 | "entropy_reg_coeff": 0.075, 32 | "bootstrap_encoder_model": false, 33 | "clustering_threshold": 0.05, 34 | "nce_from_dataset": true, 35 | "failed_homing_policy_filter": false, 36 | "encoder_sampling_style": "reuse", 37 | "data_aggregation": false, 38 | "policy_search": "pps", 39 | "patience": 20, 40 | "bias_homing_policy": false, 41 | "entropy_policy": "none", 42 | "filter_unreachable_abstract_states": false, 43 | "filter_old_abstract_states": false, 44 | "use_l1_penalty": false, 45 | "expected_optima": 0.0, 46 | "max_try": 1 47 | } 48 | -------------------------------------------------------------------------------- /data/temporal_diabcombolock/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "num_actions": 10, 3 | "actions": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 4 | "horizon": 10, 5 | "obs_dim": -1, 6 | "feature_type": "feature", 7 | "gamma": 1.0, 8 | "noise_type": "hadamhardg", 9 | "optimal_reward_a": 1.0, 10 | "optimal_reward_b": 0.1, 11 | "anti_shaping_reward": 0.0, 12 | "anti_shaping_reward2": 0.0, 13 | "exo_flip_prob": 0.1, 14 | "exo_dim": 10, 15 | "seed": 1234 16 | } 17 | -------------------------------------------------------------------------------- /data/temporal_diabcombolock/constants.json: -------------------------------------------------------------------------------- 1 | { 2 | "learning_rate": 0.001, 3 | "num_homing_policy": 3, 4 | "encoder_training_num_samples": 5000, 5 | "elim_param": 0.1, 6 | "grad_clip": 10, 7 | "eps_clip": 0.1, 8 | "batch_size": 100, 9 | "num_ppo_updates": 4, 10 | "entropy_coeff": 0.001, 11 | "rnd_bonus_coeff": 0, 12 | "forward_model_type": "forwardmodel", 13 | "backward_model_type": "backwardmodel", 14 | "discretization": true, 15 | "rnd_obs_norm_init_episode": 100, 16 | "classifier_type": "ff", 17 | "reward_free_planner": "pps", 18 | "reward_sensitive_planner": "psdp", 19 | "encoder_training_epoch": 50, 20 | "encoder_training_lr": 0.001, 21 | "encoder_training_batch_size": 256, 22 | "validation_data_percent": 0.2, 23 | "psdp_training_num_samples": 500, 24 | "cb_oracle_epoch": 20, 25 | "cb_oracle_lr": 0.001, 26 | "cb_oracle_batch_size": 32, 27 | "cb_patience": 5, 28 | "cb_validation_pct": 0.2, 29 | "eval_homing_policy_sample_size": 10, 30 | "n_hidden": 56, 31 | "entropy_reg_coeff": 0.075, 32 | "bootstrap_encoder_model": false, 33 | "clustering_threshold": 0.05, 34 | "nce_from_dataset": true, 35 | "failed_homing_policy_filter": false, 36 | "encoder_sampling_style": "reuse", 37 | "data_aggregation": false, 38 | "policy_search": "pps", 39 | "patience": 20, 40 | "bias_homing_policy": false, 41 | "entropy_policy": "none", 42 | "filter_unreachable_abstract_states": false, 43 | "filter_old_abstract_states": false, 44 | "use_l1_penalty": false, 45 | "expected_optima": 0.0, 46 | "max_try": 1 47 | } 48 | -------------------------------------------------------------------------------- /local_runs/test_factorl.sh: -------------------------------------------------------------------------------- 1 | cd .. 2 | export PYTHONPATH=$$PYTHONPATH:src 3 | python3 src/experiments/run_factorl.py --env slotfactoredmdp --encoder_training_num_samples 5000 --horizon 5 --noise hadamhardg --save_path ./results --seed 1234 --name test-factorl 4 | -------------------------------------------------------------------------------- /local_runs/test_homer.sh: -------------------------------------------------------------------------------- 1 | cd .. 2 | export PYTHONPATH=$$PYTHONPATH:src 3 | python3 src/experiments/run_homer.py --env diabcombolock --encoder_training_num_samples 5000 --horizon 5 --debug -1 --noise hadamhardg --save_path ./results --seed 1234 --name test-homer 4 | -------------------------------------------------------------------------------- /local_runs/test_id.sh: -------------------------------------------------------------------------------- 1 | cd .. 2 | export PYTHONPATH=$$PYTHONPATH:src 3 | python3 src/experiments/run_id.py --env temporal_combolock --encoder_training_num_samples 5000 --horizon 5 --exo_dim -1 --noise hadamhardg --classifier_type ff --save_path ./results --seed 1234 --name test-ppe 4 | -------------------------------------------------------------------------------- /local_runs/test_ppe.sh: -------------------------------------------------------------------------------- 1 | cd .. 2 | export PYTHONPATH=$$PYTHONPATH:src 3 | python3 src/experiments/run_ppe.py --env temporal_combolock --encoder_training_num_samples 5000 --horizon 5 --exo_dim -1 --noise hadamhardg --classifier_type ff --save_path ./results --seed 1234 --name test-ppe 4 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | line-length = 127 3 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | ## Requirements file for INTREPID: Interactive Representation Discovery 2 | ## Note that not all algorithms may need all of the packages below. 3 | ## If you are unable to install a particular package, then it 4 | ## maybe worth trying to comment it below and install the others. 5 | 6 | ## Requirements without Version Specifiers 7 | scikit-learn 8 | scikit-image 9 | scipy 10 | imageio 11 | tensorboardX 12 | matplotlib 13 | ai2thor 14 | Pillow 15 | vector-quantize-pytorch 16 | tqdm 17 | wandb 18 | lightning 19 | 20 | ## Requirements with Version Specifiers 21 | torch>=1.8.1 22 | torchvision>=0.9.1 23 | gym-minigrid>=1.0.3 24 | gym>=0.17.3 25 | -------------------------------------------------------------------------------- /src/analysis_tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/analysis_tools/__init__.py -------------------------------------------------------------------------------- /src/environments/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/environments/__init__.py -------------------------------------------------------------------------------- /src/environments/ai2thorenv/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/environments/ai2thorenv/__init__.py -------------------------------------------------------------------------------- /src/environments/app_simulator/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/environments/app_simulator/__init__.py -------------------------------------------------------------------------------- /src/environments/app_simulator/run_interactive.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import matplotlib.pyplot as plt 3 | 4 | 5 | def run_interactive(app): 6 | """ 7 | Run app in human interactive mode 8 | This will open a window that allows the user to click on UI elements 9 | Actions can also be taken by providing strings in the command prompt 10 | """ 11 | 12 | def on_click(event): 13 | if event.xdata is None or event.ydata is None: 14 | # clicked outside of plot 15 | return 16 | 17 | obs = app.get_observation() 18 | action = obs.get_action_at_click(event.xdata, event.ydata) 19 | if action: 20 | print(f"\nClicked on: {action}") 21 | obs, reward, done, info = app.step(action) 22 | plt.imshow(obs.get_screenshot()) 23 | plt.show() 24 | 25 | plt.ion() 26 | plt.axis("off") 27 | plt.connect("button_press_event", on_click) 28 | plt.connect("close_event", lambda _: sys.exit(0)) 29 | 30 | obs, info = app.reset() 31 | while True: 32 | plt.imshow(obs.get_screenshot()) 33 | plt.show() 34 | 35 | available_actions = info["valid_actions"] 36 | print(f"\nAvailable actions: {available_actions}") 37 | action = input("Enter action: ") 38 | 39 | if not action or action == "quit": 40 | print("Quitting app") 41 | break 42 | elif action == "reset": 43 | print("Resetting app") 44 | obs, info = app.reset() 45 | elif action not in available_actions: 46 | print(f"Invalid action: {action}") 47 | else: 48 | obs, reward, done, info = app.step(action) 49 | -------------------------------------------------------------------------------- /src/environments/control_env/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/environments/control_env/__init__.py -------------------------------------------------------------------------------- /src/environments/gym_env/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/environments/gym_env/__init__.py -------------------------------------------------------------------------------- /src/environments/intrepid_env_meta/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/environments/intrepid_env_meta/__init__.py -------------------------------------------------------------------------------- /src/environments/intrepid_env_meta/action_type.py: -------------------------------------------------------------------------------- 1 | class ActionType: 2 | Discrete = "discrete" 3 | Continuous = "continuous" 4 | Structured = "structured" 5 | Variable = "variable" 6 | 7 | @staticmethod 8 | def get_action_type_from_name(act_type_name): 9 | if act_type_name == "discrete": 10 | return ActionType.Discrete 11 | 12 | elif act_type_name == "continuous": 13 | return ActionType.Continuous 14 | 15 | elif act_type_name == "structured": 16 | return ActionType.Structured 17 | 18 | elif act_type_name == "variable": 19 | return ActionType.Variable 20 | 21 | else: 22 | raise AssertionError("No action type found for %r" % act_type_name) 23 | -------------------------------------------------------------------------------- /src/environments/intrepid_env_meta/environment_keys.py: -------------------------------------------------------------------------------- 1 | class EnvKeys: 2 | """ 3 | meta information returned can use the following keys to make the code more 4 | generalizable across different environments 5 | """ 6 | 7 | # Counter from which the time step in an episode 8 | # Designed to deal with starting with 0 vs 1 issue 9 | INITIAL_TIME_STEP = 0 10 | 11 | # Overall state 12 | STATE = "state" 13 | 14 | # Endogenous state 15 | ENDO_STATE = "endogenous_state" 16 | 17 | # Time step 18 | TIME_STEP = "timestep" 19 | -------------------------------------------------------------------------------- /src/environments/intrepid_env_meta/gym_compatible.py: -------------------------------------------------------------------------------- 1 | import gym 2 | 3 | 4 | class GymCompatible(gym.Env): 5 | def __init__(self, cerebral_env): 6 | self.cerebral_env = cerebral_env 7 | # Define action and observation space 8 | # They must be gym.spaces objects 9 | # Example when using discrete actions: 10 | self.action_space = None # TODO spaces.Discrete(N_DISCRETE_ACTIONS) 11 | 12 | # Example for using image as input: 13 | self.observation_space = None 14 | # self.observation_space = spaces.Box(low=0, high=255, shape= 15 | # (HEIGHT, WIDTH, N_CHANNELS), dtype=np.uint8) # TODO 16 | 17 | def step(self, action): 18 | # Execute one time step within the environment 19 | return self.cerebral_env.step(action) 20 | 21 | def reset(self): 22 | # Reset the state of the environment to an initial state 23 | obs, info = self.cerebral_env.reset() 24 | return obs 25 | 26 | def render(self, mode="human", close=False): 27 | # Render the environment to the screen 28 | raise NotImplementedError() 29 | -------------------------------------------------------------------------------- /src/environments/intrepid_env_meta/intrepid_env_interface.py: -------------------------------------------------------------------------------- 1 | class IntrepidEnvInterface: 2 | """Any environment using Intrepid Env Interface must support the following API""" 3 | 4 | def reset(self): 5 | """ 6 | :return: 7 | obs: Agent observation. No assumption made on the structure of observation. 8 | info: Dictionary containing relevant information such as latent state, etc. 9 | """ 10 | 11 | raise NotImplementedError() 12 | 13 | def step(self, action): 14 | """ 15 | :param action: 16 | :return: 17 | obs: Agent observation. No assumption made on the structure of observation. 18 | reward: Reward received by the agent. No Markov assumption is made. 19 | done: True if the episode has terminated and False otherwise. 20 | info: Dictionary containing relevant information such as latent state, etc. 21 | """ 22 | raise NotImplementedError() 23 | 24 | def get_action_type(self): 25 | """ 26 | :return: 27 | action_type: Return type of action space the agent is using 28 | """ 29 | raise NotImplementedError() 30 | 31 | def save(self, save_path, fname=None): 32 | """ 33 | Save the environment 34 | :param save_path: Save directory 35 | :param fname: Additionally, a file name can be provided. If save is a single file, then this will be 36 | used else it can be ignored. 37 | :return: None 38 | """ 39 | raise NotImplementedError() 40 | 41 | def load(self, load_path, fname=None): 42 | """ 43 | Save the environment 44 | :param load_path: Load directory 45 | :param fname: Additionally, a file name can be provided. If load is a single file, then only file 46 | with the given fname will be used. 47 | :return: Environment 48 | """ 49 | raise NotImplementedError() 50 | 51 | def is_episodic(self): 52 | """ 53 | :return: Return True or False, True if the environment is episodic and False otherwise. 54 | """ 55 | raise NotImplementedError() 56 | 57 | def act_to_str(self, action): 58 | """ 59 | :param: given an action 60 | :return: action in string representation 61 | """ 62 | 63 | return "%r" % action 64 | -------------------------------------------------------------------------------- /src/environments/matterport/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/environments/matterport/__init__.py -------------------------------------------------------------------------------- /src/environments/minigrid/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/environments/minigrid/__init__.py -------------------------------------------------------------------------------- /src/environments/rl_acid_env/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/environments/rl_acid_env/__init__.py -------------------------------------------------------------------------------- /src/environments/rl_acid_env/noise_gen.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | 4 | 5 | # Generate Hadamhard matrix of atleast a given size using Sylvester's method 6 | def generated_hadamhard_matrix(lower_bound): 7 | dim = 1 8 | h = np.array([[1.0]], dtype=float) 9 | 10 | while dim < lower_bound: 11 | h = np.block([[h, h], [h, -h]]) 12 | dim = 2 * dim 13 | 14 | # Trim the columns of the matrix to match the lower bound 15 | return h[:, :lower_bound] 16 | 17 | 18 | # Size of the smallest Hadamhard matrix which is greater than lower bound, as generated by Sylvester's method. 19 | def get_sylvester_hadamhard_matrix_dim(lower_bound): 20 | return int(math.pow(2, math.ceil(math.log(lower_bound, 2)))) 21 | -------------------------------------------------------------------------------- /src/environments/robot_car/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/environments/robot_car/__init__.py -------------------------------------------------------------------------------- /src/environments/robot_car/client/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/environments/robot_car/client/__init__.py -------------------------------------------------------------------------------- /src/environments/robot_car/client/client_utils.py: -------------------------------------------------------------------------------- 1 | import cv2 as cv 2 | import sys 3 | from time import time, strftime, localtime 4 | 5 | 6 | def get_timestamp_str(): 7 | return strftime("%Y-%m-%d-%H-%M-%S", localtime(time())) 8 | 9 | 10 | # Initialize webcams 11 | # devnames: list of strings containing numbers ["0", "1"] or names (["/dev/video0", "/dev/video1"]) 12 | def init_cameras(devnames): 13 | cameras = [] 14 | for i, devname in enumerate(devnames): 15 | print(f"Opening camera device {devname}") 16 | try: 17 | # Open device as int index 18 | c = cv.VideoCapture(int(devname)) 19 | except ValueError: 20 | # Open device as string like "/dev/video0" 21 | c = cv.VideoCapture(devname) 22 | if not c.isOpened(): 23 | print(f"Error: Could not open camera {devname}") 24 | for c in cameras: 25 | c.release() 26 | sys.exit(-1) 27 | 28 | # Test camera by taking a picture 29 | print(f"Testing camera {devname}") 30 | ret, pic = c.read() 31 | if not ret: 32 | print(f"Error: Could not read from camera {devname}") 33 | for c in cameras: 34 | c.release() 35 | sys.exit(-1) 36 | print(f"Got image of size {pic.shape}") 37 | 38 | # Add camera to list 39 | cameras.append(c) 40 | print(f"Initialized camera at index {i}: {devname}") 41 | return cameras 42 | -------------------------------------------------------------------------------- /src/environments/robot_car/client/state_capture_client.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import asyncio 3 | import os 4 | 5 | from environments.robot_car.client.client_base import CarClient 6 | from environments.robot_car.client.client_utils import get_timestamp_str, init_cameras 7 | from environments.robot_car.client.state import CarState 8 | 9 | 10 | async def do_capture(host, port, output_dir, cameras): 11 | car = CarClient(host, port) 12 | await car.connect() 13 | 14 | car_state = CarState() 15 | await car_state.capture_from_cameras(car, cameras) 16 | car_state.save_to_files(output_dir) 17 | print(f"Saved images to {output_dir}") 18 | 19 | 20 | if __name__ == "__main__": 21 | parser = argparse.ArgumentParser() 22 | parser.add_argument("--host", type=str, default="localhost") 23 | parser.add_argument("--port", type=int, default=21219) 24 | parser.add_argument("--output_dir", type=str, default=os.path.join(os.getcwd(), get_timestamp_str())) 25 | parser.add_argument("--cameras", type=str, nargs="+", default=[]) 26 | args = parser.parse_args() 27 | 28 | assert len(args.cameras) > 0, "Must specify at least one camera" 29 | 30 | # Create output directory 31 | os.makedirs(args.output_dir, exist_ok=True) 32 | 33 | cameras = init_cameras(args.cameras) 34 | try: 35 | asyncio.run(do_capture(args.host, args.port, args.output_dir, cameras)) 36 | finally: 37 | for c in cameras: 38 | c.release() 39 | -------------------------------------------------------------------------------- /src/environments/robot_car/server/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/environments/robot_car/server/__init__.py -------------------------------------------------------------------------------- /src/environments/robot_car/server/mock_pi_libraries.py: -------------------------------------------------------------------------------- 1 | # Mock versions of raspberry pi specific libraries 2 | 3 | 4 | class Picarx: 5 | def forward(self, speed): 6 | print(f"[mock picarx] forward(speed={speed})") 7 | 8 | def backward(self, speed): 9 | print(f"[mock picarx] backward(speed={speed})") 10 | 11 | def set_dir_servo_angle(self, angle): 12 | print(f"[mock picarx] set_dir_servo_angle(angle={angle})") 13 | 14 | def stop(self): 15 | print("[mock picarx] stop()") 16 | 17 | 18 | class PiCamera: 19 | def start_preview(self): 20 | print("[mock picamera] start_preview()") 21 | 22 | def capture(self, output_file, **kwargs): 23 | print(f"[mock picamera] capture(output_file={output_file})") 24 | -------------------------------------------------------------------------------- /src/environments/robot_car/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/environments/robot_car/utils/__init__.py -------------------------------------------------------------------------------- /src/environments/robot_car/utils/check_corrupted_images.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import cv2 as cv 4 | from tqdm import tqdm 5 | 6 | 7 | def load_pic(filename, resize=(256, 256)): 8 | pic = cv.imread(filename) 9 | return pic 10 | 11 | 12 | def run_check(root_dir): 13 | # get all subdirectories 14 | # each should contain a file called actions.txt 15 | subdirs = [ 16 | d 17 | for d in os.listdir(root_dir) 18 | if os.path.isdir(os.path.join(root_dir, d)) and os.path.isfile(os.path.join(root_dir, d, "actions.txt")) 19 | ] 20 | assert len(subdirs) > 0, f"No subdirectories found in {root_dir}" 21 | 22 | for dir in tqdm(subdirs): 23 | # read log file for this subdirectory 24 | with open(os.path.join(root_dir, dir, "actions.txt")) as f: 25 | log = f.readlines() 26 | log = [json.loads(a) for a in log if a.strip() != ""] 27 | 28 | # load a single trajectory 29 | traj_pics = [] 30 | for line in log: 31 | traj_pics.append( 32 | [ 33 | os.path.join(root_dir, dir, line["cam0"]), 34 | os.path.join(root_dir, dir, line["cam1"]), 35 | os.path.join(root_dir, dir, line["cam_car"]), 36 | ] 37 | ) 38 | 39 | for traj in tqdm(traj_pics): 40 | for pic in traj: 41 | if not os.path.isfile(pic): 42 | print(f"File {pic} does not exist") 43 | loaded = load_pic(pic) 44 | if loaded is None: 45 | print(f"File {pic} could not be loaded") 46 | 47 | 48 | if __name__ == "__main__": 49 | import argparse 50 | 51 | parser = argparse.ArgumentParser() 52 | parser.add_argument("root_dir", help="Root directory containing trajectories in subdirectories") 53 | args = parser.parse_args() 54 | 55 | run_check(args.root_dir) 56 | -------------------------------------------------------------------------------- /src/experiments/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/experiments/__init__.py -------------------------------------------------------------------------------- /src/experiments/experiment_save.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import numpy as np 3 | 4 | 5 | def terminate(performance, exp_setup, seeds): 6 | setting = dict() 7 | 8 | for k, v in exp_setup.config.items(): 9 | setting["config/%s" % k] = v 10 | 11 | for k, v in exp_setup.constants.items(): 12 | setting["constants/%s" % k] = v 13 | 14 | for k, v in exp_setup.args.__dict__.items(): 15 | setting["args/%s" % k] = v 16 | 17 | results = {"setting": setting, "performance": performance, "seeds": seeds} 18 | 19 | # Save performance 20 | with open("%s/results.pickle" % exp_setup.experiment, "wb") as f: 21 | pickle.dump(results, f) 22 | 23 | if len(performance) > 0: 24 | for key in performance[0]: # Assumes the keys are same across all runes 25 | if not isinstance(performance[0][key], int) and not isinstance(performance[0][key], float): 26 | continue 27 | 28 | metrics = [performance_[key] for performance_ in performance] 29 | 30 | exp_setup.logger.log( 31 | "%r: Mean %f, Std %f, Median %f, Min %f, Max %f, Num runs %d, All performance %r" 32 | % ( 33 | key, 34 | np.mean(metrics), 35 | np.std(metrics), 36 | np.median(metrics), 37 | np.min(metrics), 38 | np.max(metrics), 39 | len(metrics), 40 | metrics, 41 | ) 42 | ) 43 | 44 | exp_setup.logger.log("Experiment Completed.") 45 | 46 | # Cleanup 47 | exp_setup.logger_manager.cleanup() 48 | -------------------------------------------------------------------------------- /src/experiments/experimental_setup.py: -------------------------------------------------------------------------------- 1 | class ExperimentalSetup: 2 | def __init__( 3 | self, 4 | config, 5 | constants, 6 | experiment, 7 | exp_name, 8 | env_name, 9 | args, 10 | debug, 11 | logger, 12 | logger_manager, 13 | ): 14 | """ 15 | :param config: Dictionary containing values for the environment 16 | :param constants: Dictionary containing hyperparameters for the algorithm 17 | :param experiment: the full experiment folder where all contents should be saved 18 | :param exp_name: name of the main experiment log file 19 | :param env_name: name of the environment 20 | :param args: command line arguments 21 | :param debug: if set to true, then run the code in debug mode 22 | :param logger: Logger for logging data 23 | :param logger_manager: Logger Manager 24 | """ 25 | 26 | self.config = config 27 | self.constants = constants 28 | self.experiment = experiment 29 | self.exp_name = exp_name 30 | self.env_name = env_name 31 | self.base_env_name = env_name.split("/")[-1] 32 | self.args = args 33 | self.logger = logger 34 | self.debug = debug 35 | self.logger_manager = logger_manager 36 | -------------------------------------------------------------------------------- /src/experiments/run_factorl.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import random 3 | import numpy as np 4 | import torch.multiprocessing as mp 5 | 6 | from experiments.experiment_header import get_header 7 | from experiments.experiment_save import terminate 8 | from learning.core_learner.factorl import FactoRL 9 | from environments.intrepid_env_meta.make_env import MakeEnvironment 10 | 11 | 12 | def main(): 13 | exp_setup = get_header() 14 | 15 | performance = [] 16 | 17 | if exp_setup.config["seed"] == -1: 18 | seeds = list(range(1234, 1234 + 10)) 19 | num_runs = len(seeds) 20 | else: 21 | seeds = [exp_setup.config["seed"]] 22 | num_runs = 1 23 | 24 | for exp_id in range(1, num_runs + 1): 25 | exp_setup.config["seed"] = seeds[exp_id - 1] 26 | exp_setup.config["env_seed"] = seeds[exp_id - 1] * 10 27 | exp_setup.logger.log("========= STARTING EXPERIMENT %d (Seed = %d) ======== " % (exp_id, exp_setup.config["seed"])) 28 | 29 | # Set the random seed 30 | random.seed(exp_setup.config["seed"]) 31 | np.random.seed(exp_setup.config["seed"]) 32 | torch.manual_seed(exp_setup.config["seed"]) 33 | if torch.cuda.is_available(): 34 | torch.cuda.manual_seed_all(exp_setup.config["seed"]) 35 | 36 | # Create a new environment 37 | make_env = MakeEnvironment() 38 | env = make_env.make(exp_setup) 39 | exp_setup.logger.log("Environment Created") 40 | 41 | # Save the environment for reproducibility 42 | # env.save_environment(experiment, trial_name=exp_id) 43 | # print("Saving Environment...") 44 | 45 | learning_alg = FactoRL(exp_setup) 46 | 47 | policy_result = learning_alg.train(env=env, exp_id=exp_id, opt_reward=True) 48 | 49 | performance.append(policy_result) 50 | 51 | terminate(performance, exp_setup, seeds) 52 | 53 | 54 | if __name__ == "__main__": 55 | print("SETTING THE START METHOD ") 56 | mp.freeze_support() 57 | mp.set_start_method("spawn") 58 | main() 59 | -------------------------------------------------------------------------------- /src/experiments/run_homer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import random 3 | import numpy as np 4 | import torch.multiprocessing as mp 5 | 6 | from learning.core_learner.homer import Homer 7 | from experiments.experiment_save import terminate 8 | from experiments.experiment_header import get_header 9 | from environments.intrepid_env_meta.make_env import MakeEnvironment 10 | 11 | 12 | def main(): 13 | exp_setup = get_header() 14 | 15 | performance = [] 16 | 17 | if exp_setup.config["seed"] == -1: 18 | seeds = list(range(1234, 1234 + 10)) 19 | num_runs = len(seeds) 20 | else: 21 | seeds = [exp_setup.config["seed"]] 22 | num_runs = 1 23 | 24 | for exp_id in range(1, num_runs + 1): 25 | exp_setup.config["seed"] = seeds[exp_id - 1] 26 | exp_setup.config["env_seed"] = seeds[exp_id - 1] * 10 27 | exp_setup.logger.log("========= STARTING EXPERIMENT %d (Seed = %d) ======== " % (exp_id, exp_setup.config["seed"])) 28 | 29 | # Set the random seed 30 | random.seed(exp_setup.config["seed"]) 31 | np.random.seed(exp_setup.config["seed"]) 32 | torch.manual_seed(exp_setup.config["seed"]) 33 | if torch.cuda.is_available(): 34 | torch.cuda.manual_seed_all(exp_setup.config["seed"]) 35 | 36 | # Create a new environment 37 | make_env = MakeEnvironment() 38 | env = make_env.make(exp_setup) 39 | exp_setup.logger.log("Environment Created") 40 | 41 | cover_validator = env.generate_homing_policy_validation_fn() 42 | 43 | learning_alg = Homer(exp_setup) 44 | policy_result = learning_alg.train( 45 | env=env, 46 | env_name=exp_setup.env_name, 47 | homing_policy_validator=cover_validator, 48 | exp_id=exp_id, 49 | opt_reward=False, 50 | num_processes=exp_setup.constants["num_processes"], 51 | ) 52 | 53 | performance.append(policy_result) 54 | 55 | terminate(performance, exp_setup, seeds) 56 | 57 | 58 | if __name__ == "__main__": 59 | print("SETTING THE START METHOD ") 60 | mp.freeze_support() 61 | mp.set_start_method("spawn") 62 | main() 63 | -------------------------------------------------------------------------------- /src/experiments/run_id.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import random 3 | import numpy as np 4 | import torch.multiprocessing as mp 5 | 6 | from experiments.experiment_save import terminate 7 | from experiments.experiment_header import get_header 8 | from learning.core_learner.ik_learner import IDLearning 9 | from environments.intrepid_env_meta.make_env import MakeEnvironment 10 | 11 | 12 | def main(): 13 | exp_setup = get_header() 14 | 15 | performance = [] 16 | 17 | if exp_setup.config["seed"] == -1: 18 | seeds = list(range(1234, 1234 + 10)) 19 | num_runs = len(seeds) 20 | else: 21 | seeds = [exp_setup.config["seed"]] 22 | num_runs = 1 23 | 24 | for exp_id in range(1, num_runs + 1): 25 | exp_setup.config["seed"] = seeds[exp_id - 1] 26 | exp_setup.config["env_seed"] = seeds[exp_id - 1] * 10 27 | exp_setup.logger.log("========= STARTING EXPERIMENT %d (Seed = %d) ======== " % (exp_id, exp_setup.config["seed"])) 28 | 29 | # Set the random seed 30 | random.seed(exp_setup.config["seed"]) 31 | np.random.seed(exp_setup.config["seed"]) 32 | torch.manual_seed(exp_setup.config["seed"]) 33 | if torch.cuda.is_available(): 34 | torch.cuda.manual_seed_all(exp_setup.config["seed"]) 35 | 36 | # Create a new environment 37 | make_env = MakeEnvironment() 38 | env = make_env.make(exp_setup) 39 | exp_setup.logger.log("Environment Created") 40 | 41 | # # Save the environment for reproducibility 42 | # env.save_environment(experiment, trial_name=exp_id) 43 | # print("Saving Environment...") 44 | cover_validator = None # env.generate_homing_policy_validation_fn() 45 | 46 | learning_alg = IDLearning(exp_setup) 47 | policy_result = learning_alg.train( 48 | env=env, 49 | env_name=exp_setup.env_name, 50 | homing_policy_validator=cover_validator, 51 | exp_id=exp_id, 52 | opt_reward=True, 53 | num_processes=exp_setup.constants["num_processes"], 54 | ) 55 | 56 | performance.append(policy_result) 57 | 58 | terminate(performance, exp_setup, seeds) 59 | 60 | 61 | if __name__ == "__main__": 62 | print("SETTING THE START METHOD ") 63 | mp.freeze_support() 64 | mp.set_start_method("spawn") 65 | main() 66 | -------------------------------------------------------------------------------- /src/experiments/run_mbrl_oracle.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import random 3 | import numpy as np 4 | import torch.multiprocessing as mp 5 | 6 | from experiments.experiment_header import get_header 7 | from environments.intrepid_env_meta.make_env import MakeEnvironment 8 | from experiments.experiment_save import terminate 9 | from learning.core_learner.mbrl_oracle_decoder import MBRLOracleDecoder 10 | 11 | 12 | def main(): 13 | exp_setup = get_header() 14 | 15 | performance = [] 16 | 17 | if exp_setup.config["seed"] == -1: 18 | seeds = list(range(1234, 1234 + 10)) 19 | num_runs = len(seeds) 20 | else: 21 | seeds = [exp_setup.config["seed"]] 22 | num_runs = 1 23 | 24 | for exp_id in range(1, num_runs + 1): 25 | exp_setup.config["seed"] = seeds[exp_id - 1] 26 | exp_setup.config["env_seed"] = seeds[exp_id - 1] * 10 27 | exp_setup.logger.log("========= STARTING EXPERIMENT %d (Seed = %d) ======== " % (exp_id, exp_setup.config["seed"])) 28 | 29 | # Set the random seed 30 | random.seed(exp_setup.config["seed"]) 31 | np.random.seed(exp_setup.config["seed"]) 32 | torch.manual_seed(exp_setup.config["seed"]) 33 | if torch.cuda.is_available(): 34 | torch.cuda.manual_seed_all(exp_setup.config["seed"]) 35 | 36 | # Create a new environment 37 | make_env = MakeEnvironment() 38 | env = make_env.make(exp_setup) 39 | exp_setup.logger.log("Environment Created") 40 | 41 | # # Save the environment for reproducibility 42 | # env.save_environment(experiment, trial_name=exp_id) 43 | # print("Saving Environment...") 44 | 45 | learning_alg = MBRLOracleDecoder(exp_setup) 46 | 47 | policy_result = learning_alg.train(env=env, exp_id=exp_id) 48 | 49 | performance.append(policy_result) 50 | 51 | terminate(performance, exp_setup, seeds) 52 | 53 | 54 | if __name__ == "__main__": 55 | print("SETTING THE START METHOD ") 56 | mp.freeze_support() 57 | mp.set_start_method("spawn") 58 | main() 59 | -------------------------------------------------------------------------------- /src/experiments/run_ppe.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import random 3 | import numpy as np 4 | import torch.multiprocessing as mp 5 | 6 | from learning.core_learner.ppe import PPE 7 | from experiments.experiment_save import terminate 8 | from experiments.experiment_header import get_header 9 | from environments.intrepid_env_meta.make_env import MakeEnvironment 10 | 11 | 12 | def main(): 13 | exp_setup = get_header() 14 | 15 | performance = [] 16 | 17 | if exp_setup.config["seed"] == -1: 18 | seeds = list(range(1234, 1234 + 10)) 19 | num_runs = len(seeds) 20 | else: 21 | seeds = [exp_setup.config["seed"]] 22 | num_runs = 1 23 | 24 | for exp_id in range(1, num_runs + 1): 25 | exp_setup.config["seed"] = seeds[exp_id - 1] 26 | exp_setup.config["env_seed"] = seeds[exp_id - 1] * 10 27 | exp_setup.logger.log("========= STARTING EXPERIMENT %d (Seed = %d) ======== " % (exp_id, exp_setup.config["seed"])) 28 | 29 | # Set the random seed 30 | random.seed(exp_setup.config["seed"]) 31 | np.random.seed(exp_setup.config["seed"]) 32 | torch.manual_seed(exp_setup.config["seed"]) 33 | if torch.cuda.is_available(): 34 | torch.cuda.manual_seed_all(exp_setup.config["seed"]) 35 | 36 | # Create a new environment 37 | make_env = MakeEnvironment() 38 | env = make_env.make(exp_setup) 39 | exp_setup.logger.log("Environment Created") 40 | 41 | learning_alg = PPE(exp_setup) 42 | policy_result = learning_alg.train(env=env, exp_id=exp_id, opt_reward=True) 43 | 44 | performance.append(policy_result) 45 | 46 | terminate(performance, exp_setup, seeds) 47 | 48 | 49 | if __name__ == "__main__": 50 | print("SETTING THE START METHOD ") 51 | mp.freeze_support() 52 | mp.set_start_method("spawn") 53 | main() 54 | -------------------------------------------------------------------------------- /src/experiments/run_richid.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import random 3 | import numpy as np 4 | import torch.multiprocessing as mp 5 | 6 | from learning.core_learner.richid import RichId 7 | from experiments.experiment_save import terminate 8 | from experiments.experiment_header import get_header 9 | from environments.intrepid_env_meta.make_env import MakeEnvironment 10 | 11 | 12 | def main(): 13 | exp_setup = get_header() 14 | 15 | performance = [] 16 | 17 | if exp_setup.config["seed"] == -1: 18 | seeds = list(range(1234, 1234 + 10)) 19 | num_runs = len(seeds) 20 | else: 21 | seeds = [exp_setup.config["seed"]] 22 | num_runs = 1 23 | 24 | for exp_id in range(1, num_runs + 1): 25 | exp_setup.config["seed"] = seeds[exp_id - 1] 26 | exp_setup.config["env_seed"] = seeds[exp_id - 1] * 10 27 | exp_setup.logger.log("========= STARTING EXPERIMENT %d (Seed = %d) ======== " % (exp_id, exp_setup.config["seed"])) 28 | 29 | # Set the random seed 30 | random.seed(exp_setup.config["seed"]) 31 | np.random.seed(exp_setup.config["seed"]) 32 | torch.manual_seed(exp_setup.config["seed"]) 33 | if torch.cuda.is_available(): 34 | torch.cuda.manual_seed_all(exp_setup.config["seed"]) 35 | 36 | # Create a new environment 37 | make_env = MakeEnvironment() 38 | env = make_env.make(exp_setup) 39 | exp_setup.logger.log("Environment Created") 40 | 41 | learning_alg = RichId(exp_setup) 42 | policy_result = learning_alg.train(env=env, latent_lqr=env.env.get_latent_lqr().copy()) 43 | 44 | performance.append(policy_result) 45 | 46 | terminate(performance, exp_setup, seeds) 47 | 48 | 49 | if __name__ == "__main__": 50 | print("SETTING THE START METHOD ") 51 | mp.freeze_support() 52 | mp.set_start_method("spawn") 53 | main() 54 | -------------------------------------------------------------------------------- /src/experiments/run_sysid.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import random 3 | import numpy as np 4 | import torch.multiprocessing as mp 5 | 6 | from learning.core_learner.richid import SysID 7 | from experiments.experiment_save import terminate 8 | from experiments.experiment_header import get_header 9 | from environments.intrepid_env_meta.make_env import MakeEnvironment 10 | 11 | 12 | def main(): 13 | exp_setup = get_header() 14 | 15 | performance = [] 16 | 17 | if exp_setup.config["seed"] == -1: 18 | seeds = list(range(1234, 1234 + 10)) 19 | num_runs = len(seeds) 20 | else: 21 | seeds = [exp_setup.config["seed"]] 22 | num_runs = 1 23 | 24 | for exp_id in range(1, num_runs + 1): 25 | exp_setup.config["seed"] = seeds[exp_id - 1] 26 | exp_setup.config["env_seed"] = seeds[exp_id - 1] * 10 27 | exp_setup.logger.log("========= STARTING EXPERIMENT %d (Seed = %d) ======== " % (exp_id, exp_setup.config["seed"])) 28 | 29 | # Set the random seed 30 | random.seed(exp_setup.config["seed"]) 31 | np.random.seed(exp_setup.config["seed"]) 32 | torch.manual_seed(exp_setup.config["seed"]) 33 | if torch.cuda.is_available(): 34 | torch.cuda.manual_seed_all(exp_setup.config["seed"]) 35 | 36 | # Create a new environment 37 | make_env = MakeEnvironment() 38 | env = make_env.make(exp_setup) 39 | exp_setup.logger.log("Environment Created") 40 | 41 | learning_alg = SysID(exp_setup) 42 | policy_result = learning_alg.train(env=env) 43 | 44 | performance.append(policy_result) 45 | 46 | terminate(performance, exp_setup, seeds) 47 | 48 | 49 | if __name__ == "__main__": 50 | print("SETTING THE START METHOD ") 51 | mp.freeze_support() 52 | mp.set_start_method("spawn") 53 | main() 54 | -------------------------------------------------------------------------------- /src/learning/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/learning/__init__.py -------------------------------------------------------------------------------- /src/learning/core_learner/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/learning/core_learner/__init__.py -------------------------------------------------------------------------------- /src/learning/core_learner/ik_learner.py: -------------------------------------------------------------------------------- 1 | from learning.core_learner.abstract_rl_discrete_latent_state import ( 2 | AbstractRLDiscreteLatentState, 3 | ) 4 | from learning.learning_utils.encoder_sampler_ik import EncoderSamplerIK 5 | from learning.learning_utils.ik_train_encoding_function import IKTrainEncodingFunction 6 | 7 | 8 | class IDLearning(AbstractRLDiscreteLatentState): 9 | """ 10 | An algorithm similar to Homer but that instead relies on inverse kinematics: 11 | max_\theta p(a_h | x_h, \phi(x_{h+1})) 12 | """ 13 | 14 | def __init__(self, exp_setup): 15 | super(IDLearning, self).__init__(exp_setup) 16 | 17 | self.config = exp_setup.config 18 | self.constants = exp_setup.constants 19 | self.logger = exp_setup.logger 20 | self.experiment = exp_setup.experiment 21 | self.actions = self.config["actions"] 22 | 23 | # Train encoding function 24 | self.train_encoder = IKTrainEncodingFunction(self.config, self.constants) 25 | 26 | # Sampler for generating data for training the encoding function 27 | self.encoder_sampler = EncoderSamplerIK() 28 | 29 | def gather_dataset(self, env, step, homing_policies, num_samples, dataset): 30 | """ 31 | Collect a set of dataset given the environment. Return a tuple of: 32 | - dataset which can be arbitrary and used by the specific implementation to train the dataset 33 | - list of episodes generated by the agent in the process 34 | Any implementation needs to implement this. 35 | """ 36 | 37 | dataset = self.encoder_sampler.gather_samples(num_samples, env, self.actions, step, homing_policies) 38 | return dataset, dataset 39 | 40 | def train_discrete_encoder( 41 | self, 42 | dataset, 43 | logger, 44 | tensorboard, 45 | debug, 46 | bootstrap_model, 47 | undiscretized_initialization=True, 48 | ): 49 | """ 50 | Returns: 51 | - encoding_function: a function that maps an observation to one of the s values where s is a natural number 52 | - num_state_budget: the natural number s 53 | """ 54 | 55 | encoding_function, num_state_budget = self.train_encoder.train_model( 56 | dataset=dataset, 57 | logger=self.logger, 58 | tensorboard=tensorboard, 59 | discretized=True, 60 | bootstrap_model=None, 61 | ) 62 | 63 | return encoding_function, num_state_budget 64 | -------------------------------------------------------------------------------- /src/learning/datastructures/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/learning/datastructures/__init__.py -------------------------------------------------------------------------------- /src/learning/datastructures/abstract_tabular_mdp.py: -------------------------------------------------------------------------------- 1 | class AbstractTabularMDP: 2 | def __init__(self, actions, horizon, gamma=1.0): 3 | self.actions = actions 4 | self.horizon = horizon 5 | self.gamma = gamma 6 | 7 | def get_states(self, timestep): 8 | raise NotImplementedError() 9 | 10 | def get_transitions(self, state, action): 11 | raise NotImplementedError() 12 | 13 | def get_reward(self, state, action, next_state, step): 14 | raise NotImplementedError() 15 | -------------------------------------------------------------------------------- /src/learning/datastructures/count_conditional_probability.py: -------------------------------------------------------------------------------- 1 | from learning.datastructures.count_probability import CountProbability 2 | 3 | 4 | class CountConditionalProbability: 5 | """ 6 | A simple class to estimate conditional probabilities based on counts 7 | """ 8 | 9 | def __init__(self): 10 | self._conditions = {} 11 | 12 | def add(self, entry, condition): 13 | if condition not in self._conditions: 14 | self._conditions[condition] = CountProbability() 15 | 16 | self._conditions[condition].add(entry) 17 | 18 | def get_conditions(self): 19 | return self._conditions 20 | 21 | def get_entry(self, condition): 22 | if condition not in self._conditions: 23 | return None 24 | else: 25 | return self._conditions[condition] 26 | 27 | def total_count(self, condition): 28 | if condition not in self._conditions: 29 | return 0 30 | else: 31 | return self._conditions[condition].total_count() 32 | 33 | def get_prob_entry(self, entry, condition): 34 | if condition not in self._conditions: 35 | return None 36 | else: 37 | return self._conditions[condition].get_prob_entry(entry) 38 | 39 | def __str__(self): 40 | return "{%s}" % ( 41 | "; ".join(["%r -> %s" % (condition, str(prob)) for (condition, prob) in sorted(self._conditions.items())]) 42 | ) 43 | -------------------------------------------------------------------------------- /src/learning/datastructures/count_probability.py: -------------------------------------------------------------------------------- 1 | from utils.generic_policy import sample_action_from_prob 2 | 3 | 4 | class CountProbability: 5 | """ 6 | A simple class to estimate probabilities based on counts 7 | """ 8 | 9 | def __init__(self): 10 | self._total_count = 0 11 | self._values = {} 12 | 13 | def add(self, entry): 14 | self._total_count += 1 15 | 16 | if entry in self._values: 17 | self._values[entry] += 1.0 18 | else: 19 | self._values[entry] = 1.0 20 | 21 | def get_probability(self): 22 | z = float(max(1.0, self._total_count)) 23 | prob = [(key, value / z) for (key, value) in sorted(self._values.items())] 24 | 25 | return prob 26 | 27 | def get_probability_dict(self): 28 | z = float(max(1.0, self._total_count)) 29 | return {key: value / z for (key, value) in self._values.items()} 30 | 31 | def get_prob_entry(self, entry): 32 | if entry in self._values: 33 | return self._values[entry] / float(max(1.0, self._total_count)) 34 | else: 35 | return 0.0 36 | 37 | def sample(self): 38 | key_prob = self.get_probability() 39 | prob = [key_prob_[1] for key_prob_ in key_prob] 40 | ix = sample_action_from_prob(prob) 41 | 42 | return key_prob[ix][0] 43 | 44 | def total_count(self): 45 | return self._total_count 46 | 47 | def get_entries(self): 48 | return self._values.keys() 49 | 50 | def total_variation(self, other_prob): 51 | union_keys = set(self._values.keys()).union(set(other_prob._values.keys())) 52 | 53 | tv = 0.0 54 | for key in union_keys: 55 | tv += abs(self.get_prob_entry(key) - other_prob.get_prob_entry(key)) 56 | 57 | return 0.5 * tv 58 | 59 | def __str__(self): 60 | prob = self.get_probability() 61 | 62 | return "{%s}" % ("; ".join(["%r: %f" % (entry_, prob_) for entry_, prob_ in prob])) 63 | -------------------------------------------------------------------------------- /src/learning/datastructures/elliptic_potential.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class EllipticPotential: 5 | 6 | def __init__(self, lam=1.0): 7 | """ 8 | A function to keep track of the matrix 9 | Lambda = lambda I + \sum_{i=1}^0 v_i v_i^T 10 | and return Lambda^{-1} and det(Lambda^{-1}) efficiently 11 | 12 | Supports features in both numpy and torch format. 13 | """ 14 | self.lam = lam 15 | self._inv_matrix = None 16 | self._det_inv_matrix = None 17 | 18 | def reset(self): 19 | self._inv_matrix = None 20 | self._det_inv_matrix = None 21 | 22 | def add_feature(self, feature): 23 | """ 24 | :param feature: A torch tensor or a numpy ndarray of 1-d or 2-d (of type 1xd or dx1) 25 | Sherman Woodbury Morrison update 26 | (A + uv^T)^{-1} = A^{-1} - (A^-1 uv^T A^-1) / (1 + v^T A^-1 u) 27 | """ 28 | 29 | if feature.ndim == 1: 30 | feature = feature.unsqueeze(0) 31 | 32 | elif feature.ndim == 2: 33 | pass # TODO 34 | else: 35 | raise AssertionError("Feature dimension must be either 1-D or 2D of type 1xd or dx1") 36 | 37 | if self._inv_matrix is None: 38 | self._inv_matrix = (1.0 / self.lam) * torch.eye(feature.shape[0]) 39 | 40 | rho = 1.0 / (1.0 + feature.T @ self._inv_matrix @ feature) 41 | self._inv_matrix = self._inv_matrix - (self._inv_matrix @ feature @ feature.T @ self._inv_matrix) * rho 42 | 43 | def get_inverse(self): 44 | return self._inv_matrix 45 | 46 | def get_inv_mat_det(self): 47 | return self._det_inv_matrix 48 | 49 | def get_elliptic_bonus(self, features): 50 | """ 51 | :param features: Given a feature of size either dim or batch x dim 52 | :return: Bonus which is either scalar if input is 1-d or batch if 2-d 53 | """ 54 | 55 | bonus = torch.sqrt(torch.diagonal(features @ self._inv_matrix @ features.T)) 56 | 57 | if features.ndim == 1: 58 | bonus = bonus[0] 59 | 60 | return bonus 61 | -------------------------------------------------------------------------------- /src/learning/learning_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/learning/learning_utils/__init__.py -------------------------------------------------------------------------------- /src/learning/learning_utils/abstract_encoder_sampler.py: -------------------------------------------------------------------------------- 1 | class AbstractEncoderSampler: 2 | def __init__(self): 3 | pass 4 | 5 | @staticmethod 6 | def gather_samples(num_samples, env, actions, step, homing_policies): 7 | """Gather samples given the environment, action space, the step at which the sample has to be 8 | gathered and the homing policies for the given step""" 9 | raise NotImplementedError() 10 | -------------------------------------------------------------------------------- /src/learning/learning_utils/generic_train_classifier.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from learning.learning_utils.generic_learner import GenericLearner 4 | from utils.cuda import cuda_var 5 | 6 | 7 | class GenericTrainClassifier(GenericLearner): 8 | """Class for training a classifier. Fairly generic with minimal assumption""" 9 | 10 | def __init__(self, exp_setup): 11 | GenericLearner.__init__(self, exp_setup) 12 | 13 | @staticmethod 14 | def calc_prob(model, batch): 15 | obs = cuda_var(torch.cat([torch.from_numpy(np.array(pt[0])).view(1, -1) for pt in batch], dim=0)).float() 16 | 17 | prob, info_dict = model.gen_prob(obs) # Batch x Num Classes 18 | 19 | return prob, info_dict 20 | 21 | def calc_loss(self, model, batch, test=False): 22 | obs = cuda_var(torch.cat([torch.from_numpy(np.array(pt[0])).view(1, -1) for pt in batch], dim=0)).float() 23 | y = cuda_var(torch.LongTensor([pt[1] for pt in batch]).view(-1)) 24 | 25 | log_prob, info_dict = model.gen_log_prob(obs) # Batch x Num Classes 26 | 27 | selected_log_prob = log_prob.gather(1, y.view(-1, 1)) # Batch 28 | loss = -selected_log_prob.mean() 29 | 30 | return loss, info_dict 31 | 32 | def get_class_mean_prob(self, model, dataset): 33 | """ 34 | :param model: A classification model f for mapping input space X to distribution over K classes. Given input 35 | x in X, the model f(j | x) denotes the probability of class j. 36 | :param dataset: A list of tuples where first dimension of tuple is input x in X, and second is class label, 37 | rest dimensions are ignored and can be used for adding meta-information. 38 | :return: A pytorch cpu matrix of size dataset_size x N where (i, j)^{th} value denote 39 | 1/N f(j | x_i) where N is the size of dataset and x_i is the i^{th} input in the dataset. 40 | """ 41 | 42 | dataset_size = len(dataset) 43 | batches = [dataset[i : i + self.batch_size] for i in range(0, dataset_size, self.batch_size)] 44 | all_prob = [] 45 | 46 | for batch in batches: 47 | prob, info_dict = self.calc_prob(model, batch) 48 | prob = prob.detach().data.cpu() # batch x num_class 49 | 50 | all_prob.append(prob) 51 | 52 | all_prob = torch.cat(all_prob, dim=0) # Dataset x num_class 53 | 54 | return all_prob / float(all_prob.size(0)) 55 | -------------------------------------------------------------------------------- /src/learning/learning_utils/ricatti_solver.py: -------------------------------------------------------------------------------- 1 | import time 2 | import numpy as np 3 | 4 | from utils.beautify_time import beautify 5 | 6 | 7 | class RicattiSolver: 8 | def __init__(self, logger, max_it=1000, min_change=0.000001): 9 | self.max_it = max_it 10 | self.logger = logger 11 | self.min_change = min_change 12 | 13 | def solve(self, A, B, Q, R): 14 | time_start = time.time() 15 | self.logger.debug("Performing Ricatti Iterations") 16 | P = np.eye(A.shape[0]) 17 | 18 | for it in range(0, self.max_it): 19 | inv_term = np.linalg.inv(R + B.T @ P @ B) 20 | new_P = A.T @ P @ A + Q - A.T @ P @ B @ inv_term @ B.T @ P @ A 21 | 22 | change = np.linalg.norm(P - new_P) 23 | 24 | if it % 10 == 0: 25 | self.logger.debug("Ricatti Solver: Iteration=%d, Change in P %f" % (it, change)) 26 | 27 | P = new_P 28 | 29 | if change < self.min_change: 30 | break 31 | 32 | K = np.linalg.inv(R + B.T @ P @ B) @ B.T @ P @ A 33 | self.logger.debug("Ricatti Iterations Performed. Time taken %s" % beautify(time.time() - time_start)) 34 | 35 | return P, K 36 | -------------------------------------------------------------------------------- /src/learning/linear_mdp/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/learning/linear_mdp/__init__.py -------------------------------------------------------------------------------- /src/learning/model_estimation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/learning/model_estimation/__init__.py -------------------------------------------------------------------------------- /src/learning/model_estimation/count_based_estimation.py: -------------------------------------------------------------------------------- 1 | class CountBasedEstimation: 2 | def __init__(self, stationary=False): 3 | self.stationary = stationary 4 | 5 | def estimate_all(self, replay_memory, decoder): 6 | raise NotImplementedError() 7 | 8 | def estimate_step(self, mdp, replay_memory, step, decoders): 9 | if not isinstance(replay_memory, list): 10 | raise AssertionError("Replay memory must be a list") 11 | 12 | transitions = [episode.get_transitions_at_step(step - 1) for episode in replay_memory] 13 | 14 | latent_transitions = [ 15 | ( 16 | decoders[step - 1].encode_observations(x), 17 | a, 18 | r, 19 | decoders[step].encode_observations(next_x), 20 | ) 21 | for (x, a, r, next_x) in transitions 22 | ] 23 | 24 | abs_states = set([lt[3] for lt in latent_transitions]) 25 | 26 | for abs_state in abs_states: 27 | mdp.add_state(abs_state, step) 28 | 29 | for abs_state, action, reward, next_abs_state in latent_transitions: 30 | mdp.add_transition(abs_state, action, next_abs_state) 31 | mdp.add_reward(abs_state, action, next_abs_state, reward) 32 | -------------------------------------------------------------------------------- /src/learning/planning/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/learning/planning/__init__.py -------------------------------------------------------------------------------- /src/learning/planning/cem/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/learning/planning/cem/__init__.py -------------------------------------------------------------------------------- /src/learning/planning/cem/cem_optimizer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.distributions as dist 3 | import time 4 | 5 | 6 | class CEM_Optimizer: 7 | def __init__(self, cost_fcn, x_min=None, x_max=None) -> None: 8 | self.cost_fcn = cost_fcn 9 | self.x_min, self.x_max = x_min, x_max 10 | 11 | def cem_iter(self, x_init, num_samples=500, num_iter=10, elite_ratio=0.2, sigma=0.2): 12 | # x_init has size [1, horizon, nu] 13 | _, horizon, nu = x_init.size() 14 | device = x_init.device 15 | 16 | dim = horizon * nu 17 | 18 | # we have fixed mean and cov initialization 19 | mean = torch.zeros(dim).to(device) 20 | cov = torch.eye(dim).to(device) 21 | 22 | cost_fcn = self.cost_fcn 23 | # initialize mean and cov 24 | for i in range(num_iter): 25 | start_time = time.time() 26 | x_samples = dist.MultivariateNormal(mean, cov).sample((num_samples,)) 27 | input_samples = x_samples.view((-1, 1, horizon, nu)) 28 | if self.x_min is not None: 29 | input_samples = torch.clamp(input_samples, min=self.x_min) 30 | 31 | if self.x_max is not None: 32 | input_samples = torch.clamp(input_samples, max=self.x_max) 33 | 34 | scores = cost_fcn(input_samples).view(-1) 35 | 36 | # minimize the cost function 37 | _, elite_idx = torch.topk(scores, int(num_samples * elite_ratio), largest=False) 38 | elite_samples = x_samples[elite_idx] 39 | mean = elite_samples.mean(dim=0) 40 | cov = torch.diag(elite_samples.var(dim=0)) 41 | run_time = time.time() - start_time 42 | print( 43 | "cem iter {:d} takes {:.2f} secs. cost min: {:.2f}, cost max: {:.2f}".format( 44 | i, run_time, scores.min().item(), scores.max().item() 45 | ) 46 | ) 47 | 48 | best_x = elite_samples[0].view((1, horizon, nu)) 49 | best_score = cost_fcn(best_x) 50 | return best_x, best_score 51 | -------------------------------------------------------------------------------- /src/learning/planning/high_level_planner/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/learning/planning/high_level_planner/__init__.py -------------------------------------------------------------------------------- /src/learning/planning/hj_prox/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/learning/planning/hj_prox/__init__.py -------------------------------------------------------------------------------- /src/learning/policy_roll_in/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/learning/policy_roll_in/__init__.py -------------------------------------------------------------------------------- /src/learning/policy_roll_in/roll.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | from learning.datastructures.episode import Episode 4 | from environments.intrepid_env_meta.environment_keys import EnvKeys 5 | 6 | 7 | class Roll: 8 | def __init__(self, env, actions): 9 | self._env = env 10 | self.eps = None 11 | self.actions = actions 12 | 13 | def roll_in(self, policy, t): 14 | """ 15 | Roll-in in the environment using the above policy till time step t 16 | :param policy: A policy for taking actions 17 | :param t: Number of actions taken by the policy. 18 | :return: 19 | """ 20 | 21 | obs, info = self._env.reset() 22 | self.eps = Episode(state=info[EnvKeys.ENDO_STATE], observation=obs, gamma=1.0) 23 | 24 | for h in range(0, t): 25 | action = policy.sample_action(obs, h) 26 | obs, reward, done, info = self._env.step(action) 27 | self.eps.add( 28 | action=action, 29 | reward=reward, 30 | new_obs=obs, 31 | new_state=info[EnvKeys.ENDO_STATE], 32 | ) 33 | 34 | return self 35 | 36 | def take_random(self, k): 37 | for t in range(0, k): 38 | action = random.choice(self.actions) 39 | self.take_action(action) 40 | 41 | return self 42 | 43 | def take_action(self, action): 44 | obs, reward, done, info = self._env.step(action) 45 | 46 | self.eps.add( 47 | action=action, 48 | reward=reward, 49 | new_obs=obs, 50 | new_state=info[EnvKeys.ENDO_STATE], 51 | ) 52 | 53 | return self 54 | 55 | def roll_out(self, policy, t): 56 | raise NotImplementedError() 57 | 58 | def terminate(self): 59 | """Terminate the roll-out""" 60 | 61 | self.eps.terminate() 62 | return self 63 | 64 | def retrieve(self, pattern=None): 65 | """Retrieve the details""" 66 | 67 | if pattern is not None: 68 | raise NotImplementedError() 69 | else: 70 | return self.eps 71 | -------------------------------------------------------------------------------- /src/learning/policy_search/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/learning/policy_search/__init__.py -------------------------------------------------------------------------------- /src/learning/state_abstraction/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/learning/state_abstraction/__init__.py -------------------------------------------------------------------------------- /src/learning/state_abstraction/abstract_state_decoder.py: -------------------------------------------------------------------------------- 1 | class AbstractStateDecoder: 2 | def __init__(self): 3 | pass 4 | 5 | def calc_loss(self, model, sample, **kwargs): 6 | """ 7 | :model 8 | :sample 9 | :**kwargs 10 | """ 11 | raise NotImplementedError() 12 | -------------------------------------------------------------------------------- /src/learning/state_abstraction/autoencoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | from utils.cuda import cuda_var 5 | from learning.learning_utils.entropy_decay_policy import EntropyDecayPolicy 6 | 7 | 8 | class Autoencoder: 9 | """ 10 | State abstraction using autoencoder 11 | """ 12 | 13 | def __init__(self, constants, epoch): 14 | self.entropy_decay_policy = EntropyDecayPolicy(constants, epoch) 15 | self.entropy_coeff = constants["entropy_reg_coeff"] 16 | 17 | def calc_loss(self, model, batch, epoch, discretized, test_set_errors=None, past_entropy=None): 18 | observations = cuda_var( 19 | torch.cat( 20 | [torch.from_numpy(np.array(point.get_next_obs())).view(1, -1) for point in batch], 21 | dim=0, 22 | ) 23 | ).float() 24 | 25 | # Compute loss given by L2 distance between the observation and reconstructed observation. 26 | # The returned observation is flattened. 27 | reconstructed_obs, meta_dict = model.reconstruct( 28 | observations=observations, discretized=discretized 29 | ) # outputs of size batch x -1 30 | 31 | reconstruction_loss = torch.norm(observations - reconstructed_obs) 32 | 33 | if discretized: 34 | # For discretized models, there is an internal classification step representation by a probability 35 | # distribution that can be controlled using entropy bonus 36 | # NOT SUPPORTED AT THE MOMENT 37 | decay_coeff = self.entropy_decay_policy.get_entropy_coeff(epoch, test_set_errors, past_entropy) 38 | loss = reconstruction_loss - self.entropy_coeff * decay_coeff * meta_dict["mean_entropy"] 39 | else: 40 | decay_coeff = None 41 | loss = reconstruction_loss 42 | 43 | info_dict = dict() 44 | 45 | info_dict["reconstruction_loss"] = reconstruction_loss 46 | 47 | if discretized: 48 | info_dict["mean_entropy"] = meta_dict["mean_entropy"] 49 | info_dict["entropy_coeff"] = self.entropy_coeff * decay_coeff 50 | else: 51 | info_dict["mean_entropy"] = -1 52 | info_dict["entropy_coeff"] = 0.0 53 | 54 | return loss, info_dict 55 | -------------------------------------------------------------------------------- /src/learning/state_abstraction/generalized_inverse_kinematics.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | from utils.cuda import cuda_var 5 | from learning.learning_utils.entropy_decay_policy import EntropyDecayPolicy 6 | 7 | 8 | class GeneralizedInverseKinematics: 9 | """ 10 | State abstraction using generalized inverse kinematics 11 | """ 12 | 13 | def __init__(self, constants, epoch): 14 | self.entropy_decay_policy = EntropyDecayPolicy(constants, epoch) 15 | self.entropy_coeff = constants["entropy_reg_coeff"] 16 | 17 | def calc_loss(self, model, batch, epoch, discretized, test_set_errors=None, past_entropy=None): 18 | past_observations = cuda_var( 19 | torch.cat( 20 | [torch.from_numpy(np.array(point.get_curr_obs())).view(1, -1) for point in batch], 21 | dim=0, 22 | ) 23 | ).float() 24 | past_actions = cuda_var( 25 | torch.cat( 26 | [torch.from_numpy(np.array(point.get_action())).view(1, -1) for point in batch], 27 | dim=0, 28 | ) 29 | ).long() 30 | observations = cuda_var( 31 | torch.cat( 32 | [torch.from_numpy(np.array(point.get_next_obs())).view(1, -1) for point in batch], 33 | dim=0, 34 | ) 35 | ).float() 36 | 37 | # Compute loss 38 | log_probs, meta_dict = model.gen_log_prob( 39 | prev_observations=past_observations, 40 | observations=observations, 41 | discretized=discretized, 42 | ) # outputs of size batch x num_actions 43 | classification_loss = -torch.mean(log_probs.gather(1, past_actions.view(-1, 1))) 44 | 45 | if discretized: 46 | # For discretized models, there is an internal classification step representation by a probability 47 | # distribution that can be controlled using entropy bonus 48 | # NOT SUPPORTED AT THE MOMENT 49 | decay_coeff = self.entropy_decay_policy.get_entropy_coeff(epoch, test_set_errors, past_entropy) 50 | loss = classification_loss - self.entropy_coeff * decay_coeff * meta_dict["mean_entropy"] 51 | else: 52 | decay_coeff = None 53 | loss = classification_loss 54 | 55 | info_dict = dict() 56 | 57 | info_dict["classification_loss"] = classification_loss 58 | 59 | if discretized: 60 | info_dict["mean_entropy"] = meta_dict["mean_entropy"] 61 | info_dict["entropy_coeff"] = self.entropy_coeff * decay_coeff 62 | else: 63 | info_dict["mean_entropy"] = -1 64 | info_dict["entropy_coeff"] = 0.0 65 | 66 | return loss, info_dict 67 | -------------------------------------------------------------------------------- /src/learning/state_abstraction/inverse_kinematics.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | from utils.cuda import cuda_var 5 | from learning.learning_utils.entropy_decay_policy import EntropyDecayPolicy 6 | 7 | 8 | class InverseKinematics: 9 | """ 10 | State abstraction using inverse kinematics 11 | """ 12 | 13 | def __init__(self, constants, epoch): 14 | self.entropy_decay_policy = EntropyDecayPolicy(constants, epoch) 15 | self.entropy_coeff = constants["entropy_reg_coeff"] 16 | 17 | def calc_loss(self, model, batch, epoch, discretized, test_set_errors=None, past_entropy=None): 18 | prev_observations = cuda_var( 19 | torch.cat( 20 | [torch.from_numpy(np.array(point.get_curr_obs())).view(1, -1) for point in batch], 21 | dim=0, 22 | ) 23 | ).float() 24 | actions = cuda_var( 25 | torch.cat( 26 | [torch.from_numpy(np.array(point.get_action())).view(1, -1) for point in batch], 27 | dim=0, 28 | ) 29 | ).long() 30 | observations = cuda_var( 31 | torch.cat( 32 | [torch.from_numpy(np.array(point.get_next_obs())).view(1, -1) for point in batch], 33 | dim=0, 34 | ) 35 | ).float() 36 | 37 | # Compute loss 38 | log_probs, meta_dict = model.gen_log_prob( 39 | prev_observations=prev_observations, 40 | observations=observations, 41 | discretized=discretized, 42 | ) # outputs of size batch x num_actions 43 | classification_loss = -torch.mean(log_probs.gather(1, actions.view(-1, 1))) 44 | 45 | if discretized: 46 | # For discretized models, there is an internal classification step representation by a probability 47 | # distribution that can be controlled using entropy bonus 48 | # NOT SUPPORTED AT THE MOMENT 49 | decay_coeff = self.entropy_decay_policy.get_entropy_coeff(epoch, test_set_errors, past_entropy) 50 | loss = classification_loss - self.entropy_coeff * decay_coeff * meta_dict["mean_entropy"] 51 | else: 52 | decay_coeff = None 53 | loss = classification_loss 54 | 55 | info_dict = dict() 56 | 57 | info_dict["classification_loss"] = classification_loss 58 | 59 | if discretized: 60 | info_dict["mean_entropy"] = meta_dict["mean_entropy"] 61 | info_dict["entropy_coeff"] = self.entropy_coeff * decay_coeff 62 | else: 63 | info_dict["mean_entropy"] = -1 64 | info_dict["entropy_coeff"] = 0.0 65 | 66 | return loss, info_dict 67 | -------------------------------------------------------------------------------- /src/learning/state_abstraction/noise_contrastive_global.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | from utils.cuda import cuda_var 5 | from learning.learning_utils.entropy_decay_policy import EntropyDecayPolicy 6 | 7 | 8 | class NoiseContrastiveGlobal: 9 | """ 10 | State abstraction using noise contrastive learning with globally normalized probabilities 11 | """ 12 | 13 | def __init__(self, constants, epoch): 14 | self.entropy_decay_policy = EntropyDecayPolicy(constants, epoch) 15 | self.entropy_coeff = constants["entropy_reg_coeff"] 16 | 17 | @staticmethod 18 | def calc_loss(model, batch, epoch, discretized, test_set_errors=None, past_entropy=None): 19 | prev_observations = cuda_var( 20 | torch.cat( 21 | [torch.from_numpy(np.array(point[0])).view(1, -1) for point in batch], 22 | dim=0, 23 | ) 24 | ).float() 25 | actions = cuda_var( 26 | torch.cat( 27 | [torch.from_numpy(np.array(point[1])).view(1, -1) for point in batch], 28 | dim=0, 29 | ) 30 | ).long() 31 | observations = cuda_var( 32 | torch.cat( 33 | [torch.from_numpy(np.array(point[2])).view(1, -1) for point in batch], 34 | dim=0, 35 | ) 36 | ).float() 37 | 38 | # Generate a matrix M of size batch x batch where M[i, j] denotes p(y = 1 | x_i, a_i, x'_j) 39 | # diagonal elements are real transitions, non-diagonal elements are imposter candidates 40 | scores = model.gen_scores( 41 | prev_observations=prev_observations, 42 | actions=actions, 43 | observations=observations, 44 | ) 45 | 46 | classification_loss = (torch.diagonal(scores, 0) - torch.logsumexp(scores, 1)).mean() 47 | 48 | info_dict = dict() 49 | info_dict["classification_loss"] = classification_loss 50 | info_dict["mean_entropy"] = 0.0 51 | info_dict["entropy_coeff"] = 0.0 52 | 53 | return classification_loss, info_dict 54 | -------------------------------------------------------------------------------- /src/learning/state_abstraction/noise_contrastive_local.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import torch.nn.functional as F 4 | 5 | from utils.cuda import cuda_var 6 | from learning.learning_utils.entropy_decay_policy import EntropyDecayPolicy 7 | 8 | 9 | class NoiseContrastiveLocal: 10 | """ 11 | State abstraction using noise contrastive learning with locally normalized probabilities 12 | """ 13 | 14 | def __init__(self, constants, epoch): 15 | self.entropy_decay_policy = EntropyDecayPolicy(constants, epoch) 16 | self.entropy_coeff = constants["entropy_reg_coeff"] 17 | 18 | @staticmethod 19 | def calc_loss(model, batch): 20 | prev_observations = cuda_var( 21 | torch.cat( 22 | [torch.from_numpy(np.array(point[0])).view(1, -1) for point in batch], 23 | dim=0, 24 | ) 25 | ).float() 26 | actions = cuda_var( 27 | torch.cat( 28 | [torch.from_numpy(np.array(point[1])).view(1, -1) for point in batch], 29 | dim=0, 30 | ) 31 | ).long() 32 | observations = cuda_var( 33 | torch.cat( 34 | [torch.from_numpy(np.array(point[2])).view(1, -1) for point in batch], 35 | dim=0, 36 | ) 37 | ).float() 38 | 39 | # Generate a matrix M of size batch x batch where M[i, j] denotes p(y = 1 | x_i, a_i, x'_j) 40 | # diagonal elements are real transitions, non-diagonal elements are imposter candidates 41 | scores = model.gen_scores( 42 | prev_observations=prev_observations, 43 | actions=actions, 44 | observations=observations, 45 | ) 46 | 47 | batch_size = len(batch) 48 | 49 | # Single negative example 50 | log_probs = F.logsigmoid((2 * torch.eye(batch_size) - 1).cuda() * scores) 51 | 52 | classification_loss = -torch.sum( 53 | log_probs 54 | * (torch.eye(batch_size).cuda() / batch_size + (1 - torch.eye(batch_size).cuda()) / batch_size / (batch_size - 1)) 55 | ) 56 | 57 | info_dict = dict() 58 | info_dict["classification_loss"] = classification_loss 59 | info_dict["mean_entropy"] = 0.0 60 | info_dict["entropy_coeff"] = 0.0 61 | 62 | return classification_loss, info_dict 63 | -------------------------------------------------------------------------------- /src/learning/tabular_rl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/learning/tabular_rl/__init__.py -------------------------------------------------------------------------------- /src/learning/tabular_rl/det_tabular_mdp_builder.py: -------------------------------------------------------------------------------- 1 | from learning.datastructures.abstract_tabular_mdp import AbstractTabularMDP 2 | 3 | 4 | class DetTabularMDPBuilder(AbstractTabularMDP): 5 | """ 6 | Builder class to construct a deterministic tabular MDP 7 | """ 8 | 9 | def __init__(self, actions, horizon, gamma=1.0): 10 | AbstractTabularMDP.__init__(self, actions, horizon, gamma) 11 | 12 | self.actions = actions 13 | self.horizon = horizon 14 | self.gamma = gamma 15 | 16 | # States reached at different time step 17 | # timestep -> [state1, state2, ...] 18 | self._states = dict() 19 | 20 | # (state, action) -> [(new_state, 1.0)] 21 | self._transitions = dict() 22 | 23 | # (state, action) -> scalar_value 24 | self._rewards = dict() 25 | 26 | self._finalize = False 27 | 28 | def add_state(self, state, timestep): 29 | assert not self._finalize, "This MDP has been finalized so new states cannot be added to it." 30 | 31 | if timestep not in self._states: 32 | self._states[timestep] = [] 33 | 34 | self._states[timestep].append(state) 35 | 36 | def add_transition(self, state, action, new_state): 37 | assert not self._finalize, "This MDP has been finalized so new transitions cannot be added to it." 38 | 39 | if (state, action) in self._transitions: 40 | return 41 | 42 | self._transitions[(state, action)] = [(new_state, 1.0)] 43 | 44 | def add_reward(self, state, action, reward): 45 | assert not self._finalize, "This MDP has been finalized so new rewards cannot be added to it." 46 | 47 | if (state, action) in self._rewards: 48 | return 49 | 50 | self._rewards[(state, action)] = reward 51 | 52 | def finalize(self): 53 | self._finalize = True 54 | 55 | def get_states(self, timestep): 56 | return self._states[timestep] 57 | 58 | def num_states(self, timestep): 59 | return len(self._states[timestep]) 60 | 61 | def get_transitions(self, state, action): 62 | return self._transitions[(state, action)] 63 | 64 | def get_reward(self, state, action, next_state, step): 65 | return self._rewards[(state, action)] 66 | -------------------------------------------------------------------------------- /src/learning/tabular_rl/value_iteration.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class ValueIteration: 5 | """ 6 | Performs Bellman Optimal Q-iteration on Tabular MDP 7 | """ 8 | 9 | def __init__(self): 10 | pass 11 | 12 | def do_value_iteration(self, tabular_mdp, reward_func=None, min_reward_val=0.0): 13 | actions = tabular_mdp.actions 14 | num_actions = len(actions) 15 | q_values = dict() 16 | 17 | for h in range(tabular_mdp.horizon, -1, -1): 18 | states = tabular_mdp.get_states(h) 19 | 20 | for state in states: 21 | state_with_timestep = (h, state) 22 | 23 | q_values[state_with_timestep] = np.repeat(min_reward_val, num_actions).astype(np.float32) 24 | 25 | for action in actions: 26 | if h == tabular_mdp.horizon: 27 | q_values[state_with_timestep][action] = 0.0 28 | else: 29 | q_val = 0.0 30 | for new_state, prob_val in tabular_mdp.get_transitions(state, action): 31 | if reward_func is None: 32 | # Use the environment reward function 33 | reward = tabular_mdp.get_reward(state, action, new_state, h) 34 | else: 35 | # Use the given reward function 36 | reward = reward_func(state, action, new_state, h) 37 | 38 | q_val += prob_val * (reward + tabular_mdp.gamma * q_values[(h + 1, new_state)].max()) 39 | 40 | q_values[state_with_timestep][action] = q_val 41 | 42 | return q_values 43 | -------------------------------------------------------------------------------- /src/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/model/__init__.py -------------------------------------------------------------------------------- /src/model/bottleneck/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/model/bottleneck/__init__.py -------------------------------------------------------------------------------- /src/model/bottleneck/gaussian_bottleneck.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class GaussianBottleneck(nn.Module): 6 | 7 | def __init__(self, hidden_dim): 8 | super(GaussianBottleneck, self).__init__() 9 | 10 | self.hidden_dim = hidden_dim 11 | self.pre_enc = nn.Linear(self.hidden_dim, 2 * self.hidden_dim) 12 | self.post_enc = nn.Linear(self.hidden_dim, self.hidden_dim) 13 | 14 | # TODO add command line argument 15 | self.kl_weight = 0.0001 16 | 17 | self.mu_prior = nn.Parameter(torch.zeros(self.hidden_dim)) 18 | self.sigma_prior = nn.Parameter(torch.ones(self.hidden_dim)) 19 | 20 | if torch.cuda.is_available(): 21 | self.cuda() 22 | 23 | def gb_helper(self, h): 24 | 25 | h = self.pre_enc(h) 26 | mu = h[:, :self.hidden_dim] 27 | std = torch.exp(h[:, self.hidden_dim:]) + 1e-6 28 | q_z = torch.distributions.Normal(loc=mu, scale=std) 29 | 30 | if self.training: 31 | # print('std: {}, mu: {}, mu_prior: {}'.format(std.mean(), torch.abs(mu).mean(), 32 | # torch.abs(self.mu_prior).mean())) 33 | # h = mu + torch.randn_like(std) * std 34 | h = q_z.rsample() 35 | # klb_loss = (mu**2 + std**2 - 2*torch.log(std)).sum(dim=1).mean() * self.kl_weight 36 | p_z = torch.distributions.Normal(loc=self.mu_prior, scale=self.sigma_prior) 37 | klb_loss = self.kl_weight * torch.distributions.kl_divergence(q_z, p_z).sum(dim=1).mean() 38 | else: 39 | h = mu 40 | klb_loss = 0.0 41 | 42 | # h = self.post_enc(h) 43 | return h, klb_loss 44 | -------------------------------------------------------------------------------- /src/model/bottleneck/vq_bottleneck.py: -------------------------------------------------------------------------------- 1 | 2 | class VQBottleneckWrapper: 3 | 4 | def __init__(self): 5 | pass 6 | 7 | @staticmethod 8 | def get_bottleneck(model_name, constants, heads=None, codebook_size=None): 9 | 10 | if model_name == "vq": 11 | 12 | from vector_quantize_pytorch import VectorQuantize 13 | 14 | return VectorQuantize( 15 | 16 | dim=constants["vq_dim"], 17 | codebook_size=constants["vq_codebook_size"] if codebook_size is None else codebook_size, 18 | 19 | # the exponential moving average decay, lower means the dictionary will change faster 20 | decay=constants["vq_decay"], 21 | 22 | # 1., # the weight on the commitment loss 23 | commitment_weight=constants["vq_commitment_weight"], 24 | 25 | # in paper, they recommended a value of 10 26 | orthogonal_reg_weight=constants["vq_orthogonal_reg_weight"], 27 | 28 | # this would randomly sample from the codebook for the orthogonal regularization loss, 29 | # for limiting memory usage 30 | orthogonal_reg_max_codes=constants["vq_orthogonal_reg_max_codes"], 31 | 32 | # set this to True if you have a very large codebook, and would only like to enforce the 33 | # loss on the activated codes per batch 34 | orthogonal_reg_active_codes_only=constants["vq_orthogonal_reg_active_codes_only"], 35 | 36 | # number of heads to vector quantize, codebook shared across all heads 37 | heads=constants["vq_heads"] if heads is None else heads, 38 | 39 | # whether to have a separate codebook per head. False would mean 1 shared codebook 40 | separate_codebook_per_head=constants["vq_separate_codebook_per_head"], 41 | 42 | codebook_dim=constants["vq_codebook_dim"], 43 | sample_codebook_temp=constants["vq_sample_codebook_temp"], 44 | kmeans_init=constants["vq_kmeans_init"], # set to True 45 | 46 | # number of kmeans iterations to calculate the centroids for the codebook on init 47 | kmeans_iters=constants["vq_kmeans_iters"] 48 | ) 49 | 50 | else: 51 | raise AssertionError("Unhandled model name %r" % model_name) 52 | 53 | @staticmethod 54 | def vq_helper(vq_model, encoding): 55 | encoding = encoding.unsqueeze(0) 56 | encoding, indices, vq_loss = vq_model(encoding) # https://github.com/lucidrains/vector-quantize-pytorch 57 | vq_loss = vq_loss.sum() 58 | encoding = encoding.squeeze(0) 59 | return encoding, indices, vq_loss 60 | -------------------------------------------------------------------------------- /src/model/classifiers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/model/classifiers/__init__.py -------------------------------------------------------------------------------- /src/model/classifiers/classifier_model_wrapper.py: -------------------------------------------------------------------------------- 1 | from model.classifiers.convm_classifier import ConvMClassifier 2 | from model.classifiers.conv3_classifier import Conv3Classifier 3 | from model.classifiers.conv2_classifier import Conv2Classifier 4 | from model.classifiers.feedforward_classifier import FeedForwardClassifier 5 | from model.classifiers.linear_classifier import LinearClassifier 6 | 7 | 8 | class ClassifierModelWrapper: 9 | """Wrapper for classification model""" 10 | 11 | @staticmethod 12 | def get_classifier(model_name, num_class, config, constants, bootstrap_model=None): 13 | if model_name == "linear": 14 | return LinearClassifier(num_class, config, constants, bootstrap_model) 15 | 16 | elif model_name == "ff": 17 | return FeedForwardClassifier(num_class, config, constants, bootstrap_model) 18 | 19 | elif model_name == "conv2": 20 | return Conv2Classifier(num_class, config, constants, bootstrap_model) 21 | 22 | elif model_name == "conv3": 23 | return Conv3Classifier(num_class, config, constants, bootstrap_model) 24 | 25 | elif model_name == "convm": 26 | return ConvMClassifier(num_class, config, constants, bootstrap_model) 27 | 28 | else: 29 | raise NotImplementedError( 30 | "Model %s is not implemented. Implemented models are linear, " "ff, conv2, covn3, and convm" % model_name 31 | ) 32 | -------------------------------------------------------------------------------- /src/model/classifiers/feedforward_classifier.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class FeedForwardClassifier(nn.Module): 7 | """Model for learning the forward kinematic inseparability""" 8 | 9 | NAME = "ff" 10 | 11 | def __init__(self, num_class, config, constants, bootstrap_model=None): 12 | super(FeedForwardClassifier, self).__init__() 13 | 14 | self.num_class = num_class 15 | self.config = config 16 | self.constants = constants 17 | 18 | if config["feature_type"] == "feature": 19 | self.obs_encoder = nn.Sequential( 20 | nn.Linear(config["obs_dim"], constants["n_hidden"]), 21 | nn.LeakyReLU(), 22 | nn.Linear(constants["n_hidden"], constants["n_hidden"]), 23 | nn.LeakyReLU(), 24 | nn.Linear(constants["n_hidden"], self.num_class), 25 | ) 26 | 27 | elif config["feature_type"] == "image": 28 | raise NotImplementedError() 29 | 30 | else: 31 | raise NotImplementedError() 32 | 33 | if torch.cuda.is_available(): 34 | self.cuda() 35 | 36 | if bootstrap_model is not None: 37 | self.load_state_dict(bootstrap_model.state_dict()) 38 | 39 | def _gen_logits(self, observations, return_log_prob=True): 40 | if self.config["feature_type"] == "image": 41 | raise AssertionError("Cannot handle images right now") 42 | 43 | logits = self.obs_encoder(observations) 44 | 45 | if return_log_prob: 46 | return F.log_softmax(logits, dim=1), dict() 47 | else: 48 | return F.softmax(logits, dim=1), dict() 49 | 50 | def gen_log_prob(self, observations): 51 | return self._gen_logits(observations, return_log_prob=True) 52 | 53 | def gen_prob(self, observations): 54 | return self._gen_logits(observations, return_log_prob=False) 55 | 56 | def save(self, folder_name, model_name=None): 57 | if model_name is None: 58 | torch.save(self.state_dict(), folder_name + FeedForwardClassifier.NAME) 59 | else: 60 | torch.save(self.state_dict(), folder_name + model_name) 61 | 62 | def load(self, folder_name, model_name=None): 63 | if model_name is None: 64 | self.load_state_dict(torch.load(folder_name + FeedForwardClassifier.NAME)) 65 | else: 66 | self.load_state_dict(torch.load(folder_name + model_name)) 67 | -------------------------------------------------------------------------------- /src/model/classifiers/linear_classifier.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class LinearClassifier(nn.Module): 7 | """Linear classifier""" 8 | 9 | NAME = "linear" 10 | 11 | def __init__(self, num_class, config, constants, bootstrap_model=None): 12 | super(LinearClassifier, self).__init__() 13 | 14 | self.num_class = num_class 15 | self.config = config 16 | self.constants = constants 17 | 18 | if config["feature_type"] == "feature": 19 | self.obs_encoder = nn.Sequential(nn.Linear(config["obs_dim"], self.num_class)) 20 | 21 | elif config["feature_type"] == "image": 22 | raise NotImplementedError() 23 | 24 | else: 25 | raise NotImplementedError() 26 | 27 | if torch.cuda.is_available(): 28 | self.cuda() 29 | 30 | if bootstrap_model is not None: 31 | self.load_state_dict(bootstrap_model.state_dict()) 32 | 33 | def _gen_logits(self, observations, return_log_prob=True): 34 | if self.config["feature_type"] == "image": 35 | raise AssertionError("Cannot handle images right now") 36 | 37 | logits = self.obs_encoder(observations) 38 | 39 | if return_log_prob: 40 | return F.log_softmax(logits, dim=1), dict() 41 | else: 42 | return F.softmax(logits, dim=1), dict() 43 | 44 | def gen_log_prob(self, observations): 45 | return self._gen_logits(observations, return_log_prob=True) 46 | 47 | def gen_prob(self, observations): 48 | return self._gen_logits(observations, return_log_prob=False) 49 | 50 | def save(self, folder_name, model_name=None): 51 | if model_name is None: 52 | torch.save(self.state_dict(), folder_name + LinearClassifier.NAME) 53 | else: 54 | torch.save(self.state_dict(), folder_name + model_name) 55 | 56 | def load(self, folder_name, model_name=None): 57 | if model_name is None: 58 | self.load_state_dict(torch.load(folder_name + LinearClassifier.NAME)) 59 | else: 60 | self.load_state_dict(torch.load(folder_name + model_name)) 61 | -------------------------------------------------------------------------------- /src/model/decoder/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/model/decoder/__init__.py -------------------------------------------------------------------------------- /src/model/decoder/conv_decoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class ConvDecoder(nn.Module): 6 | NAME = "conv" 7 | 8 | def __init__(self, height, width, channel, out_dim, bootstrap_model=None): 9 | super(ConvDecoder, self).__init__() 10 | 11 | self.height = height 12 | self.channel = channel 13 | self.width = width 14 | 15 | self.out_dim = out_dim 16 | 17 | self.linear_layer = nn.Linear(out_dim, 32 * 2 * 2) 18 | 19 | self.model = nn.Sequential( 20 | nn.LeakyReLU(), 21 | nn.ConvTranspose2d(32, 16, (2, 2), 2, output_padding=1), 22 | nn.LeakyReLU(), 23 | nn.ConvTranspose2d(16, 16, (4, 4), 2), 24 | nn.LeakyReLU(), 25 | nn.ConvTranspose2d(16, 16, (4, 4), 2), 26 | nn.LeakyReLU(), 27 | nn.ConvTranspose2d(16, self.channel, (6, 6), 2), 28 | ) 29 | 30 | if torch.cuda.is_available(): 31 | self.cuda() 32 | 33 | if bootstrap_model is not None: 34 | self.load_state_dict(bootstrap_model.state_dict()) 35 | 36 | def forward(self, vec): 37 | return self.decode(vec) 38 | 39 | def decode(self, vec): 40 | batch_size = vec.size(0) 41 | out = self.linear_layer(vec).view(batch_size, 32, 2, 2) 42 | return self.model(out) 43 | -------------------------------------------------------------------------------- /src/model/decoder/conv_decoder2.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class ConvDecoder2(nn.Module): 6 | NAME = "conv2" 7 | 8 | def __init__(self, height, width, channel, out_dim, bootstrap_model=None): 9 | super(ConvDecoder2, self).__init__() 10 | 11 | self.height = height 12 | self.channel = channel 13 | self.width = width 14 | 15 | self.out_dim = out_dim 16 | 17 | self.linear_layer = nn.Linear(out_dim, 32 * 2 * 2) 18 | 19 | self.model = nn.Sequential( 20 | nn.LeakyReLU(), 21 | nn.Conv2d(32, 128, 3, stride=1, padding=1), 22 | nn.Upsample(scale_factor=2), 23 | nn.BatchNorm2d(128), 24 | nn.LeakyReLU(), 25 | nn.Conv2d(128, 64, 3, stride=1, padding=1), 26 | nn.Upsample(scale_factor=2), 27 | nn.BatchNorm2d(64), 28 | nn.LeakyReLU(), 29 | nn.Conv2d(64, 32, 3, stride=1, padding=1), 30 | nn.Upsample(scale_factor=2), 31 | nn.BatchNorm2d(32), 32 | nn.LeakyReLU(), 33 | nn.Conv2d(32, 16, 3, stride=1, padding=1), 34 | nn.Upsample(scale_factor=2), 35 | nn.BatchNorm2d(16), 36 | nn.LeakyReLU(), 37 | nn.Conv2d(16, 8, 3, stride=1, padding=1), 38 | nn.Upsample(size=(56, 56)), 39 | nn.BatchNorm2d(8), 40 | nn.LeakyReLU(), 41 | nn.Conv2d(8, self.channel, 3, stride=1, padding=1), 42 | ) 43 | 44 | if torch.cuda.is_available(): 45 | self.cuda() 46 | 47 | if bootstrap_model is not None: 48 | self.load_state_dict(bootstrap_model.state_dict()) 49 | 50 | def forward(self, vec): 51 | return self.decode(vec) 52 | 53 | def decode(self, vec): 54 | batch_size = vec.size(0) 55 | out = self.linear_layer(vec).view(batch_size, 32, 2, 2) 56 | # print(out.shape) 57 | # raise Exception('done') 58 | return self.model(out) 59 | -------------------------------------------------------------------------------- /src/model/decoder/conv_decoder_ai2thor.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class ConvDecoderAI2Thor(nn.Module): 6 | NAME = "conv-ai2thor" 7 | 8 | def __init__(self, height, width, channel, out_dim, bootstrap_model=None): 9 | super(ConvDecoderAI2Thor, self).__init__() 10 | 11 | self.height = height 12 | self.channel = channel 13 | self.width = width 14 | 15 | self.out_dim = out_dim 16 | 17 | self.linear_layer = nn.Linear(out_dim, 32 * 2 * 4) 18 | 19 | self.model = nn.Sequential( 20 | nn.LeakyReLU(), 21 | nn.Conv2d(32, 128, 3, stride=1, padding=1), 22 | nn.Upsample(scale_factor=2), 23 | nn.BatchNorm2d(128), 24 | nn.LeakyReLU(), 25 | nn.Conv2d(128, 64, 3, stride=1, padding=1), 26 | nn.Upsample(scale_factor=2), 27 | nn.BatchNorm2d(64), 28 | nn.LeakyReLU(), 29 | nn.Conv2d(64, 32, 3, stride=1, padding=1), 30 | nn.Upsample(scale_factor=2), 31 | nn.BatchNorm2d(32), 32 | nn.LeakyReLU(), 33 | nn.Conv2d(32, 16, 3, stride=1, padding=1), 34 | nn.Upsample(scale_factor=2), 35 | nn.BatchNorm2d(16), 36 | nn.LeakyReLU(), 37 | nn.Conv2d(16, 8, 3, stride=1, padding=1), 38 | nn.Upsample(size=(56, 56 * 2)), 39 | nn.BatchNorm2d(8), 40 | nn.LeakyReLU(), 41 | nn.Conv2d(8, self.channel, 3, stride=1, padding=1), 42 | ) 43 | 44 | if torch.cuda.is_available(): 45 | self.cuda() 46 | 47 | if bootstrap_model is not None: 48 | self.load_state_dict(bootstrap_model.state_dict()) 49 | 50 | def forward(self, vec): 51 | return self.decode(vec) 52 | 53 | def decode(self, vec): 54 | batch_size = vec.size(0) 55 | out = self.linear_layer(vec).view(batch_size, 32, 2, 4) 56 | # print(out.shape) 57 | # raise Exception('done') 58 | return self.model(out) 59 | -------------------------------------------------------------------------------- /src/model/decoder/decoder_wrapper.py: -------------------------------------------------------------------------------- 1 | from model.decoder.conv_decoder import ConvDecoder 2 | from model.decoder.conv_decoder2 import ConvDecoder2 3 | from model.decoder.conv_decoder_ai2thor import ConvDecoderAI2Thor 4 | from model.decoder.feedforward_decoder import FeedForwardDecoder 5 | 6 | 7 | class DecoderModelWrapper: 8 | """Wrapper for decoder models""" 9 | 10 | @staticmethod 11 | def get_decoder(model_name, bootstrap_model=None, **kwargs): 12 | models = [FeedForwardDecoder.NAME, ConvDecoder.NAME, ConvDecoder2.NAME] 13 | 14 | if model_name == FeedForwardDecoder.NAME: 15 | return FeedForwardDecoder(**kwargs, bootstrap_model=bootstrap_model) 16 | 17 | elif model_name == ConvDecoder.NAME: 18 | return ConvDecoder(**kwargs, bootstrap_model=bootstrap_model) 19 | 20 | elif model_name == ConvDecoder2.NAME: 21 | return ConvDecoder2(**kwargs, bootstrap_model=bootstrap_model) 22 | 23 | elif model_name == ConvDecoderAI2Thor.NAME: 24 | return ConvDecoderAI2Thor(**kwargs, bootstrap_model=bootstrap_model) 25 | 26 | else: 27 | raise NotImplementedError("Model %s is not implemented. Implemented models are linear, %r" % (model_name, models)) 28 | -------------------------------------------------------------------------------- /src/model/decoder/feedforward_decoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class FeedForwardDecoder(nn.Module): 6 | NAME = "ff" 7 | 8 | def __init__(self, num_inputs, inp_dim, out_dim, hidden_dim, bootstrap_model=None): 9 | super(FeedForwardDecoder, self).__init__() 10 | 11 | self.num_inputs = num_inputs 12 | self.inp_dim = inp_dim 13 | self.out_dim = out_dim 14 | self.hidden_dim = hidden_dim 15 | 16 | self.model = nn.Sequential( 17 | nn.Linear(self.out_dim, self.hidden_dim), 18 | nn.LeakyReLU(), 19 | nn.Linear(self.hidden_dim, self.num_inputs * self.inp_dim), 20 | ) 21 | 22 | if torch.cuda.is_available(): 23 | self.cuda() 24 | 25 | if bootstrap_model is not None: 26 | self.load_state_dict(bootstrap_model.state_dict()) 27 | 28 | def forward(self, vec): 29 | return self.decode(vec) 30 | 31 | def decode(self, vec): 32 | output = self.model(vec).data.cpu() 33 | outputs = [] 34 | for i in range(0, self.num_inputs): 35 | outputs.append(output[i * self.inp_dim : (i + 1) * self.inp_dim]) 36 | 37 | return outputs 38 | -------------------------------------------------------------------------------- /src/model/encoder/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/model/encoder/__init__.py -------------------------------------------------------------------------------- /src/model/encoder/conv2_encoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class Conv2Encoder(nn.Module): 6 | NAME = "conv2" 7 | 8 | def __init__(self, height, width, channel, out_dim, bootstrap_model=None): 9 | super(Conv2Encoder, self).__init__() 10 | 11 | self.height = height 12 | self.channel = channel 13 | self.width = width 14 | 15 | self.out_dim = out_dim 16 | 17 | self.model = nn.Sequential( 18 | nn.Conv2d(self.channel, 16, (8, 8), 4), 19 | nn.LeakyReLU(), 20 | nn.Conv2d(16, 32, (4, 4), 2), 21 | nn.LeakyReLU(), 22 | nn.Conv2d(32, 32, (4, 4), 1), 23 | nn.Flatten(), 24 | nn.Linear(128, out_dim), 25 | ) 26 | 27 | if torch.cuda.is_available(): 28 | self.cuda() 29 | 30 | if bootstrap_model is not None: 31 | self.load_state_dict(bootstrap_model.state_dict()) 32 | 33 | def forward(self, img): 34 | return self.encode(img) 35 | 36 | def encode(self, img): 37 | return self.model(img) 38 | -------------------------------------------------------------------------------- /src/model/encoder/conv3_encoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from utils.conv_util import get_conv_out_size 5 | 6 | 7 | class Conv3Encoder(nn.Module): 8 | """ 9 | Recommended convolution for gridworld 10 | """ 11 | 12 | NAME = "conv3" 13 | 14 | def __init__(self, height, width, channel, out_dim, bootstrap_model=None): 15 | super(Conv3Encoder, self).__init__() 16 | 17 | self.height = height 18 | self.channel = channel 19 | self.width = width 20 | 21 | self.out_dim = out_dim 22 | 23 | # Note that the dynamic size below is calculated based on the model. If the model changes, then the size 24 | # will also change 25 | kernel_size1 = (5, 5) 26 | stride1 = (2, 2) 27 | 28 | kernel_size2 = (5, 5) 29 | stride2 = (2, 2) 30 | 31 | kernel_size3 = (5, 5) 32 | stride3 = (1, 1) 33 | 34 | dynamic_size_h1, dynamic_size_w1 = get_conv_out_size(self.height, self.width, kernel_size=kernel_size1, stride=stride1) 35 | 36 | dynamic_size_h2, dynamic_size_w2 = get_conv_out_size( 37 | dynamic_size_h1, dynamic_size_w1, kernel_size=kernel_size2, stride=stride2 38 | ) 39 | 40 | dynamic_size_h3, dynamic_size_w3 = get_conv_out_size( 41 | dynamic_size_h2, dynamic_size_w2, kernel_size=kernel_size3, stride=stride3 42 | ) 43 | 44 | self.n_channels_out = 32 45 | self.dynamic_size = dynamic_size_h3 * dynamic_size_w3 * self.n_channels_out 46 | 47 | self.model = nn.Sequential( 48 | nn.Conv2d(3, 32, kernel_size=kernel_size1, stride=stride1), 49 | nn.BatchNorm2d(32), 50 | nn.LeakyReLU(), 51 | nn.Conv2d(32, 64, kernel_size=kernel_size2, stride=stride2), 52 | nn.BatchNorm2d(64), 53 | nn.LeakyReLU(), 54 | nn.Conv2d(64, self.n_channels_out, kernel_size=kernel_size3, stride=stride3), 55 | nn.BatchNorm2d(self.n_channels_out), 56 | nn.Flatten(), 57 | nn.Linear(self.dynamic_size, out_dim), 58 | ) 59 | 60 | if torch.cuda.is_available(): 61 | self.cuda() 62 | 63 | if bootstrap_model is not None: 64 | self.load_state_dict(bootstrap_model.state_dict()) 65 | 66 | def forward(self, img): 67 | return self.encode(img) 68 | 69 | def encode(self, img): 70 | # print(img.shape) 71 | return self.model(img) 72 | -------------------------------------------------------------------------------- /src/model/encoder/conv4_encoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from utils.conv_util import get_conv_out_size 5 | 6 | 7 | class Conv4Encoder(nn.Module): 8 | NAME = "conv4" 9 | 10 | def __init__(self, height, width, channel, out_dim, bootstrap_model=None): 11 | super(Conv4Encoder, self).__init__() 12 | 13 | self.height = height 14 | self.channel = channel 15 | self.width = width 16 | 17 | self.out_dim = out_dim 18 | 19 | # Note that the dynamic size below is calculated based on the model. If the model changes, then the size 20 | # will also change 21 | kernel_size1 = (8, 8) 22 | stride1 = (4, 4) 23 | 24 | kernel_size2 = (4, 4) 25 | stride2 = (2, 2) 26 | 27 | dynamic_size_h1, dynamic_size_w1 = get_conv_out_size(self.height, self.width, kernel_size=kernel_size1, stride=stride1) 28 | 29 | dynamic_size_h2, dynamic_size_w2 = get_conv_out_size( 30 | dynamic_size_h1, dynamic_size_w1, kernel_size=kernel_size2, stride=stride2 31 | ) 32 | 33 | self.n_channels_out = 32 34 | self.dynamic_size = dynamic_size_h2 * dynamic_size_w2 * self.n_channels_out 35 | 36 | self.model = nn.Sequential( 37 | nn.Conv2d(3, 16, (8, 8), 4), 38 | nn.ReLU(), 39 | nn.Conv2d(16, self.n_channels_out, (4, 4), 2), 40 | nn.ReLU(), 41 | nn.Flatten(), 42 | nn.Linear(self.dynamic_size, out_dim), 43 | ) 44 | 45 | if torch.cuda.is_available(): 46 | self.cuda() 47 | 48 | if bootstrap_model is not None: 49 | self.load_state_dict(bootstrap_model.state_dict()) 50 | 51 | def forward(self, img): 52 | return self.encode(img) 53 | 54 | def encode(self, img): 55 | # print(img.shape) 56 | return self.model(img) 57 | -------------------------------------------------------------------------------- /src/model/encoder/conv_encoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from utils.conv_util import get_conv_out_size 5 | 6 | 7 | class ConvEncoder(nn.Module): 8 | NAME = "conv" 9 | 10 | def __init__(self, height, width, channel, out_dim, bootstrap_model=None): 11 | super(ConvEncoder, self).__init__() 12 | 13 | self.height = height 14 | self.channel = channel 15 | self.width = width 16 | 17 | self.out_dim = out_dim 18 | 19 | # Note that the dynamic size below is calculated based on the model. If the model changes, then the size 20 | # will also change 21 | kernel_size1 = (8, 8) 22 | stride1 = (4, 4) 23 | 24 | kernel_size2 = (4, 4) 25 | stride2 = (2, 2) 26 | 27 | kernel_size3 = (4, 4) 28 | stride3 = (1, 1) 29 | 30 | dynamic_size_h1, dynamic_size_w1 = get_conv_out_size(self.height, self.width, kernel_size=kernel_size1, stride=stride1) 31 | 32 | dynamic_size_h2, dynamic_size_w2 = get_conv_out_size( 33 | dynamic_size_h1, dynamic_size_w1, kernel_size=kernel_size2, stride=stride2 34 | ) 35 | 36 | dynamic_size_h3, dynamic_size_w3 = get_conv_out_size( 37 | dynamic_size_h2, dynamic_size_w2, kernel_size=kernel_size3, stride=stride3 38 | ) 39 | 40 | self.n_channels_out = 32 41 | self.dynamic_size = dynamic_size_h3 * dynamic_size_w3 * self.n_channels_out 42 | 43 | self.model = nn.Sequential( 44 | nn.Conv2d(3, 16, kernel_size=kernel_size1, stride=stride1), 45 | nn.BatchNorm2d(16), 46 | nn.LeakyReLU(), 47 | nn.Conv2d(16, 32, kernel_size=kernel_size2, stride=stride2), 48 | nn.BatchNorm2d(32), 49 | nn.LeakyReLU(), 50 | nn.Conv2d(32, self.n_channels_out, kernel_size=kernel_size3, stride=stride3), 51 | nn.BatchNorm2d(self.n_channels_out), 52 | nn.Flatten(), 53 | nn.Linear(self.dynamic_size, out_dim), 54 | ) 55 | 56 | if torch.cuda.is_available(): 57 | self.cuda() 58 | 59 | if bootstrap_model is not None: 60 | self.load_state_dict(bootstrap_model.state_dict()) 61 | 62 | def forward(self, img): 63 | return self.encode(img) 64 | 65 | def encode(self, img): 66 | # print(img.shape) 67 | return self.model(img) 68 | -------------------------------------------------------------------------------- /src/model/encoder/encoder_wrapper.py: -------------------------------------------------------------------------------- 1 | from model.encoder.conv2_encoder import Conv2Encoder 2 | from model.encoder.conv3_encoder import Conv3Encoder 3 | from model.encoder.conv4_encoder import Conv4Encoder 4 | from model.encoder.conv_encoder import ConvEncoder 5 | from model.encoder.feedforward_encoder import FeedForwardEncoder 6 | 7 | 8 | class EncoderModelWrapper: 9 | """Wrapper for encoder models""" 10 | 11 | @staticmethod 12 | def get_encoder(model_name, bootstrap_model=None, **kwargs): 13 | models = [ 14 | FeedForwardEncoder, 15 | ConvEncoder, 16 | Conv2Encoder, 17 | Conv3Encoder, 18 | Conv4Encoder, 19 | ] 20 | model_names = [model.NAME for model in models] 21 | 22 | for model in models: 23 | if model_name == model.NAME: 24 | return model(**kwargs, bootstrap_model=bootstrap_model) 25 | 26 | raise NotImplementedError("Model %s is not implemented. Implemented models are %s" % (model_name, model_names)) 27 | -------------------------------------------------------------------------------- /src/model/encoder/feedforward_encoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class FeedForwardEncoder(nn.Module): 6 | NAME = "ff" 7 | 8 | def __init__(self, num_inputs, inp_dim, out_dim, hidden_dim, bootstrap_model=None): 9 | super(FeedForwardEncoder, self).__init__() 10 | 11 | self.num_inputs = num_inputs 12 | self.inp_dim = inp_dim 13 | self.out_dim = out_dim 14 | self.hidden_dim = hidden_dim 15 | 16 | self.model = nn.Sequential( 17 | nn.Linear(self.num_inputs * self.inp_dim, self.hidden_dim), 18 | nn.LeakyReLU(), 19 | nn.Linear(self.hidden_dim, self.out_dim), 20 | ) 21 | 22 | if torch.cuda.is_available(): 23 | self.cuda() 24 | 25 | if bootstrap_model is not None: 26 | self.load_state_dict(bootstrap_model.state_dict()) 27 | 28 | def forward(self, **inputs): 29 | return self.encode(**inputs) 30 | 31 | def encode(self, **inputs): 32 | vec = torch.cat(inputs, dim=1) 33 | return self.model(vec) 34 | -------------------------------------------------------------------------------- /src/model/forward_model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/model/forward_model/__init__.py -------------------------------------------------------------------------------- /src/model/forward_model/conv_forward_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class ConvForwardModel(nn.Module): 6 | NAME = "ConvForward" 7 | 8 | def __init__(self, exp_setup, bootstrap_model=None): 9 | super(ConvForwardModel, self).__init__() 10 | 11 | self.num_actions = exp_setup.config["num_actions"] 12 | self.latent_action_vec_dim = 256 13 | self.height, self.width, self.channel = exp_setup.config["obs_dim"] 14 | self.encoder_dim = exp_setup.constants["hidden_dim"] 15 | # self.reshape_layer = nn.Linear(self.encoder_dim + self.num_actions, 32 * 2 * 2) 16 | self.reshape_layer = nn.Linear(self.encoder_dim + self.latent_action_vec_dim, 256 * 2 * 2) 17 | 18 | self.model = nn.Sequential( 19 | nn.LeakyReLU(), 20 | nn.Conv2d(256, 128, 3, stride=1, padding=1), 21 | nn.Upsample(scale_factor=2), 22 | nn.BatchNorm2d(128), 23 | nn.LeakyReLU(), 24 | nn.Conv2d(128, 64, 3, stride=1, padding=1), 25 | nn.Upsample(scale_factor=2), 26 | nn.BatchNorm2d(64), 27 | nn.LeakyReLU(), 28 | nn.Conv2d(64, 32, 3, stride=1, padding=1), 29 | nn.Upsample(scale_factor=2), 30 | nn.BatchNorm2d(32), 31 | nn.LeakyReLU(), 32 | nn.Conv2d(32, 16, 3, stride=1, padding=1), 33 | nn.Upsample(scale_factor=2), 34 | nn.BatchNorm2d(16), 35 | nn.LeakyReLU(), 36 | nn.Conv2d(16, 8, 3, stride=1, padding=1), 37 | nn.Upsample(size=(56, 56)), 38 | nn.BatchNorm2d(8), 39 | nn.LeakyReLU(), 40 | nn.Conv2d(8, self.channel, 3, stride=1, padding=1), 41 | ) 42 | 43 | if torch.cuda.is_available(): 44 | self.cuda() 45 | 46 | if bootstrap_model is not None: 47 | self.load_state_dict(bootstrap_model.state_dict()) 48 | 49 | def forward(self, obs_encoding, latent_action_vec): 50 | batch = obs_encoding.size(0) 51 | vec = torch.cat([obs_encoding, latent_action_vec], dim=1) # batch x dim 52 | vec = self.reshape_layer(vec).resize(batch, 256, 2, 2) 53 | return self.model(vec) 54 | -------------------------------------------------------------------------------- /src/model/forward_model/forward_model_wrapper.py: -------------------------------------------------------------------------------- 1 | from model.forward_model.conv_forward_model import ConvForwardModel 2 | 3 | 4 | class ForwardDynamicsWrapper: 5 | """Wrapper for forward dynamics models""" 6 | 7 | @staticmethod 8 | def get_forward_dynamics_model(model_name, bootstrap_model=None, **kwargs): 9 | models = [ConvForwardModel.NAME] 10 | 11 | if model_name == ConvForwardModel.NAME: 12 | return ConvForwardModel(**kwargs, bootstrap_model=bootstrap_model) 13 | 14 | else: 15 | raise NotImplementedError("Model %s is not implemented. Implemented models are linear, %r" % (model_name, models)) 16 | -------------------------------------------------------------------------------- /src/model/inverse_dynamics/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/model/inverse_dynamics/__init__.py -------------------------------------------------------------------------------- /src/model/inverse_dynamics/encoded_mlp.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from utils.gumbel import gumbel_sample 4 | 5 | 6 | class EncodedMLP(nn.Module): 7 | NAME = "encoded-mlp" 8 | 9 | def __init__(self, exp_setup, bootstrap_model=None): 10 | super(EncodedMLP, self).__init__() 11 | 12 | self.temperature = 1.0 13 | self.action_dim = exp_setup.config["num_actions"] 14 | self.dim = exp_setup.constants["hidden_dim"] 15 | 16 | self.mlp = nn.Sequential( 17 | nn.Linear(2 * self.dim, self.dim), 18 | nn.LeakyReLU(), 19 | nn.Linear(self.dim, self.action_dim), 20 | ) 21 | 22 | self.mlp_h = nn.Sequential(nn.Linear(2 * self.dim, self.dim), nn.LeakyReLU(), nn.Linear(self.dim, 256)) 23 | 24 | if torch.cuda.is_available(): 25 | self.cuda() 26 | 27 | if bootstrap_model is not None: 28 | self.load_state_dict(bootstrap_model.state_dict()) 29 | 30 | def get_action_dim(self): 31 | return self.action_dim 32 | 33 | def get_latent_action(self, prev_encoding, obs_encoding): 34 | x = torch.cat([prev_encoding, obs_encoding], dim=1) # batch x (2 dim) 35 | logits = self.mlp(x) # batch x action_dim 36 | 37 | # Compute probability using Gumbel softmax 38 | prob, log_prob = gumbel_sample(logits, self.temperature) 39 | 40 | h = self.mlp_h(x) 41 | 42 | return prob, log_prob, h 43 | -------------------------------------------------------------------------------- /src/model/inverse_dynamics/inverse_dynamics_wrapper.py: -------------------------------------------------------------------------------- 1 | from model.inverse_dynamics.encoded_mlp import EncodedMLP 2 | from model.inverse_dynamics.simple_feed_forward import SimpleFeedForwardIK 3 | from model.inverse_dynamics.tensor_inverse_dynamics import TensorInverseDynamics 4 | 5 | 6 | class InverseDynamicsWrapper: 7 | """Wrapper for inverse dynamics models""" 8 | 9 | @staticmethod 10 | def get_inv_dynamics_model(model_name, bootstrap_model=None, **kwargs): 11 | models = [EncodedMLP.NAME, SimpleFeedForwardIK.NAME, TensorInverseDynamics.NAME] 12 | 13 | if model_name == EncodedMLP.NAME: 14 | return EncodedMLP(**kwargs, bootstrap_model=bootstrap_model) 15 | 16 | elif model_name == SimpleFeedForwardIK.NAME: 17 | return SimpleFeedForwardIK(**kwargs, bootstrap_model=bootstrap_model) 18 | 19 | elif model_name == TensorInverseDynamics.NAME: 20 | return TensorInverseDynamics(**kwargs, bootstrap_model=bootstrap_model) 21 | 22 | else: 23 | raise NotImplementedError("Model %s is not implemented. Implemented models are linear, %r" % (model_name, models)) 24 | -------------------------------------------------------------------------------- /src/model/inverse_dynamics/tensor_inverse_dynamics.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from utils.gumbel import gumbel_sample 4 | 5 | 6 | class TensorInverseDynamics(nn.Module): 7 | NAME = "tensor-inv-dyn" 8 | 9 | def __init__(self, exp_setup, bootstrap_model=None): 10 | super(TensorInverseDynamics, self).__init__() 11 | 12 | self.temperature = 1.0 13 | self.action_dim = exp_setup.config["num_actions"] 14 | self.dim = exp_setup.constants["hidden_dim"] 15 | 16 | self.tensor_W = nn.Parameter(torch.randn(self.dim, self.action_dim, self.dim) * 0.01) 17 | 18 | if torch.cuda.is_available(): 19 | self.cuda() 20 | 21 | if bootstrap_model is not None: 22 | self.load_state_dict(bootstrap_model.state_dict()) 23 | 24 | def get_action_dim(self): 25 | return self.action_dim 26 | 27 | def get_latent_action(self, prev_encoding, obs_encoding): 28 | batch_size = prev_encoding.size(0) 29 | 30 | x = torch.matmul(prev_encoding, self.tensor_W.view(self.dim, self.action_dim * self.dim)) 31 | x = x.view(batch_size, self.action_dim, self.dim) 32 | 33 | x = (obs_encoding[:, None, :] * x).sum(2) # batch x num_actions 34 | 35 | # Compute probability using Gumbel softmax 36 | prob, log_prob = gumbel_sample(x, self.temperature) 37 | 38 | return prob, log_prob 39 | -------------------------------------------------------------------------------- /src/model/inverse_dynamics_model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/model/inverse_dynamics_model/__init__.py -------------------------------------------------------------------------------- /src/model/inverse_dynamics_model/action_predictor_wrapper.py: -------------------------------------------------------------------------------- 1 | from model.inverse_dynamics_model.action_predictor import ( 2 | ActionPredictor, 3 | ActionPredictorFlatNN, 4 | ActionPredictorCNN1, 5 | ActionPredictorCNN2, 6 | ActionPredictorCNN3, 7 | ActionPredictorCNN4, 8 | ActionPredictorCNN5, 9 | ActionPredictorCNN6, 10 | ActionPredictorCNN7, 11 | ) 12 | 13 | 14 | class InverseDynamicsWrapper: 15 | FF = range(1) 16 | 17 | def __init__(self): 18 | pass 19 | 20 | @staticmethod 21 | def get_model(config, constants, bootstrap_model=None): 22 | model_type_str = constants["model_type"] 23 | if model_type_str == "ff": 24 | return ActionPredictor(config, constants, bootstrap_model) 25 | elif model_type_str == "flat": 26 | return ActionPredictorFlatNN(config, constants, bootstrap_model) 27 | elif model_type_str == "conv1": 28 | return ActionPredictorCNN1(config, constants, bootstrap_model) 29 | elif model_type_str == "conv2": 30 | return ActionPredictorCNN2(config, constants, bootstrap_model) 31 | elif model_type_str == "conv3": 32 | return ActionPredictorCNN3(config, constants, bootstrap_model) 33 | elif model_type_str == "conv4": 34 | return ActionPredictorCNN4(config, constants, bootstrap_model) 35 | elif model_type_str == "conv5": 36 | return ActionPredictorCNN5(config, constants, bootstrap_model) 37 | elif model_type_str == "conv6": 38 | return ActionPredictorCNN6(config, constants, bootstrap_model) 39 | elif model_type_str == "conv7": 40 | return ActionPredictorCNN7(config, constants, bootstrap_model) 41 | else: 42 | raise AssertionError("Unhandled model type %r" % model_type_str) 43 | -------------------------------------------------------------------------------- /src/model/misc/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/model/misc/__init__.py -------------------------------------------------------------------------------- /src/model/misc/independence_test_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class IndependenceTestModel(nn.Module): 7 | def __init__(self, config, model_input_dim, hidden_dim): 8 | super(IndependenceTestModel, self).__init__() 9 | 10 | self.config = config 11 | 12 | if config["feature_type"] == "feature": 13 | # Model head 14 | self.classifier = nn.Sequential( 15 | nn.Linear(model_input_dim, hidden_dim), 16 | nn.LeakyReLU(), 17 | nn.Linear(hidden_dim, 2), 18 | ) 19 | 20 | else: 21 | raise AssertionError("Unhandled feature type") 22 | 23 | if torch.cuda.is_available(): 24 | self.cuda() 25 | 26 | def gen_logits_(self, model_input, type="logsoftmax"): 27 | """ 28 | :param model_input: Pytorch float tensor of size batch x dim 29 | :return: 30 | """ 31 | 32 | if self.config["feature_type"] == "image": 33 | raise AssertionError() 34 | 35 | logits = self.classifier(model_input) 36 | 37 | if type == "logsoftmax": 38 | result = F.log_softmax(logits, dim=1) 39 | elif type == "softmax": 40 | result = F.softmax(logits, dim=1) 41 | else: 42 | raise AssertionError("Unhandled type ", type) 43 | 44 | return result 45 | 46 | def gen_log_prob(self, model_input): 47 | return self.gen_logits_(model_input, type="logsoftmax") 48 | 49 | def gen_prob(self, model_input): 50 | return self.gen_logits_(model_input, type="softmax") 51 | -------------------------------------------------------------------------------- /src/model/misc/lqr_model.py: -------------------------------------------------------------------------------- 1 | class LQRModel: 2 | def __init__(self, A, B, Q, R, Sigma_W, Sigma_0): 3 | """ 4 | LQR model describes a simple continuous control dynamics where 5 | the state s evolves as 6 | 7 | s_1 ~ N(0, Sigma_0) 8 | s_{t+1} = A s_t + B u_t + epsilon_t, for all t 9 | epsilon_t ~ N(0, Sigma_W) 10 | 11 | where s_t and u_t is the state and action respectively at time step t 12 | 13 | cost at time step t is given by s_t^T Q s_t + u_t^T R u_t 14 | """ 15 | self.A = A 16 | self.B = B 17 | self.Q = Q 18 | self.R = R 19 | self.Sigma_W = Sigma_W 20 | self.Sigma_0 = Sigma_0 21 | 22 | def copy(self): 23 | return LQRModel( 24 | A=self.A.copy(), 25 | B=self.B.copy(), 26 | Q=self.Q.copy(), 27 | R=self.R.copy(), 28 | Sigma_W=self.Sigma_W.copy(), 29 | Sigma_0=self.Sigma_0.copy(), 30 | ) 31 | -------------------------------------------------------------------------------- /src/model/misc/robot_car/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/model/misc/robot_car/__init__.py -------------------------------------------------------------------------------- /src/model/misc/robot_car/autoencoder_embeddings.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import pickle 4 | import numpy as np 5 | import torch 6 | from tqdm import tqdm 7 | 8 | from environments.robot_car.utils.dataset import CarDataset 9 | from model.misc.robot_car.autoencoder_train import CarAutoencoder 10 | 11 | 12 | # Precompute VAE embeddings for all images in the dataset 13 | # Embeddings are then saved into a pickle file 14 | if __name__ == "__main__": 15 | batch_size = 24 16 | data_root = "./car_data" 17 | train_root = "./autoencoder_training" 18 | torch.set_float32_matmul_precision("medium") 19 | 20 | parser = argparse.ArgumentParser() 21 | parser.add_argument("checkpoint", type=str) 22 | parser.add_argument("output_file", type=str) 23 | args = parser.parse_args() 24 | 25 | # check that checkpoint file exists 26 | if not os.path.isfile(args.checkpoint): 27 | raise FileNotFoundError(args.checkpoint) 28 | 29 | print(f"Loading model from {args.checkpoint}") 30 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 31 | model = CarAutoencoder.load_from_checkpoint(args.checkpoint).to(device) 32 | 33 | print("Loading data...") 34 | dataset = CarDataset(data_root, max_k=1, resize=(256, 256), cache_into_memory=False) 35 | num_samples = len(dataset.actions) 36 | assert len(dataset.pic_filenames) == num_samples 37 | assert sum(dataset.traj_lengths) == num_samples 38 | assert dataset.cumulative_lengths[-1] == num_samples 39 | assert len(dataset.traj_lengths) == len(dataset.cumulative_lengths) 40 | 41 | print("Generating embeddings...") 42 | traj_ends = dataset.cumulative_lengths - 1 43 | embeddings = [] 44 | actions = [] 45 | for i in tqdm(range(num_samples)): 46 | pics = dataset._load_pics_at_index(i) 47 | pics = torch.tensor(pics, dtype=torch.float32, device=device).unsqueeze(0) 48 | pics = pics / 256.0 49 | with torch.no_grad(): 50 | z, _ = model.encode(pics) 51 | embeddings.append(z.squeeze().cpu().numpy()) 52 | 53 | if i in traj_ends: 54 | action = np.array([0.5, 0.5, 0.5, 0.5]) 55 | else: 56 | # get action that comes after this observation 57 | action = dataset.actions[i + 1] 58 | actions.append(action) 59 | 60 | embeddings = np.array(embeddings, dtype=np.float32) 61 | actions = np.array(actions, dtype=np.float32) 62 | 63 | output = { 64 | "embeddings": embeddings, 65 | "actions": actions, 66 | "traj_lengths": dataset.traj_lengths, 67 | "total_samples": num_samples, 68 | } 69 | 70 | print("Saving pickle...") 71 | with open(args.output_file, "wb") as f: 72 | pickle.dump(output, f) 73 | -------------------------------------------------------------------------------- /src/model/misc/robot_car/dist_pred_model.py: -------------------------------------------------------------------------------- 1 | """ 2 | Given (x[t], x[t+k]) predict k. 3 | 4 | Have ability to find smallest k with probability above some epsilon. 5 | 6 | 7 | """ 8 | 9 | import torch 10 | import torch.nn as nn 11 | 12 | 13 | def argmax_first(a): 14 | b = torch.stack([torch.arange(a.shape[1])] * a.shape[0]) 15 | max_values, _ = torch.max(a, dim=1) 16 | b[a != max_values[:, None]] = a.shape[1] 17 | first_max, _ = torch.min(b, dim=1) 18 | 19 | if torch.cuda.is_available(): 20 | first_max = first_max.cuda() 21 | 22 | return first_max 23 | 24 | 25 | class DistPred(nn.Module): 26 | def __init__(self, inp_size, maxk): 27 | super(DistPred, self).__init__() 28 | 29 | self.enc = nn.Sequential( 30 | nn.Linear(inp_size * 2, 512), nn.LeakyReLU(), nn.Linear(512, 512), nn.LeakyReLU(), nn.Linear(512, maxk) 31 | ) 32 | 33 | def forward(self, x, xk): 34 | bs = x.shape[0] 35 | x = x.reshape((bs, -1)) 36 | xk = xk.reshape((bs, -1)) 37 | 38 | h = torch.cat([x, xk], dim=1) 39 | 40 | py = self.enc(h) 41 | 42 | return py 43 | 44 | def predict_k(self, x, xk): 45 | sm = nn.Softmax(dim=1) 46 | py = sm(self.forward(x, xk)) 47 | 48 | cdf = torch.gt(torch.cumsum(py, dim=1), 0.5).float() 49 | 50 | first_max = argmax_first(cdf) 51 | 52 | return first_max 53 | 54 | def loss(self, x, xk, k): 55 | py = self.forward(x, xk) 56 | 57 | ce = nn.CrossEntropyLoss() 58 | loss = ce(py, k) 59 | 60 | return loss 61 | 62 | 63 | if __name__ == "__main__": 64 | dp = DistPred(64, 32 * 32 * 3).cuda() 65 | 66 | x = torch.randn(1, 3, 32, 32).repeat(100, 1, 1, 1).cuda() 67 | xk = torch.randn(1, 3, 32, 32).repeat(100, 1, 1, 1).cuda() 68 | 69 | y = torch.zeros(100).long().cuda() 70 | 71 | # y[0:25] += 4 72 | # y[25:50] += 5 73 | # y[50:75] += 6 74 | # y[75:100] += 1 75 | 76 | # y += 3 77 | 78 | for i in range(0, 1000): 79 | dp.train(x, xk, y) 80 | 81 | kpred = dp.predict_k(x[0:1], xk[0:1]) 82 | 83 | print(kpred) 84 | -------------------------------------------------------------------------------- /src/model/misc/robot_car/positional_encoding.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | 4 | 5 | def positionalencoding1d(d_model, length): 6 | """ 7 | :param d_model: dimension of the model 8 | :param length: length of positions 9 | :return: length*d_model position matrix 10 | """ 11 | if d_model % 2 != 0: 12 | raise ValueError("Cannot use sin/cos positional encoding with " "odd dim (got dim={:d})".format(d_model)) 13 | pe = torch.zeros(length, d_model) 14 | position = torch.arange(0, length).unsqueeze(1) 15 | div_term = torch.exp((torch.arange(0, d_model, 2, dtype=torch.float) * -(math.log(10000.0) / d_model))) 16 | pe[:, 0::2] = torch.sin(position.float() * div_term) 17 | pe[:, 1::2] = torch.cos(position.float() * div_term) 18 | 19 | return pe 20 | 21 | 22 | def positionalencoding2d(d_model, height, width): 23 | """ 24 | :param d_model: dimension of the model 25 | :param height: height of the positions 26 | :param width: width of the positions 27 | :return: d_model*height*width position matrix 28 | """ 29 | if d_model % 4 != 0: 30 | raise ValueError("Cannot use sin/cos positional encoding with " "odd dimension (got dim={:d})".format(d_model)) 31 | pe = torch.zeros(d_model, height, width) 32 | # Each dimension use half of d_model 33 | d_model = int(d_model / 2) 34 | div_term = torch.exp(torch.arange(0.0, d_model, 2) * -(math.log(10000.0) / d_model)) 35 | pos_w = torch.arange(0.0, width).unsqueeze(1) 36 | pos_h = torch.arange(0.0, height).unsqueeze(1) 37 | pe[0:d_model:2, :, :] = torch.sin(pos_w * div_term).transpose(0, 1).unsqueeze(1).repeat(1, height, 1) 38 | pe[1:d_model:2, :, :] = torch.cos(pos_w * div_term).transpose(0, 1).unsqueeze(1).repeat(1, height, 1) 39 | pe[d_model::2, :, :] = torch.sin(pos_h * div_term).transpose(0, 1).unsqueeze(2).repeat(1, 1, width) 40 | pe[d_model + 1 :: 2, :, :] = torch.cos(pos_h * div_term).transpose(0, 1).unsqueeze(2).repeat(1, 1, width) 41 | 42 | return pe 43 | -------------------------------------------------------------------------------- /src/model/model_wrapper.py: -------------------------------------------------------------------------------- 1 | from model.decoder.decoder_wrapper import DecoderModelWrapper 2 | from model.encoder.encoder_wrapper import EncoderModelWrapper 3 | from model.classifiers.classifier_model_wrapper import ClassifierModelWrapper 4 | 5 | 6 | class ModelWrapper: 7 | def __init__(self): 8 | pass 9 | 10 | @staticmethod 11 | def get_model(model_type, model_name, config, constants, bootstrap_model=None, **kwargs): 12 | if model_type == "classifier": 13 | return ClassifierModelWrapper.get_classifier( 14 | model_name=model_name, 15 | num_class=kwargs["num_class"], 16 | config=config, 17 | constants=constants, 18 | bootstrap_model=None, 19 | ) 20 | 21 | elif model_type == "encoder": 22 | return EncoderModelWrapper.get_encoder(model_name=model_name, bootstrap_model=bootstrap_model, **kwargs) 23 | 24 | elif model_type == "decoder": 25 | return DecoderModelWrapper.get_decoder(model_name=model_name, bootstrap_model=bootstrap_model, **kwargs) 26 | 27 | else: 28 | raise NotImplementedError() 29 | -------------------------------------------------------------------------------- /src/model/policy/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/model/policy/__init__.py -------------------------------------------------------------------------------- /src/model/policy/abstract_nonstationary.py: -------------------------------------------------------------------------------- 1 | class AbstractNonStationaryPolicy: 2 | def __init__(self): 3 | pass 4 | 5 | def action_type(self): 6 | """ 7 | :return: Type of action returned by the policy 8 | """ 9 | raise NotImplementedError() 10 | 11 | def sample_action(self, observation, timestep): 12 | """ 13 | :param observation: Observation of the world 14 | :param timestep: time step at which observation is observed 15 | :return: a numpy vector denoting the probability distribution over actions 16 | """ 17 | raise NotImplementedError() 18 | 19 | def get_argmax_action(self, observation, timestep): 20 | """ 21 | :param observation: Observation of the world 22 | :param timestep: time step at which observation is observed 23 | :return: action representation (can be integer, real number, real-valued vector, or object of some class) 24 | """ 25 | raise NotImplementedError() 26 | -------------------------------------------------------------------------------- /src/model/policy/abstract_stationary.py: -------------------------------------------------------------------------------- 1 | class AbstractStationaryPolicy: 2 | def __init__(self): 3 | pass 4 | 5 | def action_type(self): 6 | """ 7 | :return: Type of action returned by the policy 8 | """ 9 | raise NotImplementedError() 10 | 11 | def sample_action(self, observation): 12 | """ 13 | :param observation: Observation of the world 14 | :return: a numpy vector denoting the probability distribution over actions 15 | """ 16 | raise NotImplementedError() 17 | 18 | def get_argmax_action(self, observation): 19 | """ 20 | :param observation: Observation of the world 21 | :return: action representation (can be integer, real number, real-valued vector, or object of some class) 22 | """ 23 | raise NotImplementedError() 24 | -------------------------------------------------------------------------------- /src/model/policy/nonstationary_composed_policy.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | import torch.nn as nn 4 | 5 | from environments.intrepid_env_meta.action_type import ActionType 6 | 7 | 8 | class NonStationaryComposedPolicy(nn.Module, ActionType): 9 | def __init__(self, encoder_fn, q_values, config): 10 | super(NonStationaryComposedPolicy, self).__init__() 11 | super(ActionType, self).__init__() 12 | 13 | self.encoder_fn = encoder_fn 14 | self.q_values = q_values 15 | self.action_space = config["actions"] 16 | 17 | def action_type(self): 18 | raise ActionType.Discrete 19 | 20 | def sample_action(self, observations, time_step): 21 | return self.get_argmax_action(observations, time_step) 22 | 23 | def get_argmax_action(self, observations, time_step): 24 | if self.encoder_fn is None: 25 | return random.choice(self.action_space) 26 | 27 | if isinstance(self.encoder_fn, list) or isinstance(self.encoder_fn, dict): 28 | latent_state = self.encoder_fn[time_step].encode_observations(observations) 29 | else: 30 | latent_state = self.encoder_fn.encode_observations(observations) 31 | 32 | if (time_step, latent_state) in self.q_values: 33 | q_values = self.q_values[(time_step, latent_state)] 34 | 35 | return np.random.choice(np.flatnonzero(q_values == q_values.max())) 36 | else: 37 | return random.choice(self.action_space) 38 | 39 | def save(self, folder_name, model_name=None): 40 | raise NotImplementedError() 41 | 42 | def load(self, folder_name, model_name=None): 43 | raise NotImplementedError() 44 | -------------------------------------------------------------------------------- /src/model/policy/open_loop.py: -------------------------------------------------------------------------------- 1 | from model.policy.abstract_nonstationary import AbstractNonStationaryPolicy 2 | 3 | 4 | class OpenLoopPolicy(AbstractNonStationaryPolicy): 5 | def __init__(self, actions=None, path_id=None): 6 | AbstractNonStationaryPolicy.__init__(self) 7 | 8 | # List of actions 9 | if actions is None: 10 | self._actions = [] 11 | else: 12 | self._actions = list(actions) 13 | 14 | # ID of this path 15 | self.path_id = path_id 16 | 17 | # Last action that formed this path 18 | self.action = None if len(self._actions) == 0 else self._actions[-1] 19 | 20 | # ID of the parent 21 | self.parent_path_id = None 22 | 23 | def extend(self, action, path_id=None): 24 | policy = self.clone() 25 | policy._actions.append(action) 26 | 27 | policy.parent_path_id = policy.path_id 28 | policy.path_id = path_id 29 | policy.action = action 30 | 31 | return policy 32 | 33 | def num_timesteps(self): 34 | return len(self._actions) 35 | 36 | def action_type(self): 37 | """ 38 | :return: Type of action returned by the policy 39 | """ 40 | raise NotImplementedError() 41 | 42 | def sample_action(self, observation, timestep): 43 | return self._actions[timestep] 44 | 45 | def get_argmax_action(self, observation, timestep): 46 | return self._actions[timestep] 47 | 48 | def clone(self): 49 | policy = OpenLoopPolicy() 50 | policy._actions = list(self._actions) 51 | 52 | policy.path_id = self.path_id 53 | policy.action = self.action 54 | policy.parent_path_id = self.parent_path_id 55 | 56 | return policy 57 | 58 | def __str__(self): 59 | if self.parent_path_id is None or self.action is None or self.path_id is None: 60 | return "NA" 61 | else: 62 | return "[%d -> %r -> %d]" % (self.parent_path_id, self.action, self.path_id) 63 | -------------------------------------------------------------------------------- /src/model/policy/stationary_action_condition_policy.py: -------------------------------------------------------------------------------- 1 | from model.policy.abstract_stationary import AbstractStationaryPolicy 2 | 3 | 4 | class StationaryActionConditionPolicy(AbstractStationaryPolicy): 5 | """A policy that takes action by evaluating an input condition""" 6 | 7 | def __init__(self, action_condition): 8 | super(StationaryActionConditionPolicy, self).__init__() 9 | self.action_condition = action_condition 10 | 11 | def action_type(self): 12 | raise NotImplementedError() 13 | 14 | def gen_q_val(self, observations): 15 | raise NotImplementedError() 16 | 17 | def sample_action(self, observations): 18 | return self.action_condition(observations) 19 | 20 | def get_argmax_action(self, observations): 21 | return self.action_condition(observations) 22 | 23 | def save(self, folder_name, model_name=None): 24 | raise NotImplementedError() 25 | 26 | def load(self, folder_name, model_name=None): 27 | raise NotImplementedError() 28 | -------------------------------------------------------------------------------- /src/model/policy/stationary_constant_policy.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | from environments.intrepid_env_meta.action_type import ActionType 4 | 5 | 6 | class StationaryConstantPolicy(ActionType): 7 | """A policy that always takes the same action deterministically regardless of input""" 8 | 9 | def __init__(self, action): 10 | super(ActionType, self).__init__() 11 | self.action = action 12 | 13 | def action_type(self): 14 | raise NotImplementedError() 15 | 16 | def gen_q_val(self, observations): 17 | raise NotImplementedError() 18 | 19 | def sample_action(self, observations): 20 | return self.action 21 | 22 | def get_argmax_action(self, observations): 23 | return self.action 24 | 25 | def save(self, folder_name, model_name=None): 26 | with open(folder_name + model_name, "wb") as fobj: 27 | pickle.dump(self.action, fobj) 28 | 29 | def load(self, folder_name, model_name=None): 30 | with open(folder_name + model_name, "rb") as fobj: 31 | self.action = pickle.load(fobj) 32 | -------------------------------------------------------------------------------- /src/model/policy/stationary_decoder_dictionary_policy.py: -------------------------------------------------------------------------------- 1 | from environments.intrepid_env_meta.action_type import ActionType 2 | from model.policy.stationary_dictionary_policy import StationaryDictionaryPolicy 3 | 4 | 5 | class StationaryDecoderLatentPolicy(ActionType): 6 | def __init__(self, decoder, q_val_dictionary, actions): 7 | super(ActionType, self).__init__() 8 | 9 | self.decoder = decoder 10 | self.latent_policy = StationaryDictionaryPolicy(q_val_dictionary, actions) 11 | 12 | def action_type(self): 13 | raise NotImplementedError() 14 | 15 | def gen_q_val(self, observations): 16 | raise NotImplementedError() 17 | 18 | def sample_action(self, obs): 19 | obs = obs[0] 20 | latent_state = self.decoder.encode_observations(obs) 21 | return self.latent_policy.sample_action(latent_state) 22 | 23 | def get_argmax_action(self, obs): 24 | latent_state = self.decoder.encode_observations(obs) 25 | return self.latent_policy.get_argmax_action(latent_state) 26 | 27 | def save(self, folder_name, model_name=None): 28 | raise NotImplementedError() 29 | 30 | def load(self, folder_name, model_name=None): 31 | raise NotImplementedError() 32 | -------------------------------------------------------------------------------- /src/model/policy/stationary_dictionary_policy.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | from environments.intrepid_env_meta.action_type import ActionType 4 | 5 | 6 | class StationaryDictionaryPolicy(ActionType): 7 | def __init__(self, q_val_dictionary, actions): 8 | super(ActionType, self).__init__() 9 | 10 | self.q_val_dictionary = q_val_dictionary 11 | self.actions = actions 12 | 13 | def action_type(self): 14 | raise NotImplementedError() 15 | 16 | def gen_q_val(self, observations): 17 | raise NotImplementedError() 18 | 19 | def sample_action(self, state): 20 | action = self.get_argmax_action(state) 21 | assert isinstance(action, int), "Action should be of type int. Found %r of type %r" % (action, type(action)) 22 | return action 23 | 24 | def get_argmax_action(self, state): 25 | state = tuple(state) 26 | if state in self.q_val_dictionary: 27 | return int(self.q_val_dictionary[state].argmax()) 28 | else: 29 | return random.choice(self.actions) 30 | 31 | def save(self, folder_name, model_name=None): 32 | raise NotImplementedError() 33 | 34 | def load(self, folder_name, model_name=None): 35 | raise NotImplementedError() 36 | -------------------------------------------------------------------------------- /src/model/policy/stationary_stochastic_policy.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from environments.intrepid_env_meta.action_type import ActionType 6 | 7 | 8 | class StationaryStochasticPolicy(nn.Module, ActionType): 9 | def __init__(self, constants, config): 10 | super(StationaryStochasticPolicy, self).__init__() 11 | super(ActionType, self).__init__() 12 | 13 | self.layer1 = nn.Linear(config["obs_dim"], 56) 14 | self.layer2 = nn.Linear(56, 56) 15 | self.layer3 = nn.Linear(56, config["num_actions"]) 16 | 17 | def gen_prob(self, observations): 18 | x = F.relu(self.layer1(observations)) 19 | x = F.relu(self.layer2(x)) 20 | x = F.softmax(self.layer3(x)) 21 | 22 | return x 23 | 24 | def action_type(self): 25 | raise ActionType.Discrete 26 | 27 | def sample_action(self, observations): 28 | prob = self.gen_prob(observations) 29 | dist = torch.distributions.Categorical(prob) 30 | return torch.multinomial(dist.probs, 1, True) 31 | 32 | def get_argmax_action(self, observations): 33 | prob = self.gen_prob(observations) 34 | return prob.max(1)[1] 35 | 36 | def save(self, folder_name, model_name=None): 37 | if model_name is None: 38 | torch.save(self.state_dict(), folder_name + "stationary_policy") 39 | else: 40 | torch.save(self.state_dict(), folder_name + model_name) 41 | 42 | def load(self, folder_name, model_name=None): 43 | if model_name is None: 44 | self.load_state_dict(torch.load(folder_name + "stationary_policy")) 45 | else: 46 | self.load_state_dict(torch.load(folder_name + model_name)) 47 | -------------------------------------------------------------------------------- /src/model/transition_encoders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/model/transition_encoders/__init__.py -------------------------------------------------------------------------------- /src/model/transition_encoders/encoder_model_wrapper.py: -------------------------------------------------------------------------------- 1 | from model.transition_encoders.compositional_encoder_model import ( 2 | CompositionalEncoderModel, 3 | ) 4 | from model.transition_encoders.encoder_model import ( 5 | BackwardEncoderModel, 6 | ForwardEncoderModel, 7 | ) 8 | 9 | 10 | class EncoderModelWrapper: 11 | """Wrapper for encoder model""" 12 | 13 | @staticmethod 14 | def get_encoder_model(model_type, config, constants, bootstrap_model=None): 15 | if model_type == "backwardmodel": 16 | return BackwardEncoderModel(config, constants, bootstrap_model) 17 | elif model_type == "forwardmodel": 18 | return ForwardEncoderModel(config, constants, bootstrap_model) 19 | elif model_type == "compbackwardmodel": 20 | return CompositionalEncoderModel(config, constants, bootstrap_model) 21 | else: 22 | raise NotImplementedError("Did not implement %r" % model_type) 23 | -------------------------------------------------------------------------------- /src/setup_validator/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/setup_validator/__init__.py -------------------------------------------------------------------------------- /src/setup_validator/config_key_registry.py: -------------------------------------------------------------------------------- 1 | # number of actions; -1 to denote not set 2 | NUM_ACTIONS = "num_actions" 3 | 4 | # action space; -1 to denote not set 5 | ACTIONS = "actions" 6 | 7 | # Horizon of the problem; -1 to denote not set 8 | HORIZON = "horizon" 9 | 10 | # Dimension of the observation; -1 to denote not set 11 | OBS_DIM = "obs_dim" 12 | 13 | # Type of features the agent receives 14 | FEATURE_TYPE = "feature_type" 15 | 16 | # Discount factor; -1 to denote not set 17 | GAMMA = "gamma" 18 | -------------------------------------------------------------------------------- /src/setup_validator/core_validator.py: -------------------------------------------------------------------------------- 1 | REQUIRED_CONFIG_KEYS = [ 2 | "num_actions", 3 | "actions", 4 | "horizon", 5 | "obs_dim", 6 | "feature_type", 7 | "gamma", 8 | ] 9 | 10 | REQUIRED_CONSTANT_KEYS = [ 11 | "learning_rate", 12 | "num_homing_policy", 13 | "encoder_training_num_samples", 14 | "encoder_training_epoch", 15 | "encoder_training_lr", 16 | "encoder_training_batch_size", 17 | "validation_data_percent", 18 | "psdp_training_num_samples", 19 | "cb_oracle_epoch", 20 | "cb_oracle_lr", 21 | "cb_oracle_batch_size", 22 | "eval_homing_policy_sample_size", 23 | "reward_free_planner", 24 | "reward_sensitive_planner", 25 | ] 26 | 27 | 28 | def validate(config, constants): 29 | # TODO validate based on the algorithm that is being run 30 | 31 | for key in REQUIRED_CONFIG_KEYS: 32 | assert key in config, "Did not find the key %r in config in dictionary" % key 33 | 34 | for key in REQUIRED_CONSTANT_KEYS: 35 | assert key in constants, "Did not find the key %r in constants dictionary" % key 36 | 37 | return True 38 | -------------------------------------------------------------------------------- /src/unit_test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/unit_test/__init__.py -------------------------------------------------------------------------------- /src/unit_test/dynamic_arguments.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import argparse 3 | import pdb 4 | 5 | parser = argparse.ArgumentParser() 6 | parser.add_argument("-args_file", default="args.txt") 7 | 8 | data = {"f": 0, "g": "cat"} 9 | 10 | for k, v in data.items(): 11 | parser.add_argument("--%s" % k, default=None, type=type(v), help="data") 12 | 13 | dynamic_args = parser.parse_args() 14 | dynamic_args = vars(dynamic_args) 15 | 16 | pdb.set_trace() 17 | 18 | print(dynamic_args) 19 | -------------------------------------------------------------------------------- /src/unit_test/gridworld_test.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import random 3 | import numpy as np 4 | import torch.multiprocessing as mp 5 | 6 | from experiments.experiment_header import get_header 7 | from environments.intrepid_env_meta.make_env import MakeEnvironment 8 | 9 | 10 | def main(): 11 | exp_setup = get_header() 12 | 13 | if exp_setup.config["seed"] == -1: 14 | seeds = list(range(1234, 1234 + 10)) 15 | num_runs = len(seeds) 16 | else: 17 | seeds = [exp_setup.config["seed"]] 18 | num_runs = 1 19 | 20 | for exp_id in range(1, num_runs + 1): 21 | exp_setup.config["seed"] = seeds[exp_id - 1] 22 | exp_setup.config["env_seed"] = seeds[exp_id - 1] * 10 23 | exp_setup.logger.log("========= STARTING EXPERIMENT %d (Seed = %d) ======== " % (exp_id, exp_setup.config["seed"])) 24 | 25 | # Set the random seed 26 | random.seed(exp_setup.config["seed"]) 27 | np.random.seed(exp_setup.config["seed"]) 28 | torch.manual_seed(exp_setup.config["seed"]) 29 | if torch.cuda.is_available(): 30 | torch.cuda.manual_seed_all(exp_setup.config["seed"]) 31 | 32 | # Create a new environment 33 | make_env = MakeEnvironment() 34 | env = make_env.make(exp_setup) 35 | exp_setup.logger.log("Environment Created") 36 | 37 | import imageio 38 | import matplotlib.pyplot as plt 39 | 40 | plt.ion() 41 | 42 | images = [] 43 | 44 | for _ in range(0, 5): 45 | img, info = env.reset() 46 | plt.imshow(img) 47 | images.append(img) 48 | 49 | # print("Image shape is ", img.shape) 50 | for _ in range(0, exp_setup.config["horizon"]): 51 | action = random.choice(exp_setup.config["actions"]) 52 | img, _, _, _ = env.step(action) 53 | images.append(img) 54 | # print("Image shape is ", img.shape) 55 | plt.imshow(img) 56 | plt.pause(0.05) 57 | 58 | imageio.mimsave("./gridworld.gif", images) 59 | 60 | import pdb 61 | 62 | pdb.set_trace() 63 | 64 | 65 | if __name__ == "__main__": 66 | print("SETTING THE START METHOD ") 67 | mp.freeze_support() 68 | mp.set_start_method("spawn") 69 | main() 70 | -------------------------------------------------------------------------------- /src/unit_test/make_env.py: -------------------------------------------------------------------------------- 1 | from environments.intrepid_env_meta.make_env import MakeEnvironment 2 | 3 | 4 | make_env = MakeEnvironment() 5 | -------------------------------------------------------------------------------- /src/unit_test/matterport_exo.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import imageio 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | import pdb 6 | 7 | from skimage.transform import resize 8 | 9 | # Read image 10 | img = imageio.imread("./matterport_sample.png") 11 | obs_dim = img.shape 12 | print("Obs dim shape is ", obs_dim) 13 | 14 | fnames = glob.glob("data/matterport/icon_figs/*png") 15 | distractors = [] 16 | 17 | for fname in fnames: 18 | distractor_img = imageio.imread(fname) 19 | print("Read distractor from %s of size %r" % (fname, distractor_img.shape)) 20 | 21 | assert len(distractor_img.shape) == 3 and ( 22 | distractor_img.shape[2] == 3 or distractor_img.shape[2] == 4 23 | ), "Can only read RGB and RGBA images" 24 | if distractor_img.shape[2] == 4: 25 | distractor_img = distractor_img[:, :, :3] 26 | 27 | # Resize based on original image so that width of the obstacle is 10% of the width and 28 | # height is at most 40% of the height 29 | distractor_img = resize( 30 | distractor_img, 31 | ( 32 | min(distractor_img.shape[0], int(0.2 * obs_dim[0])), 33 | min(distractor_img.shape[1], int(0.2 * obs_dim[1])), 34 | 3, 35 | ), 36 | ) 37 | distractor_img = (distractor_img * 255).astype(np.uint8) 38 | distractors.append(distractor_img) 39 | 40 | print("Read %d many distractors " % len(distractors)) 41 | 42 | distractor_hor = 40 43 | distractor_ver = 30 44 | distractor_id = 0 45 | 46 | # Add distractor 47 | distractor_img = distractors[distractor_id] 48 | distractor_shape = distractor_img.shape 49 | 50 | img_slice = img[ 51 | distractor_ver : distractor_ver + distractor_shape[0], 52 | distractor_hor : distractor_hor + distractor_shape[1], 53 | :, 54 | ] 55 | 56 | print("Img shape is ", img.shape) 57 | print("Img slice's shape is ", img_slice.shape) 58 | print("Distractor slice's shape is ", distractor_shape) 59 | 60 | distractor_img = distractor_img.reshape((-1, 3)) 61 | img_slice = img_slice.reshape((-1, 3)) 62 | distractor_img_min = distractor_img.min(1) 63 | blue_pixel_ix = np.argwhere(distractor_img_min < 220) # flattened (x, y) position where pixels are blue in color 64 | values = np.squeeze(distractor_img[blue_pixel_ix]) 65 | np.put_along_axis(img_slice, blue_pixel_ix, values, axis=0) 66 | 67 | img_slice = img_slice.reshape(distractor_shape) # distractor and img_slice have the same shape 68 | 69 | img[ 70 | distractor_ver : distractor_ver + distractor_shape[0], 71 | distractor_hor : distractor_hor + distractor_shape[1], 72 | :, 73 | ] = img_slice 74 | 75 | imgplot = plt.imshow(img) 76 | plt.show() 77 | 78 | pdb.set_trace() 79 | -------------------------------------------------------------------------------- /src/unit_test/matterport_test.py: -------------------------------------------------------------------------------- 1 | import pdb 2 | import json 3 | import numpy as np 4 | 5 | from environments.matterport.matterport import Matterport 6 | 7 | with open("data/matterport/config.json") as f: 8 | config = json.load(f) 9 | 10 | env = Matterport(config) 11 | img, info = env.reset() 12 | 13 | 14 | def print_stuff(): 15 | print(env.sim.getState()[0].scanId) 16 | print(env.sim.getState()[0].location.viewpointId) 17 | print(env.sim.getState()[0].viewIndex) 18 | print(env.sim.getState()[0].heading) 19 | print(env.sim.getState()[0].elevation) 20 | print(env.sim.getState()[0].step) 21 | print(env.sim.getState()[0].navigableLocations) 22 | print(np.array(env.sim.getState()[0].rgb, copy=False).shape) 23 | print() 24 | 25 | 26 | print_stuff() 27 | print("Taking action") 28 | for i in range(0, config["horizon"]): 29 | img, reward, done, info = env.step(0) 30 | print_stuff() 31 | 32 | pdb.set_trace() 33 | -------------------------------------------------------------------------------- /src/unit_test/module_multiprocessing.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import multiprocessing as mp 3 | import torch.nn as nn 4 | import numpy as np 5 | 6 | 7 | from utils.cuda import cuda_var 8 | 9 | 10 | class Worker: 11 | def __init__(self): 12 | pass 13 | 14 | @staticmethod 15 | def forward(id, model, vector): 16 | output = model(vector) 17 | print("Client: %r Output Sum is %r" % (id, output.sum())) 18 | 19 | 20 | class Model(nn.Module): 21 | def __init__(self): 22 | super(Model, self).__init__() 23 | 24 | self.transform = nn.Sequential(nn.Linear(32, 64), nn.ReLU(), nn.Linear(64, 32), nn.ReLU()) 25 | 26 | if torch.cuda.is_available(): 27 | self.cuda() 28 | 29 | def forward(self, x): 30 | return self.transform(x) 31 | 32 | 33 | if __name__ == "__main__": 34 | mp.freeze_support() 35 | mp.set_start_method("spawn") 36 | 37 | a = np.random.rand(1, 32) 38 | a_v = cuda_var(torch.from_numpy(a)).float() 39 | 40 | model = Model() 41 | output = model(a_v) 42 | print("Master Output Sum is %r " % output.sum()) 43 | 44 | # creating new process 45 | p1 = mp.Process(target=Worker.forward, args=(0, model, a_v)) 46 | p1.start() 47 | 48 | p2 = mp.Process(target=Worker.forward, args=(1, model, a_v)) 49 | p2.start() 50 | 51 | # wait until process is finished 52 | p1.join() 53 | p2.join() 54 | -------------------------------------------------------------------------------- /src/unit_test/montezuma_human_interactive.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import time 3 | import pickle 4 | import matplotlib.pyplot as plt 5 | 6 | from textwrap import wrap 7 | from skimage.transform import resize 8 | 9 | plt.ion() 10 | env = gym.make("MontezumaRevengeDeterministic-v4") 11 | 12 | 13 | def process_obs_and_show(obs, seq, ret): 14 | obs = obs[34 : 34 + 160, :160] 15 | obs = resize(obs, (500, 500, 3)) 16 | seq_str = ", ".join([str(action) for action in seq]) 17 | plt.clf() 18 | 19 | plt.title("\n".join(wrap("Trajectory [%s], return: %f" % (seq_str, ret), 90)), fontsize=8) 20 | plt.imshow(obs) 21 | plt.show() 22 | 23 | 24 | def take_action(action): 25 | obs = None 26 | reward = 0 27 | for _ in range(4): 28 | obs, reward_, _, _ = env.step(action) 29 | reward += reward_ 30 | return obs, reward 31 | 32 | 33 | def play(seq): 34 | obs = env.reset() 35 | ret = 0 36 | for ix, action in enumerate(seq): 37 | obs, reward = take_action(action) 38 | ret += reward 39 | process_obs_and_show(obs, seq, ret) 40 | return obs, ret 41 | 42 | 43 | seq = [] 44 | ret = 0 45 | obs = env.reset() 46 | process_obs_and_show(obs, seq, ret) 47 | 48 | while True: 49 | cmd_str = input("Enter a number between 0 and 18, and press b to go back, and press q to quit\n\n") 50 | 51 | cmd_seq = [tk.strip() for tk in cmd_str.split(",")] 52 | cmd_seq = [tk for tk in cmd_seq if len(tk) > 0] 53 | 54 | for cmd in cmd_seq: 55 | if cmd == "b": 56 | if len(seq) > 0: 57 | # go back 58 | seq.pop() 59 | obs, ret = play(seq) 60 | else: 61 | print("No observation to backtrack\n\n") 62 | 63 | elif cmd == "q": 64 | with open( 65 | "key-montezuma-achieved-return-%d-%d.pkl" % (ret, int(time.time())), 66 | "wb", 67 | ) as f: 68 | pickle.dump({"seq": seq, "total_return": ret}, f) 69 | print("Quitting.") 70 | exit(0) 71 | 72 | elif cmd.startswith("load"): 73 | with open(cmd.split()[1], "rb") as f: 74 | data = pickle.load(f) 75 | obs, ret = play(data["seq"]) 76 | 77 | else: 78 | try: 79 | action = int(cmd) 80 | obs, reward = take_action(action) 81 | seq.append(action) 82 | ret += reward 83 | process_obs_and_show(obs, seq, ret) 84 | except Exception: 85 | print("Enter b, q or a number") 86 | continue 87 | -------------------------------------------------------------------------------- /src/unit_test/multiprocessing_different_gpu.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import multiprocessing as mp 3 | import torch.nn as nn 4 | import numpy as np 5 | 6 | from utils.cuda import cuda_var 7 | from copy import deepcopy 8 | 9 | 10 | class Worker: 11 | def __init__(self): 12 | pass 13 | 14 | @staticmethod 15 | def forward(id, model, gpu_id): 16 | # Set this process to use a different GPU 17 | torch.cuda.set_device(gpu_id) 18 | assert gpu_id == torch.cuda.current_device() 19 | 20 | a = np.eye(32) 21 | vector = cuda_var(torch.from_numpy(a)).float() 22 | 23 | output = None 24 | for i in range(0, 25000): # A time consuming process 25 | output = model(vector) 26 | print( 27 | "Client %r: Given GPU-ID to use %d, using GPU-ID %r out of %r, Output Sum is %r" 28 | % ( 29 | id, 30 | gpu_id, 31 | torch.cuda.current_device(), 32 | torch.cuda.device_count(), 33 | output.sum(), 34 | ) 35 | ) 36 | 37 | 38 | class Model(nn.Module): 39 | def __init__(self): 40 | super(Model, self).__init__() 41 | 42 | self.transform = nn.Sequential(nn.Linear(32, 64), nn.ReLU(), nn.Linear(64, 32), nn.ReLU()) 43 | 44 | if torch.cuda.is_available(): 45 | self.cuda() 46 | 47 | def forward(self, x): 48 | return self.transform(x) 49 | 50 | 51 | if __name__ == "__main__": 52 | mp.freeze_support() 53 | mp.set_start_method("spawn") 54 | 55 | a = np.eye(32) 56 | a_v = cuda_var(torch.from_numpy(a)).float() 57 | 58 | model = Model() 59 | output = model(a_v) 60 | print("Master Output Sum is %r " % output.sum()) 61 | 62 | # creating new process 63 | new_model = deepcopy(model) 64 | new_model.cuda(0) 65 | p1 = mp.Process(target=Worker.forward, args=(0, new_model, 0)) 66 | p1.start() 67 | 68 | new_model = deepcopy(model) 69 | new_model.cuda(1) 70 | p2 = mp.Process(target=Worker.forward, args=(1, new_model, 1)) 71 | p2.start() 72 | 73 | new_model = deepcopy(model) 74 | new_model.cuda(2) 75 | p3 = mp.Process(target=Worker.forward, args=(2, new_model, 2)) 76 | p3.start() 77 | 78 | # wait until process is finished 79 | p1.join() 80 | p2.join() 81 | p3.join() 82 | -------------------------------------------------------------------------------- /src/unit_test/shared_replay_memory.py: -------------------------------------------------------------------------------- 1 | import random 2 | import multiprocessing as mp 3 | 4 | 5 | def square_list(n, results): 6 | """ 7 | function to square a given list 8 | """ 9 | while True: 10 | i = random.randint(0, 4) 11 | results[n][i] = random.random() 12 | 13 | # print result Array 14 | if i == 0: 15 | print("Result(in process p1) 0: " + str(sum(results[0]))) 16 | print("Result(in process p1) 1: " + str(sum(results[1]))) 17 | 18 | 19 | if __name__ == "__main__": 20 | # creating Array of int data type with space for 4 integers 21 | results = [] 22 | for i in range(0, 2): 23 | results.append(mp.Array("f", range(5))) 24 | 25 | # creating new process 26 | p1 = mp.Process(target=square_list, args=(0, results)) 27 | p1.start() 28 | 29 | p2 = mp.Process(target=square_list, args=(1, results)) 30 | p2.start() 31 | 32 | # wait until process is finished 33 | p1.join() 34 | p2.join() 35 | -------------------------------------------------------------------------------- /src/unit_test/test_matterport.py: -------------------------------------------------------------------------------- 1 | import MatterSim 2 | import numpy as np 3 | import json 4 | import os 5 | from environments.intrepid_env_meta.environment_wrapper import GenerateEnvironmentWrapper 6 | 7 | 8 | def first_test(): 9 | env = MatterSim.Simulator() 10 | env.setCameraResolution(640, 480) 11 | env.setPreloadingEnabled(False) 12 | env.setDepthEnabled(False) 13 | env.setBatchSize(1) 14 | env.setCacheSize(2) 15 | 16 | env.setDatasetPath("/mnt/data/matterport/v1/scans") 17 | env.setNavGraphPath("/mnt/data/matterport/v1/connectivity/") 18 | 19 | env.initialize() 20 | house_id = "17DRP5sb8fy" 21 | room_id = "0f37bd0737e349de9d536263a4bdd60d" 22 | 23 | env.newEpisode([house_id], [room_id], [0], [0]) 24 | 25 | def print_stuff(): 26 | print(env.getState()[0].scanId) 27 | print(env.getState()[0].location.viewpointId) 28 | print(env.getState()[0].viewIndex) 29 | print(env.getState()[0].heading) 30 | print(env.getState()[0].elevation) 31 | print(env.getState()[0].step) 32 | print(env.getState()[0].navigableLocations) 33 | print(np.array(env.getState()[0].rgb, copy=False).shape) 34 | print() 35 | 36 | print_stuff() 37 | env.makeAction([1], [0], [0]) 38 | print_stuff() 39 | 40 | env.newEpisode([house_id], [room_id], [0], [0]) 41 | 42 | 43 | def test_env(): 44 | with open("../data/matterport/config.json") as f: 45 | config = json.load(f) 46 | 47 | config["save_trace"] = "True" 48 | config["trace_sample_rate"] = 500 49 | config["save_path"] = os.getenv("PT_OUTPUT_DIR") 50 | config["exp_name"] = "test" 51 | config["env_seed"] = 0 52 | config["policy_type"] = "linear" 53 | 54 | env = GenerateEnvironmentWrapper("matterport", config) 55 | env.reset() 56 | for _ in range(30): 57 | print("Stepping in env with action {}".format(1)) 58 | obs, rew, done, info = env.step(1) 59 | print("Got:", rew, done, info["location"]) 60 | print() 61 | env.reset() 62 | 63 | 64 | if __name__ == "__main__": 65 | test_env() 66 | -------------------------------------------------------------------------------- /src/unit_test/test_slot_factored_mdp.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | from environments.rl_acid_env.slot_factored_mdp import SlotFactoredMDP 4 | 5 | config = {"state_dim": 5, "grid_x": 3, "grid_y": 5, "horizon": 10} 6 | 7 | mdp = SlotFactoredMDP(config) 8 | obs, info = mdp.reset() 9 | print("State \n", info["state"]) 10 | 11 | for _ in range(0, config["horizon"]): 12 | # pdb.set_trace() 13 | action = random.randint(0, config["grid_x"] * config["grid_y"] - 1) 14 | obs, reward, done, info = mdp.step(action) 15 | 16 | print("Action ", action) 17 | print("State \n", info["state"]) 18 | -------------------------------------------------------------------------------- /src/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Intrepid/7b43fb66dd24f520dec3a501d20720d903d6d3f5/src/utils/__init__.py -------------------------------------------------------------------------------- /src/utils/average.py: -------------------------------------------------------------------------------- 1 | class AverageUtil: 2 | def __init__(self, init_val=None): 3 | if init_val is None: 4 | self._sum_val = 0.0 5 | self._num_items = 0 6 | else: 7 | self._sum_val = init_val 8 | self._num_items = 1 9 | 10 | def get_num_items(self): 11 | return self._num_items 12 | 13 | def acc(self, val): 14 | self._sum_val += val 15 | self._num_items += 1 16 | 17 | def get_mean(self): 18 | return self._sum_val / float(max(1, self._num_items)) 19 | 20 | def __str__(self): 21 | return "%f (count: %d)" % (self.get_mean(), self._num_items) 22 | -------------------------------------------------------------------------------- /src/utils/beautify_time.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | 4 | def beautify(time_taken_sec): 5 | """Given time taken in seconds it returns a beautified string""" 6 | 7 | time_taken_sec = int(time_taken_sec) 8 | 9 | if time_taken_sec < 60: 10 | return "%r second" % time_taken_sec 11 | elif 60 <= time_taken_sec < 60 * 60: 12 | return "%d minutes" % int(time_taken_sec / 60.0) 13 | elif 60 * 60 <= time_taken_sec < 24 * 60 * 60: 14 | return "%d hours" % int(time_taken_sec / (60.0 * 60.0)) 15 | elif 24 * 60 * 60 <= time_taken_sec < 30 * 24 * 60 * 60: 16 | return "%d days" % int(time_taken_sec / (24.0 * 60.0 * 60.0)) 17 | elif 30 * 24 * 60 * 60 <= time_taken_sec < 365 * 24 * 60 * 60: 18 | return "%d months" % int(time_taken_sec / (30 * 24 * 60 * 60)) 19 | elif 365 * 24 * 60 * 60 <= time_taken_sec: 20 | months = int((time_taken_sec % (365.0 * 24 * 60.0 * 60.0)) / (30.0 * 24.0 * 60.0 * 60.0)) 21 | return "%d years %d months" % ( 22 | int(time_taken_sec / (365.0 * 24.0 * 60.0 * 60.0)), 23 | months, 24 | ) 25 | 26 | 27 | def elapsed_from_str(time_from): 28 | """Given a time from, create timestep using the current time step""" 29 | 30 | return beautify(time.time() - time_from) 31 | -------------------------------------------------------------------------------- /src/utils/conv_util.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | 4 | def get_conv_out_size(h, w, kernel_size, stride, dilation=1, padding=0): 5 | """Note that in PyTorch the image is channel x height x width""" 6 | 7 | if isinstance(kernel_size, tuple): 8 | kernel_size_h, kernel_size_w = kernel_size 9 | elif isinstance(kernel_size, int): 10 | kernel_size_h = kernel_size 11 | kernel_size_w = kernel_size 12 | else: 13 | raise AssertionError("Kernel size must either be tuple with 2 values or int") 14 | 15 | if isinstance(padding, tuple): 16 | padding_h, padding_w = padding 17 | elif isinstance(padding, int): 18 | padding_h = padding 19 | padding_w = padding 20 | else: 21 | raise AssertionError("Padding must either be tuple with 2 values or int") 22 | 23 | if isinstance(dilation, tuple): 24 | dilation_h, dilation_w = dilation 25 | elif isinstance(dilation, int): 26 | dilation_h = dilation 27 | dilation_w = dilation 28 | else: 29 | raise AssertionError("Dilation must either be tuple with 2 values or int") 30 | 31 | if isinstance(stride, tuple): 32 | stride_h, stride_w = stride 33 | elif isinstance(stride, int): 34 | stride_h = stride 35 | stride_w = stride 36 | else: 37 | raise AssertionError("Stride must either be tuple with 2 values or int") 38 | 39 | h_out = int(math.floor((h + 2 * padding_h - dilation_h * (kernel_size_h - 1) - 1) / float(stride_h) + 1)) 40 | 41 | w_out = int(math.floor((w + 2 * padding_w - dilation_w * (kernel_size_w - 1) - 1) / float(stride_w) + 1)) 42 | 43 | return h_out, w_out 44 | -------------------------------------------------------------------------------- /src/utils/cuda.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Variable 3 | 4 | 5 | def cuda_tensor(t): 6 | if torch.cuda.is_available(): 7 | return t.cuda() 8 | else: 9 | return t 10 | 11 | 12 | def cuda_var(t, volatile=False, requires_grad=False): 13 | if volatile: 14 | return Variable(cuda_tensor(t), volatile=True, requires_grad=requires_grad) 15 | else: 16 | return Variable(cuda_tensor(t), requires_grad=False) 17 | -------------------------------------------------------------------------------- /src/utils/generic_policy.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | 4 | """ Basic functionality for sampling from discrete probability distributions """ 5 | 6 | 7 | def sample_action_from_prob(prob): 8 | """Pick an action sampled from the probability distribution""" 9 | 10 | num_actions = len(prob) 11 | if num_actions == 0: 12 | raise AssertionError("There must be atleast one action.") 13 | 14 | v = random.random() 15 | 16 | for i in range(0, num_actions): 17 | v = v - prob[i] 18 | if v <= 0: 19 | return i 20 | 21 | return num_actions - 1 22 | 23 | 24 | def sample_uniform_from_prob(num_actions): 25 | return random.randint(0, num_actions - 1) 26 | 27 | 28 | def get_argmax_action(model_out_val): 29 | """Returns argmax_a model_out_val(a) with random tie breaking.""" 30 | 31 | num_actions = len(model_out_val) 32 | 33 | if num_actions == 0: 34 | raise AssertionError("There must be atleast one action.") 35 | 36 | ix_max = [0] 37 | for i in range(1, num_actions): 38 | if model_out_val[i] > model_out_val[ix_max[0]]: 39 | ix_max[:] = [i] 40 | elif model_out_val[i] == model_out_val[ix_max[0]]: 41 | ix_max.append(i) 42 | 43 | return ix_max[random.randint(0, len(ix_max) - 1)] 44 | -------------------------------------------------------------------------------- /src/utils/gumbel.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | 4 | from utils.cuda import cuda_var 5 | 6 | 7 | def _sample_gumbel(input_size): 8 | noise = torch.rand(input_size) 9 | eps = 1e-20 10 | noise.add_(eps).log_().neg_() 11 | noise.add_(eps).log_().neg_() 12 | return cuda_var(noise) 13 | 14 | 15 | def gumbel_sample(input, temperature): 16 | noise = _sample_gumbel(input.size()) 17 | x = (input + noise) / temperature 18 | prob = F.softmax(x, dim=1) 19 | log_prob = F.log_softmax(x, dim=1) 20 | return prob, log_prob 21 | -------------------------------------------------------------------------------- /src/utils/leaky_softmax.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def leaky_softmax(matrix): 5 | """Given a matrix of size batch x num_factors we output another matrix using leaky softmax based on 6 | Dynamic Routing Between Capsules, Sabour et al., 2017 7 | """ 8 | 9 | vector_norms = torch.norm(matrix, dim=1).view(-1) # Batch 10 | sq_vector_norms = vector_norms * vector_norms # Batch 11 | ratio = sq_vector_norms / (1.0 + sq_vector_norms) # Batch 12 | 13 | unit_vector = matrix / vector_norms.view(-1, 1) # Batch x num_factors 14 | output = unit_vector * ratio.view(-1, 1) 15 | 16 | return output 17 | -------------------------------------------------------------------------------- /src/utils/multiprocess_logger.py: -------------------------------------------------------------------------------- 1 | import os 2 | import atexit 3 | import logging 4 | 5 | from multiprocessing import Process, Queue 6 | 7 | 8 | def logtxt(fname, s): 9 | if not os.path.isdir(os.path.dirname(fname)): 10 | os.system("mkdir -p {os.path.dirname(fname)}") # had a f prefix TODO 11 | f = open(fname, "a") 12 | f.write("{str(datetime.now())}: {s}\n") # had a f prefix TODO 13 | f.close() 14 | 15 | 16 | class MultiprocessingLoggerManager(object): 17 | def __init__(self, file_path, logging_level): 18 | self.log_queue = Queue() 19 | self.p = Process(target=logger_daemon, args=(self.log_queue, file_path, logging_level)) 20 | self.p.start() 21 | atexit.register(self.cleanup) 22 | 23 | def get_logger(self, client_id): 24 | return MultiprocessingLogger(client_id, self.log_queue) 25 | 26 | def cleanup(self): 27 | self.p.terminate() 28 | 29 | 30 | class MultiprocessingLogger(object): 31 | def __init__(self, client_id, log_queue): 32 | self.client_id = client_id 33 | self.log_queue = log_queue 34 | 35 | def log(self, message): 36 | print("Client %r: %r" % (self.client_id, message)) 37 | self.log_queue.put("Client %r: %r" % (self.client_id, message)) 38 | 39 | def debug(self, message): 40 | print("Client %r: %r" % (self.client_id, message)) 41 | self.log_queue.put("Client %r: %r" % (self.client_id, message)) 42 | 43 | 44 | def logger_daemon(log_queue, file_path, logging_level): 45 | logging.basicConfig(filename=file_path, level=logging_level) 46 | while True: 47 | logging.info(log_queue.get()) 48 | -------------------------------------------------------------------------------- /src/utils/normalizer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class BaseNormalizer: 5 | def __init__(self, read_only=False): 6 | self.read_only = read_only 7 | 8 | def set_read_only(self): 9 | self.read_only = True 10 | 11 | def unset_read_only(self): 12 | self.read_only = False 13 | 14 | def state_dict(self): 15 | return None 16 | 17 | def load_state_dict(self, _): 18 | return 19 | 20 | 21 | class MeanStdNormalizer(BaseNormalizer): 22 | def __init__(self, read_only=False, clip=5.0, epsilon=1e-8): 23 | BaseNormalizer.__init__(self, read_only) 24 | self.read_only = read_only 25 | self.rms = None 26 | self.clip = clip 27 | self.epsilon = epsilon 28 | 29 | def __call__(self, x): 30 | x = np.asarray(x) 31 | if self.rms is None: 32 | self.rms = RunningMeanStd(shape=(1,) + x.shape[1:]) 33 | if not self.read_only: 34 | self.rms.update(x) 35 | return np.clip( 36 | (x - self.rms.mean) / np.sqrt(self.rms.var + self.epsilon), 37 | -self.clip, 38 | self.clip, 39 | ) 40 | 41 | def state_dict(self): 42 | return {"mean": self.rms.mean, "var": self.rms.var} 43 | 44 | def load_state_dict(self, saved): 45 | self.rms.mean = saved["mean"] 46 | self.rms.var = saved["var"] 47 | 48 | 49 | class RunningMeanStd(object): 50 | # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm 51 | def __init__(self, epsilon=1e-4, shape=()): 52 | self.mean = np.zeros(shape, "float64") 53 | self.var = np.ones(shape, "float64") 54 | self.count = epsilon 55 | 56 | def update(self, x): 57 | batch_mean = np.mean(x, axis=0) 58 | batch_var = np.var(x, axis=0) 59 | batch_count = x.shape[0] 60 | self.update_from_moments(batch_mean, batch_var, batch_count) 61 | 62 | def update_from_moments(self, batch_mean, batch_var, batch_count): 63 | self.mean, self.var, self.count = update_mean_var_count_from_moments( 64 | self.mean, self.var, self.count, batch_mean, batch_var, batch_count 65 | ) 66 | 67 | 68 | def update_mean_var_count_from_moments(mean, var, count, batch_mean, batch_var, batch_count): 69 | delta = batch_mean - mean 70 | tot_count = count + batch_count 71 | 72 | new_mean = mean + delta * batch_count / tot_count 73 | m_a = var * count 74 | m_b = batch_var * batch_count 75 | M2 = m_a + m_b + np.square(delta) * count * batch_count / tot_count 76 | new_var = M2 / tot_count 77 | new_count = tot_count 78 | 79 | return new_mean, new_var, new_count 80 | -------------------------------------------------------------------------------- /src/utils/simclr_transform.py: -------------------------------------------------------------------------------- 1 | # import cv2 2 | # import torch 3 | # import numpy as np 4 | 5 | from torchvision import transforms 6 | 7 | 8 | # class GaussianBlur(object): 9 | # # Implements Gaussian blur as described in the SimCLR paper 10 | # def __init__(self, kernel_size, min=0.1, max=2.0): 11 | # self.min = min 12 | # self.max = max 13 | # # kernel size is set to be 10% of the image height/width 14 | # self.kernel_size = kernel_size 15 | # 16 | # def __call__(self, sample): 17 | # sample = np.array(sample) 18 | # 19 | # # blur the image with a 50% chance 20 | # prob = np.random.random_sample() 21 | # 22 | # if prob < 0.5: 23 | # sigma = (self.max - self.min) * np.random.random_sample() + self.min 24 | # sample = cv2.GaussianBlur(sample, (self.kernel_size, self.kernel_size), sigma) 25 | # 26 | # return torch.FloatTensor(sample) 27 | 28 | 29 | train_transform = transforms.Compose( 30 | [ 31 | transforms.RandomResizedCrop(32), 32 | transforms.RandomHorizontalFlip(p=0.5), 33 | transforms.RandomApply([transforms.ColorJitter(0.4, 0.4, 0.4, 0.1)], p=0.8), 34 | transforms.RandomGrayscale(p=0.2), 35 | transforms.RandomApply( 36 | [transforms.GaussianBlur(kernel_size=int(0.1 * 32), sigma=(0.1, 2.0))], 37 | p=0.5, 38 | ), 39 | # GaussianBlur(kernel_size=int(0.1 * 32)), 40 | # transforms.ToTensor(), # Removed it as it permuted the order, instead move it to the stop 41 | transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]), 42 | ] 43 | ) 44 | 45 | test_transform = transforms.Compose( 46 | [ 47 | transforms.ToTensor(), 48 | transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]), 49 | ] 50 | ) 51 | -------------------------------------------------------------------------------- /src/utils/telemetry.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | 4 | class Telemeter: 5 | """A class for measuring time taken by different code snippets""" 6 | 7 | def __init__(self, name): 8 | self.name = name 9 | 10 | self.time_data = dict() 11 | self.ctr = dict() 12 | 13 | self.active_key = None 14 | self.timer = None # Start of current code window 15 | 16 | def start(self, key): 17 | self.active_key = "%s_%s" % (self.name, key) 18 | 19 | # Start the timer 20 | self.timer = time.time() 21 | 22 | def stop(self): 23 | time_taken = time.time() - self.timer 24 | self.timer = None # Reset the timer 25 | 26 | if self.active_key in self.time_data: 27 | self.time_data[self.active_key] += time_taken 28 | self.ctr[self.active_key] += 1 29 | else: 30 | self.time_data[self.active_key] = time_taken 31 | self.ctr[self.active_key] = 1 32 | 33 | def merge(self, telemetry): 34 | assert self.timer is not None and telemetry is not None, "Cannot merge telemeters with running timer." 35 | assert self.name != telemetry.name, "Telemeters with same name cannot be merged." 36 | 37 | for key in telemetry.time_data: 38 | self.time_data[key] = telemetry.time_data[key] 39 | self.ctr[key] = telemetry.ctr[key] 40 | 41 | def save_to_log(self, logger): 42 | for key, time_taken in sorted(self.time_data.items()): 43 | count = self.ctr[key] 44 | avg = round(time_taken / float(max(1, count)), 4) 45 | logger.log("%r: Avg time taken %r sec with %d count" % (key, avg, count)) 46 | 47 | def print_report(self): 48 | for key, time_taken in sorted(self.time_data.items()): 49 | count = self.ctr[key] 50 | avg = round(time_taken / float(max(1, count)), 4) 51 | print("%r: Avg time taken %r sec with %d count" % (key, avg, count)) 52 | -------------------------------------------------------------------------------- /src/utils/tensorboard.py: -------------------------------------------------------------------------------- 1 | from tensorboardX import SummaryWriter 2 | 3 | 4 | class Tensorboard: 5 | def __init__(self, log_dir): 6 | save_dir = log_dir + "/tensorboard_logs/" 7 | self.writer = SummaryWriter(save_dir) 8 | self.index_dict = dict() 9 | 10 | def log_scalar(self, name, value, index=-1): 11 | if index == -1: 12 | if name in self.index_dict: 13 | self.index_dict[name] += 1 14 | index = self.index_dict[name] 15 | else: 16 | self.index_dict[name] = 1 17 | index = 1 18 | self.writer.add_scalar(name, value, index) 19 | 20 | def log_histogram(self, name, value, bins, index=-1): 21 | if index == -1: 22 | if name in self.index_dict: 23 | self.index_dict[name] += 1 24 | index = self.index_dict[name] 25 | else: 26 | self.index_dict[name] = 1 27 | index = 1 28 | self.writer.add_histogram(name, value, index, bins) 29 | --------------------------------------------------------------------------------