├── LICENSE.md ├── README.md ├── architecture.png └── code ├── .gitignore ├── conda_env.yml ├── data └── download_d4rl_datasets.py ├── decision_transformer ├── envs │ ├── assets │ │ └── reacher_2d.xml │ └── reacher_2d.py ├── evaluation │ └── evaluate_episodes.py ├── models │ ├── decision_transformer.py │ ├── mlp_bc.py │ ├── model.py │ └── trajectory_gpt2.py └── training │ ├── act_trainer.py │ ├── seq_trainer.py │ └── trainer.py ├── eval_model.py ├── experiment.py ├── kmeans_cache ├── chibi_1000.pt ├── chibi_lm_1000.pt ├── chibiv2_lm_1000.pt ├── gpt2_1000.pt └── gpt2_500.pt ├── lm_cotraining ├── decision_transformer │ ├── envs │ │ ├── assets │ │ │ └── reacher_2d.xml │ │ └── reacher_2d.py │ ├── evaluation │ │ └── evaluate_episodes.py │ ├── models │ │ ├── decision_transformer.py │ │ ├── mlp_bc.py │ │ ├── model.py │ │ └── trajectory_gpt2.py │ └── training │ │ ├── act_trainer.py │ │ ├── seq_trainer.py │ │ └── trainer.py ├── eval_model.py ├── experiment.py ├── get_nlp_datasets.py └── utils.py ├── run.sh ├── utils.py └── vision ├── envs ├── assets │ └── reacher_2d.xml └── reacher_2d.py ├── evaluation └── evaluate_episodes.py ├── models ├── decision_transformer.py ├── mlp_bc.py ├── model.py └── trajectory_gpt2.py └── training ├── act_trainer.py ├── seq_trainer.py └── trainer.py /LICENSE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/machelreid/can-wikipedia-help-offline-rl/HEAD/LICENSE.md -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/machelreid/can-wikipedia-help-offline-rl/HEAD/README.md -------------------------------------------------------------------------------- /architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/machelreid/can-wikipedia-help-offline-rl/HEAD/architecture.png -------------------------------------------------------------------------------- /code/.gitignore: -------------------------------------------------------------------------------- 1 | checkpoints 2 | wandb 3 | -------------------------------------------------------------------------------- /code/conda_env.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/machelreid/can-wikipedia-help-offline-rl/HEAD/code/conda_env.yml -------------------------------------------------------------------------------- /code/data/download_d4rl_datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/machelreid/can-wikipedia-help-offline-rl/HEAD/code/data/download_d4rl_datasets.py -------------------------------------------------------------------------------- /code/decision_transformer/envs/assets/reacher_2d.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/machelreid/can-wikipedia-help-offline-rl/HEAD/code/decision_transformer/envs/assets/reacher_2d.xml -------------------------------------------------------------------------------- /code/decision_transformer/envs/reacher_2d.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/machelreid/can-wikipedia-help-offline-rl/HEAD/code/decision_transformer/envs/reacher_2d.py -------------------------------------------------------------------------------- /code/decision_transformer/evaluation/evaluate_episodes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/machelreid/can-wikipedia-help-offline-rl/HEAD/code/decision_transformer/evaluation/evaluate_episodes.py -------------------------------------------------------------------------------- /code/decision_transformer/models/decision_transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/machelreid/can-wikipedia-help-offline-rl/HEAD/code/decision_transformer/models/decision_transformer.py -------------------------------------------------------------------------------- /code/decision_transformer/models/mlp_bc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/machelreid/can-wikipedia-help-offline-rl/HEAD/code/decision_transformer/models/mlp_bc.py -------------------------------------------------------------------------------- /code/decision_transformer/models/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/machelreid/can-wikipedia-help-offline-rl/HEAD/code/decision_transformer/models/model.py -------------------------------------------------------------------------------- /code/decision_transformer/models/trajectory_gpt2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/machelreid/can-wikipedia-help-offline-rl/HEAD/code/decision_transformer/models/trajectory_gpt2.py -------------------------------------------------------------------------------- /code/decision_transformer/training/act_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/machelreid/can-wikipedia-help-offline-rl/HEAD/code/decision_transformer/training/act_trainer.py -------------------------------------------------------------------------------- /code/decision_transformer/training/seq_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/machelreid/can-wikipedia-help-offline-rl/HEAD/code/decision_transformer/training/seq_trainer.py -------------------------------------------------------------------------------- /code/decision_transformer/training/trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/machelreid/can-wikipedia-help-offline-rl/HEAD/code/decision_transformer/training/trainer.py -------------------------------------------------------------------------------- /code/eval_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/machelreid/can-wikipedia-help-offline-rl/HEAD/code/eval_model.py -------------------------------------------------------------------------------- /code/experiment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/machelreid/can-wikipedia-help-offline-rl/HEAD/code/experiment.py -------------------------------------------------------------------------------- /code/kmeans_cache/chibi_1000.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/machelreid/can-wikipedia-help-offline-rl/HEAD/code/kmeans_cache/chibi_1000.pt -------------------------------------------------------------------------------- /code/kmeans_cache/chibi_lm_1000.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/machelreid/can-wikipedia-help-offline-rl/HEAD/code/kmeans_cache/chibi_lm_1000.pt -------------------------------------------------------------------------------- /code/kmeans_cache/chibiv2_lm_1000.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/machelreid/can-wikipedia-help-offline-rl/HEAD/code/kmeans_cache/chibiv2_lm_1000.pt -------------------------------------------------------------------------------- /code/kmeans_cache/gpt2_1000.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/machelreid/can-wikipedia-help-offline-rl/HEAD/code/kmeans_cache/gpt2_1000.pt -------------------------------------------------------------------------------- /code/kmeans_cache/gpt2_500.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/machelreid/can-wikipedia-help-offline-rl/HEAD/code/kmeans_cache/gpt2_500.pt -------------------------------------------------------------------------------- /code/lm_cotraining/decision_transformer/envs/assets/reacher_2d.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/machelreid/can-wikipedia-help-offline-rl/HEAD/code/lm_cotraining/decision_transformer/envs/assets/reacher_2d.xml -------------------------------------------------------------------------------- /code/lm_cotraining/decision_transformer/envs/reacher_2d.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/machelreid/can-wikipedia-help-offline-rl/HEAD/code/lm_cotraining/decision_transformer/envs/reacher_2d.py -------------------------------------------------------------------------------- /code/lm_cotraining/decision_transformer/evaluation/evaluate_episodes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/machelreid/can-wikipedia-help-offline-rl/HEAD/code/lm_cotraining/decision_transformer/evaluation/evaluate_episodes.py -------------------------------------------------------------------------------- /code/lm_cotraining/decision_transformer/models/decision_transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/machelreid/can-wikipedia-help-offline-rl/HEAD/code/lm_cotraining/decision_transformer/models/decision_transformer.py -------------------------------------------------------------------------------- /code/lm_cotraining/decision_transformer/models/mlp_bc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/machelreid/can-wikipedia-help-offline-rl/HEAD/code/lm_cotraining/decision_transformer/models/mlp_bc.py -------------------------------------------------------------------------------- /code/lm_cotraining/decision_transformer/models/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/machelreid/can-wikipedia-help-offline-rl/HEAD/code/lm_cotraining/decision_transformer/models/model.py -------------------------------------------------------------------------------- /code/lm_cotraining/decision_transformer/models/trajectory_gpt2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/machelreid/can-wikipedia-help-offline-rl/HEAD/code/lm_cotraining/decision_transformer/models/trajectory_gpt2.py -------------------------------------------------------------------------------- /code/lm_cotraining/decision_transformer/training/act_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/machelreid/can-wikipedia-help-offline-rl/HEAD/code/lm_cotraining/decision_transformer/training/act_trainer.py -------------------------------------------------------------------------------- /code/lm_cotraining/decision_transformer/training/seq_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/machelreid/can-wikipedia-help-offline-rl/HEAD/code/lm_cotraining/decision_transformer/training/seq_trainer.py -------------------------------------------------------------------------------- /code/lm_cotraining/decision_transformer/training/trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/machelreid/can-wikipedia-help-offline-rl/HEAD/code/lm_cotraining/decision_transformer/training/trainer.py -------------------------------------------------------------------------------- /code/lm_cotraining/eval_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/machelreid/can-wikipedia-help-offline-rl/HEAD/code/lm_cotraining/eval_model.py -------------------------------------------------------------------------------- /code/lm_cotraining/experiment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/machelreid/can-wikipedia-help-offline-rl/HEAD/code/lm_cotraining/experiment.py -------------------------------------------------------------------------------- /code/lm_cotraining/get_nlp_datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/machelreid/can-wikipedia-help-offline-rl/HEAD/code/lm_cotraining/get_nlp_datasets.py -------------------------------------------------------------------------------- /code/lm_cotraining/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/machelreid/can-wikipedia-help-offline-rl/HEAD/code/lm_cotraining/utils.py -------------------------------------------------------------------------------- /code/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/machelreid/can-wikipedia-help-offline-rl/HEAD/code/run.sh -------------------------------------------------------------------------------- /code/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/machelreid/can-wikipedia-help-offline-rl/HEAD/code/utils.py -------------------------------------------------------------------------------- /code/vision/envs/assets/reacher_2d.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/machelreid/can-wikipedia-help-offline-rl/HEAD/code/vision/envs/assets/reacher_2d.xml -------------------------------------------------------------------------------- /code/vision/envs/reacher_2d.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/machelreid/can-wikipedia-help-offline-rl/HEAD/code/vision/envs/reacher_2d.py -------------------------------------------------------------------------------- /code/vision/evaluation/evaluate_episodes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/machelreid/can-wikipedia-help-offline-rl/HEAD/code/vision/evaluation/evaluate_episodes.py -------------------------------------------------------------------------------- /code/vision/models/decision_transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/machelreid/can-wikipedia-help-offline-rl/HEAD/code/vision/models/decision_transformer.py -------------------------------------------------------------------------------- /code/vision/models/mlp_bc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/machelreid/can-wikipedia-help-offline-rl/HEAD/code/vision/models/mlp_bc.py -------------------------------------------------------------------------------- /code/vision/models/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/machelreid/can-wikipedia-help-offline-rl/HEAD/code/vision/models/model.py -------------------------------------------------------------------------------- /code/vision/models/trajectory_gpt2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/machelreid/can-wikipedia-help-offline-rl/HEAD/code/vision/models/trajectory_gpt2.py -------------------------------------------------------------------------------- /code/vision/training/act_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/machelreid/can-wikipedia-help-offline-rl/HEAD/code/vision/training/act_trainer.py -------------------------------------------------------------------------------- /code/vision/training/seq_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/machelreid/can-wikipedia-help-offline-rl/HEAD/code/vision/training/seq_trainer.py -------------------------------------------------------------------------------- /code/vision/training/trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/machelreid/can-wikipedia-help-offline-rl/HEAD/code/vision/training/trainer.py --------------------------------------------------------------------------------