├── .gitignore ├── README.md ├── algos ├── linear_bandit │ ├── dpo.py │ ├── mle.py │ └── pg.py └── neural_bandit │ ├── dpo.py │ ├── mle.py │ └── pg.py ├── environment.yml ├── envs ├── linear_bandit.py └── neural_bandit.py ├── experiments ├── run_linear_bandit.py └── run_neural_bandit.py ├── images └── neural_bandit.png ├── scripts ├── run_linear_bandit.sh └── run_neural_bandit.sh └── utils ├── collect_data.py ├── io_utils.py ├── logger.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/policy_optimization/HEAD/.gitignore -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/policy_optimization/HEAD/README.md -------------------------------------------------------------------------------- /algos/linear_bandit/dpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/policy_optimization/HEAD/algos/linear_bandit/dpo.py -------------------------------------------------------------------------------- /algos/linear_bandit/mle.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/policy_optimization/HEAD/algos/linear_bandit/mle.py -------------------------------------------------------------------------------- /algos/linear_bandit/pg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/policy_optimization/HEAD/algos/linear_bandit/pg.py -------------------------------------------------------------------------------- /algos/neural_bandit/dpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/policy_optimization/HEAD/algos/neural_bandit/dpo.py -------------------------------------------------------------------------------- /algos/neural_bandit/mle.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/policy_optimization/HEAD/algos/neural_bandit/mle.py -------------------------------------------------------------------------------- /algos/neural_bandit/pg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/policy_optimization/HEAD/algos/neural_bandit/pg.py -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/policy_optimization/HEAD/environment.yml -------------------------------------------------------------------------------- /envs/linear_bandit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/policy_optimization/HEAD/envs/linear_bandit.py -------------------------------------------------------------------------------- /envs/neural_bandit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/policy_optimization/HEAD/envs/neural_bandit.py -------------------------------------------------------------------------------- /experiments/run_linear_bandit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/policy_optimization/HEAD/experiments/run_linear_bandit.py -------------------------------------------------------------------------------- /experiments/run_neural_bandit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/policy_optimization/HEAD/experiments/run_neural_bandit.py -------------------------------------------------------------------------------- /images/neural_bandit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/policy_optimization/HEAD/images/neural_bandit.png -------------------------------------------------------------------------------- /scripts/run_linear_bandit.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/policy_optimization/HEAD/scripts/run_linear_bandit.sh -------------------------------------------------------------------------------- /scripts/run_neural_bandit.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/policy_optimization/HEAD/scripts/run_neural_bandit.sh -------------------------------------------------------------------------------- /utils/collect_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/policy_optimization/HEAD/utils/collect_data.py -------------------------------------------------------------------------------- /utils/io_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/policy_optimization/HEAD/utils/io_utils.py -------------------------------------------------------------------------------- /utils/logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/policy_optimization/HEAD/utils/logger.py -------------------------------------------------------------------------------- /utils/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/policy_optimization/HEAD/utils/utils.py --------------------------------------------------------------------------------