├── CODE_OF_CONDUCT.rst ├── LICENSE.md ├── Makefile ├── README.md ├── README.rst ├── agents ├── eval │ ├── analyse_experiments.py │ ├── analyse_experiments_cross_val.py │ ├── analyse_experiments_cross_val_sens.py │ ├── analyse_experiments_latency.py │ └── results │ │ ├── CrossVal │ │ ├── DDQN │ │ │ ├── DQN_CM1-postgres-card-job-masking-v0_0_2019-06-25_22-49.txt │ │ │ ├── DQN_CM1-postgres-card-job-masking-v0_0_2019-06-25_22-56.txt │ │ │ ├── DQN_CM1-postgres-card-job-masking-v1_0_2019-06-25_23-03.txt │ │ │ ├── DQN_CM1-postgres-card-job-masking-v1_0_2019-06-25_23_11.txt │ │ │ ├── DQN_CM1-postgres-card-job-masking-v2_0_2019-06-25_23_18.txt │ │ │ ├── DQN_CM1-postgres-card-job-masking-v2_0_2019-06-25_23_25.txt │ │ │ ├── DQN_CM1-postgres-card-job-masking-v3_0_2019-06-25_23_33.txt │ │ │ └── DQN_CM1-postgres-card-job-masking-v3_0_2019-06-25_23_40.txt │ │ ├── DQN │ │ │ ├── 0_DQN_CM1-postgres-card-job-masking-v0_0_2019-06-20_12-14.txt │ │ │ ├── 1_DQN_CM1-postgres-card-job-masking-v0_0_2019-06-20_13-01.txt │ │ │ ├── 2_DQN_CM1-postgres-card-job-masking-v0_0_2019-06-20_14-29.txt │ │ │ └── 3_DQN_CM1-postgres-card-job-masking-v0_0_2019-06-20_15-37.txt │ │ └── PPO │ │ │ ├── 0_PPO_CM1-postgres-card-job-masking-v0_0_2019-06-21_07-19.txt │ │ │ ├── 1_PPO_CM1-postgres-card-job-masking-v0_0_2019-06-20_17-42.txt │ │ │ ├── 2_PPO_CM1-postgres-card-job-masking-v0_0_2019-06-20_20-57.txt │ │ │ └── 3_PPO_CM1-postgres-card-job-masking-v0_0_2019-06-21_00-08.txt │ │ ├── Sens │ │ ├── DDQN │ │ │ ├── DQN_CM1-postgres-card-job-masking-v0_0_2019-06-26_22-43.txt │ │ │ ├── DQN_CM1-postgres-card-job-masking-v0_0_2019-06-26_22-50.txt │ │ │ ├── DQN_CM1-postgres-card-job-masking-v0_0_2019-06-26_22-57.txt │ │ │ ├── DQN_CM1-postgres-card-job-masking-v0_0_2019-06-26_23-04.txt │ │ │ ├── DQN_CM1-postgres-card-job-masking-v0_0_2019-06-26_23-12.txt │ │ │ ├── DQN_CM1-postgres-card-job-masking-v1_0_2019-06-26_23-19.txt │ │ │ ├── DQN_CM1-postgres-card-job-masking-v1_0_2019-06-26_23-26.txt │ │ │ ├── DQN_CM1-postgres-card-job-masking-v1_0_2019-06-26_23-33.txt │ │ │ ├── DQN_CM1-postgres-card-job-masking-v1_0_2019-06-26_23-41.txt │ │ │ ├── DQN_CM1-postgres-card-job-masking-v1_0_2019-06-26_23-48.txt │ │ │ ├── DQN_CM1-postgres-card-job-masking-v2_0_2019-06-26_23-55.txt │ │ │ ├── DQN_CM1-postgres-card-job-masking-v2_0_2019-06-27_00-03.txt │ │ │ ├── DQN_CM1-postgres-card-job-masking-v2_0_2019-06-27_00-10.txt │ │ │ ├── DQN_CM1-postgres-card-job-masking-v2_0_2019-06-27_00-17.txt │ │ │ ├── DQN_CM1-postgres-card-job-masking-v2_0_2019-06-27_00-24.txt │ │ │ ├── DQN_CM1-postgres-card-job-masking-v3_0_2019-06-27_00-32.txt │ │ │ ├── DQN_CM1-postgres-card-job-masking-v3_0_2019-06-27_00-39.txt │ │ │ ├── DQN_CM1-postgres-card-job-masking-v3_0_2019-06-27_00-46.txt │ │ │ ├── DQN_CM1-postgres-card-job-masking-v3_0_2019-06-27_00-53.txt │ │ │ └── DQN_CM1-postgres-card-job-masking-v3_0_2019-06-27_01-01.txt │ │ ├── DQN │ │ │ ├── DQN_CM1-postgres-card-job-masking-v0_0_2019-06-26_17-38.txt │ │ │ ├── DQN_CM1-postgres-card-job-masking-v0_0_2019-06-26_17-51.txt │ │ │ ├── DQN_CM1-postgres-card-job-masking-v0_0_2019-06-26_18-03.txt │ │ │ ├── DQN_CM1-postgres-card-job-masking-v0_0_2019-06-26_18-12.txt │ │ │ ├── DQN_CM1-postgres-card-job-masking-v0_0_2019-06-26_18-23.txt │ │ │ ├── DQN_CM1-postgres-card-job-masking-v1_0_2019-06-26_18-34.txt │ │ │ ├── DQN_CM1-postgres-card-job-masking-v1_0_2019-06-26_18-46.txt │ │ │ ├── DQN_CM1-postgres-card-job-masking-v1_0_2019-06-26_18-57.txt │ │ │ ├── DQN_CM1-postgres-card-job-masking-v1_0_2019-06-26_19-09.txt │ │ │ ├── DQN_CM1-postgres-card-job-masking-v1_0_2019-06-26_19-21.txt │ │ │ ├── DQN_CM1-postgres-card-job-masking-v2_0_2019-06-26_19-33.txt │ │ │ ├── DQN_CM1-postgres-card-job-masking-v2_0_2019-06-26_19-45.txt │ │ │ ├── DQN_CM1-postgres-card-job-masking-v2_0_2019-06-26_19-56.txt │ │ │ ├── DQN_CM1-postgres-card-job-masking-v2_0_2019-06-26_20-08.txt │ │ │ ├── DQN_CM1-postgres-card-job-masking-v2_0_2019-06-26_20-20.txt │ │ │ ├── DQN_CM1-postgres-card-job-masking-v3_0_2019-06-26_20-32.txt │ │ │ ├── DQN_CM1-postgres-card-job-masking-v3_0_2019-06-26_20-43.txt │ │ │ ├── DQN_CM1-postgres-card-job-masking-v3_0_2019-06-26_20-52.txt │ │ │ ├── DQN_CM1-postgres-card-job-masking-v3_0_2019-06-26_21-01.txt │ │ │ └── DQN_CM1-postgres-card-job-masking-v3_0_2019-06-26_21-10.txt │ │ └── PPO │ │ │ ├── PPO_CM1-postgres-card-job-masking-v0_0_2019-06-27_01-08.txt │ │ │ ├── PPO_CM1-postgres-card-job-masking-v0_0_2019-06-27_01-32.txt │ │ │ ├── PPO_CM1-postgres-card-job-masking-v0_0_2019-06-27_01-57.txt │ │ │ ├── PPO_CM1-postgres-card-job-masking-v0_0_2019-06-27_02-20.txt │ │ │ ├── PPO_CM1-postgres-card-job-masking-v0_0_2019-06-27_02-45.txt │ │ │ ├── PPO_CM1-postgres-card-job-masking-v1_0_2019-06-27_03-09.txt │ │ │ ├── PPO_CM1-postgres-card-job-masking-v1_0_2019-06-27_03-32.txt │ │ │ ├── PPO_CM1-postgres-card-job-masking-v1_0_2019-06-27_03-56.txt │ │ │ ├── PPO_CM1-postgres-card-job-masking-v1_0_2019-06-27_04-19.txt │ │ │ ├── PPO_CM1-postgres-card-job-masking-v1_0_2019-06-27_04-43.txt │ │ │ ├── PPO_CM1-postgres-card-job-masking-v2_0_2019-06-27_05-08.txt │ │ │ ├── PPO_CM1-postgres-card-job-masking-v2_0_2019-06-27_05-31.txt │ │ │ ├── PPO_CM1-postgres-card-job-masking-v2_0_2019-06-27_05-56.txt │ │ │ ├── PPO_CM1-postgres-card-job-masking-v2_0_2019-06-27_06-20.txt │ │ │ ├── PPO_CM1-postgres-card-job-masking-v2_0_2019-06-27_06-45.txt │ │ │ ├── PPO_CM1-postgres-card-job-masking-v3_0_2019-06-27_07-09.txt │ │ │ ├── PPO_CM1-postgres-card-job-masking-v3_0_2019-06-27_07-34.txt │ │ │ ├── PPO_CM1-postgres-card-job-masking-v3_0_2019-06-27_07-59.txt │ │ │ ├── PPO_CM1-postgres-card-job-masking-v3_0_2019-06-27_08-28.txt │ │ │ └── PPO_CM1-postgres-card-job-masking-v3_0_2019-06-27_08-58.txt │ │ ├── res_left_deep.txt │ │ └── res_postgres_stat.txt ├── queries │ ├── crossval │ │ ├── job_queries_simple_crossval_7400_0_test.txt │ │ ├── job_queries_simple_crossval_7400_0_train.txt │ │ ├── job_queries_simple_crossval_7400_0_train_sort.txt │ │ ├── job_queries_simple_crossval_7400_0_train_sort_a.txt │ │ ├── job_queries_simple_crossval_7400_1_test.txt │ │ ├── job_queries_simple_crossval_7400_1_train.txt │ │ ├── job_queries_simple_crossval_7400_1_train_sort.txt │ │ ├── job_queries_simple_crossval_7400_1_train_sort_a.txt │ │ ├── job_queries_simple_crossval_7400_2_test.txt │ │ ├── job_queries_simple_crossval_7400_2_train.txt │ │ ├── job_queries_simple_crossval_7400_2_train_sort.txt │ │ ├── job_queries_simple_crossval_7400_2_train_sort_a.txt │ │ ├── job_queries_simple_crossval_7400_3_test.txt │ │ ├── job_queries_simple_crossval_7400_3_train.txt │ │ ├── job_queries_simple_crossval_7400_3_train_sort.txt │ │ └── job_queries_simple_crossval_7400_3_train_sort_a.txt │ ├── crossval_sens │ │ ├── job_queries_simple_crossval_0_test.txt │ │ ├── job_queries_simple_crossval_0_train.txt │ │ ├── job_queries_simple_crossval_1_test.txt │ │ ├── job_queries_simple_crossval_1_train.txt │ │ ├── job_queries_simple_crossval_2_test.txt │ │ ├── job_queries_simple_crossval_2_train.txt │ │ ├── job_queries_simple_crossval_3_test.txt │ │ └── job_queries_simple_crossval_3_train.txt │ ├── helper_func │ │ ├── createTable_movie_info_idx.py │ │ ├── create_4_fold_after_sensitivity.py │ │ ├── create_4_fold_crossvalidaton_sets.py │ │ ├── indices_preprocessing.py │ │ ├── query_parser_joinonly.py │ │ └── sql_to_rl_schema │ ├── imdb_schema.json │ ├── imdb_schema.sql │ ├── indices.txt │ ├── job_queries.txt │ ├── job_queries_label.txt │ ├── job_queries_simple.txt │ ├── job_queries_simple_label.txt │ ├── res_greedy_left_deep.txt │ └── synt_queries.txt ├── rollout │ ├── custom_rollout_dqn.py │ ├── custom_rollout_ppo.py │ └── custom_rollout_sens.py ├── run │ ├── configs.py │ ├── execute.py │ ├── masking_envs_cross.py │ ├── models.py │ └── simple_corridor.py └── trad_models_job.py ├── bin ├── docker_entrypoint └── render.py ├── docs ├── agents.md ├── environments.md ├── misc.md ├── readme.md └── wrappers.md ├── examples ├── agents │ ├── _policies.py │ ├── cem.py │ ├── keyboard_agent.py │ └── random_agent.py └── scripts │ ├── list_envs │ └── sim_env ├── gym.egg-info ├── PKG-INFO ├── SOURCES.txt ├── dependency_links.txt ├── not-zip-safe ├── requires.txt └── top_level.txt ├── gym ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-36.pyc │ ├── core.cpython-36.pyc │ ├── error.cpython-36.pyc │ ├── logger.cpython-36.pyc │ └── version.cpython-36.pyc ├── core.py ├── envs │ ├── README.md │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ └── registration.cpython-36.pyc │ ├── algorithmic │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-36.pyc │ │ │ ├── algorithmic_env.cpython-36.pyc │ │ │ ├── copy_.cpython-36.pyc │ │ │ ├── duplicated_input.cpython-36.pyc │ │ │ ├── repeat_copy.cpython-36.pyc │ │ │ ├── reverse.cpython-36.pyc │ │ │ └── reversed_addition.cpython-36.pyc │ │ ├── algorithmic_env.py │ │ ├── copy_.py │ │ ├── duplicated_input.py │ │ ├── repeat_copy.py │ │ ├── reverse.py │ │ ├── reversed_addition.py │ │ └── tests │ │ │ ├── __init__.py │ │ │ └── test_algorithmic.py │ ├── atari │ │ ├── __init__.py │ │ └── atari_env.py │ ├── box2d │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ └── __init__.cpython-36.pyc │ │ ├── bipedal_walker.py │ │ ├── car_dynamics.py │ │ ├── car_racing.py │ │ ├── lunar_lander.py │ │ └── test_lunar_lander.py │ ├── classic_control │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-36.pyc │ │ │ ├── acrobot.cpython-36.pyc │ │ │ ├── cartpole.cpython-36.pyc │ │ │ ├── continuous_mountain_car.cpython-36.pyc │ │ │ ├── mountain_car.cpython-36.pyc │ │ │ ├── pendulum.cpython-36.pyc │ │ │ └── rendering.cpython-36.pyc │ │ ├── acrobot.py │ │ ├── assets │ │ │ └── clockwise.png │ │ ├── cartpole.py │ │ ├── continuous_mountain_car.py │ │ ├── mountain_car.py │ │ ├── pendulum.py │ │ └── rendering.py │ ├── database │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-36.pyc │ │ │ ├── cm1_postgres_card_env_job_crossval_0.cpython-36.pyc │ │ │ ├── cm1_postgres_card_env_job_crossval_1.cpython-36.pyc │ │ │ ├── cm1_postgres_card_env_job_crossval_2.cpython-36.pyc │ │ │ ├── cm1_postgres_card_env_job_crossval_3.cpython-36.pyc │ │ │ ├── cm1_postgres_card_env_job_one.cpython-36.pyc │ │ │ └── simple_corridor_ray.cpython-36.pyc │ │ ├── cm1_postgres_card_env_job.py │ │ ├── cm1_postgres_card_env_job_crossval_0.py │ │ ├── cm1_postgres_card_env_job_crossval_1.py │ │ ├── cm1_postgres_card_env_job_crossval_2.py │ │ ├── cm1_postgres_card_env_job_crossval_3.py │ │ ├── cm1_postgres_card_env_job_one.py │ │ └── simple_corridor_ray.py │ ├── mujoco │ │ ├── __init__.py │ │ ├── ant.py │ │ ├── ant_v3.py │ │ ├── assets │ │ │ ├── ant.xml │ │ │ ├── half_cheetah.xml │ │ │ ├── hopper.xml │ │ │ ├── humanoid.xml │ │ │ ├── humanoidstandup.xml │ │ │ ├── inverted_double_pendulum.xml │ │ │ ├── inverted_pendulum.xml │ │ │ ├── point.xml │ │ │ ├── pusher.xml │ │ │ ├── reacher.xml │ │ │ ├── striker.xml │ │ │ ├── swimmer.xml │ │ │ ├── thrower.xml │ │ │ └── walker2d.xml │ │ ├── half_cheetah.py │ │ ├── half_cheetah_v3.py │ │ ├── hopper.py │ │ ├── hopper_v3.py │ │ ├── humanoid.py │ │ ├── humanoid_v3.py │ │ ├── humanoidstandup.py │ │ ├── inverted_double_pendulum.py │ │ ├── inverted_pendulum.py │ │ ├── mujoco_env.py │ │ ├── pusher.py │ │ ├── reacher.py │ │ ├── striker.py │ │ ├── swimmer.py │ │ ├── swimmer_v3.py │ │ ├── thrower.py │ │ ├── walker2d.py │ │ └── walker2d_v3.py │ ├── registration.py │ ├── robotics │ │ ├── README.md │ │ ├── __init__.py │ │ ├── assets │ │ │ ├── LICENSE.md │ │ │ ├── fetch │ │ │ │ ├── pick_and_place.xml │ │ │ │ ├── push.xml │ │ │ │ ├── reach.xml │ │ │ │ ├── robot.xml │ │ │ │ ├── shared.xml │ │ │ │ └── slide.xml │ │ │ ├── hand │ │ │ │ ├── manipulate_block.xml │ │ │ │ ├── manipulate_block_touch_sensors.xml │ │ │ │ ├── manipulate_egg.xml │ │ │ │ ├── manipulate_egg_touch_sensors.xml │ │ │ │ ├── manipulate_pen.xml │ │ │ │ ├── manipulate_pen_touch_sensors.xml │ │ │ │ ├── reach.xml │ │ │ │ ├── robot.xml │ │ │ │ ├── robot_touch_sensors_92.xml │ │ │ │ ├── shared.xml │ │ │ │ ├── shared_asset.xml │ │ │ │ └── shared_touch_sensors_92.xml │ │ │ ├── stls │ │ │ │ ├── .get │ │ │ │ ├── fetch │ │ │ │ │ ├── base_link_collision.stl │ │ │ │ │ ├── bellows_link_collision.stl │ │ │ │ │ ├── elbow_flex_link_collision.stl │ │ │ │ │ ├── estop_link.stl │ │ │ │ │ ├── forearm_roll_link_collision.stl │ │ │ │ │ ├── gripper_link.stl │ │ │ │ │ ├── head_pan_link_collision.stl │ │ │ │ │ ├── head_tilt_link_collision.stl │ │ │ │ │ ├── l_wheel_link_collision.stl │ │ │ │ │ ├── laser_link.stl │ │ │ │ │ ├── r_wheel_link_collision.stl │ │ │ │ │ ├── shoulder_lift_link_collision.stl │ │ │ │ │ ├── shoulder_pan_link_collision.stl │ │ │ │ │ ├── torso_fixed_link.stl │ │ │ │ │ ├── torso_lift_link_collision.stl │ │ │ │ │ ├── upperarm_roll_link_collision.stl │ │ │ │ │ ├── wrist_flex_link_collision.stl │ │ │ │ │ └── wrist_roll_link_collision.stl │ │ │ │ └── hand │ │ │ │ │ ├── F1.stl │ │ │ │ │ ├── F2.stl │ │ │ │ │ ├── F3.stl │ │ │ │ │ ├── TH1_z.stl │ │ │ │ │ ├── TH2_z.stl │ │ │ │ │ ├── TH3_z.stl │ │ │ │ │ ├── forearm_electric.stl │ │ │ │ │ ├── forearm_electric_cvx.stl │ │ │ │ │ ├── knuckle.stl │ │ │ │ │ ├── lfmetacarpal.stl │ │ │ │ │ ├── palm.stl │ │ │ │ │ └── wrist.stl │ │ │ └── textures │ │ │ │ ├── block.png │ │ │ │ └── block_hidden.png │ │ ├── fetch │ │ │ ├── __init__.py │ │ │ ├── pick_and_place.py │ │ │ ├── push.py │ │ │ ├── reach.py │ │ │ └── slide.py │ │ ├── fetch_env.py │ │ ├── hand │ │ │ ├── __init__.py │ │ │ ├── manipulate.py │ │ │ ├── manipulate_touch_sensors.py │ │ │ └── reach.py │ │ ├── hand_env.py │ │ ├── robot_env.py │ │ ├── rotations.py │ │ └── utils.py │ ├── tests │ │ ├── __init__.py │ │ ├── spec_list.py │ │ ├── test_determinism.py │ │ ├── test_envs.py │ │ ├── test_envs_semantics.py │ │ ├── test_kellycoinflip.py │ │ ├── test_mujoco_v2_to_v3_conversion.py │ │ └── test_registration.py │ ├── toy_text │ │ ├── __init__.py │ │ ├── blackjack.py │ │ ├── cliffwalking.py │ │ ├── discrete.py │ │ ├── frozen_lake.py │ │ ├── guessing_game.py │ │ ├── hotter_colder.py │ │ ├── kellycoinflip.py │ │ ├── nchain.py │ │ ├── roulette.py │ │ └── taxi.py │ └── unittest │ │ ├── __init__.py │ │ ├── cube_crash.py │ │ └── memorize_digits.py ├── error.py ├── logger.py ├── spaces │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── box.cpython-36.pyc │ │ ├── dict_space.cpython-36.pyc │ │ ├── discrete.cpython-36.pyc │ │ ├── multi_binary.cpython-36.pyc │ │ ├── multi_discrete.cpython-36.pyc │ │ ├── space.cpython-36.pyc │ │ └── tuple_space.cpython-36.pyc │ ├── box.py │ ├── dict_space.py │ ├── discrete.py │ ├── multi_binary.py │ ├── multi_discrete.py │ ├── space.py │ ├── tests │ │ ├── __init__.py │ │ └── test_spaces.py │ └── tuple_space.py ├── tests │ └── test_core.py ├── utils │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── atomic_write.cpython-36.pyc │ │ ├── closer.cpython-36.pyc │ │ ├── colorize.cpython-36.pyc │ │ ├── ezpickle.cpython-36.pyc │ │ ├── json_utils.cpython-36.pyc │ │ └── seeding.cpython-36.pyc │ ├── atomic_write.py │ ├── closer.py │ ├── colorize.py │ ├── ezpickle.py │ ├── json_utils.py │ ├── play.py │ ├── seeding.py │ └── tests │ │ ├── test_atexit.py │ │ └── test_seeding.py ├── version.py └── wrappers │ ├── README.md │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-36.pyc │ ├── dict.cpython-36.pyc │ ├── monitor.cpython-36.pyc │ └── time_limit.cpython-36.pyc │ ├── dict.py │ ├── monitor.py │ ├── monitoring │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── stats_recorder.cpython-36.pyc │ │ └── video_recorder.cpython-36.pyc │ ├── stats_recorder.py │ ├── tests │ │ ├── __init__.py │ │ ├── helpers.py │ │ └── test_video_recorder.py │ └── video_recorder.py │ ├── tests │ └── __init__.py │ └── time_limit.py ├── queryoptimization ├── QueryGraph.py ├── __pycache__ │ ├── QueryGraph.cpython-36.pyc │ └── cm1_postgres_card.cpython-36.pyc ├── cm1_postgres_card.py └── reward_mapping.py ├── requirements.txt ├── requirements_dev.txt ├── scripts └── generate_json.py └── setup.py /CODE_OF_CONDUCT.rst: -------------------------------------------------------------------------------- 1 | OpenAI Gym is dedicated to providing a harassment-free experience for 2 | everyone, regardless of gender, gender identity and expression, sexual 3 | orientation, disability, physical appearance, body size, age, race, or 4 | religion. We do not tolerate harassment of participants in any form. 5 | 6 | This code of conduct applies to all OpenAI Gym spaces (including Gist 7 | comments) both online and off. Anyone who violates this code of 8 | conduct may be sanctioned or expelled from these spaces at the 9 | discretion of the OpenAI team. 10 | 11 | We may add additional rules over time, which will be made clearly 12 | available to participants. Participants are responsible for knowing 13 | and abiding by these rules. 14 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # gym 2 | 3 | The MIT License 4 | 5 | Copyright (c) 2016 OpenAI (https://openai.com) 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the rights 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in 15 | all copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | THE SOFTWARE. 24 | 25 | # Mujoco models 26 | This work is derived from [MuJuCo models](http://www.mujoco.org/forum/index.php?resources/) used under the following license: 27 | ``` 28 | This file is part of MuJoCo. 29 | Copyright 2009-2015 Roboti LLC. 30 | Mujoco :: Advanced physics simulation engine 31 | Source : www.roboti.us 32 | Version : 1.31 33 | Released : 23Apr16 34 | Author :: Vikash Kumar 35 | Contacts : kumar@roboti.us 36 | ``` 37 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: install test 2 | 3 | install: 4 | pip install -r requirements.txt 5 | 6 | base: 7 | docker pull ubuntu:14.04 8 | docker tag ubuntu:14.04 quay.io/openai/gym:base 9 | docker push quay.io/openai/gym:base 10 | 11 | test: 12 | docker build -f test.dockerfile -t quay.io/openai/gym:test . 13 | docker push quay.io/openai/gym:test 14 | 15 | upload: 16 | rm -rf dist 17 | python setup.py sdist 18 | twine upload dist/* 19 | 20 | docker-build: 21 | docker build -t quay.io/openai/gym . 22 | 23 | docker-run: 24 | docker run -ti quay.io/openai/gym bash 25 | -------------------------------------------------------------------------------- /agents/eval/results/CrossVal/DDQN/DQN_CM1-postgres-card-job-masking-v0_0_2019-06-25_22-49.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.09232526502748559 3 | 1 4 | Episode reward -0.05924896486052718 5 | 2 6 | Episode reward -0.05924896486052718 7 | 3 8 | Episode reward -0.0380780432400844 9 | 4 10 | Episode reward -0.6660070845738382 11 | 5 12 | Episode reward -0.08959885774269408 13 | 6 14 | Episode reward -0.13403700504932164 15 | 7 16 | Episode reward -0.17593507436925426 17 | 8 18 | Episode reward -0.40541110058287255 19 | 9 20 | Episode reward -0.055199048005318795 21 | 10 22 | Episode reward -0.055199048005318795 23 | 11 24 | Episode reward -0.05924896486052718 25 | 12 26 | Episode reward -0.06282709365173889 27 | 13 28 | Episode reward -0.30721756470010503 29 | 14 30 | Episode reward -0.09207927439907027 31 | 15 32 | Episode reward -0.07957090298063017 33 | 16 34 | Episode reward -0.08959885774269408 35 | 17 36 | Episode reward -0.03735954940271086 37 | 18 38 | Episode reward -0.19417733641309376 39 | 19 40 | Episode reward -0.07141141313514943 41 | 20 42 | Episode reward -0.0380780432400844 43 | 21 44 | Episode reward -0.0676730838366675 45 | 22 46 | Episode reward -0.2018877166542457 47 | 23 48 | Episode reward -0.07116586478402431 49 | 24 50 | Episode reward -0.08124529087940868 51 | 25 52 | Episode reward -1.0412401369915116 53 | 26 54 | Episode reward -0.03312091317731878 55 | 27 56 | Episode reward -0.034055850069849854 57 | 28 58 | Episode reward -0.19417733641309376 59 | 29 60 | Episode reward -0.03312091317731878 61 | 30 62 | Episode reward -0.03237362943516202 63 | 31 64 | Episode reward -0.30856854020371943 65 | 32 66 | Episode reward -1.0231810121908724 -------------------------------------------------------------------------------- /agents/eval/results/CrossVal/DDQN/DQN_CM1-postgres-card-job-masking-v0_0_2019-06-25_22-56.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.0912465162216765 3 | 1 4 | Episode reward -0.0757571670267232 5 | 2 6 | Episode reward -0.0757571670267232 7 | 3 8 | Episode reward -0.0687174990246987 9 | 4 10 | Episode reward -0.29222770118129276 11 | 5 12 | Episode reward -0.06454881965346083 13 | 6 14 | Episode reward -0.06408073965423712 15 | 7 16 | Episode reward -0.095090280976639 17 | 8 18 | Episode reward -0.6898582210167801 19 | 9 20 | Episode reward -3.136981211210088 21 | 10 22 | Episode reward -3.136981211210088 23 | 11 24 | Episode reward -0.0757571670267232 25 | 12 26 | Episode reward -0.17739969119777252 27 | 13 28 | Episode reward -0.23983126648104836 29 | 14 30 | Episode reward -0.21552077474554615 31 | 15 32 | Episode reward -0.9891107370461636 33 | 16 34 | Episode reward -0.06454881965346083 35 | 17 36 | Episode reward -0.11682229079565579 37 | 18 38 | Episode reward -0.5342858626354794 39 | 19 40 | Episode reward -0.04700762600867747 41 | 20 42 | Episode reward -0.0687174990246987 43 | 21 44 | Episode reward -0.046330917179086675 45 | 22 46 | Episode reward -0.07618937301541505 47 | 23 48 | Episode reward -0.058530481465487075 49 | 24 50 | Episode reward -0.06738229645862026 51 | 25 52 | Episode reward -0.2737287010723212 53 | 26 54 | Episode reward -0.029687702102654655 55 | 27 56 | Episode reward -0.10308373252179798 57 | 28 58 | Episode reward -0.5342858626354794 59 | 29 60 | Episode reward -0.029687702102654655 61 | 30 62 | Episode reward -0.02825540333877366 63 | 31 64 | Episode reward -0.6518353590784514 65 | 32 66 | Episode reward -2.685334394432857 -------------------------------------------------------------------------------- /agents/eval/results/CrossVal/DDQN/DQN_CM1-postgres-card-job-masking-v1_0_2019-06-25_23-03.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.03568160525141344 3 | 1 4 | Episode reward -0.03568160525141344 5 | 2 6 | Episode reward -0.1355938215280049 7 | 3 8 | Episode reward -0.05192720695014784 9 | 4 10 | Episode reward -0.07545556640403842 11 | 5 12 | Episode reward -0.1355938215280049 13 | 6 14 | Episode reward -0.08641757785774706 15 | 7 16 | Episode reward -0.06577161766742984 17 | 8 18 | Episode reward -0.10057788186073036 19 | 9 20 | Episode reward -0.19112854103197646 21 | 10 22 | Episode reward -0.8520942526542834 23 | 11 24 | Episode reward -0.06577161766742984 25 | 12 26 | Episode reward -0.035387398791473776 27 | 13 28 | Episode reward -0.40349863553810794 29 | 14 30 | Episode reward -0.060794006592682505 31 | 15 32 | Episode reward -0.10057788186073036 33 | 16 34 | Episode reward -0.06577161766742984 35 | 17 36 | Episode reward -0.12152513732901479 37 | 18 38 | Episode reward -0.12064593952365024 39 | 19 40 | Episode reward -0.6522490405089769 41 | 20 42 | Episode reward -0.19112854103197646 43 | 21 44 | Episode reward -1.1643301465952296 45 | 22 46 | Episode reward -0.07889019883589055 47 | 23 48 | Episode reward -0.40349863553810794 49 | 24 50 | Episode reward -0.05969766230248122 51 | 25 52 | Episode reward -0.035387398791473776 53 | 26 54 | Episode reward -0.1342049906862556 55 | 27 56 | Episode reward -0.07373562202690227 57 | 28 58 | Episode reward -0.060794006592682505 59 | 29 60 | Episode reward -0.1342049906862556 61 | 30 62 | Episode reward -1.1643301465952296 63 | 31 64 | Episode reward -0.1342049906862556 65 | 32 66 | Episode reward -0.05192720695014784 -------------------------------------------------------------------------------- /agents/eval/results/CrossVal/DDQN/DQN_CM1-postgres-card-job-masking-v1_0_2019-06-25_23_11.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.09917009633690389 3 | 1 4 | Episode reward -0.09917009633690389 5 | 2 6 | Episode reward -1.3965699455848108 7 | 3 8 | Episode reward -0.05341589760853796 9 | 4 10 | Episode reward -0.7250663247925099 11 | 5 12 | Episode reward -1.3965699455848108 13 | 6 14 | Episode reward -0.05776417903571414 15 | 7 16 | Episode reward -0.17457846899822493 17 | 8 18 | Episode reward -0.08194215178680397 19 | 9 20 | Episode reward -0.12245221512677709 21 | 10 22 | Episode reward -3.220201024362526 23 | 11 24 | Episode reward -0.17457846899822493 25 | 12 26 | Episode reward -0.0806868574244709 27 | 13 28 | Episode reward -0.5973610753690145 29 | 14 30 | Episode reward -0.036183304173653764 31 | 15 32 | Episode reward -0.08194215178680397 33 | 16 34 | Episode reward -0.17457846899822493 35 | 17 36 | Episode reward -0.15042265845509317 37 | 18 38 | Episode reward -1.3181789952375915 39 | 19 40 | Episode reward -1.157941237538758 41 | 20 42 | Episode reward -0.12245221512677709 43 | 21 44 | Episode reward -0.08872898146199591 45 | 22 46 | Episode reward -0.2873069201438187 47 | 23 48 | Episode reward -0.5973610753690145 49 | 24 50 | Episode reward -0.059282276064932124 51 | 25 52 | Episode reward -0.0806868574244709 53 | 26 54 | Episode reward -0.21543175072185602 55 | 27 56 | Episode reward -0.17090875943315603 57 | 28 58 | Episode reward -0.036183304173653764 59 | 29 60 | Episode reward -0.21543175072185602 61 | 30 62 | Episode reward -0.08872898146199591 63 | 31 64 | Episode reward -0.21543175072185602 65 | 32 66 | Episode reward -0.05341589760853796 -------------------------------------------------------------------------------- /agents/eval/results/CrossVal/DDQN/DQN_CM1-postgres-card-job-masking-v2_0_2019-06-25_23_18.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.4004279199434673 3 | 1 4 | Episode reward -0.0799585779471935 5 | 2 6 | Episode reward -0.26480080776301795 7 | 3 8 | Episode reward -0.061575979644339626 9 | 4 10 | Episode reward -0.10837808330368688 11 | 5 12 | Episode reward -4.9406331502030785 13 | 6 14 | Episode reward -0.26480080776301795 15 | 7 16 | Episode reward -0.29155253990025254 17 | 8 18 | Episode reward -0.07538121678662917 19 | 9 20 | Episode reward -0.4004279199434673 21 | 10 22 | Episode reward -0.18937078398773496 23 | 11 24 | Episode reward -0.29155253990025254 25 | 12 26 | Episode reward -0.09132586390525599 27 | 13 28 | Episode reward -0.07538121678662917 29 | 14 30 | Episode reward -2.6932711826106495 31 | 15 32 | Episode reward -0.061575979644339626 33 | 16 34 | Episode reward -0.07674917475154754 35 | 17 36 | Episode reward -0.0168918524304905 37 | 18 38 | Episode reward -0.0168918524304905 39 | 19 40 | Episode reward -0.0168918524304905 41 | 20 42 | Episode reward -0.07674917475154754 43 | 21 44 | Episode reward -0.0168918524304905 45 | 22 46 | Episode reward -0.03233059533208041 47 | 23 48 | Episode reward -0.07251991839428569 49 | 24 50 | Episode reward -0.18937078398773496 51 | 25 52 | Episode reward -0.14626074949630433 53 | 26 54 | Episode reward -0.03261364543201184 55 | 27 56 | Episode reward -0.0799585779471935 57 | 28 58 | Episode reward -0.4632697416752851 59 | 29 60 | Episode reward -0.07710728395264174 61 | 30 62 | Episode reward -0.0799585779471935 63 | 31 64 | Episode reward -0.09132586390525599 65 | 32 66 | Episode reward -0.07538121678662917 -------------------------------------------------------------------------------- /agents/eval/results/CrossVal/DDQN/DQN_CM1-postgres-card-job-masking-v2_0_2019-06-25_23_25.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.20864121641017075 3 | 1 4 | Episode reward -0.06132569611577271 5 | 2 6 | Episode reward -3.362908422196969 7 | 3 8 | Episode reward -0.14036077027472663 9 | 4 10 | Episode reward -0.09253804613416579 11 | 5 12 | Episode reward -10.0 13 | 6 14 | Episode reward -3.362908422196969 15 | 7 16 | Episode reward -0.5211803099934719 17 | 8 18 | Episode reward -0.04267227031799767 19 | 9 20 | Episode reward -0.20864121641017075 21 | 10 22 | Episode reward -0.352576638966663 23 | 11 24 | Episode reward -0.5211803099934719 25 | 12 26 | Episode reward -0.034275611671883484 27 | 13 28 | Episode reward -0.04267227031799767 29 | 14 30 | Episode reward -3.108213285017131 31 | 15 32 | Episode reward -0.14036077027472663 33 | 16 34 | Episode reward -1.5459709807873585 35 | 17 36 | Episode reward -0.031333601870507335 37 | 18 38 | Episode reward -0.031333601870507335 39 | 19 40 | Episode reward -0.031333601870507335 41 | 20 42 | Episode reward -1.5459709807873585 43 | 21 44 | Episode reward -0.031333601870507335 45 | 22 46 | Episode reward -0.03973752923757494 47 | 23 48 | Episode reward -0.07251991839428569 49 | 24 50 | Episode reward -0.352576638966663 51 | 25 52 | Episode reward -0.1931696363289294 53 | 26 54 | Episode reward -0.07185843696020319 55 | 27 56 | Episode reward -0.06132569611577271 57 | 28 58 | Episode reward -0.7360834640744801 59 | 29 60 | Episode reward -0.12835971592996637 61 | 30 62 | Episode reward -0.06132569611577271 63 | 31 64 | Episode reward -0.034275611671883484 65 | 32 66 | Episode reward -0.04267227031799767 -------------------------------------------------------------------------------- /agents/eval/results/CrossVal/DDQN/DQN_CM1-postgres-card-job-masking-v3_0_2019-06-25_23_33.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -2.177216918151554 3 | 1 4 | Episode reward -0.07195425714814593 5 | 2 6 | Episode reward -0.07644971717708265 7 | 3 8 | Episode reward -0.27115671771615124 9 | 4 10 | Episode reward -0.7110507063939326 11 | 5 12 | Episode reward -0.2753104668689248 13 | 6 14 | Episode reward -0.2696091911358208 15 | 7 16 | Episode reward -0.27115671771615124 17 | 8 18 | Episode reward -0.2686456723363504 19 | 9 20 | Episode reward -0.07644971717708265 21 | 10 22 | Episode reward -0.18568298962429827 23 | 11 24 | Episode reward -0.2696091911358208 25 | 12 26 | Episode reward -0.0712537610215057 27 | 13 28 | Episode reward -0.03133693696263822 29 | 14 30 | Episode reward -0.04354700646007917 31 | 15 32 | Episode reward -0.04354700646007917 33 | 16 34 | Episode reward -0.28913007829629245 35 | 17 36 | Episode reward -0.2686456723363504 37 | 18 38 | Episode reward -0.0984704007082432 39 | 19 40 | Episode reward -0.28458434697436374 41 | 20 42 | Episode reward -0.09629964082675532 43 | 21 44 | Episode reward -0.5545606080887343 45 | 22 46 | Episode reward -3.0047203124331583 47 | 23 48 | Episode reward -3.0047203124331583 49 | 24 50 | Episode reward -0.04354700646007917 51 | 25 52 | Episode reward -0.3878442085713193 53 | 26 54 | Episode reward -0.25230858175250803 55 | 27 56 | Episode reward -0.06045214544948912 57 | 28 58 | Episode reward -0.2753104668689248 59 | 29 60 | Episode reward -0.0984704007082432 61 | 30 62 | Episode reward -0.08135292678097 63 | 31 64 | Episode reward -0.30888595472609837 65 | 32 66 | Episode reward -0.07630053362970107 -------------------------------------------------------------------------------- /agents/eval/results/CrossVal/DDQN/DQN_CM1-postgres-card-job-masking-v3_0_2019-06-25_23_40.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -6.977853083402471 3 | 1 4 | Episode reward -0.014750229888273883 5 | 2 6 | Episode reward -0.11004934329238769 7 | 3 8 | Episode reward -0.3075102355308754 9 | 4 10 | Episode reward -0.25075572931808277 11 | 5 12 | Episode reward -1.1749104344738168 13 | 6 14 | Episode reward -0.24536790923130328 15 | 7 16 | Episode reward -0.3075102355308754 17 | 8 18 | Episode reward -1.1949637406197033 19 | 9 20 | Episode reward -0.11004934329238769 21 | 10 22 | Episode reward -0.02189230905882976 23 | 11 24 | Episode reward -0.24536790923130328 25 | 12 26 | Episode reward -0.12098504290918799 27 | 13 28 | Episode reward -0.10954992285765683 29 | 14 30 | Episode reward -0.08080491672505738 31 | 15 32 | Episode reward -0.08080491672505738 33 | 16 34 | Episode reward -0.11001252688796674 35 | 17 36 | Episode reward -1.1949637406197033 37 | 18 38 | Episode reward -0.07165981984008177 39 | 19 40 | Episode reward -0.07246075374336153 41 | 20 42 | Episode reward -0.33133572714142434 43 | 21 44 | Episode reward -0.16070164166087963 45 | 22 46 | Episode reward -0.0677775278199148 47 | 23 48 | Episode reward -0.0677775278199148 49 | 24 50 | Episode reward -0.08080491672505738 51 | 25 52 | Episode reward -0.09690876096169423 53 | 26 54 | Episode reward -1.4278224798306347 55 | 27 56 | Episode reward -0.15211313680231775 57 | 28 58 | Episode reward -1.1749104344738168 59 | 29 60 | Episode reward -0.07165981984008177 61 | 30 62 | Episode reward -0.17291788928291846 63 | 31 64 | Episode reward -0.42250667202209813 65 | 32 66 | Episode reward -0.10117533490750025 -------------------------------------------------------------------------------- /agents/eval/results/CrossVal/DQN/0_DQN_CM1-postgres-card-job-masking-v0_0_2019-06-20_12-14.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.04250667623658374 3 | 1 4 | Episode reward -0.043735200688653275 5 | 2 6 | Episode reward -0.043735200688653275 7 | 3 8 | Episode reward -0.4545835561617039 9 | 4 10 | Episode reward -2.9474540890304906 11 | 5 12 | Episode reward -0.08572392697992298 13 | 6 14 | Episode reward -0.04702122636745635 15 | 7 16 | Episode reward -0.08430166857587773 17 | 8 18 | Episode reward -0.13739781374467325 19 | 9 20 | Episode reward -0.4989685545251943 21 | 10 22 | Episode reward -0.4989685545251943 23 | 11 24 | Episode reward -0.043735200688653275 25 | 12 26 | Episode reward -0.14637348863957508 27 | 13 28 | Episode reward -0.3489252407850174 29 | 14 30 | Episode reward -0.16450907339208837 31 | 15 32 | Episode reward -0.9015001256074497 33 | 16 34 | Episode reward -0.08572392697992298 35 | 17 36 | Episode reward -0.129074092148704 37 | 18 38 | Episode reward -0.43953727442311596 39 | 19 40 | Episode reward -0.06645427207950612 41 | 20 42 | Episode reward -0.4545835561617039 43 | 21 44 | Episode reward -0.040663677026966744 45 | 22 46 | Episode reward -0.6414444243152285 47 | 23 48 | Episode reward -0.1403180474811257 49 | 24 50 | Episode reward -0.07635491035295133 51 | 25 52 | Episode reward -0.8016359770581806 53 | 26 54 | Episode reward -0.02964481674562209 55 | 27 56 | Episode reward -0.030743152709410658 57 | 28 58 | Episode reward -0.43953727442311596 59 | 29 60 | Episode reward -0.02964481674562209 61 | 30 62 | Episode reward -0.032205543990432864 63 | 31 64 | Episode reward -0.1888662928927437 65 | 32 66 | Episode reward -0.8587329194541353 -------------------------------------------------------------------------------- /agents/eval/results/CrossVal/DQN/1_DQN_CM1-postgres-card-job-masking-v0_0_2019-06-20_13-01.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.025093990216201855 3 | 1 4 | Episode reward -0.025093990216201855 5 | 2 6 | Episode reward -0.14758284754696152 7 | 3 8 | Episode reward -0.07187718427034381 9 | 4 10 | Episode reward -0.23134228941100562 11 | 5 12 | Episode reward -0.14758284754696152 13 | 6 14 | Episode reward -0.08895933399804828 15 | 7 16 | Episode reward -0.06790813125945544 17 | 8 18 | Episode reward -0.12451151971730107 19 | 9 20 | Episode reward -0.04372475176813919 21 | 10 22 | Episode reward -0.2551682254260556 23 | 11 24 | Episode reward -0.1358670652291641 25 | 12 26 | Episode reward -0.03509420931094406 27 | 13 28 | Episode reward -0.30854284840175594 29 | 14 30 | Episode reward -0.08126113139956363 31 | 15 32 | Episode reward -0.12451151971730107 33 | 16 34 | Episode reward -0.1358670652291641 35 | 17 36 | Episode reward -0.1990731049765641 37 | 18 38 | Episode reward -0.33603354505443994 39 | 19 40 | Episode reward -0.6008547373389358 41 | 20 42 | Episode reward -0.08805867672996827 43 | 21 44 | Episode reward -0.3897968186737239 45 | 22 46 | Episode reward -0.053488699667351224 47 | 23 48 | Episode reward -0.30854284840175594 49 | 24 50 | Episode reward -0.015745716776087385 51 | 25 52 | Episode reward -0.03509420931094406 53 | 26 54 | Episode reward -0.10243642789224044 55 | 27 56 | Episode reward -0.25042281217404794 57 | 28 58 | Episode reward -0.08126113139956363 59 | 29 60 | Episode reward -0.10243642789224044 61 | 30 62 | Episode reward -0.3897968204951858 63 | 31 64 | Episode reward -0.10243642789224044 65 | 32 66 | Episode reward -0.07187718427034381 -------------------------------------------------------------------------------- /agents/eval/results/CrossVal/DQN/2_DQN_CM1-postgres-card-job-masking-v0_0_2019-06-20_14-29.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.09070589197376257 3 | 1 4 | Episode reward -0.05035065858078603 5 | 2 6 | Episode reward -0.453347968433121 7 | 3 8 | Episode reward -0.141900378884525 9 | 4 10 | Episode reward -0.08478151928805357 11 | 5 12 | Episode reward -0.7262140050224025 13 | 6 14 | Episode reward -0.453347968433121 15 | 7 16 | Episode reward -0.423866350342025 17 | 8 18 | Episode reward -0.046877088985411056 19 | 9 20 | Episode reward -0.20330354703062414 21 | 10 22 | Episode reward -0.5817834156360889 23 | 11 24 | Episode reward -0.40793614969403025 25 | 12 26 | Episode reward -0.06817616500507305 27 | 13 28 | Episode reward -0.046877088985411056 29 | 14 30 | Episode reward -0.39199721066887877 31 | 15 32 | Episode reward -0.141900378884525 33 | 16 34 | Episode reward -0.5328449487931493 35 | 17 36 | Episode reward -0.017322226877799354 37 | 18 38 | Episode reward -0.017322226877799354 39 | 19 40 | Episode reward -0.017322226877799354 41 | 20 42 | Episode reward -0.5328449487931493 43 | 21 44 | Episode reward -0.017322226877799354 45 | 22 46 | Episode reward -0.03770671354254604 47 | 23 48 | Episode reward -0.027885441107498778 49 | 24 50 | Episode reward -0.4843840951278181 51 | 25 52 | Episode reward -0.10691694083316836 53 | 26 54 | Episode reward -0.0806868574244709 55 | 27 56 | Episode reward -0.05035065858078603 57 | 28 58 | Episode reward -0.2088368085785675 59 | 29 60 | Episode reward -0.05235456946724033 61 | 30 62 | Episode reward -0.05035065858078603 63 | 31 64 | Episode reward -0.06817616500507305 65 | 32 66 | Episode reward -0.046877088985411056 -------------------------------------------------------------------------------- /agents/eval/results/CrossVal/DQN/3_DQN_CM1-postgres-card-job-masking-v0_0_2019-06-20_15-37.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.5216030282187517 3 | 1 4 | Episode reward -0.015565521585496138 5 | 2 6 | Episode reward -0.12493232650842861 7 | 3 8 | Episode reward -0.2914432549192331 9 | 4 10 | Episode reward -0.2810219568527235 11 | 5 12 | Episode reward -0.3864728736355608 13 | 6 14 | Episode reward -0.21374867856632412 15 | 7 16 | Episode reward -0.8888746788945107 17 | 8 18 | Episode reward -0.6270598137150882 19 | 9 20 | Episode reward -0.12493232650842861 21 | 10 22 | Episode reward -0.4370397427470111 23 | 11 24 | Episode reward -0.7484225477159483 25 | 12 26 | Episode reward -0.10058668017072649 27 | 13 28 | Episode reward -0.03486750334587033 29 | 14 30 | Episode reward -0.04353870610803556 31 | 15 32 | Episode reward -0.04353870610803556 33 | 16 34 | Episode reward -0.17230925414802278 35 | 17 36 | Episode reward -0.6270598137150882 37 | 18 38 | Episode reward -0.04415429513603606 39 | 19 40 | Episode reward -0.27980718192570736 41 | 20 42 | Episode reward -0.1476950448166174 43 | 21 44 | Episode reward -0.4827212209691818 45 | 22 46 | Episode reward -1.89325383278709 47 | 23 48 | Episode reward -1.89325383278709 49 | 24 50 | Episode reward -0.04353870610803556 51 | 25 52 | Episode reward -0.13679131661471217 53 | 26 54 | Episode reward -2.199049985903023 55 | 27 56 | Episode reward -1.2091431046764913 57 | 28 58 | Episode reward -0.3864728736355608 59 | 29 60 | Episode reward -0.04415429513603606 61 | 30 62 | Episode reward -0.13774419793714146 63 | 31 64 | Episode reward -0.3473456757452176 65 | 32 66 | Episode reward -0.09302876836460415 -------------------------------------------------------------------------------- /agents/eval/results/CrossVal/PPO/0_PPO_CM1-postgres-card-job-masking-v0_0_2019-06-21_07-19.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.035074630589971595 3 | 1 4 | Episode reward -0.04349101333777846 5 | 2 6 | Episode reward -0.04349101333777846 7 | 3 8 | Episode reward -0.5440642168242524 9 | 4 10 | Episode reward -0.23682535416763306 11 | 5 12 | Episode reward -0.09909943265261287 13 | 6 14 | Episode reward -0.04757897952222104 15 | 7 16 | Episode reward -0.09337368563928766 17 | 8 18 | Episode reward -0.17729405711224014 19 | 9 20 | Episode reward -0.5413416659532867 21 | 10 22 | Episode reward -0.5413416659532867 23 | 11 24 | Episode reward -0.04349102966298874 25 | 12 26 | Episode reward -0.046965483821373194 27 | 13 28 | Episode reward -0.32209073173912334 29 | 14 30 | Episode reward -0.21618728078611385 31 | 15 32 | Episode reward -0.12250606105321278 33 | 16 34 | Episode reward -0.09909943265261287 35 | 17 36 | Episode reward -0.1882907224834501 37 | 18 38 | Episode reward -0.4395332232436422 39 | 19 40 | Episode reward -0.0647717433572532 41 | 20 42 | Episode reward -0.5440642168242524 43 | 21 44 | Episode reward -0.08088948015849032 45 | 22 46 | Episode reward -0.3259102668400463 47 | 23 48 | Episode reward -0.12022757604421293 49 | 24 50 | Episode reward -0.12644832351960167 51 | 25 52 | Episode reward -0.3862463333141468 53 | 26 54 | Episode reward -0.11365564092362716 55 | 27 56 | Episode reward -0.14279889245072025 57 | 28 58 | Episode reward -0.4395332232436422 59 | 29 60 | Episode reward -0.11365564092362716 61 | 30 62 | Episode reward -0.03616146217681379 63 | 31 64 | Episode reward -0.2600023808739524 65 | 32 66 | Episode reward -0.1984183728110506 -------------------------------------------------------------------------------- /agents/eval/results/CrossVal/PPO/1_PPO_CM1-postgres-card-job-masking-v0_0_2019-06-20_17-42.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.07553546777879044 3 | 1 4 | Episode reward -0.07553546777879044 5 | 2 6 | Episode reward -0.27510596257138636 7 | 3 8 | Episode reward -0.08172802675916649 9 | 4 10 | Episode reward -0.3067228808646211 11 | 5 12 | Episode reward -0.27510596257138636 13 | 6 14 | Episode reward -0.09899059705807331 15 | 7 16 | Episode reward -0.26013479444513204 17 | 8 18 | Episode reward -0.16452782205736638 19 | 9 20 | Episode reward -0.03376752987745191 21 | 10 22 | Episode reward -0.7129951685882668 23 | 11 24 | Episode reward -0.26013479444513204 25 | 12 26 | Episode reward -0.048688101308770715 27 | 13 28 | Episode reward -0.12834168008546049 29 | 14 30 | Episode reward -0.03269284479641088 31 | 15 32 | Episode reward -0.16452782205736638 33 | 16 34 | Episode reward -0.26013479444513204 35 | 17 36 | Episode reward -0.020569669280548226 37 | 18 38 | Episode reward -0.6722845110268765 39 | 19 40 | Episode reward -0.4483860274774437 41 | 20 42 | Episode reward -0.03376752987745191 43 | 21 44 | Episode reward -0.22716133367769076 45 | 22 46 | Episode reward -0.14160626281077177 47 | 23 48 | Episode reward -0.12834168008546049 49 | 24 50 | Episode reward -0.0826441464654666 51 | 25 52 | Episode reward -0.048688101308770715 53 | 26 54 | Episode reward -0.045314552820709474 55 | 27 56 | Episode reward -0.21459611178012783 57 | 28 58 | Episode reward -0.03269284479641088 59 | 29 60 | Episode reward -0.045314552820709474 61 | 30 62 | Episode reward -0.22716133367769076 63 | 31 64 | Episode reward -0.045314552820709474 65 | 32 66 | Episode reward -0.08172802675916649 -------------------------------------------------------------------------------- /agents/eval/results/CrossVal/PPO/2_PPO_CM1-postgres-card-job-masking-v0_0_2019-06-20_20-57.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.8311808828384866 3 | 1 4 | Episode reward -0.049162934917470265 5 | 2 6 | Episode reward -0.05569238881718234 7 | 3 8 | Episode reward -0.10435047727203843 9 | 4 10 | Episode reward -0.11223347161001132 11 | 5 12 | Episode reward -0.5047332646790449 13 | 6 14 | Episode reward -0.05569238881718234 15 | 7 16 | Episode reward -0.09079292432968282 17 | 8 18 | Episode reward -0.048212168136698805 19 | 9 20 | Episode reward -0.8311808828384866 21 | 10 22 | Episode reward -0.09301256986630256 23 | 11 24 | Episode reward -0.09079292432968282 25 | 12 26 | Episode reward -0.034734270463721476 27 | 13 28 | Episode reward -0.048212168136698805 29 | 14 30 | Episode reward -0.22555615770702875 31 | 15 32 | Episode reward -0.10435047727203843 33 | 16 34 | Episode reward -1.4189650181967743 35 | 17 36 | Episode reward -0.03591519743771626 37 | 18 38 | Episode reward -0.03591519743771626 39 | 19 40 | Episode reward -0.03591519743771626 41 | 20 42 | Episode reward -1.4189650181967743 43 | 21 44 | Episode reward -0.03591519743771626 45 | 22 46 | Episode reward -0.0391763592933125 47 | 23 48 | Episode reward -0.032061254572963986 49 | 24 50 | Episode reward -0.09301256986630256 51 | 25 52 | Episode reward -0.09054801950288247 53 | 26 54 | Episode reward -0.07192739488786601 55 | 27 56 | Episode reward -0.049162934917470265 57 | 28 58 | Episode reward -0.05639203918999746 59 | 29 60 | Episode reward -0.0556131204418698 61 | 30 62 | Episode reward -0.049162934917470265 63 | 31 64 | Episode reward -0.034734270463721476 65 | 32 66 | Episode reward -0.048212168136698805 -------------------------------------------------------------------------------- /agents/eval/results/CrossVal/PPO/3_PPO_CM1-postgres-card-job-masking-v0_0_2019-06-21_00-08.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.7105046486911155 3 | 1 4 | Episode reward -0.015172820799754048 5 | 2 6 | Episode reward -0.07698279800471737 7 | 3 8 | Episode reward -0.11564666784397715 9 | 4 10 | Episode reward -0.085401774977727 11 | 5 12 | Episode reward -0.039470935076274444 13 | 6 14 | Episode reward -0.2858248492304062 15 | 7 16 | Episode reward -0.11564666784397715 17 | 8 18 | Episode reward -0.1202385385545359 19 | 9 20 | Episode reward -0.07698279800471737 21 | 10 22 | Episode reward -0.03655240500442934 23 | 11 24 | Episode reward -0.2858248492304062 25 | 12 26 | Episode reward -0.03217557746998145 27 | 13 28 | Episode reward -0.03978472855609879 29 | 14 30 | Episode reward -0.03998895437382237 31 | 15 32 | Episode reward -0.03998895437382237 33 | 16 34 | Episode reward -0.055421172300400084 35 | 17 36 | Episode reward -0.1202385385545359 37 | 18 38 | Episode reward -0.05721184971069869 39 | 19 40 | Episode reward -0.024014963037070135 41 | 20 42 | Episode reward -0.07073874471883848 43 | 21 44 | Episode reward -0.11493200164852475 45 | 22 46 | Episode reward -0.08118705582254972 47 | 23 48 | Episode reward -0.08118705582254972 49 | 24 50 | Episode reward -0.04027287131792835 51 | 25 52 | Episode reward -0.030814154360475497 53 | 26 54 | Episode reward -0.41711998304673437 55 | 27 56 | Episode reward -0.05254144678309079 57 | 28 58 | Episode reward -0.039470935076274444 59 | 29 60 | Episode reward -0.05721184971069869 61 | 30 62 | Episode reward -0.13540081603645562 63 | 31 64 | Episode reward -0.11523795546597759 65 | 32 66 | Episode reward -0.07172370683692394 -------------------------------------------------------------------------------- /agents/eval/results/Sens/DDQN/DQN_CM1-postgres-card-job-masking-v0_0_2019-06-26_22-43.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.27558095252481823 3 | 1 4 | Episode reward -0.04881196206116656 5 | 2 6 | Episode reward -0.057129895014628645 7 | 3 8 | Episode reward -0.02413986852228746 9 | 4 10 | Episode reward -0.04881196206116656 11 | 5 12 | Episode reward -0.08477171086290694 13 | 6 14 | Episode reward -0.05112417053965845 15 | 7 16 | Episode reward -0.04881196206116656 17 | 8 18 | Episode reward -0.057129895014628645 19 | 9 20 | Episode reward -0.02413986852228746 21 | 10 22 | Episode reward -0.02782363695521269 23 | 11 24 | Episode reward -0.02413986852228746 25 | 12 26 | Episode reward -0.030745056326600684 27 | 13 28 | Episode reward -0.105541811240415 29 | 14 30 | Episode reward -0.030745056326600684 31 | 15 32 | Episode reward -0.2943353362872508 33 | 16 34 | Episode reward -0.0702008636757136 35 | 17 36 | Episode reward -0.02421676437192152 37 | 18 38 | Episode reward -0.11364518206031764 39 | 19 40 | Episode reward -0.030745056326600684 41 | 20 42 | Episode reward -0.02421676437192152 43 | 21 44 | Episode reward -0.2943353362872508 45 | 22 46 | Episode reward -1.0471927184970582 47 | 23 48 | Episode reward -0.030745056326600684 49 | 24 50 | Episode reward -0.1381363327374976 51 | 25 52 | Episode reward -0.1502010898763279 53 | 26 54 | Episode reward -0.11364518206031764 55 | 27 56 | Episode reward -0.9019950180524843 57 | 28 58 | Episode reward -0.11051189246089237 59 | 29 60 | Episode reward -0.9019950180524843 61 | 30 62 | Episode reward -0.34422406832617164 63 | 31 64 | Episode reward -0.9226552034835453 65 | 32 66 | Episode reward -0.11051189246089237 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/DDQN/DQN_CM1-postgres-card-job-masking-v0_0_2019-06-26_22-50.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.18955668888006516 3 | 1 4 | Episode reward -0.07538435579269818 5 | 2 6 | Episode reward -0.13763983299347746 7 | 3 8 | Episode reward -0.017133534059142582 9 | 4 10 | Episode reward -0.07538435579269818 11 | 5 12 | Episode reward -0.23681765202843313 13 | 6 14 | Episode reward -0.045608404028343846 15 | 7 16 | Episode reward -0.07538435579269818 17 | 8 18 | Episode reward -0.13763983299347746 19 | 9 20 | Episode reward -0.017133534059142582 21 | 10 22 | Episode reward -0.031359789481813165 23 | 11 24 | Episode reward -0.017133534059142582 25 | 12 26 | Episode reward -0.5117432024854952 27 | 13 28 | Episode reward -0.10017040999921636 29 | 14 30 | Episode reward -0.5117432024854952 31 | 15 32 | Episode reward -0.3063057927273395 33 | 16 34 | Episode reward -0.04335828061621043 35 | 17 36 | Episode reward -0.17030093301048227 37 | 18 38 | Episode reward -0.03498822105248509 39 | 19 40 | Episode reward -0.5117432024854952 41 | 20 42 | Episode reward -0.17030093301048227 43 | 21 44 | Episode reward -0.3063057927273395 45 | 22 46 | Episode reward -0.8720589518975661 47 | 23 48 | Episode reward -0.5117432024854952 49 | 24 50 | Episode reward -0.05334939724697757 51 | 25 52 | Episode reward -0.07515711857741253 53 | 26 54 | Episode reward -0.03498822105248509 55 | 27 56 | Episode reward -0.150037775640448 57 | 28 58 | Episode reward -1.7061491096309487 59 | 29 60 | Episode reward -0.150037775640448 61 | 30 62 | Episode reward -0.4694407429840971 63 | 31 64 | Episode reward -0.20142884531108737 65 | 32 66 | Episode reward -1.7061491096309487 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/DDQN/DQN_CM1-postgres-card-job-masking-v0_0_2019-06-26_22-57.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.30314392267308243 3 | 1 4 | Episode reward -0.048942487896884775 5 | 2 6 | Episode reward -0.11224204517838968 7 | 3 8 | Episode reward -0.04041163490023662 9 | 4 10 | Episode reward -0.048942487896884775 11 | 5 12 | Episode reward -0.2071402573830317 13 | 6 14 | Episode reward -0.04354609630754805 15 | 7 16 | Episode reward -0.048942487896884775 17 | 8 18 | Episode reward -0.11224204517838968 19 | 9 20 | Episode reward -0.04041163490023662 21 | 10 22 | Episode reward -0.08099384277832641 23 | 11 24 | Episode reward -0.04041163490023662 25 | 12 26 | Episode reward -0.26289383628627916 27 | 13 28 | Episode reward -0.2585818592369886 29 | 14 30 | Episode reward -0.26289383628627916 31 | 15 32 | Episode reward -0.4255944763681336 33 | 16 34 | Episode reward -0.09146664393435837 35 | 17 36 | Episode reward -0.04172631416874164 37 | 18 38 | Episode reward -0.08634590894513532 39 | 19 40 | Episode reward -0.26289383628627916 41 | 20 42 | Episode reward -0.04172631416874164 43 | 21 44 | Episode reward -0.4255944763681336 45 | 22 46 | Episode reward -0.9930748713927716 47 | 23 48 | Episode reward -0.26289383628627916 49 | 24 50 | Episode reward -0.08685939077874871 51 | 25 52 | Episode reward -0.1439642353522919 53 | 26 54 | Episode reward -0.08634590894513532 55 | 27 56 | Episode reward -0.08292824481266547 57 | 28 58 | Episode reward -0.3116669475828783 59 | 29 60 | Episode reward -0.08292824481266547 61 | 30 62 | Episode reward -1.2917472689985068 63 | 31 64 | Episode reward -2.267584795565996 65 | 32 66 | Episode reward -0.3116669475828783 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/DDQN/DQN_CM1-postgres-card-job-masking-v0_0_2019-06-26_23-04.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.35994174180248356 3 | 1 4 | Episode reward -0.08967003284303779 5 | 2 6 | Episode reward -0.06633110985036819 7 | 3 8 | Episode reward -0.024138197204125588 9 | 4 10 | Episode reward -0.08967003284303779 11 | 5 12 | Episode reward -0.12131197018291068 13 | 6 14 | Episode reward -0.08159943393092112 15 | 7 16 | Episode reward -0.08967003284303779 17 | 8 18 | Episode reward -0.06633110985036819 19 | 9 20 | Episode reward -0.024138197204125588 21 | 10 22 | Episode reward -0.04519726996489192 23 | 11 24 | Episode reward -0.024138197204125588 25 | 12 26 | Episode reward -0.6726173034448367 27 | 13 28 | Episode reward -0.1200718130108979 29 | 14 30 | Episode reward -0.6726173034448367 31 | 15 32 | Episode reward -0.5184339075768208 33 | 16 34 | Episode reward -0.09218501270709711 35 | 17 36 | Episode reward -0.08457872345546512 37 | 18 38 | Episode reward -0.3840608534754276 39 | 19 40 | Episode reward -0.6726173034448367 41 | 20 42 | Episode reward -0.08457872345546512 43 | 21 44 | Episode reward -0.5184339075768208 45 | 22 46 | Episode reward -2.2553578165652315 47 | 23 48 | Episode reward -0.6726173034448367 49 | 24 50 | Episode reward -0.043849090187531505 51 | 25 52 | Episode reward -0.12250719817545971 53 | 26 54 | Episode reward -0.3840608534754276 55 | 27 56 | Episode reward -0.9414336002787038 57 | 28 58 | Episode reward -0.259607297292651 59 | 29 60 | Episode reward -0.9414336002787038 61 | 30 62 | Episode reward -1.4906706850052123 63 | 31 64 | Episode reward -0.9268063509174941 65 | 32 66 | Episode reward -0.259607297292651 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/DDQN/DQN_CM1-postgres-card-job-masking-v0_0_2019-06-26_23-12.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.24037279887272653 3 | 1 4 | Episode reward -0.08967267166823349 5 | 2 6 | Episode reward -0.14574456497635793 7 | 3 8 | Episode reward -0.04052620453777192 9 | 4 10 | Episode reward -0.08967267166823349 11 | 5 12 | Episode reward -0.05839430481639012 13 | 6 14 | Episode reward -0.042049859058254466 15 | 7 16 | Episode reward -0.08967267166823349 17 | 8 18 | Episode reward -0.14574456497635793 19 | 9 20 | Episode reward -0.04052620453777192 21 | 10 22 | Episode reward -0.035387398791473776 23 | 11 24 | Episode reward -0.04052620453777192 25 | 12 26 | Episode reward -0.3081372521894369 27 | 13 28 | Episode reward -0.19697085808248865 29 | 14 30 | Episode reward -0.3081372521894369 31 | 15 32 | Episode reward -0.3473515640807032 33 | 16 34 | Episode reward -0.0770283963830053 35 | 17 36 | Episode reward -0.10444999606979488 37 | 18 38 | Episode reward -0.17725048363198728 39 | 19 40 | Episode reward -0.3081372521894369 41 | 20 42 | Episode reward -0.10444999606979488 43 | 21 44 | Episode reward -0.3473515640807032 45 | 22 46 | Episode reward -1.0628416694913883 47 | 23 48 | Episode reward -0.3081372521894369 49 | 24 50 | Episode reward -0.03471436694667969 51 | 25 52 | Episode reward -0.1749982005862832 53 | 26 54 | Episode reward -0.17725048363198728 55 | 27 56 | Episode reward -0.2183680134462438 57 | 28 58 | Episode reward -1.1151569754978479 59 | 29 60 | Episode reward -0.2183680134462438 61 | 30 62 | Episode reward -4.282459636853341 63 | 31 64 | Episode reward -0.12185140992527171 65 | 32 66 | Episode reward -1.1151569754978479 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/DDQN/DQN_CM1-postgres-card-job-masking-v1_0_2019-06-26_23-19.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.08345432741603323 3 | 1 4 | Episode reward -0.03270266088480268 5 | 2 6 | Episode reward -0.04387940004763255 7 | 3 8 | Episode reward -0.0668200687255869 9 | 4 10 | Episode reward -0.08345432741603323 11 | 5 12 | Episode reward -0.0355272172597055 13 | 6 14 | Episode reward -0.0355272172597055 15 | 7 16 | Episode reward -0.05776827532233728 17 | 8 18 | Episode reward -0.0489256947357149 19 | 9 20 | Episode reward -0.024451252478888984 21 | 10 22 | Episode reward -0.03270266088480268 23 | 11 24 | Episode reward -0.0668200687255869 25 | 12 26 | Episode reward -0.12206811525563288 27 | 13 28 | Episode reward -0.03324864307828557 29 | 14 30 | Episode reward -1.5218863274271832 31 | 15 32 | Episode reward -0.14863094541553515 33 | 16 34 | Episode reward -0.06068943193275675 35 | 17 36 | Episode reward -0.06311483931967421 37 | 18 38 | Episode reward -1.1792024986090612 39 | 19 40 | Episode reward -0.06311483931967421 41 | 20 42 | Episode reward -1.5218863274271832 43 | 21 44 | Episode reward -0.2047816586063136 45 | 22 46 | Episode reward -0.08459361693182295 47 | 23 48 | Episode reward -0.6860024684503246 49 | 24 50 | Episode reward -0.12206811525563288 51 | 25 52 | Episode reward -1.5218863274271832 53 | 26 54 | Episode reward -0.23051751623213476 55 | 27 56 | Episode reward -1.0450177089505237 57 | 28 58 | Episode reward -0.12483834537697638 59 | 29 60 | Episode reward -0.21784941761188364 61 | 30 62 | Episode reward -0.12483834537697638 63 | 31 64 | Episode reward -0.34286462753048075 65 | 32 66 | Episode reward -0.042434700141162504 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/DDQN/DQN_CM1-postgres-card-job-masking-v1_0_2019-06-26_23-26.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.1817401715663023 3 | 1 4 | Episode reward -0.045736034886930114 5 | 2 6 | Episode reward -0.0425799752619166 7 | 3 8 | Episode reward -0.12594518851555642 9 | 4 10 | Episode reward -0.1817401715663023 11 | 5 12 | Episode reward -0.027127691490268103 13 | 6 14 | Episode reward -0.027127691490268103 15 | 7 16 | Episode reward -0.09848810221539772 17 | 8 18 | Episode reward -0.07452432521926988 19 | 9 20 | Episode reward -0.030514058614199084 21 | 10 22 | Episode reward -0.045736034886930114 23 | 11 24 | Episode reward -0.12594518851555642 25 | 12 26 | Episode reward -1.3661655350838076 27 | 13 28 | Episode reward -0.07751913636594289 29 | 14 30 | Episode reward -0.5914126560557297 31 | 15 32 | Episode reward -0.6751187579548755 33 | 16 34 | Episode reward -0.13527973671640817 35 | 17 36 | Episode reward -0.13355235938620955 37 | 18 38 | Episode reward -2.8791312530476403 39 | 19 40 | Episode reward -0.13355235938620955 41 | 20 42 | Episode reward -0.5914126560557297 43 | 21 44 | Episode reward -1.2266405256197506 45 | 22 46 | Episode reward -0.13445133076216104 47 | 23 48 | Episode reward -4.502093600931535 49 | 24 50 | Episode reward -1.3661655350838076 51 | 25 52 | Episode reward -0.5914126560557297 53 | 26 54 | Episode reward -0.2250079152360116 55 | 27 56 | Episode reward -0.4241127696417123 57 | 28 58 | Episode reward -0.0567903693993498 59 | 29 60 | Episode reward -0.16528777169532158 61 | 30 62 | Episode reward -0.0567903693993498 63 | 31 64 | Episode reward -0.2210223646672177 65 | 32 66 | Episode reward -0.5524230978942932 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/DDQN/DQN_CM1-postgres-card-job-masking-v1_0_2019-06-26_23-33.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.07507031161221572 3 | 1 4 | Episode reward -0.02788544515979268 5 | 2 6 | Episode reward -0.05886797839695098 7 | 3 8 | Episode reward -0.03507463058997159 9 | 4 10 | Episode reward -0.07507031161221572 11 | 5 12 | Episode reward -0.03182842533498737 13 | 6 14 | Episode reward -0.03182842533498737 15 | 7 16 | Episode reward -0.04284168035384416 17 | 8 18 | Episode reward -0.04534374048979065 19 | 9 20 | Episode reward -0.017854296454287696 21 | 10 22 | Episode reward -0.02788544515979268 23 | 11 24 | Episode reward -0.03507463058997159 25 | 12 26 | Episode reward -1.728129218727309 27 | 13 28 | Episode reward -0.04288417768717584 29 | 14 30 | Episode reward -0.8669459063341383 31 | 15 32 | Episode reward -0.5957924260804778 33 | 16 34 | Episode reward -0.1608013042102145 35 | 17 36 | Episode reward -0.14116973877176903 37 | 18 38 | Episode reward -0.4582300689516999 39 | 19 40 | Episode reward -0.14116973877176903 41 | 20 42 | Episode reward -0.8669459063341383 43 | 21 44 | Episode reward -0.23507669847117002 45 | 22 46 | Episode reward -0.257464863977234 47 | 23 48 | Episode reward -0.7225274961235719 49 | 24 50 | Episode reward -1.728129218727309 51 | 25 52 | Episode reward -0.8669459063341383 53 | 26 54 | Episode reward -0.5018922720520662 55 | 27 56 | Episode reward -1.4458065035486747 57 | 28 58 | Episode reward -0.12576175130619727 59 | 29 60 | Episode reward -2.1847724413496845 61 | 30 62 | Episode reward -0.12576175130619727 63 | 31 64 | Episode reward -10.0 65 | 32 66 | Episode reward -0.29220435425971747 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/DDQN/DQN_CM1-postgres-card-job-masking-v1_0_2019-06-26_23-41.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.1124822015130597 3 | 1 4 | Episode reward -0.08120008151070614 5 | 2 6 | Episode reward -0.07195425714814593 7 | 3 8 | Episode reward -0.034734270463721476 9 | 4 10 | Episode reward -0.1124822015130597 11 | 5 12 | Episode reward -0.08062936653669973 13 | 6 14 | Episode reward -0.08062936653669973 15 | 7 16 | Episode reward -0.048794264008057545 17 | 8 18 | Episode reward -0.04762437651883879 19 | 9 20 | Episode reward -0.017894443551591956 21 | 10 22 | Episode reward -0.08120008151070614 23 | 11 24 | Episode reward -0.034734270463721476 25 | 12 26 | Episode reward -0.6070125224098926 27 | 13 28 | Episode reward -0.03598834660714644 29 | 14 30 | Episode reward -0.05377848955867573 31 | 15 32 | Episode reward -0.6654611568242396 33 | 16 34 | Episode reward -0.24455632389856444 35 | 17 36 | Episode reward -0.20230449864180236 37 | 18 38 | Episode reward -0.47358455571548486 39 | 19 40 | Episode reward -0.20230449864180236 41 | 20 42 | Episode reward -0.05377848955867573 43 | 21 44 | Episode reward -0.5112261565659746 45 | 22 46 | Episode reward -0.03992135685784581 47 | 23 48 | Episode reward -2.718542781042719 49 | 24 50 | Episode reward -0.6070125224098926 51 | 25 52 | Episode reward -0.05377848955867573 53 | 26 54 | Episode reward -0.46669785892018956 55 | 27 56 | Episode reward -0.6275660161703391 57 | 28 58 | Episode reward -0.11820991523285349 59 | 29 60 | Episode reward -0.034985686879062824 61 | 30 62 | Episode reward -0.11820991523285349 63 | 31 64 | Episode reward -0.05374457139886684 65 | 32 66 | Episode reward -0.19551536736181602 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/DDQN/DQN_CM1-postgres-card-job-masking-v1_0_2019-06-26_23-48.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.20157036788938326 3 | 1 4 | Episode reward -0.036161478686099854 5 | 2 6 | Episode reward -0.059360435260922134 7 | 3 8 | Episode reward -0.09129731384613651 9 | 4 10 | Episode reward -0.20157036788938326 11 | 5 12 | Episode reward -0.07240424243259115 13 | 6 14 | Episode reward -0.07240424243259115 15 | 7 16 | Episode reward -0.0864612244971949 17 | 8 18 | Episode reward -0.03413359703445045 19 | 9 20 | Episode reward -0.03583480936761567 21 | 10 22 | Episode reward -0.036161478686099854 23 | 11 24 | Episode reward -0.09129731384613651 25 | 12 26 | Episode reward -1.7192917302088107 27 | 13 28 | Episode reward -0.36275042991950734 29 | 14 30 | Episode reward -0.4398737094495282 31 | 15 32 | Episode reward -0.47360335950245774 33 | 16 34 | Episode reward -0.24184035576527704 35 | 17 36 | Episode reward -0.2365740773515336 37 | 18 38 | Episode reward -1.6995748408268063 39 | 19 40 | Episode reward -0.2365740773515336 41 | 20 42 | Episode reward -0.4398737094495282 43 | 21 44 | Episode reward -0.9245131209769015 45 | 22 46 | Episode reward -0.10469117557857596 47 | 23 48 | Episode reward -0.7950366384880169 49 | 24 50 | Episode reward -1.7192917302088107 51 | 25 52 | Episode reward -0.4398737094495282 53 | 26 54 | Episode reward -0.36933313026683123 55 | 27 56 | Episode reward -0.324041770054891 57 | 28 58 | Episode reward -0.09400403700733229 59 | 29 60 | Episode reward -0.8668116684172212 61 | 30 62 | Episode reward -0.09400403700733229 63 | 31 64 | Episode reward -4.219869712828589 65 | 32 66 | Episode reward -1.4243612634000198 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/DDQN/DQN_CM1-postgres-card-job-masking-v2_0_2019-06-26_23-55.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.12314729665130966 3 | 1 4 | Episode reward -0.07587524328597077 5 | 2 6 | Episode reward -0.014006730368606715 7 | 3 8 | Episode reward -0.12314729665130966 9 | 4 10 | Episode reward -0.05676713993368714 11 | 5 12 | Episode reward -0.09316599325929086 13 | 6 14 | Episode reward -0.0712537610215057 15 | 7 16 | Episode reward -0.12596776349839567 17 | 8 18 | Episode reward -0.0712537610215057 19 | 9 20 | Episode reward -0.09316599325929086 21 | 10 22 | Episode reward -0.12596776349839567 23 | 11 24 | Episode reward -0.05676713993368714 25 | 12 26 | Episode reward -0.06896061967207592 27 | 13 28 | Episode reward -0.06665287033774699 29 | 14 30 | Episode reward -0.7835202043523772 31 | 15 32 | Episode reward -0.2377034070649619 33 | 16 34 | Episode reward -0.17014958076671238 35 | 17 36 | Episode reward -0.08444590701218813 37 | 18 38 | Episode reward -0.13683176219830362 39 | 19 40 | Episode reward -0.17014958076671238 41 | 20 42 | Episode reward -0.12423777548516589 43 | 21 44 | Episode reward -0.12423777548516589 45 | 22 46 | Episode reward -0.03744309781786968 47 | 23 48 | Episode reward -0.07870519799511859 49 | 24 50 | Episode reward -0.07379302575813697 51 | 25 52 | Episode reward -0.03744309781786968 53 | 26 54 | Episode reward -0.08444590701218813 55 | 27 56 | Episode reward -0.3645164290635234 57 | 28 58 | Episode reward -1.4489497044968553 59 | 29 60 | Episode reward -0.9268010821400807 61 | 30 62 | Episode reward -0.9268010821400807 63 | 31 64 | Episode reward -0.9548521854644674 65 | 32 66 | Episode reward -1.00334055246423 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/DDQN/DQN_CM1-postgres-card-job-masking-v2_0_2019-06-27_00-03.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.14719356641033804 3 | 1 4 | Episode reward -0.08551378235852217 5 | 2 6 | Episode reward -0.05927291532671435 7 | 3 8 | Episode reward -0.14719356641033804 9 | 4 10 | Episode reward -0.05614627746556709 11 | 5 12 | Episode reward -0.10182138800251893 13 | 6 14 | Episode reward -0.026253729619348028 15 | 7 16 | Episode reward -0.034734270463721476 17 | 8 18 | Episode reward -0.026253729619348028 19 | 9 20 | Episode reward -0.10182138800251893 21 | 10 22 | Episode reward -0.034734270463721476 23 | 11 24 | Episode reward -0.05614627746556709 25 | 12 26 | Episode reward -0.07560449897727003 27 | 13 28 | Episode reward -0.09260405700373572 29 | 14 30 | Episode reward -0.16051743846256955 31 | 15 32 | Episode reward -1.0382032331025535 33 | 16 34 | Episode reward -0.15590348112402602 35 | 17 36 | Episode reward -0.075731574204734 37 | 18 38 | Episode reward -0.7484736185366027 39 | 19 40 | Episode reward -0.15590348112402602 41 | 20 42 | Episode reward -0.6002671736052771 43 | 21 44 | Episode reward -0.6002671736052771 45 | 22 46 | Episode reward -0.04541963194802768 47 | 23 48 | Episode reward -0.19873324788643287 49 | 24 50 | Episode reward -0.1529152908387798 51 | 25 52 | Episode reward -0.04541963194802768 53 | 26 54 | Episode reward -0.075731574204734 55 | 27 56 | Episode reward -0.17848711256492703 57 | 28 58 | Episode reward -0.4690045020919547 59 | 29 60 | Episode reward -0.11503302349873121 61 | 30 62 | Episode reward -0.11503302349873121 63 | 31 64 | Episode reward -0.18300526095468556 65 | 32 66 | Episode reward -0.706397972363886 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/DDQN/DQN_CM1-postgres-card-job-masking-v2_0_2019-06-27_00-10.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.12329995411308095 3 | 1 4 | Episode reward -0.04375222683962195 5 | 2 6 | Episode reward -0.01461044637738746 7 | 3 8 | Episode reward -0.12329995411308095 9 | 4 10 | Episode reward -0.06150175381439599 11 | 5 12 | Episode reward -0.0935300959626787 13 | 6 14 | Episode reward -0.09951648607317835 15 | 7 16 | Episode reward -0.03486750334587033 17 | 8 18 | Episode reward -0.09951648607317835 19 | 9 20 | Episode reward -0.0935300959626787 21 | 10 22 | Episode reward -0.03486750334587033 23 | 11 24 | Episode reward -0.06150175381439599 25 | 12 26 | Episode reward -0.05137716531652278 27 | 13 28 | Episode reward -0.07447613641072884 29 | 14 30 | Episode reward -1.5593855104272059 31 | 15 32 | Episode reward -0.6529194568462344 33 | 16 34 | Episode reward -0.19924611485773427 35 | 17 36 | Episode reward -0.2606020317905099 37 | 18 38 | Episode reward -0.16470856244558463 39 | 19 40 | Episode reward -0.19924611485773427 41 | 20 42 | Episode reward -0.18501918965666792 43 | 21 44 | Episode reward -0.18501918965666792 45 | 22 46 | Episode reward -0.03492596567000414 47 | 23 48 | Episode reward -0.18420492025227028 49 | 24 50 | Episode reward -0.17108075900247405 51 | 25 52 | Episode reward -0.03492596567000414 53 | 26 54 | Episode reward -0.2606020317905099 55 | 27 56 | Episode reward -0.9786893372543106 57 | 28 58 | Episode reward -1.5280842547680897 59 | 29 60 | Episode reward -1.3794981620790647 61 | 30 62 | Episode reward -1.3794981620790647 63 | 31 64 | Episode reward -0.41376949056270346 65 | 32 66 | Episode reward -8.885142737640974 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/DDQN/DQN_CM1-postgres-card-job-masking-v2_0_2019-06-27_00-17.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.12711612910190045 3 | 1 4 | Episode reward -0.04054538166539806 5 | 2 6 | Episode reward -0.0592822880415303 7 | 3 8 | Episode reward -0.12711612910190045 9 | 4 10 | Episode reward -0.05770323033389274 11 | 5 12 | Episode reward -0.08435470944512642 13 | 6 14 | Episode reward -0.09939703748088827 15 | 7 16 | Episode reward -0.0347342737169928 17 | 8 18 | Episode reward -0.09939703748088827 19 | 9 20 | Episode reward -0.08435470944512642 21 | 10 22 | Episode reward -0.0347342737169928 23 | 11 24 | Episode reward -0.05770323033389274 25 | 12 26 | Episode reward -0.12444219570783058 27 | 13 28 | Episode reward -0.08223803235917913 29 | 14 30 | Episode reward -0.1250378800342006 31 | 15 32 | Episode reward -0.884138791877237 33 | 16 34 | Episode reward -0.0312910405373954 35 | 17 36 | Episode reward -0.028473507214146647 37 | 18 38 | Episode reward -0.08930232640581404 39 | 19 40 | Episode reward -0.0312910405373954 41 | 20 42 | Episode reward -0.10189253795155429 43 | 21 44 | Episode reward -0.10189253795155429 45 | 22 46 | Episode reward -0.07225842946070764 47 | 23 48 | Episode reward -0.2117268565270504 49 | 24 50 | Episode reward -0.06104193298554494 51 | 25 52 | Episode reward -0.07225842946070764 53 | 26 54 | Episode reward -0.028473507214146647 55 | 27 56 | Episode reward -2.676010011780778 57 | 28 58 | Episode reward -0.11993982816627774 59 | 29 60 | Episode reward -0.16525165736781894 61 | 30 62 | Episode reward -0.16525165736781894 63 | 31 64 | Episode reward -0.1910254021722515 65 | 32 66 | Episode reward -0.0765156692152971 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/DDQN/DQN_CM1-postgres-card-job-masking-v2_0_2019-06-27_00-24.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.10033450635101224 3 | 1 4 | Episode reward -0.03433214802294838 5 | 2 6 | Episode reward -0.016352774771925534 7 | 3 8 | Episode reward -0.10033450635101224 9 | 4 10 | Episode reward -0.09909943265261287 11 | 5 12 | Episode reward -0.044642636137402276 13 | 6 14 | Episode reward -0.1212074396442906 15 | 7 16 | Episode reward -0.042397458718117176 17 | 8 18 | Episode reward -0.1212074396442906 19 | 9 20 | Episode reward -0.044642636137402276 21 | 10 22 | Episode reward -0.042397458718117176 23 | 11 24 | Episode reward -0.09909943265261287 25 | 12 26 | Episode reward -0.0718108292925205 27 | 13 28 | Episode reward -0.06691023716665473 29 | 14 30 | Episode reward -1.3500543844307473 31 | 15 32 | Episode reward -0.9417540616095432 33 | 16 34 | Episode reward -0.1942975708833093 35 | 17 36 | Episode reward -0.5400520252712872 37 | 18 38 | Episode reward -0.8823172001295022 39 | 19 40 | Episode reward -0.1942975708833093 41 | 20 42 | Episode reward -0.27701588384744286 43 | 21 44 | Episode reward -0.27701588384744286 45 | 22 46 | Episode reward -0.06858444675276336 47 | 23 48 | Episode reward -0.10401866491158338 49 | 24 50 | Episode reward -0.622081793021284 51 | 25 52 | Episode reward -0.06858444675276336 53 | 26 54 | Episode reward -0.5400520252712872 55 | 27 56 | Episode reward -1.7628042608502752 57 | 28 58 | Episode reward -0.4414320090095663 59 | 29 60 | Episode reward -0.877436660174117 61 | 30 62 | Episode reward -0.877436660174117 63 | 31 64 | Episode reward -0.5558796473358404 65 | 32 66 | Episode reward -2.1705803199830895 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/DDQN/DQN_CM1-postgres-card-job-masking-v3_0_2019-06-27_00-32.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.060102243961668966 3 | 1 4 | Episode reward -0.024956575331624833 5 | 2 6 | Episode reward -0.06575958981344368 7 | 3 8 | Episode reward -0.742025981099217 9 | 4 10 | Episode reward -0.06575958981344368 11 | 5 12 | Episode reward -0.04881608706462487 13 | 6 14 | Episode reward -0.099469884796484 15 | 7 16 | Episode reward -0.04881608706462487 17 | 8 18 | Episode reward -0.06575958981344368 19 | 9 20 | Episode reward -0.060102243961668966 21 | 10 22 | Episode reward -0.08091762494516386 23 | 11 24 | Episode reward -0.742025981099217 25 | 12 26 | Episode reward -0.20732534130293798 27 | 13 28 | Episode reward -4.1430095198090955 29 | 14 30 | Episode reward -0.7632885115426113 31 | 15 32 | Episode reward -0.3327704423887685 33 | 16 34 | Episode reward -0.46821006861671277 35 | 17 36 | Episode reward -0.4126078499259603 37 | 18 38 | Episode reward -0.049348952891950154 39 | 19 40 | Episode reward -0.08104127927648287 41 | 20 42 | Episode reward -0.20732534130293798 43 | 21 44 | Episode reward -0.15129520099138563 45 | 22 46 | Episode reward -4.1430095198090955 47 | 23 48 | Episode reward -0.08104127927648287 49 | 24 50 | Episode reward -0.15129520099138563 51 | 25 52 | Episode reward -0.3327704423887685 53 | 26 54 | Episode reward -1.875939323668768 55 | 27 56 | Episode reward -0.36934881398733993 57 | 28 58 | Episode reward -0.36934881398733993 59 | 29 60 | Episode reward -0.06518270716132102 61 | 30 62 | Episode reward -0.16378253598131856 63 | 31 64 | Episode reward -0.06518270716132102 65 | 32 66 | Episode reward -3.8351389288818547 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/DDQN/DQN_CM1-postgres-card-job-masking-v3_0_2019-06-27_00-39.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.08654496093362701 3 | 1 4 | Episode reward -0.025480169052106025 5 | 2 6 | Episode reward -0.07504537221695146 7 | 3 8 | Episode reward -0.7230040041310137 9 | 4 10 | Episode reward -0.07504537221695146 11 | 5 12 | Episode reward -0.0514000585621904 13 | 6 14 | Episode reward -0.030288961912586593 15 | 7 16 | Episode reward -0.0514000585621904 17 | 8 18 | Episode reward -0.07504537221695146 19 | 9 20 | Episode reward -0.08654496093362701 21 | 10 22 | Episode reward -0.0676045357726634 23 | 11 24 | Episode reward -0.7230040041310137 25 | 12 26 | Episode reward -0.2991053947898695 27 | 13 28 | Episode reward -0.7298191132408043 29 | 14 30 | Episode reward -0.5973730273668652 31 | 15 32 | Episode reward -1.5673774660263595 33 | 16 34 | Episode reward -0.4452954603213519 35 | 17 36 | Episode reward -0.10678871636265225 37 | 18 38 | Episode reward -0.053377974754769575 39 | 19 40 | Episode reward -0.05067382064521689 41 | 20 42 | Episode reward -0.2991053947898695 43 | 21 44 | Episode reward -0.21699494268457162 45 | 22 46 | Episode reward -0.7298191132408043 47 | 23 48 | Episode reward -0.05067382064521689 49 | 24 50 | Episode reward -0.21699494268457162 51 | 25 52 | Episode reward -1.5673774660263595 53 | 26 54 | Episode reward -0.8399540108662341 55 | 27 56 | Episode reward -0.1325701992813082 57 | 28 58 | Episode reward -0.1325701992813082 59 | 29 60 | Episode reward -0.9022965065009941 61 | 30 62 | Episode reward -0.16013945241090471 63 | 31 64 | Episode reward -0.9022965065009941 65 | 32 66 | Episode reward -0.10351044055283086 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/DDQN/DQN_CM1-postgres-card-job-masking-v3_0_2019-06-27_00-46.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.08887787229635742 3 | 1 4 | Episode reward -0.016831525549694693 5 | 2 6 | Episode reward -0.10621850716441615 7 | 3 8 | Episode reward -0.3599290393131559 9 | 4 10 | Episode reward -0.10621850716441615 11 | 5 12 | Episode reward -0.03618330729664126 13 | 6 14 | Episode reward -0.026253729619348028 15 | 7 16 | Episode reward -0.03618330729664126 17 | 8 18 | Episode reward -0.10621850716441615 19 | 9 20 | Episode reward -0.08887787229635742 21 | 10 22 | Episode reward -0.12945145547180198 23 | 11 24 | Episode reward -0.3599290393131559 25 | 12 26 | Episode reward -0.5819137281301746 27 | 13 28 | Episode reward -0.8796683587941776 29 | 14 30 | Episode reward -0.44386292489847545 31 | 15 32 | Episode reward -0.4560707109736925 33 | 16 34 | Episode reward -0.08908873900600649 35 | 17 36 | Episode reward -0.08320263014032528 37 | 18 38 | Episode reward -0.02113275847255507 39 | 19 40 | Episode reward -0.05698526727788568 41 | 20 42 | Episode reward -0.5819137281301746 43 | 21 44 | Episode reward -0.12885721898449576 45 | 22 46 | Episode reward -0.8796683587941776 47 | 23 48 | Episode reward -0.05698526727788568 49 | 24 50 | Episode reward -0.12885721898449576 51 | 25 52 | Episode reward -0.4560707109736925 53 | 26 54 | Episode reward -0.15085967474399561 55 | 27 56 | Episode reward -0.2868043256188304 57 | 28 58 | Episode reward -0.2868043256188304 59 | 29 60 | Episode reward -0.20486975638478636 61 | 30 62 | Episode reward -2.256328249160063 63 | 31 64 | Episode reward -0.20486975638478636 65 | 32 66 | Episode reward -0.09733838664923591 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/DDQN/DQN_CM1-postgres-card-job-masking-v3_0_2019-06-27_00-53.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.07546580235781458 3 | 1 4 | Episode reward -0.016877838264604338 5 | 2 6 | Episode reward -0.14358227431541454 7 | 3 8 | Episode reward -0.4759352365326977 9 | 4 10 | Episode reward -0.14358227431541454 11 | 5 12 | Episode reward -0.08120009164616435 13 | 6 14 | Episode reward -0.030288961912586593 15 | 7 16 | Episode reward -0.08120009164616435 17 | 8 18 | Episode reward -0.14358227431541454 19 | 9 20 | Episode reward -0.07546580235781458 21 | 10 22 | Episode reward -0.10115976642583667 23 | 11 24 | Episode reward -0.4759352365326977 25 | 12 26 | Episode reward -0.5278627799721357 27 | 13 28 | Episode reward -8.12105414816833 29 | 14 30 | Episode reward -0.520155235763466 31 | 15 32 | Episode reward -4.578419398237171 33 | 16 34 | Episode reward -0.2492442849019238 35 | 17 36 | Episode reward -0.05295112116265456 37 | 18 38 | Episode reward -0.033379618563127614 39 | 19 40 | Episode reward -0.11169698038721343 41 | 20 42 | Episode reward -0.5278627799721357 43 | 21 44 | Episode reward -0.5594416931204723 45 | 22 46 | Episode reward -8.12105414816833 47 | 23 48 | Episode reward -0.11169698038721343 49 | 24 50 | Episode reward -0.5594416931204723 51 | 25 52 | Episode reward -4.578419398237171 53 | 26 54 | Episode reward -1.0737933120005951 55 | 27 56 | Episode reward -0.0433228383383041 57 | 28 58 | Episode reward -0.0433228383383041 59 | 29 60 | Episode reward -0.2865444118330874 61 | 30 62 | Episode reward -0.8160530174959545 63 | 31 64 | Episode reward -0.2865444118330874 65 | 32 66 | Episode reward -0.8905360222099052 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/DDQN/DQN_CM1-postgres-card-job-masking-v3_0_2019-06-27_01-01.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.09909943265261287 3 | 1 4 | Episode reward -0.04048878034633957 5 | 2 6 | Episode reward -0.14027873736176988 7 | 3 8 | Episode reward -0.2757943796277473 9 | 4 10 | Episode reward -0.14027873736176988 11 | 5 12 | Episode reward -0.03269284479641088 13 | 6 14 | Episode reward -0.09909702864375958 15 | 7 16 | Episode reward -0.03269284479641088 17 | 8 18 | Episode reward -0.14027873736176988 19 | 9 20 | Episode reward -0.09909943265261287 21 | 10 22 | Episode reward -0.12242821421088672 23 | 11 24 | Episode reward -0.2757943796277473 25 | 12 26 | Episode reward -0.32204197199604767 27 | 13 28 | Episode reward -10.0 29 | 14 30 | Episode reward -0.66544804218219 31 | 15 32 | Episode reward -1.5961380430876475 33 | 16 34 | Episode reward -0.5683126437885454 35 | 17 36 | Episode reward -0.5402698261583266 37 | 18 38 | Episode reward -0.12431533109568649 39 | 19 40 | Episode reward -0.12336994669697467 41 | 20 42 | Episode reward -0.32204197199604767 43 | 21 44 | Episode reward -0.523340791579023 45 | 22 46 | Episode reward -10.0 47 | 23 48 | Episode reward -0.12336994669697467 49 | 24 50 | Episode reward -0.523340791579023 51 | 25 52 | Episode reward -1.5961380430876475 53 | 26 54 | Episode reward -2.3073709254846864 55 | 27 56 | Episode reward -0.5234042161515298 57 | 28 58 | Episode reward -0.5234042161515298 59 | 29 60 | Episode reward -10.0 61 | 30 62 | Episode reward -2.052019313053443 63 | 31 64 | Episode reward -10.0 65 | 32 66 | Episode reward -0.9794231398823333 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/DQN/DQN_CM1-postgres-card-job-masking-v0_0_2019-06-26_17-38.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.39784343381026127 3 | 1 4 | Episode reward -0.04284168035384416 5 | 2 6 | Episode reward -0.12396041652325344 7 | 3 8 | Episode reward -0.03639454160250224 9 | 4 10 | Episode reward -0.049985586581092416 11 | 5 12 | Episode reward -0.13491842733406806 13 | 6 14 | Episode reward -0.048625955604474816 15 | 7 16 | Episode reward -0.04284168035384416 17 | 8 18 | Episode reward -0.12396041652325344 19 | 9 20 | Episode reward -0.03639454160250224 21 | 10 22 | Episode reward -0.02836784515033639 23 | 11 24 | Episode reward -0.03639454160250224 25 | 12 26 | Episode reward -0.4076676587973248 27 | 13 28 | Episode reward -0.4484038853696569 29 | 14 30 | Episode reward -0.34323308530049235 31 | 15 32 | Episode reward -0.3381496352541543 33 | 16 34 | Episode reward -0.08198499639662102 35 | 17 36 | Episode reward -0.17457871715582302 37 | 18 38 | Episode reward -0.07471460752909874 39 | 19 40 | Episode reward -0.4076676587973248 41 | 20 42 | Episode reward -0.11454849439488829 43 | 21 44 | Episode reward -0.3381496352541543 45 | 22 46 | Episode reward -2.5874656845005433 47 | 23 48 | Episode reward -0.4076676587973248 49 | 24 50 | Episode reward -0.04741382107368106 51 | 25 52 | Episode reward -0.1429815682749804 53 | 26 54 | Episode reward -0.07471460752909874 55 | 27 56 | Episode reward -0.7560860131589566 57 | 28 58 | Episode reward -0.16725233488156255 59 | 29 60 | Episode reward -0.7560860131589566 61 | 30 62 | Episode reward -0.5372025035465239 63 | 31 64 | Episode reward -0.9146002008448023 65 | 32 66 | Episode reward -0.05618179320419906 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/DQN/DQN_CM1-postgres-card-job-masking-v0_0_2019-06-26_17-51.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.3979051669516898 3 | 1 4 | Episode reward -0.04273331904513919 5 | 2 6 | Episode reward -0.0796796558532023 7 | 3 8 | Episode reward -0.030775531753543035 9 | 4 10 | Episode reward -0.04273331904513919 11 | 5 12 | Episode reward -0.0710344022610779 13 | 6 14 | Episode reward -0.04637159394535056 15 | 7 16 | Episode reward -0.04273331904513919 17 | 8 18 | Episode reward -0.0796796558532023 19 | 9 20 | Episode reward -0.030775531753543035 21 | 10 22 | Episode reward -0.035387381921055984 23 | 11 24 | Episode reward -0.030775531753543035 25 | 12 26 | Episode reward -0.22207973948548543 27 | 13 28 | Episode reward -0.3841930827519314 29 | 14 30 | Episode reward -0.22207973948548543 31 | 15 32 | Episode reward -0.4664622149849865 33 | 16 34 | Episode reward -0.058516577073671854 35 | 17 36 | Episode reward -0.017085697328236425 37 | 18 38 | Episode reward -0.10041435493147516 39 | 19 40 | Episode reward -0.22207973948548543 41 | 20 42 | Episode reward -0.12684105947549254 43 | 21 44 | Episode reward -0.4664622149849865 45 | 22 46 | Episode reward -1.0855410951502236 47 | 23 48 | Episode reward -0.250526470127113 49 | 24 50 | Episode reward -0.03634948690323592 51 | 25 52 | Episode reward -0.11518717538341036 53 | 26 54 | Episode reward -0.10041435493147516 55 | 27 56 | Episode reward -0.20124123583849185 57 | 28 58 | Episode reward -0.6211836852356291 59 | 29 60 | Episode reward -0.20124123583849185 61 | 30 62 | Episode reward -0.6594887862053045 63 | 31 64 | Episode reward -0.5231349789710306 65 | 32 66 | Episode reward -0.5221915024111332 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/DQN/DQN_CM1-postgres-card-job-masking-v0_0_2019-06-26_18-03.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.2899316782658249 3 | 1 4 | Episode reward -0.08078777447526607 5 | 2 6 | Episode reward -0.13222059277672488 7 | 3 8 | Episode reward -0.024247621631712984 9 | 4 10 | Episode reward -0.08078777447526607 11 | 5 12 | Episode reward -0.11689761812160805 13 | 6 14 | Episode reward -0.08614108563298156 15 | 7 16 | Episode reward -0.08078777447526607 17 | 8 18 | Episode reward -0.1321855216553659 19 | 9 20 | Episode reward -0.024247621631712984 21 | 10 22 | Episode reward -0.027127717662776323 23 | 11 24 | Episode reward -0.024247621631712984 25 | 12 26 | Episode reward -0.4942176450301103 27 | 13 28 | Episode reward -0.17020055323301045 29 | 14 30 | Episode reward -0.4942176450301103 31 | 15 32 | Episode reward -0.3037227804540725 33 | 16 34 | Episode reward -0.112218243112666 35 | 17 36 | Episode reward -0.03542479246928049 37 | 18 38 | Episode reward -0.061620293359461176 39 | 19 40 | Episode reward -0.4942176450301103 41 | 20 42 | Episode reward -0.03542479246928049 43 | 21 44 | Episode reward -0.3037227804540725 45 | 22 46 | Episode reward -1.124520336388186 47 | 23 48 | Episode reward -0.4942176450301103 49 | 24 50 | Episode reward -0.06952757078604364 51 | 25 52 | Episode reward -0.09076328681684806 53 | 26 54 | Episode reward -0.061620293359461176 55 | 27 56 | Episode reward -1.1333812287757818 57 | 28 58 | Episode reward -0.16934397519173275 59 | 29 60 | Episode reward -1.1333812287757818 61 | 30 62 | Episode reward -1.5436626004964369 63 | 31 64 | Episode reward -0.7565640324459745 65 | 32 66 | Episode reward -0.152578458682799 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/DQN/DQN_CM1-postgres-card-job-masking-v0_0_2019-06-26_18-12.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.19402753446010804 3 | 1 4 | Episode reward -0.08896199390427148 5 | 2 6 | Episode reward -0.11280996802857843 7 | 3 8 | Episode reward -0.016376576834589908 9 | 4 10 | Episode reward -0.08896199390427148 11 | 5 12 | Episode reward -0.09021399966666245 13 | 6 14 | Episode reward -0.07668234606490741 15 | 7 16 | Episode reward -0.08896199390427148 17 | 8 18 | Episode reward -0.11280996802857843 19 | 9 20 | Episode reward -0.016376576834589908 21 | 10 22 | Episode reward -0.07017649946010493 23 | 11 24 | Episode reward -0.016376576834589908 25 | 12 26 | Episode reward -0.034562587192761535 27 | 13 28 | Episode reward -0.2165894966038312 29 | 14 30 | Episode reward -0.5715255042411844 31 | 15 32 | Episode reward -0.3105274978045144 33 | 16 34 | Episode reward -0.06892287060937181 35 | 17 36 | Episode reward -0.04009297682568206 37 | 18 38 | Episode reward -0.029270136447821504 39 | 19 40 | Episode reward -0.034562587192761535 41 | 20 42 | Episode reward -0.04009297682568206 43 | 21 44 | Episode reward -0.3105274978045144 45 | 22 46 | Episode reward -1.1635291832214676 47 | 23 48 | Episode reward -0.034562587192761535 49 | 24 50 | Episode reward -0.12107961930906216 51 | 25 52 | Episode reward -0.12877193157758035 53 | 26 54 | Episode reward -0.029270136447821504 55 | 27 56 | Episode reward -0.22809935283321492 57 | 28 58 | Episode reward -1.2008879297941095 59 | 29 60 | Episode reward -0.22809935283321492 61 | 30 62 | Episode reward -0.6754784674814706 63 | 31 64 | Episode reward -0.5857228659589171 65 | 32 66 | Episode reward -1.2008879297941095 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/DQN/DQN_CM1-postgres-card-job-masking-v0_0_2019-06-26_18-23.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.25209366621778034 3 | 1 4 | Episode reward -0.04267227031799767 5 | 2 6 | Episode reward -0.0905675852292019 7 | 3 8 | Episode reward -0.024438088585689702 9 | 4 10 | Episode reward -0.04267227031799767 11 | 5 12 | Episode reward -0.07833828157222134 13 | 6 14 | Episode reward -0.07001048044505005 15 | 7 16 | Episode reward -0.04267227031799767 17 | 8 18 | Episode reward -0.0905675852292019 19 | 9 20 | Episode reward -0.024438088585689702 21 | 10 22 | Episode reward -0.031983805781932564 23 | 11 24 | Episode reward -0.024438088585689702 25 | 12 26 | Episode reward -0.04851686772029544 27 | 13 28 | Episode reward -0.0997079581185358 29 | 14 30 | Episode reward -0.04851686772029544 31 | 15 32 | Episode reward -0.5948991465605741 33 | 16 34 | Episode reward -0.05754361367803335 35 | 17 36 | Episode reward -0.03541821214354241 37 | 18 38 | Episode reward -0.20081949438948138 39 | 19 40 | Episode reward -0.04851686772029544 41 | 20 42 | Episode reward -0.03541821214354241 43 | 21 44 | Episode reward -0.5948991465605741 45 | 22 46 | Episode reward -0.695539328900599 47 | 23 48 | Episode reward -0.04851686772029544 49 | 24 50 | Episode reward -0.044204227664346465 51 | 25 52 | Episode reward -0.1477177336342962 53 | 26 54 | Episode reward -0.20081949438948138 55 | 27 56 | Episode reward -0.17209134076281396 57 | 28 58 | Episode reward -0.678325686590574 59 | 29 60 | Episode reward -0.17209134076281396 61 | 30 62 | Episode reward -0.6465893119034476 63 | 31 64 | Episode reward -1.452779600265249 65 | 32 66 | Episode reward -0.678325686590574 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/DQN/DQN_CM1-postgres-card-job-masking-v1_0_2019-06-26_18-34.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.08719419478544864 3 | 1 4 | Episode reward -0.07071197300329211 5 | 2 6 | Episode reward -0.0720533415267356 7 | 3 8 | Episode reward -0.0317137719701762 9 | 4 10 | Episode reward -0.08719419478544864 11 | 5 12 | Episode reward -0.03580333870168962 13 | 6 14 | Episode reward -0.03580333870168962 15 | 7 16 | Episode reward -0.04284168035384416 17 | 8 18 | Episode reward -0.046558280088157725 19 | 9 20 | Episode reward -0.01773482318638659 21 | 10 22 | Episode reward -0.03205124204657616 23 | 11 24 | Episode reward -0.0317137719701762 25 | 12 26 | Episode reward -0.5485133615352417 27 | 13 28 | Episode reward -0.07970587114700266 29 | 14 30 | Episode reward -1.385235486540009 31 | 15 32 | Episode reward -0.4736033599796503 33 | 16 34 | Episode reward -0.1266616958686347 35 | 17 36 | Episode reward -0.07336936856963146 37 | 18 38 | Episode reward -3.038768066502795 39 | 19 40 | Episode reward -0.07336936856963146 41 | 20 42 | Episode reward -1.385235486540009 43 | 21 44 | Episode reward -0.2709781952610177 45 | 22 46 | Episode reward -0.20560885251248306 47 | 23 48 | Episode reward -0.2798808981108424 49 | 24 50 | Episode reward -0.5485133613292303 51 | 25 52 | Episode reward -1.385235486540009 53 | 26 54 | Episode reward -0.28598128355633595 55 | 27 56 | Episode reward -1.0790819373447889 57 | 28 58 | Episode reward -0.053806786444806624 59 | 29 60 | Episode reward -0.04599434666943311 61 | 30 62 | Episode reward -0.053806786444806624 63 | 31 64 | Episode reward -0.4842562839307456 65 | 32 66 | Episode reward -0.42341501087942923 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/DQN/DQN_CM1-postgres-card-job-masking-v1_0_2019-06-26_18-46.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.2016713512949556 3 | 1 4 | Episode reward -0.03631124021361177 5 | 2 6 | Episode reward -0.04314501359542282 7 | 3 8 | Episode reward -0.10950759039076349 9 | 4 10 | Episode reward -0.20163971447077939 11 | 5 12 | Episode reward -0.026732097288849274 13 | 6 14 | Episode reward -0.026732097288849274 15 | 7 16 | Episode reward -0.05264889304810305 17 | 8 18 | Episode reward -0.08621862980449882 19 | 9 20 | Episode reward -0.016981360335281147 21 | 10 22 | Episode reward -0.03631124021361177 23 | 11 24 | Episode reward -0.10950759039076349 25 | 12 26 | Episode reward -2.3753856294235023 27 | 13 28 | Episode reward -0.348129165654634 29 | 14 30 | Episode reward -0.08142745489724607 31 | 15 32 | Episode reward -0.3478016767018247 33 | 16 34 | Episode reward -0.14736283820414361 35 | 17 36 | Episode reward -0.08424126425724544 37 | 18 38 | Episode reward -0.34364911182988844 39 | 19 40 | Episode reward -0.08424126425724544 41 | 20 42 | Episode reward -0.21230781864891596 43 | 21 44 | Episode reward -0.9245089253001698 45 | 22 46 | Episode reward -0.34654802601595264 47 | 23 48 | Episode reward -0.307190171025344 49 | 24 50 | Episode reward -2.3753856294235023 51 | 25 52 | Episode reward -0.08142745489724607 53 | 26 54 | Episode reward -0.05882627917906343 55 | 27 56 | Episode reward -0.629782112628289 57 | 28 58 | Episode reward -0.051152035752774064 59 | 29 60 | Episode reward -0.346092736326578 61 | 30 62 | Episode reward -0.051152035752774064 63 | 31 64 | Episode reward -0.19972449830450825 65 | 32 66 | Episode reward -1.3278448827770521 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/DQN/DQN_CM1-postgres-card-job-masking-v1_0_2019-06-26_18-57.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.09348888094322813 3 | 1 4 | Episode reward -0.07252436682921387 5 | 2 6 | Episode reward -0.059282276064932124 7 | 3 8 | Episode reward -0.03157472989110686 9 | 4 10 | Episode reward -0.09348888094322813 11 | 5 12 | Episode reward -0.02674410550990102 13 | 6 14 | Episode reward -0.02674410550990102 15 | 7 16 | Episode reward -0.0498577916135476 17 | 8 18 | Episode reward -0.05282970954962578 19 | 9 20 | Episode reward -0.040532042981876085 21 | 10 22 | Episode reward -0.07252436682921387 23 | 11 24 | Episode reward -0.03157472989110686 25 | 12 26 | Episode reward -0.1492117938248044 27 | 13 28 | Episode reward -0.028268925623610386 29 | 14 30 | Episode reward -0.8942625652427555 31 | 15 32 | Episode reward -0.30893377412320533 33 | 16 34 | Episode reward -0.10022956448372186 35 | 17 36 | Episode reward -0.3163885382313505 37 | 18 38 | Episode reward -0.39106424344489 39 | 19 40 | Episode reward -0.3163885382313505 41 | 20 42 | Episode reward -0.8942625652427555 43 | 21 44 | Episode reward -0.2920731477843733 45 | 22 46 | Episode reward -0.03978563944804344 47 | 23 48 | Episode reward -0.6010554753870632 49 | 24 50 | Episode reward -0.1492117938248044 51 | 25 52 | Episode reward -0.8942625652427555 53 | 26 54 | Episode reward -0.24916330912523105 55 | 27 56 | Episode reward -2.145127723206545 57 | 28 58 | Episode reward -0.10507481481340653 59 | 29 60 | Episode reward -1.479982676369177 61 | 30 62 | Episode reward -0.10507481481340653 63 | 31 64 | Episode reward -1.2437144507042455 65 | 32 66 | Episode reward -2.2274005625633047 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/DQN/DQN_CM1-postgres-card-job-masking-v1_0_2019-06-26_19-09.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.09218974639239949 3 | 1 4 | Episode reward -0.03205124204657616 5 | 2 6 | Episode reward -0.04314501359542282 7 | 3 8 | Episode reward -0.12594518851555642 9 | 4 10 | Episode reward -0.09218581820335764 11 | 5 12 | Episode reward -0.08068685882494693 13 | 6 14 | Episode reward -0.08068685882494693 15 | 7 16 | Episode reward -0.08959885774269408 17 | 8 18 | Episode reward -0.0563390788299665 19 | 9 20 | Episode reward -0.030916863223574977 21 | 10 22 | Episode reward -0.03205124204657616 23 | 11 24 | Episode reward -0.12594518851555642 25 | 12 26 | Episode reward -0.13318390928259827 27 | 13 28 | Episode reward -0.14396982028442945 29 | 14 30 | Episode reward -0.18998648174406074 31 | 15 32 | Episode reward -0.4076873416588953 33 | 16 34 | Episode reward -0.20829998455806076 35 | 17 36 | Episode reward -0.1719953566822089 37 | 18 38 | Episode reward -0.35603076818695145 39 | 19 40 | Episode reward -0.1719953566822089 41 | 20 42 | Episode reward -0.18222873872891543 43 | 21 44 | Episode reward -0.12327202028684002 45 | 22 46 | Episode reward -0.08346533743205173 47 | 23 48 | Episode reward -4.134675345219987 49 | 24 50 | Episode reward -0.13318390928259827 51 | 25 52 | Episode reward -0.18222873872891543 53 | 26 54 | Episode reward -0.09850663004263019 55 | 27 56 | Episode reward -0.4544971714771918 57 | 28 58 | Episode reward -0.5845022059550186 59 | 29 60 | Episode reward -0.7933472684304075 61 | 30 62 | Episode reward -0.538842359583124 63 | 31 64 | Episode reward -0.3054935622016144 65 | 32 66 | Episode reward -0.1228689644526975 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/DQN/DQN_CM1-postgres-card-job-masking-v1_0_2019-06-26_19-21.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.2032193792279949 3 | 1 4 | Episode reward -0.03223009602650772 5 | 2 6 | Episode reward -0.04359257028475401 7 | 3 8 | Episode reward -0.09008855361029362 9 | 4 10 | Episode reward -0.15785715175400838 11 | 5 12 | Episode reward -0.035235191217756576 13 | 6 14 | Episode reward -0.035235191217756576 15 | 7 16 | Episode reward -0.08641757785774706 17 | 8 18 | Episode reward -0.07780369314718592 19 | 9 20 | Episode reward -0.0404829356629038 21 | 10 22 | Episode reward -0.03223009602650772 23 | 11 24 | Episode reward -0.09008855361029362 25 | 12 26 | Episode reward -2.080935431983229 27 | 13 28 | Episode reward -0.02910479597435644 29 | 14 30 | Episode reward -0.2168761042704439 31 | 15 32 | Episode reward -0.539191739416298 33 | 16 34 | Episode reward -0.2926669802717692 35 | 17 36 | Episode reward -0.2713227108621786 37 | 18 38 | Episode reward -7.606665787901584 39 | 19 40 | Episode reward -0.2713227108621786 41 | 20 42 | Episode reward -0.2168761042704439 43 | 21 44 | Episode reward -0.10960322098956127 45 | 22 46 | Episode reward -0.2791613754177095 47 | 23 48 | Episode reward -0.4808911463227717 49 | 24 50 | Episode reward -0.14454850882464038 51 | 25 52 | Episode reward -0.31920671827405656 53 | 26 54 | Episode reward -0.4409075884422382 55 | 27 56 | Episode reward -0.14585693017277035 57 | 28 58 | Episode reward -0.07425979669681125 59 | 29 60 | Episode reward -0.3138775590957599 61 | 30 62 | Episode reward -0.07425979669681125 63 | 31 64 | Episode reward -6.531898146941159 65 | 32 66 | Episode reward -0.18954075266435366 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/DQN/DQN_CM1-postgres-card-job-masking-v2_0_2019-06-26_19-33.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.1525956343561072 3 | 1 4 | Episode reward -0.08028356854640394 5 | 2 6 | Episode reward -0.042868060415267924 7 | 3 8 | Episode reward -0.1525956343561072 9 | 4 10 | Episode reward -0.07234282707600294 11 | 5 12 | Episode reward -0.10343109063429162 13 | 6 14 | Episode reward -0.03568160525141344 15 | 7 16 | Episode reward -0.1105298681790817 17 | 8 18 | Episode reward -0.03568160525141344 19 | 9 20 | Episode reward -0.10343109063429162 21 | 10 22 | Episode reward -0.1105298681790817 23 | 11 24 | Episode reward -0.07234282707600294 25 | 12 26 | Episode reward -0.07329878561934841 27 | 13 28 | Episode reward -0.12614407874027564 29 | 14 30 | Episode reward -1.2511119120838476 31 | 15 32 | Episode reward -0.18194601345024072 33 | 16 34 | Episode reward -0.20401492934148013 35 | 17 36 | Episode reward -0.08255230170920888 37 | 18 38 | Episode reward -0.07015902747493663 39 | 19 40 | Episode reward -0.20401492934148013 41 | 20 42 | Episode reward -0.3705825359014394 43 | 21 44 | Episode reward -0.3705825359014394 45 | 22 46 | Episode reward -0.023581346348501595 47 | 23 48 | Episode reward -0.3992917426311112 49 | 24 50 | Episode reward -0.12979643251920056 51 | 25 52 | Episode reward -0.023581346348501595 53 | 26 54 | Episode reward -0.08255230170920888 55 | 27 56 | Episode reward -0.4357656255548021 57 | 28 58 | Episode reward -1.2962174170577865 59 | 29 60 | Episode reward -0.08480081884697746 61 | 30 62 | Episode reward -0.08480081884697746 63 | 31 64 | Episode reward -1.327526020085683 65 | 32 66 | Episode reward -1.0202167604378118 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/DQN/DQN_CM1-postgres-card-job-masking-v2_0_2019-06-26_19-45.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.16463456118462452 3 | 1 4 | Episode reward -0.06090781894778568 5 | 2 6 | Episode reward -0.043598014382293666 7 | 3 8 | Episode reward -0.16463456118462452 9 | 4 10 | Episode reward -0.04693771008810361 11 | 5 12 | Episode reward -0.07682533820435325 13 | 6 14 | Episode reward -0.12116918092566707 15 | 7 16 | Episode reward -0.04250667623658374 17 | 8 18 | Episode reward -0.12116918092566707 19 | 9 20 | Episode reward -0.07682533820435325 21 | 10 22 | Episode reward -0.04250667623658374 23 | 11 24 | Episode reward -0.04405453454617585 25 | 12 26 | Episode reward -0.1017523799395114 27 | 13 28 | Episode reward -0.08831188259741074 29 | 14 30 | Episode reward -1.521449812391113 31 | 15 32 | Episode reward -0.9978421157903133 33 | 16 34 | Episode reward -0.1791948342756533 35 | 17 36 | Episode reward -0.06498849269178673 37 | 18 38 | Episode reward -0.16657834635640534 39 | 19 40 | Episode reward -0.1791948342756533 41 | 20 42 | Episode reward -0.34849208466574 43 | 21 44 | Episode reward -0.34849208466574 45 | 22 46 | Episode reward -0.029520192261300995 47 | 23 48 | Episode reward -0.20253669629998247 49 | 24 50 | Episode reward -0.1629028657198343 51 | 25 52 | Episode reward -0.034492175213677305 53 | 26 54 | Episode reward -0.03418986257496029 55 | 27 56 | Episode reward -0.5045587383089822 57 | 28 58 | Episode reward -0.09927996969001973 59 | 29 60 | Episode reward -0.16994977558176597 61 | 30 62 | Episode reward -0.12851577227574199 63 | 31 64 | Episode reward -0.1486119384119285 65 | 32 66 | Episode reward -2.212988798657585 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/DQN/DQN_CM1-postgres-card-job-masking-v2_0_2019-06-26_19-56.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.11230344611456233 3 | 1 4 | Episode reward -0.04353870610803556 5 | 2 6 | Episode reward -0.015686594678480263 7 | 3 8 | Episode reward -0.11230344611456229 9 | 4 10 | Episode reward -0.09899059705807331 11 | 5 12 | Episode reward -0.10100584115890524 13 | 6 14 | Episode reward -0.036166565759027325 15 | 7 16 | Episode reward -0.04048098084126644 17 | 8 18 | Episode reward -0.036166565759027325 19 | 9 20 | Episode reward -0.10100584115890524 21 | 10 22 | Episode reward -0.04048098084126644 23 | 11 24 | Episode reward -0.09899059705807331 25 | 12 26 | Episode reward -0.06562858440276698 27 | 13 28 | Episode reward -0.14613204293192303 29 | 14 30 | Episode reward -0.4157930692115822 31 | 15 32 | Episode reward -0.28196187876067585 33 | 16 34 | Episode reward -0.17087173220200844 35 | 17 36 | Episode reward -0.06768660752429305 37 | 18 38 | Episode reward -0.09137038863250388 39 | 19 40 | Episode reward -0.05243246390278 41 | 20 42 | Episode reward -0.4205077326217383 43 | 21 44 | Episode reward -0.4205077326217383 45 | 22 46 | Episode reward -0.12855087773536522 47 | 23 48 | Episode reward -0.17996878715731915 49 | 24 50 | Episode reward -0.18040760265767708 51 | 25 52 | Episode reward -0.1269123912652957 53 | 26 54 | Episode reward -0.06768660752429305 55 | 27 56 | Episode reward -1.546529717107116 57 | 28 58 | Episode reward -1.4308355191422348 59 | 29 60 | Episode reward -0.08360209623526821 61 | 30 62 | Episode reward -0.08360209623526821 63 | 31 64 | Episode reward -0.7307464211688164 65 | 32 66 | Episode reward -0.9803724256368135 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/DQN/DQN_CM1-postgres-card-job-masking-v2_0_2019-06-26_20-08.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.21784791073538257 3 | 1 4 | Episode reward -0.08079019176054422 5 | 2 6 | Episode reward -0.04363910288304664 7 | 3 8 | Episode reward -0.21784791073538257 9 | 4 10 | Episode reward -0.0990550240381061 11 | 5 12 | Episode reward -0.10403988951565578 13 | 6 14 | Episode reward -0.025197663532403644 15 | 7 16 | Episode reward -0.0900885523559721 17 | 8 18 | Episode reward -0.025197663532403644 19 | 9 20 | Episode reward -0.10403988951565578 21 | 10 22 | Episode reward -0.0900885523559721 23 | 11 24 | Episode reward -0.0990550240381061 25 | 12 26 | Episode reward -0.05780933187809634 27 | 13 28 | Episode reward -0.08952909762499525 29 | 14 30 | Episode reward -0.2381146871905646 31 | 15 32 | Episode reward -2.1078020783638487 33 | 16 34 | Episode reward -0.10008085450083719 35 | 17 36 | Episode reward -0.10404125833753813 37 | 18 38 | Episode reward -0.0876175237248979 39 | 19 40 | Episode reward -0.10008085450083719 41 | 20 42 | Episode reward -0.37935842054352914 43 | 21 44 | Episode reward -0.37935842054352914 45 | 22 46 | Episode reward -0.08794642596181713 47 | 23 48 | Episode reward -0.3179212288120924 49 | 24 50 | Episode reward -0.1710628670116646 51 | 25 52 | Episode reward -0.08794642596181713 53 | 26 54 | Episode reward -0.1593380030214384 55 | 27 56 | Episode reward -0.14086257302856015 57 | 28 58 | Episode reward -2.612402072102313 59 | 29 60 | Episode reward -1.9295695134381272 61 | 30 62 | Episode reward -1.9295695134381272 63 | 31 64 | Episode reward -0.26945104941415016 65 | 32 66 | Episode reward -0.2185817629991091 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/DQN/DQN_CM1-postgres-card-job-masking-v2_0_2019-06-26_20-20.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.13706725407894918 3 | 1 4 | Episode reward -0.0815782962282361 5 | 2 6 | Episode reward -0.042858790086604044 7 | 3 8 | Episode reward -0.13706725407894918 9 | 4 10 | Episode reward -0.06843098788034561 11 | 5 12 | Episode reward -0.09772327155280763 13 | 6 14 | Episode reward -0.09909702864375958 15 | 7 16 | Episode reward -0.09000811391283597 17 | 8 18 | Episode reward -0.09914380517687128 19 | 9 20 | Episode reward -0.09772327155280763 21 | 10 22 | Episode reward -0.09000811391283597 23 | 11 24 | Episode reward -0.06843098788034561 25 | 12 26 | Episode reward -0.18776308801118957 27 | 13 28 | Episode reward -0.0887979280980542 29 | 14 30 | Episode reward -0.7926174460975745 31 | 15 32 | Episode reward -0.7650388603936703 33 | 16 34 | Episode reward -0.12544130176920204 35 | 17 36 | Episode reward -0.3847248506937407 37 | 18 38 | Episode reward -0.277683710622039 39 | 19 40 | Episode reward -0.12544130176920204 41 | 20 42 | Episode reward -0.4579526915043374 43 | 21 44 | Episode reward -0.4579526915043374 45 | 22 46 | Episode reward -0.039217431937860196 47 | 23 48 | Episode reward -0.1331458662849965 49 | 24 50 | Episode reward -0.09586109145495311 51 | 25 52 | Episode reward -0.039217431937860196 53 | 26 54 | Episode reward -0.18226516790121786 55 | 27 56 | Episode reward -1.126274376390317 57 | 28 58 | Episode reward -1.2965861780327492 59 | 29 60 | Episode reward -0.5969621982792432 61 | 30 62 | Episode reward -0.37418219153673155 63 | 31 64 | Episode reward -1.5887702333909348 65 | 32 66 | Episode reward -0.12043867300226864 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/DQN/DQN_CM1-postgres-card-job-masking-v3_0_2019-06-26_20-32.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.07458499673722707 3 | 1 4 | Episode reward -0.01655620492174436 5 | 2 6 | Episode reward -0.2027705928816882 7 | 3 8 | Episode reward -0.245265435272713 9 | 4 10 | Episode reward -0.14383790358224166 11 | 5 12 | Episode reward -0.03220554749914568 13 | 6 14 | Episode reward -0.030048865074964633 15 | 7 16 | Episode reward -0.03220554749914568 17 | 8 18 | Episode reward -0.2027705928816882 19 | 9 20 | Episode reward -0.07458499673722707 21 | 10 22 | Episode reward -0.12234115401914353 23 | 11 24 | Episode reward -0.2652627676030573 25 | 12 26 | Episode reward -0.20539711391058402 27 | 13 28 | Episode reward -6.197492739057111 29 | 14 30 | Episode reward -0.13652927237052215 31 | 15 32 | Episode reward -0.32859912338248615 33 | 16 34 | Episode reward -0.44720092242175274 35 | 17 36 | Episode reward -0.18140211242064586 37 | 18 38 | Episode reward -0.08088810135173463 39 | 19 40 | Episode reward -0.05790191443521681 41 | 20 42 | Episode reward -0.20539711391058402 43 | 21 44 | Episode reward -0.09804842451232632 45 | 22 46 | Episode reward -6.197492739057111 47 | 23 48 | Episode reward -0.05790191443521681 49 | 24 50 | Episode reward -0.1163724939075198 51 | 25 52 | Episode reward -0.32859912338248615 53 | 26 54 | Episode reward -0.1232255837010093 55 | 27 56 | Episode reward -0.7305897055804521 57 | 28 58 | Episode reward -0.07206166290954107 59 | 29 60 | Episode reward -0.08009450866016365 61 | 30 62 | Episode reward -0.04682310730013908 63 | 31 64 | Episode reward -0.08009450866016365 65 | 32 66 | Episode reward -1.6180401879302642 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/DQN/DQN_CM1-postgres-card-job-masking-v3_0_2019-06-26_20-43.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.06960018915504923 3 | 1 4 | Episode reward -0.0404829356629038 5 | 2 6 | Episode reward -0.0768701504805474 7 | 3 8 | Episode reward -0.2785686773706815 9 | 4 10 | Episode reward -0.12369333906077647 11 | 5 12 | Episode reward -0.060704667402973755 13 | 6 14 | Episode reward -0.03567497301008607 15 | 7 16 | Episode reward -0.060704667402973755 17 | 8 18 | Episode reward -0.12369333906077647 19 | 9 20 | Episode reward -0.06960018915504923 21 | 10 22 | Episode reward -0.07071333403282269 23 | 11 24 | Episode reward -0.2785686773706815 25 | 12 26 | Episode reward -0.5579055772008945 27 | 13 28 | Episode reward -0.9643524988143982 29 | 14 30 | Episode reward -0.594686550913344 31 | 15 32 | Episode reward -0.3791631272717163 33 | 16 34 | Episode reward -0.08158676984438162 35 | 17 36 | Episode reward -0.03886057710603955 37 | 18 38 | Episode reward -0.07829579809302156 39 | 19 40 | Episode reward -0.07816679319893298 41 | 20 42 | Episode reward -0.25104288407412184 43 | 21 44 | Episode reward -0.14184366554049055 45 | 22 46 | Episode reward -0.9643524988143982 47 | 23 48 | Episode reward -0.08227565195687173 49 | 24 50 | Episode reward -0.14184366554049055 51 | 25 52 | Episode reward -0.3791631272717163 53 | 26 54 | Episode reward -0.5312007994077466 55 | 27 56 | Episode reward -0.21332917670336085 57 | 28 58 | Episode reward -0.21332917670336085 59 | 29 60 | Episode reward -0.21080046210026074 61 | 30 62 | Episode reward -0.7287759560203941 63 | 31 64 | Episode reward -0.21080046210026074 65 | 32 66 | Episode reward -0.5278184606142843 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/DQN/DQN_CM1-postgres-card-job-masking-v3_0_2019-06-26_20-52.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.06194061475045251 3 | 1 4 | Episode reward -0.024451252478888984 5 | 2 6 | Episode reward -0.12137995281474322 7 | 3 8 | Episode reward -0.7555147393017903 9 | 4 10 | Episode reward -0.12137800332540474 11 | 5 12 | Episode reward -0.07078205629260816 13 | 6 14 | Episode reward -0.09947226364908522 15 | 7 16 | Episode reward -0.07078205629260816 17 | 8 18 | Episode reward -0.12137995281474322 19 | 9 20 | Episode reward -0.06194061475045251 21 | 10 22 | Episode reward -0.04934557231909009 23 | 11 24 | Episode reward -0.7555147393017903 25 | 12 26 | Episode reward -0.07018652720155671 27 | 13 28 | Episode reward -0.4410034924650916 29 | 14 30 | Episode reward -0.4684295582012572 31 | 15 32 | Episode reward -0.4865754368374606 33 | 16 34 | Episode reward -0.4667528966507081 35 | 17 36 | Episode reward -0.18441724338296556 37 | 18 38 | Episode reward -0.07597843277715213 39 | 19 40 | Episode reward -0.06934310766649168 41 | 20 42 | Episode reward -0.15730100583391274 43 | 21 44 | Episode reward -0.07271525119774239 45 | 22 46 | Episode reward -0.4410034924650916 47 | 23 48 | Episode reward -0.05125225786908703 49 | 24 50 | Episode reward -0.10260522184949045 51 | 25 52 | Episode reward -0.4865754368374606 53 | 26 54 | Episode reward -2.157055304809026 55 | 27 56 | Episode reward -0.08068925303332256 57 | 28 58 | Episode reward -0.08068925303332256 59 | 29 60 | Episode reward -4.47103400907581 61 | 30 62 | Episode reward -2.5689069041306785 63 | 31 64 | Episode reward -4.47103400907581 65 | 32 66 | Episode reward -0.45126337373629527 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/DQN/DQN_CM1-postgres-card-job-masking-v3_0_2019-06-26_21-01.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.0483443133958614 3 | 1 4 | Episode reward -0.024436437664151225 5 | 2 6 | Episode reward -0.05139647585350354 7 | 3 8 | Episode reward -0.997242671804258 9 | 4 10 | Episode reward -0.05139647585350354 11 | 5 12 | Episode reward -0.027885419698467206 13 | 6 14 | Episode reward -0.12116918092566707 15 | 7 16 | Episode reward -0.027885419698467206 17 | 8 18 | Episode reward -0.05139647585350354 19 | 9 20 | Episode reward -0.0483443133958614 21 | 10 22 | Episode reward -0.048305213438503344 23 | 11 24 | Episode reward -0.997242671804258 25 | 12 26 | Episode reward -0.13988815376889335 27 | 13 28 | Episode reward -4.949638164285324 29 | 14 30 | Episode reward -0.3058153233248805 31 | 15 32 | Episode reward -0.634070351116121 33 | 16 34 | Episode reward -0.33324732370624954 35 | 17 36 | Episode reward -0.047925050356583294 37 | 18 38 | Episode reward -0.0823085185231105 39 | 19 40 | Episode reward -0.07069513434303808 41 | 20 42 | Episode reward -0.17982991673211632 43 | 21 44 | Episode reward -0.17433239164073405 45 | 22 46 | Episode reward -4.949638164285324 47 | 23 48 | Episode reward -0.07069513434303808 49 | 24 50 | Episode reward -0.17186940213403062 51 | 25 52 | Episode reward -0.634070351116121 53 | 26 54 | Episode reward -1.194599853492742 55 | 27 56 | Episode reward -0.6861110536646657 57 | 28 58 | Episode reward -0.6861107098178766 59 | 29 60 | Episode reward -0.2892792468882762 61 | 30 62 | Episode reward -0.11007024460564203 63 | 31 64 | Episode reward -0.2892792468882762 65 | 32 66 | Episode reward -0.17519787996842232 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/DQN/DQN_CM1-postgres-card-job-masking-v3_0_2019-06-26_21-10.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.0991018203975945 3 | 1 4 | Episode reward -0.024124862490818452 5 | 2 6 | Episode reward -0.1795644567233154 7 | 3 8 | Episode reward -0.21673003285929412 9 | 4 10 | Episode reward -0.1795644567233154 11 | 5 12 | Episode reward -0.0812000829023305 13 | 6 14 | Episode reward -0.036166565759027325 15 | 7 16 | Episode reward -0.0812000829023305 17 | 8 18 | Episode reward -0.1795644567233154 19 | 9 20 | Episode reward -0.0991018203975945 21 | 10 22 | Episode reward -0.08099025779649172 23 | 11 24 | Episode reward -0.21673003285929412 25 | 12 26 | Episode reward -0.2023948798422745 27 | 13 28 | Episode reward -2.8257900866424883 29 | 14 30 | Episode reward -0.3085266485489856 31 | 15 32 | Episode reward -1.5689272182422171 33 | 16 34 | Episode reward -0.1622527443237719 35 | 17 36 | Episode reward -0.25360809223624237 37 | 18 38 | Episode reward -0.11082099851620798 39 | 19 40 | Episode reward -0.09566085789443858 41 | 20 42 | Episode reward -0.2023948798422745 43 | 21 44 | Episode reward -0.19667119887760903 45 | 22 46 | Episode reward -2.8257900866424883 47 | 23 48 | Episode reward -0.09972863502816062 49 | 24 50 | Episode reward -0.19667119887760903 51 | 25 52 | Episode reward -1.5689272182422171 53 | 26 54 | Episode reward -0.17648099217353141 55 | 27 56 | Episode reward -0.5131099836606042 57 | 28 58 | Episode reward -0.5131099836606042 59 | 29 60 | Episode reward -0.20203772729845365 61 | 30 62 | Episode reward -0.034777839567026195 63 | 31 64 | Episode reward -0.20203772729845365 65 | 32 66 | Episode reward -0.5759645905795954 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/PPO/PPO_CM1-postgres-card-job-masking-v0_0_2019-06-27_01-08.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -1.4493740611244967 3 | 1 4 | Episode reward -0.07546580235781458 5 | 2 6 | Episode reward -0.057129895014628645 7 | 3 8 | Episode reward -0.016398674668754578 9 | 4 10 | Episode reward -0.07546580235781458 11 | 5 12 | Episode reward -0.07864015452952385 13 | 6 14 | Episode reward -0.04080925895602077 15 | 7 16 | Episode reward -0.07546580235781458 17 | 8 18 | Episode reward -0.057129895014628645 19 | 9 20 | Episode reward -0.016398674668754578 21 | 10 22 | Episode reward -0.03593260156341571 23 | 11 24 | Episode reward -0.016398674668754578 25 | 12 26 | Episode reward -0.5201622386245232 27 | 13 28 | Episode reward -0.31187261490944146 29 | 14 30 | Episode reward -0.5201622386245232 31 | 15 32 | Episode reward -0.1532045524622669 33 | 16 34 | Episode reward -0.04992459960026284 35 | 17 36 | Episode reward -0.045880934324684514 37 | 18 38 | Episode reward -0.06917486532344098 39 | 19 40 | Episode reward -0.5201622386245232 41 | 20 42 | Episode reward -0.045880934324684514 43 | 21 44 | Episode reward -0.1532045524622669 45 | 22 46 | Episode reward -1.0772492609069624 47 | 23 48 | Episode reward -0.5201622386245232 49 | 24 50 | Episode reward -0.05443054896166734 51 | 25 52 | Episode reward -0.09972567817027413 53 | 26 54 | Episode reward -0.06917486532344098 55 | 27 56 | Episode reward -0.3259963268126813 57 | 28 58 | Episode reward -0.144750656203263 59 | 29 60 | Episode reward -0.3259963268126813 61 | 30 62 | Episode reward -3.475879100718198 63 | 31 64 | Episode reward -0.5140550960191563 65 | 32 66 | Episode reward -0.144750656203263 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/PPO/PPO_CM1-postgres-card-job-masking-v0_0_2019-06-27_01-32.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.24784912734750347 3 | 1 4 | Episode reward -0.04881196206116656 5 | 2 6 | Episode reward -0.08337909572073422 7 | 3 8 | Episode reward -0.035908608340380094 9 | 4 10 | Episode reward -0.04881196206116656 11 | 5 12 | Episode reward -0.09093306786247939 13 | 6 14 | Episode reward -0.046637947848397206 15 | 7 16 | Episode reward -0.04881196206116656 17 | 8 18 | Episode reward -0.08337909572073422 19 | 9 20 | Episode reward -0.035908608340380094 21 | 10 22 | Episode reward -0.044968067561467547 23 | 11 24 | Episode reward -0.035908608340380094 25 | 12 26 | Episode reward -0.05073957923998277 27 | 13 28 | Episode reward -0.1219190936663595 29 | 14 30 | Episode reward -0.05073957923998277 31 | 15 32 | Episode reward -0.5155737097851529 33 | 16 34 | Episode reward -0.06804463961993845 35 | 17 36 | Episode reward -0.13916174378254642 37 | 18 38 | Episode reward -0.07613805698663692 39 | 19 40 | Episode reward -0.05073957923998277 41 | 20 42 | Episode reward -0.13916174378254642 43 | 21 44 | Episode reward -0.5155737097851529 45 | 22 46 | Episode reward -0.7134281983128921 47 | 23 48 | Episode reward -0.05073957923998277 49 | 24 50 | Episode reward -0.056356952557875324 51 | 25 52 | Episode reward -0.13449429336916677 53 | 26 54 | Episode reward -0.07613805698663692 55 | 27 56 | Episode reward -0.23599387281730597 57 | 28 58 | Episode reward -1.327833311602074 59 | 29 60 | Episode reward -0.23599387281730597 61 | 30 62 | Episode reward -0.351642397846502 63 | 31 64 | Episode reward -0.22023504873282015 65 | 32 66 | Episode reward -1.327833311602074 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/PPO/PPO_CM1-postgres-card-job-masking-v0_0_2019-06-27_01-57.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.3181419644395108 3 | 1 4 | Episode reward -0.0483443133958614 5 | 2 6 | Episode reward -0.09454964771993384 7 | 3 8 | Episode reward -0.024436437664151225 9 | 4 10 | Episode reward -0.0483443133958614 11 | 5 12 | Episode reward -0.14226027843990283 13 | 6 14 | Episode reward -0.08154652934971324 15 | 7 16 | Episode reward -0.0483443133958614 17 | 8 18 | Episode reward -0.09454964771993384 19 | 9 20 | Episode reward -0.024436437664151225 21 | 10 22 | Episode reward -0.045625412526001635 23 | 11 24 | Episode reward -0.024436437664151225 25 | 12 26 | Episode reward -0.5275869936198233 27 | 13 28 | Episode reward -0.6504819221628477 29 | 14 30 | Episode reward -0.5275869936198233 31 | 15 32 | Episode reward -0.3474382026452049 33 | 16 34 | Episode reward -0.10373892444100452 35 | 17 36 | Episode reward -0.06195675885538232 37 | 18 38 | Episode reward -0.12930557965530062 39 | 19 40 | Episode reward -0.5275869936198233 41 | 20 42 | Episode reward -0.06195675885538232 43 | 21 44 | Episode reward -0.3474382026452049 45 | 22 46 | Episode reward -1.2942052562181643 47 | 23 48 | Episode reward -0.5275869936198233 49 | 24 50 | Episode reward -0.03744569246546241 51 | 25 52 | Episode reward -0.14662565189593593 53 | 26 54 | Episode reward -0.12930557965530062 55 | 27 56 | Episode reward -1.1345123300589655 57 | 28 58 | Episode reward -0.7531891850786193 59 | 29 60 | Episode reward -1.1345123300589655 61 | 30 62 | Episode reward -0.18175167978692053 63 | 31 64 | Episode reward -0.15269916853638596 65 | 32 66 | Episode reward -0.7531891850786193 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/PPO/PPO_CM1-postgres-card-job-masking-v0_0_2019-06-27_02-20.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.1773691985773834 3 | 1 4 | Episode reward -0.07165981984008177 5 | 2 6 | Episode reward -0.05600778338487589 7 | 3 8 | Episode reward -0.017147995667289386 9 | 4 10 | Episode reward -0.07165981984008177 11 | 5 12 | Episode reward -0.11765390253724728 13 | 6 14 | Episode reward -0.04375219438411577 15 | 7 16 | Episode reward -0.07165981984008177 17 | 8 18 | Episode reward -0.05600778338487589 19 | 9 20 | Episode reward -0.017147995667289386 21 | 10 22 | Episode reward -0.03168755415632625 23 | 11 24 | Episode reward -0.017147995667289386 25 | 12 26 | Episode reward -0.056816767989868205 27 | 13 28 | Episode reward -0.06900285468108834 29 | 14 30 | Episode reward -0.056816767989868205 31 | 15 32 | Episode reward -0.23648286291487264 33 | 16 34 | Episode reward -0.04939413367980093 35 | 17 36 | Episode reward -0.10061051871572697 37 | 18 38 | Episode reward -0.12799412850693462 39 | 19 40 | Episode reward -0.056816767989868205 41 | 20 42 | Episode reward -0.10061051871572697 43 | 21 44 | Episode reward -0.23648286291487264 45 | 22 46 | Episode reward -2.915489470988342 47 | 23 48 | Episode reward -0.056816767989868205 49 | 24 50 | Episode reward -0.09268751073956763 51 | 25 52 | Episode reward -0.15251251125729526 53 | 26 54 | Episode reward -0.26682053860848115 55 | 27 56 | Episode reward -0.18616009283828266 57 | 28 58 | Episode reward -0.9934347421533327 59 | 29 60 | Episode reward -0.18616009283828266 61 | 30 62 | Episode reward -0.8512097466557835 63 | 31 64 | Episode reward -2.694261235972555 65 | 32 66 | Episode reward -0.9934347421533327 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/PPO/PPO_CM1-postgres-card-job-masking-v0_0_2019-06-27_02-45.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.8165815641921785 3 | 1 4 | Episode reward -0.05759667146404953 5 | 2 6 | Episode reward -0.10692309240409559 7 | 3 8 | Episode reward -0.025563028993975754 9 | 4 10 | Episode reward -0.05759667146404953 11 | 5 12 | Episode reward -0.11432644621019947 13 | 6 14 | Episode reward -0.06334511663309476 15 | 7 16 | Episode reward -0.05759667146404953 17 | 8 18 | Episode reward -0.10692309240409559 19 | 9 20 | Episode reward -0.025563028993975754 21 | 10 22 | Episode reward -0.05838854071581314 23 | 11 24 | Episode reward -0.025563028993975754 25 | 12 26 | Episode reward -0.41666053219449506 27 | 13 28 | Episode reward -0.21909425719591408 29 | 14 30 | Episode reward -0.41666053219449506 31 | 15 32 | Episode reward -0.24924418376416793 33 | 16 34 | Episode reward -0.08682577698973708 35 | 17 36 | Episode reward -0.1382021423349986 37 | 18 38 | Episode reward -0.02181178854599989 39 | 19 40 | Episode reward -0.41666053219449506 41 | 20 42 | Episode reward -0.1382021423349986 43 | 21 44 | Episode reward -0.24924418376416793 45 | 22 46 | Episode reward -0.593509910706975 47 | 23 48 | Episode reward -0.41666053219449506 49 | 24 50 | Episode reward -0.04499950800281678 51 | 25 52 | Episode reward -0.20121016743334105 53 | 26 54 | Episode reward -0.02181178854599989 55 | 27 56 | Episode reward -0.15650724943417768 57 | 28 58 | Episode reward -1.1510654576222714 59 | 29 60 | Episode reward -0.15650724943417768 61 | 30 62 | Episode reward -0.08704831661635361 63 | 31 64 | Episode reward -0.03661957039752506 65 | 32 66 | Episode reward -1.1510654576222714 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/PPO/PPO_CM1-postgres-card-job-masking-v1_0_2019-06-27_03-09.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.12344744103434088 3 | 1 4 | Episode reward -0.04562751049736301 5 | 2 6 | Episode reward -0.01547414921554427 7 | 3 8 | Episode reward -0.03486750334587033 9 | 4 10 | Episode reward -0.12344744103434088 11 | 5 12 | Episode reward -0.04551660878378537 13 | 6 14 | Episode reward -0.04551660878378537 15 | 7 16 | Episode reward -0.07528506440711924 17 | 8 18 | Episode reward -0.04730982724362852 19 | 9 20 | Episode reward -0.03556957911642329 21 | 10 22 | Episode reward -0.04562751049736301 23 | 11 24 | Episode reward -0.03486750334587033 25 | 12 26 | Episode reward -0.42348452396269953 27 | 13 28 | Episode reward -0.04946264769151922 29 | 14 30 | Episode reward -0.3471255142446494 31 | 15 32 | Episode reward -0.42351063723373694 33 | 16 34 | Episode reward -0.2834440125140251 35 | 17 36 | Episode reward -0.23346073893995894 37 | 18 38 | Episode reward -1.8089780133965547 39 | 19 40 | Episode reward -0.23346073893995894 41 | 20 42 | Episode reward -0.3471255142446494 43 | 21 44 | Episode reward -0.16488923643114498 45 | 22 46 | Episode reward -0.03139897887813531 47 | 23 48 | Episode reward -0.7769356664981362 49 | 24 50 | Episode reward -0.42348452396269953 51 | 25 52 | Episode reward -0.3471255142446494 53 | 26 54 | Episode reward -0.11389052609899208 55 | 27 56 | Episode reward -0.1198322471206175 57 | 28 58 | Episode reward -0.14464017206183744 59 | 29 60 | Episode reward -0.2958707532655944 61 | 30 62 | Episode reward -0.14464017206183744 63 | 31 64 | Episode reward -0.33940754469184703 65 | 32 66 | Episode reward -0.12442741856286436 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/PPO/PPO_CM1-postgres-card-job-masking-v1_0_2019-06-27_03-32.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.18931413848411932 3 | 1 4 | Episode reward -0.04573603241623027 5 | 2 6 | Episode reward -0.015875507399865162 7 | 3 8 | Episode reward -0.03148454241572586 9 | 4 10 | Episode reward -0.18931413848411932 11 | 5 12 | Episode reward -0.02674410550990102 13 | 6 14 | Episode reward -0.02674410550990102 15 | 7 16 | Episode reward -0.09909943265261287 17 | 8 18 | Episode reward -0.08550953924088311 19 | 9 20 | Episode reward -0.030681680366898924 21 | 10 22 | Episode reward -0.04573603241623027 23 | 11 24 | Episode reward -0.03148454241572586 25 | 12 26 | Episode reward -0.12036283025386654 27 | 13 28 | Episode reward -0.05190992748467346 29 | 14 30 | Episode reward -1.5278571992515406 31 | 15 32 | Episode reward -0.4240902811044562 33 | 16 34 | Episode reward -0.28946871716165434 35 | 17 36 | Episode reward -0.21162755169075234 37 | 18 38 | Episode reward -6.5258739890010995 39 | 19 40 | Episode reward -0.21162755169075234 41 | 20 42 | Episode reward -1.5278571992515406 43 | 21 44 | Episode reward -0.231131219194128 45 | 22 46 | Episode reward -0.039702131852407216 47 | 23 48 | Episode reward -0.8224055849209042 49 | 24 50 | Episode reward -0.12036283025386654 51 | 25 52 | Episode reward -1.5278571992515406 53 | 26 54 | Episode reward -0.07201079165344185 55 | 27 56 | Episode reward -1.1627035418678109 57 | 28 58 | Episode reward -0.4881785845587996 59 | 29 60 | Episode reward -0.15608422290618762 61 | 30 62 | Episode reward -0.4881785845587996 63 | 31 64 | Episode reward -3.8926088646590706 65 | 32 66 | Episode reward -0.08971771788742115 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/PPO/PPO_CM1-postgres-card-job-masking-v1_0_2019-06-27_03-56.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.07736028009554574 3 | 1 4 | Episode reward -0.03237363292565743 5 | 2 6 | Episode reward -0.07229358945740837 7 | 3 8 | Episode reward -0.0900885523559721 9 | 4 10 | Episode reward -0.07736028009554574 11 | 5 12 | Episode reward -0.08062936653669973 13 | 6 14 | Episode reward -0.08062936653669973 15 | 7 16 | Episode reward -0.05776417903571414 17 | 8 18 | Episode reward -0.04332702401184624 19 | 9 20 | Episode reward -0.017147995667289386 21 | 10 22 | Episode reward -0.03237363292565743 23 | 11 24 | Episode reward -0.0900885523559721 25 | 12 26 | Episode reward -0.16372880339974427 27 | 13 28 | Episode reward -0.08614939218689693 29 | 14 30 | Episode reward -0.2570017943925532 31 | 15 32 | Episode reward -0.44723571267954254 33 | 16 34 | Episode reward -0.08663626766305371 35 | 17 36 | Episode reward -0.1692793712699529 37 | 18 38 | Episode reward -2.131332801165025 39 | 19 40 | Episode reward -0.1692793712699529 41 | 20 42 | Episode reward -0.2570017943925532 43 | 21 44 | Episode reward -0.6558510394487903 45 | 22 46 | Episode reward -0.11146176209073871 47 | 23 48 | Episode reward -1.2932839327117354 49 | 24 50 | Episode reward -0.16372880339974427 51 | 25 52 | Episode reward -0.2570017943925532 53 | 26 54 | Episode reward -0.12682125442670505 55 | 27 56 | Episode reward -0.17269189545039185 57 | 28 58 | Episode reward -0.18250507906579033 59 | 29 60 | Episode reward -0.3587506452066707 61 | 30 62 | Episode reward -0.18250507906579033 63 | 31 64 | Episode reward -1.2755510641871817 65 | 32 66 | Episode reward -0.12141618321372546 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/PPO/PPO_CM1-postgres-card-job-masking-v1_0_2019-06-27_04-43.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.07684152353032814 3 | 1 4 | Episode reward -0.045962537780843735 5 | 2 6 | Episode reward -0.05894668692531575 7 | 3 8 | Episode reward -0.10950759039076349 9 | 4 10 | Episode reward -0.07684152353032814 11 | 5 12 | Episode reward -0.05838853878050166 13 | 6 14 | Episode reward -0.05838853878050166 15 | 7 16 | Episode reward -0.0864913709226286 17 | 8 18 | Episode reward -0.04688075494038591 19 | 9 20 | Episode reward -0.02424549349434644 21 | 10 22 | Episode reward -0.045962537780843735 23 | 11 24 | Episode reward -0.10950759039076349 25 | 12 26 | Episode reward -0.1606009322801893 27 | 13 28 | Episode reward -0.022903628019543487 29 | 14 30 | Episode reward -0.06933409029273846 31 | 15 32 | Episode reward -0.2396586573600612 33 | 16 34 | Episode reward -0.6702433471005993 35 | 17 36 | Episode reward -0.1265282454590266 37 | 18 38 | Episode reward -0.5916317683684801 39 | 19 40 | Episode reward -0.1265282454590266 41 | 20 42 | Episode reward -0.06933409029273846 43 | 21 44 | Episode reward -0.8578763175453542 45 | 22 46 | Episode reward -0.15471405110604453 47 | 23 48 | Episode reward -5.192889907337971 49 | 24 50 | Episode reward -0.1606009322801893 51 | 25 52 | Episode reward -0.06933409029273846 53 | 26 54 | Episode reward -0.14788017738309175 55 | 27 56 | Episode reward -0.13133883916415992 57 | 28 58 | Episode reward -0.05222499467443732 59 | 29 60 | Episode reward -0.019273972168408884 61 | 30 62 | Episode reward -0.05222499467443732 63 | 31 64 | Episode reward -0.18084060746506173 65 | 32 66 | Episode reward -0.03812054518652109 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/PPO/PPO_CM1-postgres-card-job-masking-v2_0_2019-06-27_05-08.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.08345432741603323 3 | 1 4 | Episode reward -0.049814829420080545 5 | 2 6 | Episode reward -0.059693698413934466 7 | 3 8 | Episode reward -0.08345432741603323 9 | 4 10 | Episode reward -0.05776417903571414 11 | 5 12 | Episode reward -0.07384804646944561 13 | 6 14 | Episode reward -0.07290085303652788 15 | 7 16 | Episode reward -0.06681652737493046 17 | 8 18 | Episode reward -0.07290085303652788 19 | 9 20 | Episode reward -0.07384804646944561 21 | 10 22 | Episode reward -0.06681652737493046 23 | 11 24 | Episode reward -0.05776417903571414 25 | 12 26 | Episode reward -0.028530000865686894 27 | 13 28 | Episode reward -0.06444955932645753 29 | 14 30 | Episode reward -0.238145106977467 31 | 15 32 | Episode reward -0.1018512652713162 33 | 16 34 | Episode reward -0.10097466511748529 35 | 17 36 | Episode reward -0.1079718827556172 37 | 18 38 | Episode reward -0.27400442916099493 39 | 19 40 | Episode reward -0.10097466511748529 41 | 20 42 | Episode reward -0.09566163485491369 43 | 21 44 | Episode reward -0.09566163485491369 45 | 22 46 | Episode reward -0.03911855824319398 47 | 23 48 | Episode reward -0.11204630450593825 49 | 24 50 | Episode reward -0.12282162323677014 51 | 25 52 | Episode reward -0.03911855824319398 53 | 26 54 | Episode reward -0.1079718827556172 55 | 27 56 | Episode reward -0.24487390083943764 57 | 28 58 | Episode reward -0.06113684865751117 59 | 29 60 | Episode reward -0.3034009938896316 61 | 30 62 | Episode reward -0.3034009938896316 63 | 31 64 | Episode reward -0.1594554777880175 65 | 32 66 | Episode reward -0.06361680330470688 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/PPO/PPO_CM1-postgres-card-job-masking-v2_0_2019-06-27_05-31.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.11524133504110637 3 | 1 4 | Episode reward -0.08992858856178663 5 | 2 6 | Episode reward -0.04353272655726756 7 | 3 8 | Episode reward -0.11524133504110637 9 | 4 10 | Episode reward -0.08564947271047978 11 | 5 12 | Episode reward -0.14586820444412588 13 | 6 14 | Episode reward -0.025093990216201855 15 | 7 16 | Episode reward -0.03148454241572586 17 | 8 18 | Episode reward -0.025093990216201855 19 | 9 20 | Episode reward -0.14586820444412588 21 | 10 22 | Episode reward -0.03148454241572586 23 | 11 24 | Episode reward -0.08564947271047978 25 | 12 26 | Episode reward -0.20385336109473223 27 | 13 28 | Episode reward -0.14636261585596758 29 | 14 30 | Episode reward -1.1327020689984626 31 | 15 32 | Episode reward -0.9172768276009582 33 | 16 34 | Episode reward -0.03964059990890294 35 | 17 36 | Episode reward -0.05988501200318379 37 | 18 38 | Episode reward -0.07106387323392532 39 | 19 40 | Episode reward -0.03964059990890294 41 | 20 42 | Episode reward -0.8493923732967822 43 | 21 44 | Episode reward -0.8493923732967822 45 | 22 46 | Episode reward -0.12660927280807988 47 | 23 48 | Episode reward -0.06303161263444255 49 | 24 50 | Episode reward -0.19174354671946267 51 | 25 52 | Episode reward -0.12660927280807988 53 | 26 54 | Episode reward -0.05988501200318379 55 | 27 56 | Episode reward -0.15333588656015726 57 | 28 58 | Episode reward -0.4160170647065091 59 | 29 60 | Episode reward -0.1142545253694997 61 | 30 62 | Episode reward -0.1142545253694997 63 | 31 64 | Episode reward -4.544507568526969 65 | 32 66 | Episode reward -0.32672450951052096 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/PPO/PPO_CM1-postgres-card-job-masking-v2_0_2019-06-27_05-56.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.12641352283690965 3 | 1 4 | Episode reward -0.03987387065476032 5 | 2 6 | Episode reward -0.07212509973790678 7 | 3 8 | Episode reward -0.12641352283690965 9 | 4 10 | Episode reward -0.0990550240381061 11 | 5 12 | Episode reward -0.06550302314446718 13 | 6 14 | Episode reward -0.03225649838479324 15 | 7 16 | Episode reward -0.07273056934311439 17 | 8 18 | Episode reward -0.03225649838479324 19 | 9 20 | Episode reward -0.06550302314446718 21 | 10 22 | Episode reward -0.07273056934311439 23 | 11 24 | Episode reward -0.0990550240381061 25 | 12 26 | Episode reward -0.366807798898546 27 | 13 28 | Episode reward -0.05511382020649011 29 | 14 30 | Episode reward -1.2419186930408141 31 | 15 32 | Episode reward -0.11410802485392574 33 | 16 34 | Episode reward -0.1406051267734579 35 | 17 36 | Episode reward -0.03346788350060282 37 | 18 38 | Episode reward -0.10942606316324431 39 | 19 40 | Episode reward -0.1406051267734579 41 | 20 42 | Episode reward -0.4096951355007897 43 | 21 44 | Episode reward -0.4096951355007897 45 | 22 46 | Episode reward -0.12606541887944125 47 | 23 48 | Episode reward -0.14865581891690366 49 | 24 50 | Episode reward -0.08639532465599752 51 | 25 52 | Episode reward -0.12606541887944125 53 | 26 54 | Episode reward -0.03346788350060282 55 | 27 56 | Episode reward -0.6813509193447291 57 | 28 58 | Episode reward -0.589865888306975 59 | 29 60 | Episode reward -0.11282948322601336 61 | 30 62 | Episode reward -0.11282948322601336 63 | 31 64 | Episode reward -0.2643869403848182 65 | 32 66 | Episode reward -0.21995722381435445 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/PPO/PPO_CM1-postgres-card-job-masking-v2_0_2019-06-27_06-20.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.25809252295867663 3 | 1 4 | Episode reward -0.0486330136688752 5 | 2 6 | Episode reward -0.015171608188249835 7 | 3 8 | Episode reward -0.25809252295867663 9 | 4 10 | Episode reward -0.09905263516505987 11 | 5 12 | Episode reward -0.10149092910226445 13 | 6 14 | Episode reward -0.12116918092566707 15 | 7 16 | Episode reward -0.09129731384613651 17 | 8 18 | Episode reward -0.12116918092566707 19 | 9 20 | Episode reward -0.10149092910226445 21 | 10 22 | Episode reward -0.09129731384613651 23 | 11 24 | Episode reward -0.09905263516505987 25 | 12 26 | Episode reward -0.40642233821717 27 | 13 28 | Episode reward -0.07991105104162759 29 | 14 30 | Episode reward -0.8671670103871931 31 | 15 32 | Episode reward -1.7115912484353442 33 | 16 34 | Episode reward -0.04413342453034792 35 | 17 36 | Episode reward -0.17140264352070717 37 | 18 38 | Episode reward -0.7589566813089693 39 | 19 40 | Episode reward -0.04413342453034792 41 | 20 42 | Episode reward -0.1797310791497288 43 | 21 44 | Episode reward -0.1797310791497288 45 | 22 46 | Episode reward -0.07116373360376546 47 | 23 48 | Episode reward -0.39566124408991843 49 | 24 50 | Episode reward -0.6324063165052787 51 | 25 52 | Episode reward -0.07116373360376546 53 | 26 54 | Episode reward -0.17140264352070717 55 | 27 56 | Episode reward -1.0969050499136508 57 | 28 58 | Episode reward -1.2239555114924352 59 | 29 60 | Episode reward -0.3994809520897124 61 | 30 62 | Episode reward -0.3994809520897124 63 | 31 64 | Episode reward -0.32079908401553336 65 | 32 66 | Episode reward -1.9966199293143185 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/PPO/PPO_CM1-postgres-card-job-masking-v2_0_2019-06-27_06-45.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.1469759633620184 3 | 1 4 | Episode reward -0.0617906829854538 5 | 2 6 | Episode reward -0.07236097546061052 7 | 3 8 | Episode reward -0.1469759633620184 9 | 4 10 | Episode reward -0.07528506440711924 11 | 5 12 | Episode reward -0.13433717680764212 13 | 6 14 | Episode reward -0.07135866194937979 15 | 7 16 | Episode reward -0.06682007041669577 17 | 8 18 | Episode reward -0.07135866194937979 19 | 9 20 | Episode reward -0.13433717680764212 21 | 10 22 | Episode reward -0.06682007041669577 23 | 11 24 | Episode reward -0.07528506440711924 25 | 12 26 | Episode reward -0.1134068767232039 27 | 13 28 | Episode reward -0.1348765494708712 29 | 14 30 | Episode reward -0.18081824358046317 31 | 15 32 | Episode reward -0.3172515550235411 33 | 16 34 | Episode reward -0.07343577718851581 35 | 17 36 | Episode reward -0.2176521854777649 37 | 18 38 | Episode reward -0.8987355873773807 39 | 19 40 | Episode reward -0.07343577718851581 41 | 20 42 | Episode reward -0.2519617099927493 43 | 21 44 | Episode reward -0.2519617099927493 45 | 22 46 | Episode reward -0.047098776181865615 47 | 23 48 | Episode reward -0.23280796788760255 49 | 24 50 | Episode reward -0.28608538179796 51 | 25 52 | Episode reward -0.047098776181865615 53 | 26 54 | Episode reward -0.2176521854777649 55 | 27 56 | Episode reward -0.11629730784719988 57 | 28 58 | Episode reward -0.4716620192431292 59 | 29 60 | Episode reward -0.05573872295523574 61 | 30 62 | Episode reward -0.05573872295523574 63 | 31 64 | Episode reward -0.11691391111790872 65 | 32 66 | Episode reward -0.07538045293191908 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/PPO/PPO_CM1-postgres-card-job-masking-v3_0_2019-06-27_07-09.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.08638740568091152 3 | 1 4 | Episode reward -0.03605660072729882 5 | 2 6 | Episode reward -0.12358864913662346 7 | 3 8 | Episode reward -0.44374220064663006 9 | 4 10 | Episode reward -0.12358864913662346 11 | 5 12 | Episode reward -0.06054641382102512 13 | 6 14 | Episode reward -0.09944367289528483 15 | 7 16 | Episode reward -0.06054641382102512 17 | 8 18 | Episode reward -0.12358864913662346 19 | 9 20 | Episode reward -0.08638740568091152 21 | 10 22 | Episode reward -0.10586118611020318 23 | 11 24 | Episode reward -0.44374220064663006 25 | 12 26 | Episode reward -0.12971442013354983 27 | 13 28 | Episode reward -1.109465576657242 29 | 14 30 | Episode reward -0.3313346820727754 31 | 15 32 | Episode reward -1.755194911563077 33 | 16 34 | Episode reward -0.15381031714992777 35 | 17 36 | Episode reward -0.0605740521421638 37 | 18 38 | Episode reward -0.08663104816688905 39 | 19 40 | Episode reward -0.09468269002558587 41 | 20 42 | Episode reward -0.12971442013354983 43 | 21 44 | Episode reward -0.2875202458311226 45 | 22 46 | Episode reward -1.109465576657242 47 | 23 48 | Episode reward -0.09468269002558587 49 | 24 50 | Episode reward -0.2875202458311226 51 | 25 52 | Episode reward -1.755194911563077 53 | 26 54 | Episode reward -0.19502062082073335 55 | 27 56 | Episode reward -0.19813540052136996 57 | 28 58 | Episode reward -0.19813540052136996 59 | 29 60 | Episode reward -1.0318108575341531 61 | 30 62 | Episode reward -0.09908492324154165 63 | 31 64 | Episode reward -1.0318108575341531 65 | 32 66 | Episode reward -1.6302487731690234 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/PPO/PPO_CM1-postgres-card-job-masking-v3_0_2019-06-27_07-34.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.0695084660249576 3 | 1 4 | Episode reward -0.016995289137993072 5 | 2 6 | Episode reward -0.0961452535614264 7 | 3 8 | Episode reward -0.19028714263690982 9 | 4 10 | Episode reward -0.0961452535614264 11 | 5 12 | Episode reward -0.06062825286595766 13 | 6 14 | Episode reward -0.03547135150824786 15 | 7 16 | Episode reward -0.06062825286595766 17 | 8 18 | Episode reward -0.0961452535614264 19 | 9 20 | Episode reward -0.0695084660249576 21 | 10 22 | Episode reward -0.05518622453612801 23 | 11 24 | Episode reward -0.19028714263690982 25 | 12 26 | Episode reward -0.12239771997108517 27 | 13 28 | Episode reward -0.9519199936776028 29 | 14 30 | Episode reward -0.15036890418926624 31 | 15 32 | Episode reward -0.3480009766027503 33 | 16 34 | Episode reward -0.13129563046748455 35 | 17 36 | Episode reward -0.046428506152584946 37 | 18 38 | Episode reward -0.03561639758387669 39 | 19 40 | Episode reward -0.06264168283497852 41 | 20 42 | Episode reward -0.12239771997108517 43 | 21 44 | Episode reward -0.21077005252265207 45 | 22 46 | Episode reward -0.9519199936776028 47 | 23 48 | Episode reward -0.06264168283497852 49 | 24 50 | Episode reward -0.21077005252265207 51 | 25 52 | Episode reward -0.3480009766027503 53 | 26 54 | Episode reward -0.5365086518819182 55 | 27 56 | Episode reward -0.10795683896570846 57 | 28 58 | Episode reward -0.10795683896570846 59 | 29 60 | Episode reward -0.06075249694528274 61 | 30 62 | Episode reward -0.3171565974196827 63 | 31 64 | Episode reward -0.06075249694528274 65 | 32 66 | Episode reward -0.13507089333536895 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/PPO/PPO_CM1-postgres-card-job-masking-v3_0_2019-06-27_07-59.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.06242321623935916 3 | 1 4 | Episode reward -0.024625444252667814 5 | 2 6 | Episode reward -0.131026387398842 7 | 3 8 | Episode reward -0.4543350993133133 9 | 4 10 | Episode reward -0.131026387398842 11 | 5 12 | Episode reward -0.06062294509106389 13 | 6 14 | Episode reward -0.029447313268181625 15 | 7 16 | Episode reward -0.06062294509106389 17 | 8 18 | Episode reward -0.131026387398842 19 | 9 20 | Episode reward -0.06242321623935916 21 | 10 22 | Episode reward -0.10487375155800603 23 | 11 24 | Episode reward -0.4543350993133133 25 | 12 26 | Episode reward -0.12411247735981543 27 | 13 28 | Episode reward -1.0313379121421788 29 | 14 30 | Episode reward -0.4037729201163162 31 | 15 32 | Episode reward -0.8908111586607186 33 | 16 34 | Episode reward -0.193051643952839 35 | 17 36 | Episode reward -0.2498163781436723 37 | 18 38 | Episode reward -0.018739988557059398 39 | 19 40 | Episode reward -0.10345987921119307 41 | 20 42 | Episode reward -0.12411247735981543 43 | 21 44 | Episode reward -0.11830773447950055 45 | 22 46 | Episode reward -1.0313379121421788 47 | 23 48 | Episode reward -0.10345987921119307 49 | 24 50 | Episode reward -0.11830773447950055 51 | 25 52 | Episode reward -0.8908111586607186 53 | 26 54 | Episode reward -0.19830565585104554 55 | 27 56 | Episode reward -0.1051440014433829 57 | 28 58 | Episode reward -0.1051440014433829 59 | 29 60 | Episode reward -1.0946871157166271 61 | 30 62 | Episode reward -1.1348713559956034 63 | 31 64 | Episode reward -1.0946871157166271 65 | 32 66 | Episode reward -3.555349719262672 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/PPO/PPO_CM1-postgres-card-job-masking-v3_0_2019-06-27_08-28.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.08893002419236667 3 | 1 4 | Episode reward -0.024451252478888984 5 | 2 6 | Episode reward -0.12122142648667115 7 | 3 8 | Episode reward -0.1940514291305161 9 | 4 10 | Episode reward -0.12122142648667115 11 | 5 12 | Episode reward -0.07077750997983465 13 | 6 14 | Episode reward -0.12098504290918799 15 | 7 16 | Episode reward -0.07077750997983465 17 | 8 18 | Episode reward -0.12122142648667115 19 | 9 20 | Episode reward -0.08893002419236667 21 | 10 22 | Episode reward -0.07525614096104498 23 | 11 24 | Episode reward -0.1940514291305161 25 | 12 26 | Episode reward -0.19391441894371905 27 | 13 28 | Episode reward -0.587710279001769 29 | 14 30 | Episode reward -0.5183879693420832 31 | 15 32 | Episode reward -0.5296855328330918 33 | 16 34 | Episode reward -0.0878620331768716 35 | 17 36 | Episode reward -0.20220720274702203 37 | 18 38 | Episode reward -0.019818743534297516 39 | 19 40 | Episode reward -0.07638195185657574 41 | 20 42 | Episode reward -0.19391441894371905 43 | 21 44 | Episode reward -0.8848771177687619 45 | 22 46 | Episode reward -0.587710279001769 47 | 23 48 | Episode reward -0.07638195185657574 49 | 24 50 | Episode reward -0.8848771177687619 51 | 25 52 | Episode reward -0.5296855328330918 53 | 26 54 | Episode reward -1.6267307193556293 55 | 27 56 | Episode reward -0.04291500949749171 57 | 28 58 | Episode reward -0.04291500949749171 59 | 29 60 | Episode reward -0.6291206053605887 61 | 30 62 | Episode reward -1.339652036249995 63 | 31 64 | Episode reward -0.6291206053605887 65 | 32 66 | Episode reward -1.3896864640460618 67 | -------------------------------------------------------------------------------- /agents/eval/results/Sens/PPO/PPO_CM1-postgres-card-job-masking-v3_0_2019-06-27_08-58.txt: -------------------------------------------------------------------------------- 1 | 0 2 | Episode reward -0.05676713993368714 3 | 1 4 | Episode reward -0.035779255638086066 5 | 2 6 | Episode reward -0.12304238972786152 7 | 3 8 | Episode reward -0.8165766586278007 9 | 4 10 | Episode reward -0.12304238972786152 11 | 5 12 | Episode reward -0.04573603241623027 13 | 6 14 | Episode reward -0.12117113377469224 15 | 7 16 | Episode reward -0.04573603241623027 17 | 8 18 | Episode reward -0.12304238972786152 19 | 9 20 | Episode reward -0.05676713993368714 21 | 10 22 | Episode reward -0.0522474199839503 23 | 11 24 | Episode reward -0.8165766586278007 25 | 12 26 | Episode reward -0.05506908981688788 27 | 13 28 | Episode reward -0.7592097666218125 29 | 14 30 | Episode reward -0.40702406028496463 31 | 15 32 | Episode reward -3.8101528257318575 33 | 16 34 | Episode reward -0.12045180527025628 35 | 17 36 | Episode reward -0.0824610699905283 37 | 18 38 | Episode reward -0.06125761542248886 39 | 19 40 | Episode reward -0.04587997515798381 41 | 20 42 | Episode reward -0.05506908981688788 43 | 21 44 | Episode reward -1.1526870312087054 45 | 22 46 | Episode reward -0.7592097666218125 47 | 23 48 | Episode reward -0.04587997515798381 49 | 24 50 | Episode reward -1.1526870312087054 51 | 25 52 | Episode reward -3.8101528257318575 53 | 26 54 | Episode reward -0.17664643057360693 55 | 27 56 | Episode reward -0.06593226188070841 57 | 28 58 | Episode reward -0.06593226188070841 59 | 29 60 | Episode reward -4.335980234620824 61 | 30 62 | Episode reward -0.3875338469504931 63 | 31 64 | Episode reward -4.335980234620824 65 | 32 66 | Episode reward -0.38531682450277915 67 | -------------------------------------------------------------------------------- /agents/queries/helper_func/createTable_movie_info_idx.py: -------------------------------------------------------------------------------- 1 | import psycopg2 2 | try: 3 | #conn = psycopg2.connect(host="localhost", database="imdbload", user="postgres", password="admin") 4 | #conn = psycopg2.connect(host="localhost", database="imdbload", user="docker", password="docker") 5 | conn = psycopg2.connect(host="localhost", database="imdbload", user="postgres", password="docker") 6 | except: 7 | print("I am unable to connect to the database") 8 | # print(query) 9 | cursor = conn.cursor() 10 | #cursor.execute("""CREATE TABLE movie_info_idx AS SELECT * FROM movie_info;""") 11 | cursor.execute("""SELECT * FROM movie_info LIMIT 1;""") 12 | rows = cursor.fetchall() 13 | print(rows) 14 | -------------------------------------------------------------------------------- /agents/queries/helper_func/indices_preprocessing.py: -------------------------------------------------------------------------------- 1 | import csv 2 | 3 | # Open the CSV 4 | f = open('indices.txt', 'rU') 5 | # Change each fieldname to the appropriate field name. I know, so difficult. 6 | reader = csv.DictReader(f)#, fieldnames=("schema", "name", "type", "owner", "table")) 7 | 8 | keys = [] 9 | for r in reader: 10 | keys.append(r["table_name"].replace("_","") + "." + r["column_name"]) 11 | keys.append(r["table_name"].replace("_","") + "2." + r["column_name"]) 12 | print(r["table_name"].replace("_","") + "." + r["column_name"]) 13 | print(keys) 14 | 15 | ''' 16 | select 17 | t.relname as table_name, 18 | i.relname as index_name, 19 | a.attname as column_name 20 | from 21 | pg_class t, 22 | pg_class i, 23 | pg_index ix, 24 | pg_attribute a 25 | where 26 | t.oid = ix.indrelid 27 | and i.oid = ix.indexrelid 28 | and a.attrelid = t.oid 29 | and a.attnum = ANY(ix.indkey) 30 | and t.relkind = 'r' 31 | order by 32 | t.relname, 33 | i.relname; 34 | ''' -------------------------------------------------------------------------------- /agents/queries/helper_func/query_parser_joinonly.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | f = open("~//PycharmProjects/mt-join-queryoptimization-with-drl/agents/queries/job_queries_label.txt", "r") 5 | w = open("~//PycharmProjects/mt-join-queryoptimization-with-drl/agents/queries/job_queries_simple_labled.txt", "w") 6 | qselect="SELECT * " 7 | for x in f: 8 | y=x.split('WHERE') 9 | qwhere = "WHERE" 10 | e=0 11 | for i in y[1].split('AND'): 12 | if not(("'" in i) or ('%' in i) or ("LIKE" in i) or ("<" in i) or (">" in i) or ("BETWEEN" in i) or ("OR" in i) or ("=" not in i)): 13 | #print(i) 14 | if e is 0: 15 | qwhere=qwhere+i 16 | e = 1 17 | else: 18 | qwhere = qwhere +"AND"+ i 19 | 20 | z = y[0].split('FROM') 21 | alias = {} 22 | relations = [] 23 | qfrom = 'FROM ' 24 | e = 0 25 | for i in z[1].split(','): 26 | a = i.split(' AS ') 27 | rel = a[0].replace(' ', '') 28 | if rel in alias.values(): 29 | alias[a[1].replace(' ', '')] = rel+"2" 30 | rel = rel+" AS "+rel+"2" 31 | else: 32 | alias[a[1].replace(' ', '')] = rel 33 | if e is 0: 34 | qfrom = qfrom + rel 35 | e = 1 36 | else: 37 | qfrom = qfrom + ", " + rel 38 | qfrom = qfrom 39 | for key, val in alias.items(): 40 | qwhere = qwhere.replace(' '+key+'.', ' '+val+'.') 41 | 42 | #print(relations) 43 | #print(alias) 44 | print(qselect) 45 | print(qfrom) 46 | print(qwhere) 47 | w.write(x.split("|")[0]+"|"+qselect+qfrom+' '+qwhere) 48 | #print(j) 49 | -------------------------------------------------------------------------------- /agents/queries/helper_func/sql_to_rl_schema: -------------------------------------------------------------------------------- 1 | import json as j 2 | 3 | f = open("imdb_schema.sql", "r") 4 | w = open("imdb_schema.json", "w") 5 | sql = f.read().replace(' ','') 6 | json = {} 7 | for table in sql.split(');\n\n'): 8 | element=table.split(' (') 9 | key = element[0].replace('CREATE TABLE ','') 10 | values = [] 11 | for column in element[1].split(',\n'): 12 | values.append(column.split(' ')[0].replace('\n','')) 13 | json[key]=values 14 | 15 | print(len(json)) 16 | print(sum(len(x) for x in json.values())) 17 | print(j.dumps(json)) 18 | w.write(j.dumps(json)) 19 | f.close() 20 | w.close() -------------------------------------------------------------------------------- /agents/queries/imdb_schema.json: -------------------------------------------------------------------------------- 1 | {"aka_name": ["id", "person_id", "name", "imdb_index", "name_pcode_cf", "name_pcode_nf", "surname_pcode", "md5sum"], "aka_title": ["id", "movie_id", "title", "imdb_index", "kind_id", "production_year", "phonetic_code", "episode_of_id", "season_nr", "episode_nr", "note", "md5sum"], "cast_info": ["id", "person_id", "movie_id", "person_role_id", "note", "nr_order", "role_id"], "char_name": ["id", "name", "imdb_index", "imdb_id", "name_pcode_nf", "surname_pcode", "md5sum"], "comp_cast_type": ["id", "kind"], "company_name": ["id", "name", "country_code", "imdb_id", "name_pcode_nf", "name_pcode_sf", "md5sum"], "company_type": ["id", "kind"], "complete_cast": ["id", "movie_id", "subject_id", "status_id"], "info_type": ["id", "info"], "keyword": ["id", "keyword", "phonetic_code"], "kind_type": ["id", "kind"], "link_type": ["id", "link"], "movie_companies": ["id", "movie_id", "company_id", "company_type_id", "note"], "movie_info": ["id", "movie_id", "info_type_id", "info", "note"], "movie_info_idx": ["id", "movie_id", "info_type_id", "info", "note"], "movie_keyword": ["id", "movie_id", "keyword_id"], "movie_link": ["id", "movie_id", "linked_movie_id", "link_type_id"], "name": ["id", "name", "imdb_index", "imdb_id", "gender", "name_pcode_cf", "name_pcode_nf", "surname_pcode", "md5sum"], "person_info": ["id", "person_id", "info_type_id", "info", "note"], "role_type": ["id", "role"], "title": ["id", "title", "imdb_index", "kind_id", "production_year", "imdb_id", "phonetic_code", "episode_of_id", "season_nr", "episode_nr", "series_years", "md5sum"]} -------------------------------------------------------------------------------- /bin/docker_entrypoint: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # This script is the entrypoint for our Docker image. 3 | 4 | set -ex 5 | 6 | # Set up display; otherwise rendering will fail 7 | Xvfb -screen 0 1024x768x24 & 8 | export DISPLAY=:0 9 | 10 | # Wait for the file to come up 11 | display=0 12 | file="/tmp/.X11-unix/X$display" 13 | for i in $(seq 1 10); do 14 | if [ -e "$file" ]; then 15 | break 16 | fi 17 | 18 | echo "Waiting for $file to be created (try $i/10)" 19 | sleep "$i" 20 | done 21 | if ! [ -e "$file" ]; then 22 | echo "Timing out: $file was not created" 23 | exit 1 24 | fi 25 | 26 | exec "$@" 27 | -------------------------------------------------------------------------------- /bin/render.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import gym 4 | 5 | 6 | parser = argparse.ArgumentParser(description='Renders a Gym environment for quick inspection.') 7 | parser.add_argument('env_id', type=str, help='the ID of the environment to be rendered (e.g. HalfCheetah-v1') 8 | parser.add_argument('--step', type=int, default=1) 9 | args = parser.parse_args() 10 | 11 | env = gym.make(args.env_id) 12 | env.reset() 13 | 14 | step = 0 15 | while True: 16 | if args.step: 17 | env.step(env.action_space.sample()) 18 | env.render() 19 | if step % 10 == 0: 20 | env.reset() 21 | step += 1 22 | -------------------------------------------------------------------------------- /docs/misc.md: -------------------------------------------------------------------------------- 1 | # Miscellaneous 2 | 3 | Here we have a bunch of tools, libs, apis, tutorials, resources, etc. provided by the community to add value to the gym ecosystem. 4 | 5 | ## OpenAIGym.jl 6 | 7 | Convenience wrapper of the OpenAI Gym for the Julia language [/tbreloff/OpenAIGym.jl](https://github.com/tbreloff/OpenAIGym.jl) -------------------------------------------------------------------------------- /docs/readme.md: -------------------------------------------------------------------------------- 1 | # Table of Contents 2 | 3 | - [Agents](agents.md) contains a listing of agents compatible with gym environments. Agents facilitate the running of an algorithm against an environment. 4 | 5 | - [Environments](environments.md) lists more environments to run your algorithms against. These do not come prepackaged with the gym. 6 | 7 | - [Wrappers](wrappers.md) list of general purpose wrappers for environments. These can perform pre/postprocessing on the data that is exchanged between the agent and the environment. 8 | 9 | - [Miscellaneous](misc.md) is a collection of other value-add tools and utilities. These could be anything from a small convenience lib to a collection of video tutorials or a new language binding. 10 | -------------------------------------------------------------------------------- /docs/wrappers.md: -------------------------------------------------------------------------------- 1 | # Wrappers 2 | 3 | ## Space Wrappers 4 | Wrappers that transform observation and/or action space. Contains 5 | * Discretize (make a discrete version of a continuous space) 6 | * Flatten (put all actions/observations into a single dimension) 7 | * Rescale (rescale the range of values for continuous spaces). 8 | 9 | Learn more here: https://github.com/ngc92/space-wrappers 10 | 11 | ## Utility wrappers for Atari Games 12 | The baseline repository contains wrappers that are used when doing Atari 13 | experiments. 14 | These can be found here: https://github.com/openai/baselines/blob/master/baselines/common/atari_wrappers_deprecated.py 15 | -------------------------------------------------------------------------------- /examples/agents/_policies.py: -------------------------------------------------------------------------------- 1 | # Support code for cem.py 2 | 3 | class BinaryActionLinearPolicy(object): 4 | def __init__(self, theta): 5 | self.w = theta[:-1] 6 | self.b = theta[-1] 7 | def act(self, ob): 8 | y = ob.dot(self.w) + self.b 9 | a = int(y < 0) 10 | return a 11 | 12 | class ContinuousActionLinearPolicy(object): 13 | def __init__(self, theta, n_in, n_out): 14 | assert len(theta) == (n_in + 1) * n_out 15 | self.W = theta[0 : n_in * n_out].reshape(n_in, n_out) 16 | self.b = theta[n_in * n_out : None].reshape(1, n_out) 17 | def act(self, ob): 18 | a = ob.dot(self.W) + self.b 19 | return a 20 | -------------------------------------------------------------------------------- /examples/scripts/list_envs: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from gym import envs 3 | envids = [spec.id for spec in envs.registry.all()] 4 | for envid in sorted(envids): 5 | print(envid) 6 | -------------------------------------------------------------------------------- /gym.egg-info/PKG-INFO: -------------------------------------------------------------------------------- 1 | Metadata-Version: 2.1 2 | Name: gym 3 | Version: 0.12.0 4 | Summary: The OpenAI Gym: A toolkit for developing and comparing your reinforcement learning agents. 5 | Home-page: https://github.com/openai/gym 6 | Author: OpenAI 7 | Author-email: gym@openai.com 8 | License: UNKNOWN 9 | Description: UNKNOWN 10 | Platform: UNKNOWN 11 | Provides-Extra: classic_control 12 | Provides-Extra: robotics 13 | Provides-Extra: atari 14 | Provides-Extra: mujoco 15 | Provides-Extra: all 16 | Provides-Extra: box2d 17 | -------------------------------------------------------------------------------- /gym.egg-info/dependency_links.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /gym.egg-info/not-zip-safe: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /gym.egg-info/requires.txt: -------------------------------------------------------------------------------- 1 | scipy 2 | numpy>=1.10.4 3 | requests>=2.0 4 | six 5 | pyglet>=1.2.0 6 | psycopg2 7 | 8 | [all] 9 | atari_py>=0.1.4 10 | Pillow 11 | PyOpenGL 12 | box2d-py>=2.3.5 13 | PyOpenGL 14 | mujoco_py>=1.50 15 | imageio 16 | mujoco_py>=1.50 17 | imageio 18 | 19 | [atari] 20 | atari_py>=0.1.4 21 | Pillow 22 | PyOpenGL 23 | 24 | [box2d] 25 | box2d-py>=2.3.5 26 | 27 | [classic_control] 28 | PyOpenGL 29 | 30 | [mujoco] 31 | mujoco_py>=1.50 32 | imageio 33 | 34 | [robotics] 35 | mujoco_py>=1.50 36 | imageio 37 | -------------------------------------------------------------------------------- /gym.egg-info/top_level.txt: -------------------------------------------------------------------------------- 1 | gym 2 | -------------------------------------------------------------------------------- /gym/__init__.py: -------------------------------------------------------------------------------- 1 | import distutils.version 2 | import os 3 | import sys 4 | import warnings 5 | 6 | from gym import error 7 | from gym.version import VERSION as __version__ 8 | 9 | from gym.core import Env, GoalEnv, Wrapper, ObservationWrapper, ActionWrapper, RewardWrapper 10 | from gym.spaces import Space 11 | from gym.envs import make, spec, register 12 | from gym import logger 13 | 14 | __all__ = ["Env", "Space", "Wrapper", "make", "spec", "register"] 15 | -------------------------------------------------------------------------------- /gym/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /gym/__pycache__/core.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/__pycache__/core.cpython-36.pyc -------------------------------------------------------------------------------- /gym/__pycache__/error.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/__pycache__/error.cpython-36.pyc -------------------------------------------------------------------------------- /gym/__pycache__/logger.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/__pycache__/logger.cpython-36.pyc -------------------------------------------------------------------------------- /gym/__pycache__/version.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/__pycache__/version.cpython-36.pyc -------------------------------------------------------------------------------- /gym/envs/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /gym/envs/__pycache__/registration.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/__pycache__/registration.cpython-36.pyc -------------------------------------------------------------------------------- /gym/envs/algorithmic/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.algorithmic.copy_ import CopyEnv 2 | from gym.envs.algorithmic.repeat_copy import RepeatCopyEnv 3 | from gym.envs.algorithmic.duplicated_input import DuplicatedInputEnv 4 | from gym.envs.algorithmic.reverse import ReverseEnv 5 | from gym.envs.algorithmic.reversed_addition import ReversedAdditionEnv 6 | -------------------------------------------------------------------------------- /gym/envs/algorithmic/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/algorithmic/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /gym/envs/algorithmic/__pycache__/algorithmic_env.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/algorithmic/__pycache__/algorithmic_env.cpython-36.pyc -------------------------------------------------------------------------------- /gym/envs/algorithmic/__pycache__/copy_.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/algorithmic/__pycache__/copy_.cpython-36.pyc -------------------------------------------------------------------------------- /gym/envs/algorithmic/__pycache__/duplicated_input.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/algorithmic/__pycache__/duplicated_input.cpython-36.pyc -------------------------------------------------------------------------------- /gym/envs/algorithmic/__pycache__/repeat_copy.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/algorithmic/__pycache__/repeat_copy.cpython-36.pyc -------------------------------------------------------------------------------- /gym/envs/algorithmic/__pycache__/reverse.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/algorithmic/__pycache__/reverse.cpython-36.pyc -------------------------------------------------------------------------------- /gym/envs/algorithmic/__pycache__/reversed_addition.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/algorithmic/__pycache__/reversed_addition.cpython-36.pyc -------------------------------------------------------------------------------- /gym/envs/algorithmic/copy_.py: -------------------------------------------------------------------------------- 1 | """ 2 | Task is to copy content from the input tape to 3 | the output tape. http://arxiv.org/abs/1511.07275 4 | """ 5 | from gym.envs.algorithmic import algorithmic_env 6 | 7 | 8 | class CopyEnv(algorithmic_env.TapeAlgorithmicEnv): 9 | def __init__(self, base=5, chars=True): 10 | super(CopyEnv, self).__init__(base=base, chars=chars) 11 | 12 | def target_from_input_data(self, input_data): 13 | return input_data 14 | -------------------------------------------------------------------------------- /gym/envs/algorithmic/duplicated_input.py: -------------------------------------------------------------------------------- 1 | """ 2 | Task is to return every nth character from the input tape. 3 | http://arxiv.org/abs/1511.07275 4 | """ 5 | from __future__ import division 6 | from gym.envs.algorithmic import algorithmic_env 7 | 8 | 9 | class DuplicatedInputEnv(algorithmic_env.TapeAlgorithmicEnv): 10 | def __init__(self, duplication=2, base=5): 11 | self.duplication = duplication 12 | super(DuplicatedInputEnv, self).__init__(base=base, chars=True) 13 | 14 | def generate_input_data(self, size): 15 | res = [] 16 | if size < self.duplication: 17 | size = self.duplication 18 | for i in range(size//self.duplication): 19 | char = self.np_random.randint(self.base) 20 | for _ in range(self.duplication): 21 | res.append(char) 22 | return res 23 | 24 | def target_from_input_data(self, input_data): 25 | return [input_data[i] for i in range(0, len(input_data), self.duplication)] 26 | -------------------------------------------------------------------------------- /gym/envs/algorithmic/repeat_copy.py: -------------------------------------------------------------------------------- 1 | """ 2 | Task is to copy content multiple times from the input tape to 3 | the output tape. http://arxiv.org/abs/1511.07275 4 | """ 5 | from gym.envs.algorithmic import algorithmic_env 6 | 7 | 8 | class RepeatCopyEnv(algorithmic_env.TapeAlgorithmicEnv): 9 | MIN_REWARD_SHORTFALL_FOR_PROMOTION = -.1 10 | 11 | def __init__(self, base=5): 12 | super(RepeatCopyEnv, self).__init__(base=base, chars=True) 13 | self.last = 50 14 | 15 | def target_from_input_data(self, input_data): 16 | return input_data + list(reversed(input_data)) + input_data 17 | -------------------------------------------------------------------------------- /gym/envs/algorithmic/reverse.py: -------------------------------------------------------------------------------- 1 | """ 2 | Task is to reverse content over the input tape. 3 | http://arxiv.org/abs/1511.07275 4 | """ 5 | from gym.envs.algorithmic import algorithmic_env 6 | 7 | 8 | class ReverseEnv(algorithmic_env.TapeAlgorithmicEnv): 9 | MIN_REWARD_SHORTFALL_FOR_PROMOTION = -.1 10 | 11 | def __init__(self, base=2): 12 | super(ReverseEnv, self).__init__(base=base, chars=True, starting_min_length=1) 13 | self.last = 50 14 | 15 | def target_from_input_data(self, input_str): 16 | return list(reversed(input_str)) 17 | -------------------------------------------------------------------------------- /gym/envs/algorithmic/reversed_addition.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from gym.envs.algorithmic import algorithmic_env 3 | 4 | 5 | class ReversedAdditionEnv(algorithmic_env.GridAlgorithmicEnv): 6 | def __init__(self, rows=2, base=3): 7 | super(ReversedAdditionEnv, self).__init__(rows=rows, base=base, chars=False) 8 | 9 | def target_from_input_data(self, input_strings): 10 | curry = 0 11 | target = [] 12 | for digits in input_strings: 13 | total = sum(digits) + curry 14 | target.append(total % self.base) 15 | curry = total // self.base 16 | 17 | if curry > 0: 18 | target.append(curry) 19 | return target 20 | 21 | @property 22 | def time_limit(self): 23 | # Quirk preserved for the sake of consistency: add the length of the input 24 | # rather than the length of the desired output (which may differ if there's 25 | # an extra carried digit). 26 | # TODO: It seems like this time limit is so strict as to make Addition3-v0 27 | # unsolvable, since agents aren't even given enough time steps to look at 28 | # all the digits. (The solutions on the scoreboard seem to only work by 29 | # save-scumming.) 30 | return self.input_width*2 + 4 31 | -------------------------------------------------------------------------------- /gym/envs/algorithmic/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/algorithmic/tests/__init__.py -------------------------------------------------------------------------------- /gym/envs/atari/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.atari.atari_env import AtariEnv 2 | -------------------------------------------------------------------------------- /gym/envs/box2d/__init__.py: -------------------------------------------------------------------------------- 1 | try: 2 | import Box2D 3 | from gym.envs.box2d.lunar_lander import LunarLander 4 | from gym.envs.box2d.lunar_lander import LunarLanderContinuous 5 | from gym.envs.box2d.bipedal_walker import BipedalWalker, BipedalWalkerHardcore 6 | from gym.envs.box2d.car_racing import CarRacing 7 | except ImportError: 8 | Box2D = None 9 | -------------------------------------------------------------------------------- /gym/envs/box2d/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/box2d/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /gym/envs/box2d/test_lunar_lander.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | try: 3 | import Box2D 4 | from .lunar_lander import LunarLander, LunarLanderContinuous, demo_heuristic_lander 5 | except ImportError: 6 | Box2D = None 7 | 8 | 9 | @pytest.mark.skipif(Box2D is None, reason='Box2D not installed') 10 | def test_lunar_lander(): 11 | _test_lander(LunarLander(), seed=0) 12 | 13 | @pytest.mark.skipif(Box2D is None, reason='Box2D not installed') 14 | def test_lunar_lander_continuous(): 15 | _test_lander(LunarLanderContinuous(), seed=0) 16 | 17 | @pytest.mark.skipif(Box2D is None, reason='Box2D not installed') 18 | def _test_lander(env, seed=None, render=False): 19 | total_reward = demo_heuristic_lander(env, seed=seed, render=render) 20 | assert total_reward > 100 21 | 22 | 23 | -------------------------------------------------------------------------------- /gym/envs/classic_control/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.classic_control.cartpole import CartPoleEnv 2 | from gym.envs.classic_control.mountain_car import MountainCarEnv 3 | from gym.envs.classic_control.continuous_mountain_car import Continuous_MountainCarEnv 4 | from gym.envs.classic_control.pendulum import PendulumEnv 5 | from gym.envs.classic_control.acrobot import AcrobotEnv 6 | 7 | -------------------------------------------------------------------------------- /gym/envs/classic_control/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/classic_control/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /gym/envs/classic_control/__pycache__/acrobot.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/classic_control/__pycache__/acrobot.cpython-36.pyc -------------------------------------------------------------------------------- /gym/envs/classic_control/__pycache__/cartpole.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/classic_control/__pycache__/cartpole.cpython-36.pyc -------------------------------------------------------------------------------- /gym/envs/classic_control/__pycache__/continuous_mountain_car.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/classic_control/__pycache__/continuous_mountain_car.cpython-36.pyc -------------------------------------------------------------------------------- /gym/envs/classic_control/__pycache__/mountain_car.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/classic_control/__pycache__/mountain_car.cpython-36.pyc -------------------------------------------------------------------------------- /gym/envs/classic_control/__pycache__/pendulum.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/classic_control/__pycache__/pendulum.cpython-36.pyc -------------------------------------------------------------------------------- /gym/envs/classic_control/__pycache__/rendering.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/classic_control/__pycache__/rendering.cpython-36.pyc -------------------------------------------------------------------------------- /gym/envs/classic_control/assets/clockwise.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/classic_control/assets/clockwise.png -------------------------------------------------------------------------------- /gym/envs/database/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from gym.envs.database.cm1_postgres_card_env_job import CM1PostgresCardJob 3 | from gym.envs.database.cm1_postgres_card_env_job_one import CM1PostgresCardJobOne 4 | from gym.envs.database.simple_corridor_ray import SimpleCorridor 5 | 6 | 7 | from gym.envs.database.cm1_postgres_card_env_job_crossval_0 import CM1PostgresCardJob0 8 | from gym.envs.database.cm1_postgres_card_env_job_crossval_1 import CM1PostgresCardJob1 9 | from gym.envs.database.cm1_postgres_card_env_job_crossval_2 import CM1PostgresCardJob2 10 | from gym.envs.database.cm1_postgres_card_env_job_crossval_3 import CM1PostgresCardJob3 -------------------------------------------------------------------------------- /gym/envs/database/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/database/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /gym/envs/database/__pycache__/cm1_postgres_card_env_job_crossval_0.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/database/__pycache__/cm1_postgres_card_env_job_crossval_0.cpython-36.pyc -------------------------------------------------------------------------------- /gym/envs/database/__pycache__/cm1_postgres_card_env_job_crossval_1.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/database/__pycache__/cm1_postgres_card_env_job_crossval_1.cpython-36.pyc -------------------------------------------------------------------------------- /gym/envs/database/__pycache__/cm1_postgres_card_env_job_crossval_2.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/database/__pycache__/cm1_postgres_card_env_job_crossval_2.cpython-36.pyc -------------------------------------------------------------------------------- /gym/envs/database/__pycache__/cm1_postgres_card_env_job_crossval_3.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/database/__pycache__/cm1_postgres_card_env_job_crossval_3.cpython-36.pyc -------------------------------------------------------------------------------- /gym/envs/database/__pycache__/cm1_postgres_card_env_job_one.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/database/__pycache__/cm1_postgres_card_env_job_one.cpython-36.pyc -------------------------------------------------------------------------------- /gym/envs/database/__pycache__/simple_corridor_ray.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/database/__pycache__/simple_corridor_ray.cpython-36.pyc -------------------------------------------------------------------------------- /gym/envs/database/simple_corridor_ray.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from gym import error, spaces, utils 3 | from gym.utils import seeding 4 | import os 5 | import numpy as np 6 | import random 7 | from itertools import permutations 8 | from gym.spaces import Discrete, Box 9 | 10 | """ 11 | Simple example for environment tests 12 | """ 13 | 14 | class SimpleCorridor(gym.Env): 15 | actions = [] 16 | action_obj = [] 17 | action_list = [] 18 | action_space = None 19 | observation_space = None 20 | obs = [] 21 | reward_range = [float(0), float(1)] 22 | 23 | def __init__(self):#, config): 24 | self.end_pos = 9 #config["corridor_length"] 25 | self.cur_pos = 0 26 | self.action_space = Discrete(2) 27 | self.observation_space = Box(0.0, self.end_pos, shape=(1,), dtype=np.float32) 28 | 29 | def reset(self): 30 | self.cur_pos = 0 31 | return [self.cur_pos]#, 0, False 32 | 33 | def step(self, action): 34 | assert action in [0, 1], action 35 | if action == 0 and self.cur_pos > 0: 36 | self.cur_pos -= 1 37 | elif action == 1: 38 | self.cur_pos += 1 39 | done = self.cur_pos >= self.end_pos 40 | return [self.cur_pos], 1 if done else 0, done, {} 41 | 42 | def render(self, mode='human', close=False): 43 | return self.cur_pos 44 | 45 | def close(self): 46 | return 47 | 48 | def seed(self, seed=None): 49 | self.np_random, seed = seeding.np_random(seed) 50 | return [seed] 51 | 52 | 53 | 54 | 55 | 56 | -------------------------------------------------------------------------------- /gym/envs/mujoco/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.mujoco.mujoco_env import MujocoEnv 2 | # ^^^^^ so that user gets the correct error 3 | # message if mujoco is not installed correctly 4 | from gym.envs.mujoco.ant import AntEnv 5 | from gym.envs.mujoco.half_cheetah import HalfCheetahEnv 6 | from gym.envs.mujoco.hopper import HopperEnv 7 | from gym.envs.mujoco.walker2d import Walker2dEnv 8 | from gym.envs.mujoco.humanoid import HumanoidEnv 9 | from gym.envs.mujoco.inverted_pendulum import InvertedPendulumEnv 10 | from gym.envs.mujoco.inverted_double_pendulum import InvertedDoublePendulumEnv 11 | from gym.envs.mujoco.reacher import ReacherEnv 12 | from gym.envs.mujoco.swimmer import SwimmerEnv 13 | from gym.envs.mujoco.humanoidstandup import HumanoidStandupEnv 14 | from gym.envs.mujoco.pusher import PusherEnv 15 | from gym.envs.mujoco.thrower import ThrowerEnv 16 | from gym.envs.mujoco.striker import StrikerEnv 17 | -------------------------------------------------------------------------------- /gym/envs/mujoco/assets/inverted_pendulum.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /gym/envs/mujoco/half_cheetah.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | 5 | class HalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle): 6 | def __init__(self): 7 | mujoco_env.MujocoEnv.__init__(self, 'half_cheetah.xml', 5) 8 | utils.EzPickle.__init__(self) 9 | 10 | def step(self, action): 11 | xposbefore = self.sim.data.qpos[0] 12 | self.do_simulation(action, self.frame_skip) 13 | xposafter = self.sim.data.qpos[0] 14 | ob = self._get_obs() 15 | reward_ctrl = - 0.1 * np.square(action).sum() 16 | reward_run = (xposafter - xposbefore)/self.dt 17 | reward = reward_ctrl + reward_run 18 | done = False 19 | return ob, reward, done, dict(reward_run=reward_run, reward_ctrl=reward_ctrl) 20 | 21 | def _get_obs(self): 22 | return np.concatenate([ 23 | self.sim.data.qpos.flat[1:], 24 | self.sim.data.qvel.flat, 25 | ]) 26 | 27 | def reset_model(self): 28 | qpos = self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq) 29 | qvel = self.init_qvel + self.np_random.randn(self.model.nv) * .1 30 | self.set_state(qpos, qvel) 31 | return self._get_obs() 32 | 33 | def viewer_setup(self): 34 | self.viewer.cam.distance = self.model.stat.extent * 0.5 35 | -------------------------------------------------------------------------------- /gym/envs/mujoco/inverted_pendulum.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | 5 | class InvertedPendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle): 6 | def __init__(self): 7 | utils.EzPickle.__init__(self) 8 | mujoco_env.MujocoEnv.__init__(self, 'inverted_pendulum.xml', 2) 9 | 10 | def step(self, a): 11 | reward = 1.0 12 | self.do_simulation(a, self.frame_skip) 13 | ob = self._get_obs() 14 | notdone = np.isfinite(ob).all() and (np.abs(ob[1]) <= .2) 15 | done = not notdone 16 | return ob, reward, done, {} 17 | 18 | def reset_model(self): 19 | qpos = self.init_qpos + self.np_random.uniform(size=self.model.nq, low=-0.01, high=0.01) 20 | qvel = self.init_qvel + self.np_random.uniform(size=self.model.nv, low=-0.01, high=0.01) 21 | self.set_state(qpos, qvel) 22 | return self._get_obs() 23 | 24 | def _get_obs(self): 25 | return np.concatenate([self.sim.data.qpos, self.sim.data.qvel]).ravel() 26 | 27 | def viewer_setup(self): 28 | v = self.viewer 29 | v.cam.trackbodyid = 0 30 | v.cam.distance = self.model.stat.extent 31 | -------------------------------------------------------------------------------- /gym/envs/mujoco/swimmer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | 5 | class SwimmerEnv(mujoco_env.MujocoEnv, utils.EzPickle): 6 | def __init__(self): 7 | mujoco_env.MujocoEnv.__init__(self, 'swimmer.xml', 4) 8 | utils.EzPickle.__init__(self) 9 | 10 | def step(self, a): 11 | ctrl_cost_coeff = 0.0001 12 | xposbefore = self.sim.data.qpos[0] 13 | self.do_simulation(a, self.frame_skip) 14 | xposafter = self.sim.data.qpos[0] 15 | reward_fwd = (xposafter - xposbefore) / self.dt 16 | reward_ctrl = - ctrl_cost_coeff * np.square(a).sum() 17 | reward = reward_fwd + reward_ctrl 18 | ob = self._get_obs() 19 | return ob, reward, False, dict(reward_fwd=reward_fwd, reward_ctrl=reward_ctrl) 20 | 21 | def _get_obs(self): 22 | qpos = self.sim.data.qpos 23 | qvel = self.sim.data.qvel 24 | return np.concatenate([qpos.flat[2:], qvel.flat]) 25 | 26 | def reset_model(self): 27 | self.set_state( 28 | self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq), 29 | self.init_qvel + self.np_random.uniform(low=-.1, high=.1, size=self.model.nv) 30 | ) 31 | return self._get_obs() 32 | -------------------------------------------------------------------------------- /gym/envs/mujoco/walker2d.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | 5 | class Walker2dEnv(mujoco_env.MujocoEnv, utils.EzPickle): 6 | 7 | def __init__(self): 8 | mujoco_env.MujocoEnv.__init__(self, "walker2d.xml", 4) 9 | utils.EzPickle.__init__(self) 10 | 11 | def step(self, a): 12 | posbefore = self.sim.data.qpos[0] 13 | self.do_simulation(a, self.frame_skip) 14 | posafter, height, ang = self.sim.data.qpos[0:3] 15 | alive_bonus = 1.0 16 | reward = ((posafter - posbefore) / self.dt) 17 | reward += alive_bonus 18 | reward -= 1e-3 * np.square(a).sum() 19 | done = not (height > 0.8 and height < 2.0 and 20 | ang > -1.0 and ang < 1.0) 21 | ob = self._get_obs() 22 | return ob, reward, done, {} 23 | 24 | def _get_obs(self): 25 | qpos = self.sim.data.qpos 26 | qvel = self.sim.data.qvel 27 | return np.concatenate([qpos[1:], np.clip(qvel, -10, 10)]).ravel() 28 | 29 | def reset_model(self): 30 | self.set_state( 31 | self.init_qpos + self.np_random.uniform(low=-.005, high=.005, size=self.model.nq), 32 | self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv) 33 | ) 34 | return self._get_obs() 35 | 36 | def viewer_setup(self): 37 | self.viewer.cam.trackbodyid = 2 38 | self.viewer.cam.distance = self.model.stat.extent * 0.5 39 | self.viewer.cam.lookat[2] = 1.15 40 | self.viewer.cam.elevation = -20 41 | -------------------------------------------------------------------------------- /gym/envs/robotics/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.robotics.fetch_env import FetchEnv 2 | from gym.envs.robotics.fetch.slide import FetchSlideEnv 3 | from gym.envs.robotics.fetch.pick_and_place import FetchPickAndPlaceEnv 4 | from gym.envs.robotics.fetch.push import FetchPushEnv 5 | from gym.envs.robotics.fetch.reach import FetchReachEnv 6 | 7 | from gym.envs.robotics.hand.reach import HandReachEnv 8 | from gym.envs.robotics.hand.manipulate import HandBlockEnv 9 | from gym.envs.robotics.hand.manipulate import HandEggEnv 10 | from gym.envs.robotics.hand.manipulate import HandPenEnv 11 | 12 | from gym.envs.robotics.hand.manipulate_touch_sensors import HandBlockTouchSensorsEnv 13 | from gym.envs.robotics.hand.manipulate_touch_sensors import HandEggTouchSensorsEnv 14 | from gym.envs.robotics.hand.manipulate_touch_sensors import HandPenTouchSensorsEnv 15 | -------------------------------------------------------------------------------- /gym/envs/robotics/assets/fetch/push.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /gym/envs/robotics/assets/fetch/reach.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /gym/envs/robotics/assets/fetch/slide.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/.get: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/robotics/assets/stls/.get -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/base_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/robotics/assets/stls/fetch/base_link_collision.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/bellows_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/robotics/assets/stls/fetch/bellows_link_collision.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/elbow_flex_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/robotics/assets/stls/fetch/elbow_flex_link_collision.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/estop_link.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/robotics/assets/stls/fetch/estop_link.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/forearm_roll_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/robotics/assets/stls/fetch/forearm_roll_link_collision.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/gripper_link.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/robotics/assets/stls/fetch/gripper_link.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/head_pan_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/robotics/assets/stls/fetch/head_pan_link_collision.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/head_tilt_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/robotics/assets/stls/fetch/head_tilt_link_collision.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/l_wheel_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/robotics/assets/stls/fetch/l_wheel_link_collision.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/laser_link.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/robotics/assets/stls/fetch/laser_link.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/r_wheel_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/robotics/assets/stls/fetch/r_wheel_link_collision.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/shoulder_lift_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/robotics/assets/stls/fetch/shoulder_lift_link_collision.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/shoulder_pan_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/robotics/assets/stls/fetch/shoulder_pan_link_collision.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/torso_fixed_link.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/robotics/assets/stls/fetch/torso_fixed_link.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/torso_lift_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/robotics/assets/stls/fetch/torso_lift_link_collision.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/upperarm_roll_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/robotics/assets/stls/fetch/upperarm_roll_link_collision.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/wrist_flex_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/robotics/assets/stls/fetch/wrist_flex_link_collision.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/wrist_roll_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/robotics/assets/stls/fetch/wrist_roll_link_collision.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/hand/F1.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/robotics/assets/stls/hand/F1.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/hand/F2.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/robotics/assets/stls/hand/F2.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/hand/F3.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/robotics/assets/stls/hand/F3.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/hand/TH1_z.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/robotics/assets/stls/hand/TH1_z.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/hand/TH2_z.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/robotics/assets/stls/hand/TH2_z.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/hand/TH3_z.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/robotics/assets/stls/hand/TH3_z.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/hand/forearm_electric.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/robotics/assets/stls/hand/forearm_electric.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/hand/forearm_electric_cvx.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/robotics/assets/stls/hand/forearm_electric_cvx.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/hand/knuckle.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/robotics/assets/stls/hand/knuckle.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/hand/lfmetacarpal.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/robotics/assets/stls/hand/lfmetacarpal.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/hand/palm.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/robotics/assets/stls/hand/palm.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/hand/wrist.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/robotics/assets/stls/hand/wrist.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/textures/block.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/robotics/assets/textures/block.png -------------------------------------------------------------------------------- /gym/envs/robotics/assets/textures/block_hidden.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/robotics/assets/textures/block_hidden.png -------------------------------------------------------------------------------- /gym/envs/robotics/fetch/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/robotics/fetch/__init__.py -------------------------------------------------------------------------------- /gym/envs/robotics/fetch/pick_and_place.py: -------------------------------------------------------------------------------- 1 | import os 2 | from gym import utils 3 | from gym.envs.robotics import fetch_env 4 | 5 | 6 | # Ensure we get the path separator correct on windows 7 | MODEL_XML_PATH = os.path.join('fetch', 'pick_and_place.xml') 8 | 9 | 10 | class FetchPickAndPlaceEnv(fetch_env.FetchEnv, utils.EzPickle): 11 | def __init__(self, reward_type='sparse'): 12 | initial_qpos = { 13 | 'robot0:slide0': 0.405, 14 | 'robot0:slide1': 0.48, 15 | 'robot0:slide2': 0.0, 16 | 'object0:joint': [1.25, 0.53, 0.4, 1., 0., 0., 0.], 17 | } 18 | fetch_env.FetchEnv.__init__( 19 | self, MODEL_XML_PATH, has_object=True, block_gripper=False, n_substeps=20, 20 | gripper_extra_height=0.2, target_in_the_air=True, target_offset=0.0, 21 | obj_range=0.15, target_range=0.15, distance_threshold=0.05, 22 | initial_qpos=initial_qpos, reward_type=reward_type) 23 | utils.EzPickle.__init__(self) 24 | -------------------------------------------------------------------------------- /gym/envs/robotics/fetch/push.py: -------------------------------------------------------------------------------- 1 | import os 2 | from gym import utils 3 | from gym.envs.robotics import fetch_env 4 | 5 | 6 | # Ensure we get the path separator correct on windows 7 | MODEL_XML_PATH = os.path.join('fetch', 'push.xml') 8 | 9 | 10 | class FetchPushEnv(fetch_env.FetchEnv, utils.EzPickle): 11 | def __init__(self, reward_type='sparse'): 12 | initial_qpos = { 13 | 'robot0:slide0': 0.405, 14 | 'robot0:slide1': 0.48, 15 | 'robot0:slide2': 0.0, 16 | 'object0:joint': [1.25, 0.53, 0.4, 1., 0., 0., 0.], 17 | } 18 | fetch_env.FetchEnv.__init__( 19 | self, MODEL_XML_PATH, has_object=True, block_gripper=True, n_substeps=20, 20 | gripper_extra_height=0.0, target_in_the_air=False, target_offset=0.0, 21 | obj_range=0.15, target_range=0.15, distance_threshold=0.05, 22 | initial_qpos=initial_qpos, reward_type=reward_type) 23 | utils.EzPickle.__init__(self) 24 | -------------------------------------------------------------------------------- /gym/envs/robotics/fetch/reach.py: -------------------------------------------------------------------------------- 1 | import os 2 | from gym import utils 3 | from gym.envs.robotics import fetch_env 4 | 5 | 6 | # Ensure we get the path separator correct on windows 7 | MODEL_XML_PATH = os.path.join('fetch', 'reach.xml') 8 | 9 | 10 | class FetchReachEnv(fetch_env.FetchEnv, utils.EzPickle): 11 | def __init__(self, reward_type='sparse'): 12 | initial_qpos = { 13 | 'robot0:slide0': 0.4049, 14 | 'robot0:slide1': 0.48, 15 | 'robot0:slide2': 0.0, 16 | } 17 | fetch_env.FetchEnv.__init__( 18 | self, MODEL_XML_PATH, has_object=False, block_gripper=True, n_substeps=20, 19 | gripper_extra_height=0.2, target_in_the_air=True, target_offset=0.0, 20 | obj_range=0.15, target_range=0.15, distance_threshold=0.05, 21 | initial_qpos=initial_qpos, reward_type=reward_type) 22 | utils.EzPickle.__init__(self) 23 | -------------------------------------------------------------------------------- /gym/envs/robotics/fetch/slide.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | 4 | from gym import utils 5 | from gym.envs.robotics import fetch_env 6 | 7 | 8 | # Ensure we get the path separator correct on windows 9 | MODEL_XML_PATH = os.path.join('fetch', 'slide.xml') 10 | 11 | 12 | class FetchSlideEnv(fetch_env.FetchEnv, utils.EzPickle): 13 | def __init__(self, reward_type='sparse'): 14 | initial_qpos = { 15 | 'robot0:slide0': 0.05, 16 | 'robot0:slide1': 0.48, 17 | 'robot0:slide2': 0.0, 18 | 'object0:joint': [1.7, 1.1, 0.4, 1., 0., 0., 0.], 19 | } 20 | fetch_env.FetchEnv.__init__( 21 | self, MODEL_XML_PATH, has_object=True, block_gripper=True, n_substeps=20, 22 | gripper_extra_height=-0.02, target_in_the_air=False, target_offset=np.array([0.4, 0.0, 0.0]), 23 | obj_range=0.1, target_range=0.3, distance_threshold=0.05, 24 | initial_qpos=initial_qpos, reward_type=reward_type) 25 | utils.EzPickle.__init__(self) 26 | -------------------------------------------------------------------------------- /gym/envs/robotics/hand/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/robotics/hand/__init__.py -------------------------------------------------------------------------------- /gym/envs/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/envs/tests/__init__.py -------------------------------------------------------------------------------- /gym/envs/tests/spec_list.py: -------------------------------------------------------------------------------- 1 | from gym import envs, logger 2 | import os 3 | 4 | skip_mujoco = not (os.environ.get('MUJOCO_KEY')) 5 | if not skip_mujoco: 6 | try: 7 | import mujoco_py 8 | except ImportError: 9 | skip_mujoco = True 10 | 11 | def should_skip_env_spec_for_tests(spec): 12 | # We skip tests for envs that require dependencies or are otherwise 13 | # troublesome to run frequently 14 | ep = spec._entry_point 15 | # Skip mujoco tests for pull request CI 16 | if skip_mujoco and (ep.startswith('gym.envs.mujoco') or ep.startswith('gym.envs.robotics:')): 17 | return True 18 | try: 19 | import atari_py 20 | except ImportError: 21 | if ep.startswith('gym.envs.atari'): 22 | return True 23 | try: 24 | import Box2D 25 | except ImportError: 26 | if ep.startswith('gym.envs.box2d'): 27 | return True 28 | 29 | if ( 'GoEnv' in ep or 30 | 'HexEnv' in ep or 31 | (ep.startswith("gym.envs.atari") and not spec.id.startswith("Pong") and not spec.id.startswith("Seaquest")) 32 | ): 33 | logger.warn("Skipping tests for env {}".format(ep)) 34 | return True 35 | return False 36 | 37 | spec_list = [spec for spec in sorted(envs.registry.all(), key=lambda x: x.id) if spec._entry_point is not None and not should_skip_env_spec_for_tests(spec)] 38 | -------------------------------------------------------------------------------- /gym/envs/tests/test_kellycoinflip.py: -------------------------------------------------------------------------------- 1 | from gym.envs.toy_text.kellycoinflip import KellyCoinflipEnv 2 | 3 | 4 | class TestKellyCoinflipEnv: 5 | @staticmethod 6 | def test_done_when_reaches_max_wealth(): 7 | # https://github.com/openai/gym/issues/1266 8 | env = KellyCoinflipEnv() 9 | env.seed(1) 10 | env.reset() 11 | done = False 12 | 13 | while not done: 14 | action = int(env.wealth * 20) # bet 20% of the wealth 15 | observation, reward, done, info = env.step(action) 16 | 17 | assert env.wealth == env.max_wealth 18 | -------------------------------------------------------------------------------- /gym/envs/toy_text/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.toy_text.blackjack import BlackjackEnv 2 | from gym.envs.toy_text.roulette import RouletteEnv 3 | from gym.envs.toy_text.frozen_lake import FrozenLakeEnv 4 | from gym.envs.toy_text.nchain import NChainEnv 5 | from gym.envs.toy_text.hotter_colder import HotterColder 6 | from gym.envs.toy_text.guessing_game import GuessingGame 7 | from gym.envs.toy_text.kellycoinflip import KellyCoinflipEnv 8 | from gym.envs.toy_text.kellycoinflip import KellyCoinflipGeneralizedEnv 9 | from gym.envs.toy_text.cliffwalking import CliffWalkingEnv 10 | from gym.envs.toy_text.taxi import TaxiEnv 11 | from gym.envs.toy_text.guessing_game import GuessingGame 12 | from gym.envs.toy_text.hotter_colder import HotterColder 13 | -------------------------------------------------------------------------------- /gym/envs/toy_text/roulette.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from gym import spaces 3 | from gym.utils import seeding 4 | 5 | 6 | class RouletteEnv(gym.Env): 7 | """Simple roulette environment 8 | 9 | The roulette wheel has 37 spots. If the bet is 0 and a 0 comes up, 10 | you win a reward of 35. If the parity of your bet matches the parity 11 | of the spin, you win 1. Otherwise you receive a reward of -1. 12 | 13 | The long run reward for playing 0 should be -1/37 for any state 14 | 15 | The last action (38) stops the rollout for a return of 0 (walking away) 16 | """ 17 | def __init__(self, spots=37): 18 | self.n = spots + 1 19 | self.action_space = spaces.Discrete(self.n) 20 | self.observation_space = spaces.Discrete(1) 21 | self.seed() 22 | 23 | def seed(self, seed=None): 24 | self.np_random, seed = seeding.np_random(seed) 25 | return [seed] 26 | 27 | def step(self, action): 28 | assert self.action_space.contains(action) 29 | if action == self.n - 1: 30 | # observation, reward, done, info 31 | return 0, 0, True, {} 32 | 33 | # N.B. np.random.randint draws from [A, B) while random.randint draws from [A,B] 34 | val = self.np_random.randint(0, self.n - 1) 35 | if val == action == 0: 36 | reward = self.n - 2.0 37 | elif val != 0 and action != 0 and val % 2 == action % 2: 38 | reward = 1.0 39 | else: 40 | reward = -1.0 41 | return 0, reward, False, {} 42 | 43 | def reset(self): 44 | return 0 45 | -------------------------------------------------------------------------------- /gym/envs/unittest/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.unittest.cube_crash import CubeCrash 2 | from gym.envs.unittest.cube_crash import CubeCrashSparse 3 | from gym.envs.unittest.cube_crash import CubeCrashScreenBecomesBlack 4 | from gym.envs.unittest.memorize_digits import MemorizeDigits 5 | 6 | -------------------------------------------------------------------------------- /gym/logger.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | from gym.utils import colorize 4 | 5 | DEBUG = 10 6 | INFO = 20 7 | WARN = 30 8 | ERROR = 40 9 | DISABLED = 50 10 | 11 | MIN_LEVEL = 30 12 | 13 | def set_level(level): 14 | """ 15 | Set logging threshold on current logger. 16 | """ 17 | global MIN_LEVEL 18 | MIN_LEVEL = level 19 | 20 | def debug(msg, *args): 21 | if MIN_LEVEL <= DEBUG: 22 | print('%s: %s'%('DEBUG', msg % args)) 23 | 24 | def info(msg, *args): 25 | if MIN_LEVEL <= INFO: 26 | print('%s: %s'%('INFO', msg % args)) 27 | 28 | def warn(msg, *args): 29 | if MIN_LEVEL <= WARN: 30 | warnings.warn(colorize('%s: %s'%('WARN', msg % args), 'yellow')) 31 | 32 | def error(msg, *args): 33 | if MIN_LEVEL <= ERROR: 34 | print(colorize('%s: %s'%('ERROR', msg % args), 'red')) 35 | 36 | # DEPRECATED: 37 | setLevel = set_level 38 | -------------------------------------------------------------------------------- /gym/spaces/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.spaces.space import Space 2 | from gym.spaces.box import Box 3 | from gym.spaces.discrete import Discrete 4 | from gym.spaces.multi_discrete import MultiDiscrete 5 | from gym.spaces.multi_binary import MultiBinary 6 | from gym.spaces.tuple_space import Tuple 7 | from gym.spaces.dict_space import Dict 8 | 9 | __all__ = ["Space", "Box", "Discrete", "MultiDiscrete", "MultiBinary", "Tuple", "Dict"] 10 | -------------------------------------------------------------------------------- /gym/spaces/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/spaces/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /gym/spaces/__pycache__/box.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/spaces/__pycache__/box.cpython-36.pyc -------------------------------------------------------------------------------- /gym/spaces/__pycache__/dict_space.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/spaces/__pycache__/dict_space.cpython-36.pyc -------------------------------------------------------------------------------- /gym/spaces/__pycache__/discrete.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/spaces/__pycache__/discrete.cpython-36.pyc -------------------------------------------------------------------------------- /gym/spaces/__pycache__/multi_binary.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/spaces/__pycache__/multi_binary.cpython-36.pyc -------------------------------------------------------------------------------- /gym/spaces/__pycache__/multi_discrete.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/spaces/__pycache__/multi_discrete.cpython-36.pyc -------------------------------------------------------------------------------- /gym/spaces/__pycache__/space.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/spaces/__pycache__/space.cpython-36.pyc -------------------------------------------------------------------------------- /gym/spaces/__pycache__/tuple_space.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/spaces/__pycache__/tuple_space.cpython-36.pyc -------------------------------------------------------------------------------- /gym/spaces/discrete.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import gym 3 | from .space import Space 4 | 5 | 6 | class Discrete(Space): 7 | """ 8 | {0,1,...,n-1} 9 | 10 | Example usage: 11 | self.observation_space = spaces.Discrete(2) 12 | """ 13 | def __init__(self, n): 14 | self.n = n 15 | super(Discrete, self).__init__((), np.int64) 16 | self.np_random = np.random.RandomState() 17 | 18 | def seed(self, seed): 19 | self.np_random.seed(seed) 20 | 21 | def sample(self): 22 | return self.np_random.randint(self.n) 23 | 24 | def contains(self, x): 25 | if isinstance(x, int): 26 | as_int = x 27 | elif isinstance(x, (np.generic, np.ndarray)) and (x.dtype.kind in np.typecodes['AllInteger'] and x.shape == ()): 28 | as_int = int(x) 29 | else: 30 | return False 31 | return as_int >= 0 and as_int < self.n 32 | 33 | def __repr__(self): 34 | return "Discrete(%d)" % self.n 35 | 36 | def __eq__(self, other): 37 | return self.n == other.n 38 | -------------------------------------------------------------------------------- /gym/spaces/multi_binary.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import numpy as np 3 | from .space import Space 4 | 5 | 6 | class MultiBinary(Space): 7 | def __init__(self, n): 8 | self.n = n 9 | super(MultiBinary, self).__init__((self.n,), np.int8) 10 | self.np_random = np.random.RandomState() 11 | 12 | def seed(self, seed): 13 | self.np_random.seed(seed) 14 | 15 | def sample(self): 16 | return self.np_random.randint(low=0, high=2, size=self.n).astype(self.dtype) 17 | 18 | def contains(self, x): 19 | return ((x==0) | (x==1)).all() 20 | 21 | def to_jsonable(self, sample_n): 22 | return np.array(sample_n).tolist() 23 | 24 | def from_jsonable(self, sample_n): 25 | return [np.asarray(sample) for sample in sample_n] 26 | 27 | def __repr__(self): 28 | return "MultiBinary({})".format(self.n) 29 | 30 | def __eq__(self, other): 31 | return self.n == other.n 32 | -------------------------------------------------------------------------------- /gym/spaces/space.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class Space(object): 5 | """Defines the observation and action spaces, so you can write generic 6 | code that applies to any Env. For example, you can choose a random 7 | action. 8 | """ 9 | def __init__(self, shape=None, dtype=None): 10 | import numpy as np # takes about 300-400ms to import, so we load lazily 11 | self.shape = None if shape is None else tuple(shape) 12 | self.dtype = None if dtype is None else np.dtype(dtype) 13 | 14 | def sample(self): 15 | """ 16 | Uniformly randomly sample a random element of this space 17 | """ 18 | raise NotImplementedError 19 | 20 | def seed(self, seed): 21 | """Set the seed for this space's pseudo-random number generator. """ 22 | raise NotImplementedError 23 | 24 | def contains(self, x): 25 | """ 26 | Return boolean specifying if x is a valid 27 | member of this space 28 | """ 29 | raise NotImplementedError 30 | 31 | def __contains__(self, x): 32 | return self.contains(x) 33 | 34 | def to_jsonable(self, sample_n): 35 | """Convert a batch of samples from this space to a JSONable data type.""" 36 | # By default, assume identity is JSONable 37 | return sample_n 38 | 39 | def from_jsonable(self, sample_n): 40 | """Convert a JSONable data type to a batch of samples from this space.""" 41 | # By default, assume identity is JSONable 42 | return sample_n 43 | -------------------------------------------------------------------------------- /gym/spaces/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/spaces/tests/__init__.py -------------------------------------------------------------------------------- /gym/tests/test_core.py: -------------------------------------------------------------------------------- 1 | from gym import core 2 | 3 | class ArgumentEnv(core.Env): 4 | calls = 0 5 | 6 | def __init__(self, arg): 7 | self.calls += 1 8 | self.arg = arg 9 | 10 | def test_env_instantiation(): 11 | # This looks like a pretty trivial, but given our usage of 12 | # __new__, it's worth having. 13 | env = ArgumentEnv('arg') 14 | assert env.arg == 'arg' 15 | assert env.calls == 1 16 | -------------------------------------------------------------------------------- /gym/utils/__init__.py: -------------------------------------------------------------------------------- 1 | """A set of common utilities used within the environments. These are 2 | not intended as API functions, and will not remain stable over time. 3 | """ 4 | 5 | # These submodules should not have any import-time dependencies. 6 | # We want this since we use `utils` during our import-time sanity checks 7 | # that verify that our dependencies are actually present. 8 | from .colorize import colorize 9 | from .ezpickle import EzPickle 10 | -------------------------------------------------------------------------------- /gym/utils/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/utils/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /gym/utils/__pycache__/atomic_write.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/utils/__pycache__/atomic_write.cpython-36.pyc -------------------------------------------------------------------------------- /gym/utils/__pycache__/closer.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/utils/__pycache__/closer.cpython-36.pyc -------------------------------------------------------------------------------- /gym/utils/__pycache__/colorize.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/utils/__pycache__/colorize.cpython-36.pyc -------------------------------------------------------------------------------- /gym/utils/__pycache__/ezpickle.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/utils/__pycache__/ezpickle.cpython-36.pyc -------------------------------------------------------------------------------- /gym/utils/__pycache__/json_utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/utils/__pycache__/json_utils.cpython-36.pyc -------------------------------------------------------------------------------- /gym/utils/__pycache__/seeding.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/utils/__pycache__/seeding.cpython-36.pyc -------------------------------------------------------------------------------- /gym/utils/colorize.py: -------------------------------------------------------------------------------- 1 | """A set of common utilities used within the environments. These are 2 | not intended as API functions, and will not remain stable over time. 3 | """ 4 | 5 | color2num = dict( 6 | gray=30, 7 | red=31, 8 | green=32, 9 | yellow=33, 10 | blue=34, 11 | magenta=35, 12 | cyan=36, 13 | white=37, 14 | crimson=38 15 | ) 16 | 17 | 18 | def colorize(string, color, bold=False, highlight = False): 19 | """Return string surrounded by appropriate terminal color codes to 20 | print colorized text. Valid colors: gray, red, green, yellow, 21 | blue, magenta, cyan, white, crimson 22 | """ 23 | 24 | # Import six here so that `utils` has no import-time dependencies. 25 | # We want this since we use `utils` during our import-time sanity checks 26 | # that verify that our dependencies (including six) are actually present. 27 | import six 28 | 29 | attr = [] 30 | num = color2num[color] 31 | if highlight: num += 10 32 | attr.append(six.u(str(num))) 33 | if bold: attr.append(six.u('1')) 34 | attrs = six.u(';').join(attr) 35 | return six.u('\x1b[%sm%s\x1b[0m') % (attrs, string) 36 | -------------------------------------------------------------------------------- /gym/utils/ezpickle.py: -------------------------------------------------------------------------------- 1 | class EzPickle(object): 2 | """Objects that are pickled and unpickled via their constructor 3 | arguments. 4 | 5 | Example usage: 6 | 7 | class Dog(Animal, EzPickle): 8 | def __init__(self, furcolor, tailkind="bushy"): 9 | Animal.__init__() 10 | EzPickle.__init__(furcolor, tailkind) 11 | ... 12 | 13 | When this object is unpickled, a new Dog will be constructed by passing the provided 14 | furcolor and tailkind into the constructor. However, philosophers are still not sure 15 | whether it is still the same dog. 16 | 17 | This is generally needed only for environments which wrap C/C++ code, such as MuJoCo 18 | and Atari. 19 | """ 20 | def __init__(self, *args, **kwargs): 21 | self._ezpickle_args = args 22 | self._ezpickle_kwargs = kwargs 23 | def __getstate__(self): 24 | return {"_ezpickle_args" : self._ezpickle_args, "_ezpickle_kwargs": self._ezpickle_kwargs} 25 | def __setstate__(self, d): 26 | out = type(self)(*d["_ezpickle_args"], **d["_ezpickle_kwargs"]) 27 | self.__dict__.update(out.__dict__) 28 | -------------------------------------------------------------------------------- /gym/utils/json_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def json_encode_np(obj): 4 | """ 5 | JSON can't serialize numpy types, so convert to pure python 6 | """ 7 | if isinstance(obj, np.ndarray): 8 | return list(obj) 9 | elif isinstance(obj, np.float32): 10 | return float(obj) 11 | elif isinstance(obj, np.float64): 12 | return float(obj) 13 | elif isinstance(obj, np.int8): 14 | return int(obj) 15 | elif isinstance(obj, np.int16): 16 | return int(obj) 17 | elif isinstance(obj, np.int32): 18 | return int(obj) 19 | elif isinstance(obj, np.int64): 20 | return int(obj) 21 | else: 22 | return obj 23 | -------------------------------------------------------------------------------- /gym/utils/tests/test_atexit.py: -------------------------------------------------------------------------------- 1 | from gym.utils.closer import Closer 2 | 3 | class Closeable(object): 4 | close_called = False 5 | def close(self): 6 | self.close_called = True 7 | 8 | def test_register_unregister(): 9 | registry = Closer(atexit_register=False) 10 | c1 = Closeable() 11 | c2 = Closeable() 12 | 13 | assert not c1.close_called 14 | assert not c2.close_called 15 | registry.register(c1) 16 | id2 = registry.register(c2) 17 | 18 | registry.unregister(id2) 19 | registry.close() 20 | assert c1.close_called 21 | assert not c2.close_called 22 | -------------------------------------------------------------------------------- /gym/utils/tests/test_seeding.py: -------------------------------------------------------------------------------- 1 | from gym import error 2 | from gym.utils import seeding 3 | 4 | def test_invalid_seeds(): 5 | for seed in [-1, 'test']: 6 | try: 7 | seeding.np_random(seed) 8 | except error.Error: 9 | pass 10 | else: 11 | assert False, 'Invalid seed {} passed validation'.format(seed) 12 | 13 | def test_valid_seeds(): 14 | for seed in [0, 1]: 15 | random, seed1 = seeding.np_random(seed) 16 | assert seed == seed1 17 | -------------------------------------------------------------------------------- /gym/version.py: -------------------------------------------------------------------------------- 1 | VERSION = '0.12.0' 2 | -------------------------------------------------------------------------------- /gym/wrappers/README.md: -------------------------------------------------------------------------------- 1 | # Wrappers 2 | 3 | Wrappers are used to transform an environment in a modular way: 4 | 5 | ``` 6 | env = gym.make('Pong-v0') 7 | env = MyWrapper(env) 8 | ``` 9 | 10 | Note that we may later restructure any of the files in this directory, 11 | but will keep the wrappers available at the wrappers' top-level 12 | folder. So for example, you should access `MyWrapper` as follows: 13 | 14 | ``` 15 | # Will be supported in future releases 16 | from gym.wrappers import MyWrapper 17 | ``` 18 | 19 | ## Quick tips for writing your own wrapper 20 | 21 | - Don't forget to call super(class_name, self).__init__(env) if you override the wrapper's __init__ function 22 | - You can access the inner environment with `self.unwrapped` 23 | - You can access the previous layer using `self.env` 24 | - The variables `metadata`, `action_space`, `observation_space`, `reward_range`, and `spec` are copied to `self` from the previous layer 25 | - Create a wrapped function for at least one of the following: `__init__(self, env)`, `_step`, `_reset`, `_render`, `_close`, or `_seed` 26 | - Your layered function should take its input from the previous layer (`self.env`) and/or the inner layer (`self.unwrapped`) 27 | -------------------------------------------------------------------------------- /gym/wrappers/__init__.py: -------------------------------------------------------------------------------- 1 | from gym import error 2 | from gym.wrappers.monitor import Monitor 3 | from gym.wrappers.time_limit import TimeLimit 4 | from gym.wrappers.dict import FlattenDictWrapper 5 | -------------------------------------------------------------------------------- /gym/wrappers/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/wrappers/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /gym/wrappers/__pycache__/dict.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/wrappers/__pycache__/dict.cpython-36.pyc -------------------------------------------------------------------------------- /gym/wrappers/__pycache__/monitor.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/wrappers/__pycache__/monitor.cpython-36.pyc -------------------------------------------------------------------------------- /gym/wrappers/__pycache__/time_limit.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/wrappers/__pycache__/time_limit.cpython-36.pyc -------------------------------------------------------------------------------- /gym/wrappers/dict.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import numpy as np 3 | 4 | 5 | __all__ = ['FlattenDictWrapper'] 6 | 7 | 8 | class FlattenDictWrapper(gym.ObservationWrapper): 9 | """Flattens selected keys of a Dict observation space into 10 | an array. 11 | """ 12 | def __init__(self, env, dict_keys): 13 | super(FlattenDictWrapper, self).__init__(env) 14 | self.dict_keys = dict_keys 15 | 16 | # Figure out observation_space dimension. 17 | size = 0 18 | for key in dict_keys: 19 | shape = self.env.observation_space.spaces[key].shape 20 | size += np.prod(shape) 21 | self.observation_space = gym.spaces.Box(-np.inf, np.inf, shape=(size,), dtype='float32') 22 | 23 | def observation(self, observation): 24 | assert isinstance(observation, dict) 25 | obs = [] 26 | for key in self.dict_keys: 27 | obs.append(observation[key].ravel()) 28 | return np.concatenate(obs) 29 | -------------------------------------------------------------------------------- /gym/wrappers/monitoring/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/wrappers/monitoring/__init__.py -------------------------------------------------------------------------------- /gym/wrappers/monitoring/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/wrappers/monitoring/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /gym/wrappers/monitoring/__pycache__/stats_recorder.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/wrappers/monitoring/__pycache__/stats_recorder.cpython-36.pyc -------------------------------------------------------------------------------- /gym/wrappers/monitoring/__pycache__/video_recorder.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/wrappers/monitoring/__pycache__/video_recorder.cpython-36.pyc -------------------------------------------------------------------------------- /gym/wrappers/monitoring/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/wrappers/monitoring/tests/__init__.py -------------------------------------------------------------------------------- /gym/wrappers/monitoring/tests/helpers.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import shutil 3 | import tempfile 4 | 5 | @contextlib.contextmanager 6 | def tempdir(): 7 | temp = tempfile.mkdtemp() 8 | yield temp 9 | shutil.rmtree(temp) 10 | -------------------------------------------------------------------------------- /gym/wrappers/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/gym/wrappers/tests/__init__.py -------------------------------------------------------------------------------- /queryoptimization/__pycache__/QueryGraph.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/queryoptimization/__pycache__/QueryGraph.cpython-36.pyc -------------------------------------------------------------------------------- /queryoptimization/__pycache__/cm1_postgres_card.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heitzjon/mt-join-query-optimization-with-drl/d38a13cb85b15222c8e184248b1638b737993327/queryoptimization/__pycache__/cm1_postgres_card.cpython-36.pyc -------------------------------------------------------------------------------- /queryoptimization/reward_mapping.py: -------------------------------------------------------------------------------- 1 | from math import log, sqrt 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | 5 | ################################################################ 6 | # Reward plot 7 | 8 | cost = {'max': 1.e+13, 'min': 1.e+6} 9 | y_array = np.arange(cost['min']+2,1.e18,1.e+12) 10 | sqt = lambda y2 : map(lambda y : ((sqrt(y-cost['min']))/(sqrt(cost['max']-cost['min']))*-10),y2) # SQRT 11 | 12 | sq = list(sqt(y_array)) 13 | 14 | for i in range(0,len(sq)): 15 | if sq[i]<-10:sq[i]=-10 16 | 17 | 18 | plt.plot(sq,y_array) 19 | 20 | plt.ylim(-0.5*1e11,2*1e13) 21 | #plt.xlim(-10,0) 22 | #plt.xscale('log') 23 | plt.ylabel("Cost Value") 24 | plt.xlabel("Reward") 25 | 26 | plt.show() -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py==0.7.1 2 | astor==0.7.1 3 | atari-py==0.1.7 4 | atomicwrites==1.3.0 5 | attrs==19.1.0 6 | certifi==2019.3.9 7 | chardet==3.0.4 8 | Click==7.0 9 | colorama==0.4.1 10 | cycler==0.10.0 11 | filelock==3.0.10 12 | flatbuffers==1.10 13 | funcsigs==1.0.2 14 | future==0.17.1 15 | gast==0.2.2 16 | grpcio==1.19.0 17 | h5py==2.9.0 18 | idna==2.8 19 | Keras==2.2.4 20 | Keras-Applications==1.0.7 21 | Keras-Preprocessing==1.0.9 22 | kiwisolver==1.0.1 23 | lz4==2.1.6 24 | Markdown==3.1 25 | matplotlib==3.0.3 26 | mock==2.0.0 27 | more-itertools==7.0.0 28 | numpy==1.16.2 29 | opencv-contrib-python==4.1.0.25 30 | opencv-python==4.1.0.25 31 | opencv-python-headless==4.0.1.24 32 | pandas==0.24.2 33 | pbr==5.1.3 34 | Pillow==6.0.0 35 | pluggy==0.9.0 36 | protobuf==3.7.0 37 | psutil==5.6.2 38 | psycopg2==2.8.1 39 | py==1.8.0 40 | pyglet==1.3.2 41 | PyOpenGL==3.1.0 42 | pyparsing==2.3.1 43 | pytest==4.4.0 44 | python-dateutil==2.8.0 45 | pytz==2018.9 46 | PyYAML==5.1 47 | ray==0.6.5 48 | redis==3.2.1 49 | requests==2.21.0 50 | scipy==1.2.1 51 | six==1.12.0 52 | tensorboard==1.13.1 53 | tensorflow==1.13.1 54 | tensorflow-estimator==1.13.0 55 | termcolor==1.1.0 56 | typing==3.6.6 57 | urllib3==1.24.1 58 | Werkzeug==0.15.1 59 | -e . 60 | -------------------------------------------------------------------------------- /requirements_dev.txt: -------------------------------------------------------------------------------- 1 | # Testing 2 | pytest 3 | mock 4 | 5 | -e .[all] 6 | --------------------------------------------------------------------------------