├── scripts ├── run_eval_fm_vae.sh ├── run_train_critic.sh ├── run_eval_fm_ten.sh ├── run_plan.sh ├── run_train_value_fn.sh ├── run_train_prior.sh ├── run_eval_mpur_path.sh ├── submit_generate_data_i80.slurm ├── submit_generate_data_us101.slurm ├── submit_generate_data_lanker.slurm ├── submit_generate_data_peach.slurm ├── run_train_mb_il_tprop.sh ├── submit_generate_data.slurm ├── run_train_critic_ae.sh ├── submit_plan_policy_il.slurm ├── run_plan_bprop.sh ├── submit_plan_policy.slurm ├── submit_train_prior.slurm ├── submit_train_critic_joint.slurm ├── submit_train_policy_prior.slurm ├── submit_train_il.slurm ├── submit_plan.slurm ├── submit_plan_bprop.slurm ├── submit_train_mpur_config.slurm ├── submit_train_value_fn.slurm ├── run_generate_data.sh ├── submit_train_critic.slurm ├── submit_eval_mpur.slurm ├── submit_train_mb_il.slurm ├── submit_eval_mpur_path.slurm ├── run_train_il.sh ├── submit_train_mpur.slurm ├── submit_train_fm.slurm ├── submit_eval_fm.slurm ├── run_train_mb_il.sh ├── run_train_critic_vae.sh ├── run_plan_policy.sh ├── run_train_mpur.sh ├── run_train_fm.sh └── run_train_fm_det.sh ├── I-80 ├── cam1.png ├── cam2.png ├── cam3.png ├── cam4.png ├── cam5.png ├── cam6.png ├── cam7.png └── README.md ├── US-101 ├── cam1.png ├── cam2.png ├── cam3.png ├── cam4.png ├── cam5.png ├── cam6.png ├── cam7.png ├── cam8.png └── README.md ├── doc ├── planning.png ├── agent_train.png └── uncertainty.png ├── Lankershim ├── cam1.png ├── cam2.png ├── cam3.png ├── cam4.png ├── cam5.png ├── lanes_human.png ├── lanes_machine.png └── README.md ├── Peachtree ├── lanes_human.png ├── lanes_machine.png └── README.md ├── .idea ├── vcs.xml └── dictionaries │ └── atcold.xml ├── .gitignore ├── environment.yaml ├── get_data_idx.py ├── LICENSE ├── Notebooks ├── utils.py ├── Visualization │ ├── VisualizationLibrary.py │ ├── Visualization.ipynb │ ├── DimensionalityReduction.py │ └── Tabs.py ├── Training-curves.py ├── Proximity_cost_design.py ├── Loss-design.py ├── Target-lane.py ├── New-cost-design.py ├── Proximity-cost-profile.py ├── Nonconstant_vs_Constant_Slope.py ├── Plotting-success-rate.py └── Proximity-Cost_Slope_Adjustment.py ├── extract_car_size.py ├── traffic_gym_v2.py ├── custom_graphics.py ├── play_maps.py ├── map_i80_ctrl.py ├── binarise_trajectories_files.py ├── generate_trajectories.py ├── plots ├── plot_best_of_k.py └── plot_z_trajectories.py ├── train_IL.py ├── train_cost.py ├── map_lanker.py ├── map_us101.py ├── train_MPUR.py ├── map_peach.py ├── eval_fm.py ├── train_fm.py ├── README.md ├── train_MPER.py └── dataloader.py /scripts/run_eval_fm_vae.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | -------------------------------------------------------------------------------- /I-80/cam1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/pytorch-PPUU/HEAD/I-80/cam1.png -------------------------------------------------------------------------------- /I-80/cam2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/pytorch-PPUU/HEAD/I-80/cam2.png -------------------------------------------------------------------------------- /I-80/cam3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/pytorch-PPUU/HEAD/I-80/cam3.png -------------------------------------------------------------------------------- /I-80/cam4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/pytorch-PPUU/HEAD/I-80/cam4.png -------------------------------------------------------------------------------- /I-80/cam5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/pytorch-PPUU/HEAD/I-80/cam5.png -------------------------------------------------------------------------------- /I-80/cam6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/pytorch-PPUU/HEAD/I-80/cam6.png -------------------------------------------------------------------------------- /I-80/cam7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/pytorch-PPUU/HEAD/I-80/cam7.png -------------------------------------------------------------------------------- /US-101/cam1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/pytorch-PPUU/HEAD/US-101/cam1.png -------------------------------------------------------------------------------- /US-101/cam2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/pytorch-PPUU/HEAD/US-101/cam2.png -------------------------------------------------------------------------------- /US-101/cam3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/pytorch-PPUU/HEAD/US-101/cam3.png -------------------------------------------------------------------------------- /US-101/cam4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/pytorch-PPUU/HEAD/US-101/cam4.png -------------------------------------------------------------------------------- /US-101/cam5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/pytorch-PPUU/HEAD/US-101/cam5.png -------------------------------------------------------------------------------- /US-101/cam6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/pytorch-PPUU/HEAD/US-101/cam6.png -------------------------------------------------------------------------------- /US-101/cam7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/pytorch-PPUU/HEAD/US-101/cam7.png -------------------------------------------------------------------------------- /US-101/cam8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/pytorch-PPUU/HEAD/US-101/cam8.png -------------------------------------------------------------------------------- /doc/planning.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/pytorch-PPUU/HEAD/doc/planning.png -------------------------------------------------------------------------------- /Lankershim/cam1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/pytorch-PPUU/HEAD/Lankershim/cam1.png -------------------------------------------------------------------------------- /Lankershim/cam2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/pytorch-PPUU/HEAD/Lankershim/cam2.png -------------------------------------------------------------------------------- /Lankershim/cam3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/pytorch-PPUU/HEAD/Lankershim/cam3.png -------------------------------------------------------------------------------- /Lankershim/cam4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/pytorch-PPUU/HEAD/Lankershim/cam4.png -------------------------------------------------------------------------------- /Lankershim/cam5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/pytorch-PPUU/HEAD/Lankershim/cam5.png -------------------------------------------------------------------------------- /doc/agent_train.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/pytorch-PPUU/HEAD/doc/agent_train.png -------------------------------------------------------------------------------- /doc/uncertainty.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/pytorch-PPUU/HEAD/doc/uncertainty.png -------------------------------------------------------------------------------- /Lankershim/lanes_human.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/pytorch-PPUU/HEAD/Lankershim/lanes_human.png -------------------------------------------------------------------------------- /Peachtree/lanes_human.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/pytorch-PPUU/HEAD/Peachtree/lanes_human.png -------------------------------------------------------------------------------- /Lankershim/lanes_machine.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/pytorch-PPUU/HEAD/Lankershim/lanes_machine.png -------------------------------------------------------------------------------- /Peachtree/lanes_machine.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Atcold/pytorch-PPUU/HEAD/Peachtree/lanes_machine.png -------------------------------------------------------------------------------- /scripts/run_train_critic.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for seed in 16 17 18 19 20; do 4 | sbatch submit_train_critic_joint.slurm $seed 5 | done 6 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *png 2 | *aux 3 | *bbl 4 | *blg 5 | *log 6 | *out 7 | *pdf 8 | *swp 9 | .idea/ 10 | scratch 11 | state 12 | *gif 13 | game.py 14 | data_i80 15 | .DS_Store 16 | __pycache__ 17 | *pth 18 | *err 19 | traffic-data 20 | *pkl 21 | Notebooks/.ipynb_checkpoints/ 22 | -------------------------------------------------------------------------------- /scripts/run_eval_fm_ten.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Allows named arguments 3 | set -k 4 | 5 | for mfile in /home/mbhenaff/scratch/models_v11/*vae*-dropout=*beta=1e-06*-seed=1.step200000.model; do 6 | sbatch submit_eval_fm.slurm mfile=$(basename $mfile) sampling='fp' 7 | done 8 | 9 | -------------------------------------------------------------------------------- /scripts/run_plan.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for npred in 20 50; do 4 | for n_rollouts in 10 20; do 5 | for lrt in 0.5 1.0; do 6 | for niter in 10; do 7 | sbatch submit_plan.slurm $n_rollouts $npred 0.0 $lrt $niter 8 | done 9 | done 10 | done 11 | done 12 | -------------------------------------------------------------------------------- /scripts/run_train_value_fn.sh: -------------------------------------------------------------------------------- 1 | for size in 32; do 2 | for npred in 50; do 3 | for dropout in 0.05 0.1; do 4 | for gamma in 0.97 0.99; do 5 | for nsync in 1; do 6 | sbatch submit_train_value_fn.slurm $npred $dropout $gamma $nsync $size 7 | done 8 | done 9 | done 10 | done 11 | done 12 | -------------------------------------------------------------------------------- /scripts/run_train_prior.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | mfile=model=fwd-cnn-ten3-layers=3-bsize=64-ncond=20-npred=20-lrt=0.0001-nfeature=256-nhidden=128-fgeom=1-zeroact=0-zmult=0-dropout=0.05-nz=32-beta=0.0-zdropout=0.5-gclip=5.0-warmstart=1-seed=1.step200000.model 4 | for n_mixture in 1 10 50; do 5 | for nfeature in 256; do 6 | sbatch submit_train_prior.slurm $nfeature $n_mixture $mfile 7 | done 8 | done 9 | -------------------------------------------------------------------------------- /scripts/run_eval_mpur_path.sh: -------------------------------------------------------------------------------- 1 | # Allows named arguments 2 | set -k 3 | 4 | if [ $# -eq 0 ] 5 | then 6 | echo "Pass the directory where *.model files are stored" 7 | exit 8 | fi 9 | 10 | model_dir=$1 11 | 12 | for f in $model_dir/policy_networks/*0.model; do 13 | policy=$(basename $f); 14 | sbatch submit_eval_mpur_path.slurm \ 15 | policy=$policy \ 16 | model_dir=$model_dir 17 | done 18 | -------------------------------------------------------------------------------- /scripts/submit_generate_data_i80.slurm: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | #SBATCH --job-name=i80-datagen 4 | #SBATCH --time=24:00:00 5 | #SBATCH --gres gpu:0 6 | #SBATCH --exclude="rose5,rose6,vine5,vine11,vine3" 7 | #SBATCH --qos=batch 8 | #SBATCH --nodes=1 9 | #SBATCH --mem=48000 10 | #SBATCH --mail-type=END,FAIL # notifications for job done & fail 11 | 12 | cd ../ 13 | srun python -u generate_trajectories.py -time_slot $time_slot -map i80 14 | -------------------------------------------------------------------------------- /scripts/submit_generate_data_us101.slurm: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | #SBATCH --job-name=us101-datagen 4 | #SBATCH --time=24:00:00 5 | #SBATCH --gres gpu:0 6 | #SBATCH --exclude="rose5,rose6,vine5,vine11,vine3" 7 | #SBATCH --qos=batch 8 | #SBATCH --nodes=1 9 | #SBATCH --mem=48000 10 | #SBATCH --mail-type=END,FAIL # notifications for job done & fail 11 | 12 | cd ../ 13 | srun python -u generate_trajectories.py -time_slot $time_slot -map us101 14 | -------------------------------------------------------------------------------- /environment.yaml: -------------------------------------------------------------------------------- 1 | name: PPUU 2 | channels: 3 | - pytorch 4 | - defaults 5 | dependencies: 6 | - imageio 7 | - pandas 8 | - python=3.8 9 | - pytorch>1.2.0 10 | - scipy 11 | - matplotlib 12 | - pip 13 | - jupyter 14 | - scikit-learn 15 | - scikit-image 16 | - pip: 17 | - gym 18 | - pygame>=2.0.0.dev6 19 | - jupytext 20 | - ipdb 21 | - tb-nightly 22 | - moviepy 23 | - bqplot 24 | - astropy 25 | -------------------------------------------------------------------------------- /scripts/submit_generate_data_lanker.slurm: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | #SBATCH --job-name=lanker-datagen 4 | #SBATCH --time=24:00:00 5 | #SBATCH --gres gpu:0 6 | #SBATCH --exclude="rose5,rose6,vine5,vine11,vine3" 7 | #SBATCH --qos=batch 8 | #SBATCH --nodes=1 9 | #SBATCH --mem=48000 10 | #SBATCH --mail-type=END,FAIL # notifications for job done & fail 11 | 12 | cd ../ 13 | srun python -u generate_trajectories.py -time_slot $time_slot -map lanker 14 | 15 | -------------------------------------------------------------------------------- /scripts/submit_generate_data_peach.slurm: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | #SBATCH --job-name=peach-datagen 4 | #SBATCH --time=24:00:00 5 | #SBATCH --gres gpu:0 6 | #SBATCH --exclude="rose5,rose6,vine5,vine11,vine3" 7 | #SBATCH --qos=batch 8 | #SBATCH --nodes=1 9 | #SBATCH --mem=48000 10 | #SBATCH --mail-type=END,FAIL # notifications for job done & fail 11 | 12 | cd ../ 13 | srun python -u generate_trajectories.py -time_slot $time_slot -map peach 14 | 15 | -------------------------------------------------------------------------------- /scripts/run_train_mb_il_tprop.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for targetprop in 1; do 4 | for size in 256; do 5 | for npred in 20 50; do 6 | for lambda_c in 0.0 0.2; do 7 | for gamma in 0.98; do 8 | for seed in 1; do 9 | sbatch submit_train_mb_il.slurm $size $size $seed $npred $lambda_c $targetprop $gamma 10 | done 11 | done 12 | done 13 | done 14 | done 15 | done 16 | 17 | -------------------------------------------------------------------------------- /scripts/submit_generate_data.slurm: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | #SBATCH --job-name=datagen 4 | #SBATCH --output=datagen.out 5 | #SBATCH --error=datagen.err 6 | #SBATCH --time=24:00:00 7 | #SBATCH --gres gpu:0 8 | #SBATCH --exclude="rose5,rose6,vine5,vine11, vine3" 9 | #SBATCH --qos=batch 10 | #SBATCH --nodes=1 11 | #SBATCH --mem=48000 12 | #SBATCH --mail-type=END,FAIL # notifications for job done & fail 13 | 14 | source activate py35 15 | cd ../ 16 | srun python generate_trajectories.py -time_slot $time_slot -v 1 17 | 18 | -------------------------------------------------------------------------------- /scripts/run_train_critic_ae.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | mfile=model=fwd-cnn-ae-fp-bsize=16-ncond=10-npred=20-lrt=0.0001-nhidden=100-nfeature=128-decoder=0-combine=add-gclip=1-nz=32-beta=0.0-nmix=1-warmstart=1.model 5 | 6 | for sampling in pdf; do 7 | for density in 1.0; do 8 | for seed in 1 2 3; do 9 | for n_mixture in 5 10 20; do 10 | sbatch submit_train_critic.slurm $mfile $sampling $seed $density $n_mixture 11 | done 12 | done 13 | done 14 | done 15 | -------------------------------------------------------------------------------- /.idea/dictionaries/atcold.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | backprop 5 | cuda 6 | lankershim 7 | mkdir 8 | npred 9 | optim 10 | ortho 11 | peachtree 12 | pygame 13 | regressor 14 | unnormalize 15 | unsqueeze 16 | waypoint 17 | waypoints 18 | 19 | 20 | -------------------------------------------------------------------------------- /US-101/README.md: -------------------------------------------------------------------------------- 1 | # US-101 section 2 | 3 | Pictures extracted at time 100s.\ 4 | 6 lanes in total. 5 | 6 | - Camera 3, last interesting spot, rightmost 7 | - Camera 4, middle of extra lane 8 | - Camera 5, as above 9 | - Camera 6, merging lane 10 | - Camera 7, early mergin lane 11 | 12 | Cameras 1, 2, and 8 show no interesting region. 13 | 14 | ``` 15 | cam1.png 644x160 16 | cam2.png 628x160 17 | cam3.png 640x164 18 | cam4.png 532x164 19 | cam5.png 388x164 20 | cam6.png 472x164 21 | cam7.png 552x164 22 | cam8.png 656x160 23 | ``` -------------------------------------------------------------------------------- /scripts/submit_plan_policy_il.slurm: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | #SBATCH --job-name=plan 4 | #SBATCH --output=logs/plan_%j.out 5 | #SBATCH --error=logs/plan_%j.err 6 | #SBATCH --time=48:00:00 7 | #SBATCH --gres gpu:1 8 | #SBATCH --exclude="weaver1, weaver2, weaver3, weaver4, weaver5, vine5, vine11, vine6" 9 | #SBATCH --constraint="gpu_12gb&pascal" 10 | #SBATCH --qos=batch 11 | #SBATCH --nodes=1 12 | #SBATCH --mem=70000 13 | #SBATCH --mail-type=END,FAIL # notifications for job done & fail 14 | 15 | cd ../ 16 | srun python plan_with_uncertainty.py -method policy-il -npred 1 17 | 18 | -------------------------------------------------------------------------------- /scripts/run_plan_bprop.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for method in bprop; do 4 | for niter in 5; do 5 | for lrt in 0.1; do 6 | for lane_cost in 0.1; do 7 | for u_reg in 0.0 0.1; do 8 | for u_hinge in 0.0; do 9 | for buffer in 1; do 10 | sbatch submit_plan_bprop.slurm $method $niter $lrt $u_reg $u_hinge $buffer $lane_cost 11 | done 12 | done 13 | done 14 | done 15 | done 16 | done 17 | done 18 | -------------------------------------------------------------------------------- /scripts/submit_plan_policy.slurm: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | #SBATCH --job-name=plan_policy 4 | #SBATCH --output=logs/plan_%j.out 5 | #SBATCH --error=logs/plan_%j.err 6 | #SBATCH --time=48:00:00 7 | #SBATCH --gres gpu:1 8 | #SBATCH --exclude="weaver1, weaver2, weaver3, weaver4, weaver5, vine5, vine11, vine6" 9 | #SBATCH --constraint="gpu_12gb&pascal" 10 | #SBATCH --qos=batch 11 | #SBATCH --nodes=1 12 | #SBATCH --mem=70000 13 | #SBATCH --mail-type=END,FAIL # notifications for job done & fail 14 | 15 | cd ../ 16 | srun python plan_with_uncertainty.py -method $1 -model_dir $2 -policy_model_tm $3 17 | 18 | -------------------------------------------------------------------------------- /scripts/submit_train_prior.slurm: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | #SBATCH --job-name=train_prior 4 | #SBATCH --output=train_prior.out 5 | #SBATCH --error=train_prior.err 6 | #SBATCH --time=48:00:00 7 | #SBATCH --gres gpu:1 8 | #SBATCH --exclude="weaver1, weaver2, weaver3, weaver4, weaver5, vine5, vine11" 9 | #SBATCH --constraint="gpu_12gb&pascal" 10 | #SBATCH --qos=batch 11 | #SBATCH --nodes=1 12 | #SBATCH --mem=50000 13 | #SBATCH --mail-type=END,FAIL # notifications for job done & fail 14 | 15 | module load python-3.6 16 | cd ../ 17 | srun python train_prior.py -nfeature $1 -n_mixture $2 -mfile $3 18 | 19 | -------------------------------------------------------------------------------- /scripts/submit_train_critic_joint.slurm: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | #SBATCH --job-name=train_critic_joint 4 | #SBATCH --output=train_critic.out 5 | #SBATCH --error=train_critic.err 6 | #SBATCH --time=48:00:00 7 | #SBATCH --gres gpu:1 8 | #SBATCH --exclude="weaver1, weaver2, weaver3, weaver4, weaver5, vine5, vine11, vine6" 9 | #SBATCH --constraint="gpu_12gb&pascal" 10 | #SBATCH --qos=batch 11 | #SBATCH --nodes=1 12 | #SBATCH --mem=70000 13 | #SBATCH --mail-type=END,FAIL # notifications for job done & fail 14 | 15 | module load python-3.6 16 | cd ../ 17 | srun python train_critic_joint.py -batch_size 2 -npred 200 -lrt 0.0001 -seed $1 18 | -------------------------------------------------------------------------------- /scripts/submit_train_policy_prior.slurm: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | #SBATCH --job-name=train_policy_prior 4 | #SBATCH --output=logs/train_policy_prior_%j.out 5 | #SBATCH --error=logs/train_policy_prior_%j.err 6 | #SBATCH --time=48:00:00 7 | #SBATCH --gres gpu:1 8 | #SBATCH --constraint="gpu_12gb&pascal" 9 | #SBATCH --exclude="weaver1, weaver2, weaver3, weaver4, weaver5, vine5, vine11" 10 | #SBATCH --qos=batch 11 | #SBATCH --nodes=1 12 | #SBATCH --mem=48000 13 | #SBATCH --mail-type=END,FAIL # notifications for job done & fail 14 | 15 | module load python-3.6 16 | cd ../ 17 | 18 | srun python train_policy_prior.py -mfile $1 19 | 20 | -------------------------------------------------------------------------------- /scripts/submit_train_il.slurm: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | #SBATCH --job-name=train_il 4 | #SBATCH --output=train_il.out 5 | #SBATCH --error=train_il.err 6 | #SBATCH --time=24:00:00 7 | #SBATCH --gres gpu:1 8 | #SBATCH --constraint="gpu_12gb&pascal" 9 | #SBATCH --exclude="weaver1, weaver2, weaver3, weaver4, weaver5, vine5, vine11" 10 | #SBATCH --qos=batch 11 | #SBATCH --nodes=1 12 | #SBATCH --mem=48000 13 | #SBATCH --mail-type=END,FAIL # notifications for job done & fail 14 | 15 | module load python-3.6 16 | cd ../ 17 | srun python train_il.py -model $1 -lrt $2 -n_hidden $3 -ncond $4 -npred $5 -beta $6 -nz $7 -n_mixture $8 -seed $9 18 | 19 | -------------------------------------------------------------------------------- /scripts/submit_plan.slurm: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | #SBATCH --job-name=plan 4 | #SBATCH --output=logs/plan_%j.out 5 | #SBATCH --error=logs/plan_%j.err 6 | #SBATCH --time=48:00:00 7 | #SBATCH --gres gpu:1 8 | #SBATCH --exclude="weaver1, weaver2, weaver3, weaver4, weaver5, vine5, vine11, vine6" 9 | #SBATCH --constraint="gpu_12gb&pascal" 10 | #SBATCH --qos=batch 11 | #SBATCH --nodes=1 12 | #SBATCH --mem=70000 13 | #SBATCH --mail-type=END,FAIL # notifications for job done & fail 14 | 15 | cd ../ 16 | srun python plan_with_uncertainty.py -mfile $1 -model_dir $2 -n_rollouts $3 -rollout_length $4 -bprop_lrt $5 -bprop_niter $6 -method $7 17 | 18 | -------------------------------------------------------------------------------- /scripts/submit_plan_bprop.slurm: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | #SBATCH --job-name=plan 4 | #SBATCH --output=logs/plan_%j.out 5 | #SBATCH --error=logs/plan_%j.err 6 | #SBATCH --time=48:00:00 7 | #SBATCH --gres gpu:1 8 | #SBATCH --exclude="weaver1, weaver2, weaver3, weaver4, weaver5, vine5, vine11, vine6" 9 | #SBATCH --constraint="gpu_12gb&pascal" 10 | #SBATCH --qos=batch 11 | #SBATCH --nodes=1 12 | #SBATCH --mem=70000 13 | #SBATCH --mail-type=END,FAIL # notifications for job done & fail 14 | 15 | cd ../ 16 | srun python plan_with_uncertainty.py -method $1 -bprop_niter $2 -bprop_lrt $3 -u_reg $4 -u_hinge $5 -bprop_buffer $6 -lambda_l $7 17 | 18 | -------------------------------------------------------------------------------- /scripts/submit_train_mpur_config.slurm: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | #SBATCH --job-name=train_MPUR 4 | #SBATCH --output=logs/train_MPUR_%j.out 5 | #SBATCH --error=logs/train_MPUR_%j.err 6 | #SBATCH --time=48:00:00 7 | #SBATCH --gres gpu:1 8 | #SBATCH --constraint="gpu_12gb&pascal" 9 | #SBATCH --exclude="weaver1, weaver2, weaver3, weaver4, weaver5, vine5, vine11" 10 | #SBATCH --qos=batch 11 | #SBATCH --nodes=1 12 | #SBATCH --mem=48000 13 | #SBATCH --mail-type=END,FAIL # notifications for job done & fail 14 | 15 | eval "$(conda shell.bash hook)" 16 | conda activate PPUU 17 | 18 | echo "$@" 19 | 20 | cd ../ 21 | srun python train_MPUR.py $@ 22 | -------------------------------------------------------------------------------- /scripts/submit_train_value_fn.slurm: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | #SBATCH --job-name=train_value 4 | #SBATCH --output=train_value.out 5 | #SBATCH --error=train_value.err 6 | #SBATCH --time=24:00:00 7 | #SBATCH --gres gpu:1 8 | #SBATCH --constraint="gpu_12gb&pascal" 9 | #SBATCH --exclude="weaver1, weaver2, weaver3, weaver4, weaver5, vine5, vine11" 10 | #SBATCH --qos=batch 11 | #SBATCH --nodes=1 12 | #SBATCH --mem=48000 13 | #SBATCH --mail-type=END,FAIL # notifications for job done & fail 14 | 15 | module load python-3.6 16 | cd ../ 17 | srun python train_value_function.py -npred $1 -dropout $2 -gamma $3 -nsync $4 -nfeature $5 -n_hidden $5 18 | 19 | -------------------------------------------------------------------------------- /scripts/run_generate_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Allows named arguments 4 | set -k 5 | 6 | # Pick the map you'd like to generate data for (comment the others) 7 | MAP="ai" 8 | MAP="i80" 9 | MAP="us101" 10 | MAP="lanker" 11 | MAP="peach" 12 | 13 | if $(echo "i80 us101" | grep -q $MAP); then T=2; fi 14 | if $(echo "lanker peach" | grep -q $MAP); then T=1; fi 15 | 16 | echo "Map: $MAP, time slots: $(eval echo {0..$T})" 17 | 18 | for t in $(eval echo {0..$T}); do # time slot 19 | sbatch \ 20 | --output ${MAP}_ts${t}.out \ 21 | --error ${MAP}_ts${t}.err \ 22 | submit_generate_data_${MAP}.slurm time_slot=$t 23 | done 24 | -------------------------------------------------------------------------------- /scripts/submit_train_critic.slurm: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | #SBATCH --job-name=train_critic 4 | #SBATCH --output=train_critic.out 5 | #SBATCH --error=train_critic.err 6 | #SBATCH --time=48:00:00 7 | #SBATCH --gres gpu:1 8 | #SBATCH --exclude="weaver1, weaver2, weaver3, weaver4, weaver5, vine5, vine11, vine6" 9 | #SBATCH --constraint="gpu_12gb&pascal" 10 | #SBATCH --qos=batch 11 | #SBATCH --nodes=1 12 | #SBATCH --mem=70000 13 | #SBATCH --mail-type=END,FAIL # notifications for job done & fail 14 | 15 | module load python-3.6 16 | cd ../ 17 | srun python train_critic_joint.py -mfile $1 -batch_size 2 -npred 200 -lrt 0.0001 -sampling $2 -seed $3 -graph_density $4 -n_mixture $5 18 | -------------------------------------------------------------------------------- /scripts/submit_eval_mpur.slurm: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | #SBATCH --job-name=eval_MPUR 4 | #SBATCH --output=logs/eval_MPUR_%j.out 5 | #SBATCH --error=logs/eval_MPUR_%j.err 6 | #SBATCH --time=48:00:00 7 | #SBATCH --gres gpu:1 8 | #SBATCH --constraint="gpu_12gb&pascal" 9 | #SBATCH --exclude="weaver1, weaver2, weaver3, weaver4, weaver5, vine5, vine11" 10 | #SBATCH --cpus-per-task=7 11 | #SBATCH --qos=batch 12 | #SBATCH --nodes=1 13 | #SBATCH --mem=48000 14 | #SBATCH --mail-type=END,FAIL # notifications for job done & fail 15 | 16 | eval "$(conda shell.bash hook)" 17 | conda activate PPUU 18 | 19 | cd ../ 20 | srun python eval_policy.py \ 21 | -method policy-MPUR \ 22 | -policy_model $policy \ 23 | -save_grad_vid 24 | -------------------------------------------------------------------------------- /scripts/submit_train_mb_il.slurm: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | #SBATCH --job-name=train_mb_il 4 | #SBATCH --output=logs/train_mb_il_%j.out 5 | #SBATCH --error=logs/train_mb_il_%j.err 6 | #SBATCH --time=48:00:00 7 | #SBATCH --gres gpu:1 8 | #SBATCH --constraint="gpu_12gb&pascal" 9 | #SBATCH --exclude="weaver1, weaver2, weaver3, weaver4, weaver5, vine5, vine11" 10 | #SBATCH --qos=batch 11 | #SBATCH --nodes=1 12 | #SBATCH --mem=48000 13 | #SBATCH --mail-type=END,FAIL # notifications for job done & fail 14 | 15 | module load python-3.6 16 | cd ../ 17 | 18 | srun python train_mb_il.py -nfeature $1 -n_hidden $2 -seed $3 -npred $4 -lambda_c $5 -targetprop $6 -gamma $7 -batch_size $8 -curriculum_length $9 -actions_subsample ${10} -lambda_h ${11} -context_dim ${12} 19 | 20 | -------------------------------------------------------------------------------- /I-80/README.md: -------------------------------------------------------------------------------- 1 | # I-80 section 2 | 3 | Pictures extracted at time 100s.\ 4 | 6 lanes in total. 5 | 6 | ``` 7 | cam1.png 828x240 8 | cam2.png 932x240 9 | cam3.png 340x216 10 | cam4.png 360x192 11 | cam5.png 388x196 12 | cam6.png 532x216 13 | cam7.png 1756x216 14 | ``` 15 | 16 | ```python 17 | names = ( 18 | 'Vehicle ID', 19 | 'Frame ID', 20 | 'Total Frames', 21 | 'Global Time', 22 | 'Local X', 23 | 'Local Y', 24 | 'Global X', 25 | 'Global Y', 26 | 'Vehicle Length', 27 | 'Vehicle Width', 28 | 'Vehicle Class', 29 | 'Vehicle Velocity', 30 | 'Vehicle Acceleration', 31 | 'Lane Identification', 32 | 'Preceding Vehicle', 33 | 'Following Vehicle', 34 | 'Spacing', 35 | 'Headway' 36 | ) 37 | ``` 38 | -------------------------------------------------------------------------------- /scripts/submit_eval_mpur_path.slurm: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | #SBATCH --job-name=eval_MPUR 4 | #SBATCH --output=logs/eval_MPUR_%j.out 5 | #SBATCH --error=logs/eval_MPUR_%j.err 6 | #SBATCH --time=48:00:00 7 | #SBATCH --gres gpu:1 8 | #SBATCH --constraint="gpu_12gb&pascal" 9 | #SBATCH --exclude="weaver1, weaver2, weaver3, weaver4, weaver5, vine5, vine11" 10 | #SBATCH --cpus-per-task=7 11 | #SBATCH --qos=batch 12 | #SBATCH --nodes=1 13 | #SBATCH --mem=48000 14 | #SBATCH --mail-type=END,FAIL # notifications for job done & fail 15 | 16 | eval "$(conda shell.bash hook)" 17 | conda activate PPUU 18 | 19 | cd ../ 20 | srun python eval_policy.py \ 21 | $@ \ 22 | -model_dir $model_dir \ 23 | -method policy-MPUR \ 24 | -policy_model $policy \ 25 | -save_grad_vid 26 | -------------------------------------------------------------------------------- /get_data_idx.py: -------------------------------------------------------------------------------- 1 | from dataloader import DataLoader 2 | import torch 3 | from collections import namedtuple 4 | import pickle 5 | import utils 6 | 7 | print('> Loading DataLoader') 8 | class opt: 9 | debug = 0 10 | dataloader = DataLoader(None, opt, 'i80') 11 | 12 | print('> Loading splits') 13 | splits = torch.load('/home/atcold/vLecunGroup/nvidia-collab/traffic-data-atcold/data_i80_v0/splits.pth') 14 | 15 | for split in splits: 16 | data_dict = dict() 17 | print(f'> Building {split}') 18 | for idx in splits[split]: 19 | car_path = dataloader.ids[idx] 20 | timeslot, car_id = utils.parse_car_path(car_path) 21 | data_dict[idx] = timeslot, car_id 22 | print(f'> Pickling {split}') 23 | with open(f'{split}.pkl', 'wb') as f: 24 | pickle.dump(data_dict, f) 25 | -------------------------------------------------------------------------------- /scripts/run_train_il.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | rm *.err 4 | rm *.out 5 | 6 | for model in policy-il-mdn; do 7 | for lrt in 0.0005 0.0001; do 8 | for nhidden in 256; do 9 | for ncond in 20; do 10 | for npred in 1; do 11 | for nz in 0; do 12 | for beta in 1; do 13 | for nmix in 1; do 14 | for seed in 1 2 3; do 15 | sbatch submit_train_il.slurm $model $lrt $nhidden $ncond $npred $beta $nz $nmix $seed 16 | done 17 | done 18 | done 19 | done 20 | done 21 | done 22 | done 23 | done 24 | done 25 | -------------------------------------------------------------------------------- /Lankershim/README.md: -------------------------------------------------------------------------------- 1 | # Lankershim section 2 | 3 | Pictures extracted at time 100s.\ 4 | 8 lanes in total. 5 | 6 | ``` 7 | cam1.png 560x400 8 | cam2.png 760x400 9 | cam3.png 648x404 10 | cam4.png 912x404 11 | cam5.png 328x404 12 | ``` 13 | 14 | ```python 15 | names = ( 16 | 'Vehicle ID', 17 | 'Frame ID', 18 | 'Total Frames', 19 | 'Global Time', 20 | 'Local X', 21 | 'Local Y', 22 | 'Global X', 23 | 'Global Y', 24 | 'Vehicle Length', 25 | 'Vehicle Width', 26 | 'Vehicle Class', 27 | 'Vehicle Velocity', 28 | 'Vehicle Acceleration', 29 | 'Lane Identification', 30 | 'Origin Zone', 31 | 'Destination Zone', 32 | 'Intersection', 33 | 'Section', 34 | 'Direction', 35 | 'Movement', 36 | 'Preceding Vehicle', 37 | 'Following Vehicle', 38 | 'Spacing', 39 | 'Headway' 40 | ) 41 | ``` 42 | -------------------------------------------------------------------------------- /scripts/submit_train_mpur.slurm: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | #SBATCH --job-name=train_MPUR 4 | #SBATCH --output=logs/train_MPUR_%j.out 5 | #SBATCH --error=logs/train_MPUR_%j.err 6 | #SBATCH --time=48:00:00 7 | #SBATCH --gres gpu:1 8 | #SBATCH --constraint="gpu_12gb&pascal" 9 | #SBATCH --exclude="weaver1, weaver2, weaver3, weaver4, weaver5, vine5, vine11" 10 | #SBATCH --qos=batch 11 | #SBATCH --nodes=1 12 | #SBATCH --mem=48000 13 | #SBATCH --mail-type=END,FAIL # notifications for job done & fail 14 | 15 | eval "$(conda shell.bash hook)" 16 | conda activate PPUU 17 | 18 | cd ../ 19 | srun python train_MPUR.py \ 20 | -npred $npred \ 21 | -u_reg $u_reg \ 22 | -lrt_z $lrt_z \ 23 | -z_updates $z_updates \ 24 | -batch_size $batch_size \ 25 | -lambda_l $lambda_l \ 26 | -lambda_o $lambda_o \ 27 | -lambda_a $lambda_a \ 28 | -seed $seed \ 29 | -policy 'policy-deterministic' 30 | 31 | -------------------------------------------------------------------------------- /Peachtree/README.md: -------------------------------------------------------------------------------- 1 | # Peachtree section 2 | 3 | Pictures extracted at time 100s.\ 4 | ? lanes in total. 5 | 6 | ``` 7 | cam1.png 640x344 8 | cam2.png 624x344 9 | cam3.png 472x340 10 | cam4.png 688x340 11 | cam5.png 456x344 12 | cam6.png 472x364 13 | cam7.png 752x344 14 | cam8.png 280x344 15 | ``` 16 | 17 | ```python 18 | names = ( 19 | 'Vehicle ID', 20 | 'Frame ID', 21 | 'Total Frames', 22 | 'Global Time', 23 | 'Local X', 24 | 'Local Y', 25 | 'Global X', 26 | 'Global Y', 27 | 'Vehicle Length', 28 | 'Vehicle Width', 29 | 'Vehicle Class', 30 | 'Vehicle Velocity', 31 | 'Vehicle Acceleration', 32 | 'Lane Identification', 33 | 'Origin Zone', 34 | 'Destination Zone', 35 | 'Intersection', 36 | 'Section', 37 | 'Direction', 38 | 'Movement', 39 | 'Preceding Vehicle', 40 | 'Following Vehicle', 41 | 'Spacing', 42 | 'Headway' 43 | ) 44 | ``` 45 | -------------------------------------------------------------------------------- /scripts/submit_train_fm.slurm: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | #SBATCH --job-name=train_fm_det 4 | #SBATCH --output=logs/train_fm_%j.out 5 | #SBATCH --error=logs/train_fm_%j.err 6 | #SBATCH --time=48:00:00 7 | #SBATCH --gres gpu:1 8 | #SBATCH --exclude="weaver1, weaver2, weaver3, weaver4, weaver5, vine5, vine11, lion6, lion7" 9 | #SBATCH --constraint="gpu_12gb&pascal" 10 | #SBATCH --qos=batch 11 | #SBATCH --nodes=1 12 | #SBATCH --mem=50000 13 | #SBATCH --mail-type=END,FAIL # notifications for job done & fail 14 | 15 | eval "$(conda shell.bash hook)" 16 | conda activate PPUU 17 | 18 | cd ../ 19 | srun python train_fm.py \ 20 | -model $model \ 21 | -lrt $lrt \ 22 | -nfeature $nfeature \ 23 | -warmstart $warmstart \ 24 | -ncond $ncond \ 25 | -npred $npred \ 26 | -beta $beta \ 27 | -nz $nz \ 28 | -z_dropout $z_dropout \ 29 | -layers $layers \ 30 | -batch_size $batch_size \ 31 | -seed $seed \ 32 | -dropout $dropout 33 | 34 | -------------------------------------------------------------------------------- /scripts/submit_eval_fm.slurm: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | #SBATCH --job-name=eval_fm 4 | #SBATCH --output=eval_fm.out 5 | #SBATCH --error=eval_fm.err 6 | #SBATCH --time=48:00:00 7 | #SBATCH --gres gpu:1 8 | #SBATCH --exclude="weaver1, weaver2, weaver3, weaver4, weaver5, vine5, vine11, vine6" 9 | #SBATCH --constraint="gpu_12gb&pascal" 10 | #SBATCH --qos=batch 11 | #SBATCH --nodes=1 12 | #SBATCH --mem=70000 13 | #SBATCH --mail-type=END,FAIL # notifications for job done & fail 14 | 15 | eval "$(conda shell.bash hook)" 16 | conda activate PPUU 17 | 18 | cd ../ 19 | srun python eval_fm.py -mfile $mfile -batch_size 4 -npred 200 -sampling $sampling -n_sample 10 -n_batches 10 -save_video 1 20 | #srun python eval_fm.py -mfile $1 -batch_size 1 -npred 1000 -sampling $2 -n_sample 10 -n_batches 50 -save_video 1 -graph_density $3 -n_mixture 20 21 | #srun python eval_fm.py -mfile $1 -batch_size 4 -npred 50 -sampling pdf -n_sample 200 -n_batches 100 -save_video 0 -n_mixture 1 22 | 23 | -------------------------------------------------------------------------------- /scripts/run_train_mb_il.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for targetprop in 0; do 4 | for size in 256; do 5 | for npred in 1; do 6 | for lambda_c in 0.0; do 7 | for gamma in 0.99; do 8 | for seed in 2 3; do 9 | for bsize in 16; do 10 | for curriculum_length in 1; do 11 | for subsample in 1; do 12 | for lambda_h in 0; do 13 | for context_dim in 1; do 14 | sbatch submit_train_mb_il.slurm $size $size $seed $npred $lambda_c $targetprop $gamma $bsize $curriculum_length $subsample $lambda_h $context_dim 15 | done 16 | done 17 | done 18 | done 19 | done 20 | done 21 | done 22 | done 23 | done 24 | done 25 | done 26 | 27 | -------------------------------------------------------------------------------- /scripts/run_train_critic_vae.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | mfile=model=fwd-cnn-bsize=16-ncond=10-npred=20-lrt=0.0001-nhidden=100-nfeature=128-decoder=0-combine=add-nz=32-beta=0.0-warmstart=0.model 4 | for sampling in fp; do 5 | for n_samples in 1; do 6 | for seed in 4; do 7 | sbatch submit_train_critic.slurm $mfile $sampling $seed 1 8 | done 9 | done 10 | done 11 | 12 | 13 | mfile=model=fwd-cnn-vae-fp-bsize=16-ncond=10-npred=20-lrt=0.0001-nhidden=100-nfeature=128-decoder=0-combine=add-gclip=1.0-nz=32-beta=1e-05-warmstart=1.model 14 | for sampling in fp; do 15 | for n_samples in 1; do 16 | for seed in 4; do 17 | sbatch submit_train_critic.slurm $mfile $sampling $seed 1 18 | done 19 | done 20 | done 21 | 22 | 23 | mfile=model=fwd-cnn-vae-fp-bsize=16-ncond=10-npred=20-lrt=0.0001-nhidden=100-nfeature=128-decoder=0-combine=add-gclip=1.0-nz=32-beta=0.0001-warmstart=1.model 24 | for sampling in fp; do 25 | for n_samples in 1; do 26 | for seed in 4; do 27 | sbatch submit_train_critic.slurm $mfile $sampling $seed 1 28 | done 29 | done 30 | done 31 | 32 | 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Mikael Henaff and Alfredo Canziani 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /scripts/run_plan_policy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | model_dir=/home/mbhenaff/projects/pytorch-Traffic-Simulator/scratch/models_v11/ 4 | 5 | method=policy-tm 6 | 7 | #for policy_model in ${model_dir}/policy_networks/*svg*vae*learnedcost=*.model; do 8 | #for policy_model in ${model_dir}/policy_networks/*policy-il*lrt=0.0001*nmixture=1-*.model; 9 | #for policy_model in ${model_dir}/policy_networks/*svg*zdropout=0.5*npred=30*ureg=0.05*inferz=0*learnedcost=1*seed=3*novalue.model; do 10 | #for policy_model in ${model_dir}/policy_networks/*svg*deterministic*npred=40*novalue.model; do 11 | #for policy_model in ${model_dir}/policy_networks/*svg*zdropout=0.5*npred=30*depeweg=1*novalue.model; do 12 | #for policy_model in ${model_dir}/policy_networks/*svg*npred=*lambdaa=0*5*gamma*.model; do 13 | #for policy_model in ${model_dir}/policy_networks/*svg*npred=3-*zdropout=0.0*seed=2*novalue.model; do 14 | for seed in 2 3; do 15 | for policy_model in ${model_dir}/policy_networks/*mbil*npred=5-*seed=${seed}*.model; do 16 | #for policy_model in ${model_dir}/policy_networks/*svg*deterministic*npred=3-*lambdaa=0.0-*seed=2*novalue.model; do 17 | echo $(basename $policy_model) 18 | sbatch submit_plan_policy.slurm $method $model_dir $(basename $policy_model) 19 | done 20 | done 21 | -------------------------------------------------------------------------------- /Notebooks/utils.py: -------------------------------------------------------------------------------- 1 | import re 2 | import pandas as pd 3 | 4 | pattern = re.compile(r'.*step (\d+) \| train: \[c: (\S+), l: (\S+), u: (\S+), a: (\S+), p: (\S+) \] \| test: \[c: (\S+), l:(\S+), u: (\S+), a: (\S+), p: (\S+)]') 5 | df_header = ( 6 | 'Step', 7 | 'TrPrx', 'TrLan', 'TrUnc', 'TrAct', 'TrLss', 8 | 'TePrx', 'TeLan', 'TeUnc', 'TeAct', 'TeLss', 9 | ) 10 | job_header = 'job name' 11 | 12 | def load_log_file(file_name): 13 | 14 | with open(file_name, 'r') as policy_log_file: 15 | 16 | # Count nb of lines to skip 17 | last_header_line_nb = 0 # lines to be skipped 18 | for n, line in enumerate(policy_log_file): 19 | if job_header in line: last_header_line_nb = n 20 | 21 | # Read valid data 22 | policy_log_file.seek(0) # go back to the top 23 | policy_log_list = list() 24 | for n, line in enumerate(policy_log_file): 25 | if n <= last_header_line_nb: continue 26 | match = re.match(pattern, line) 27 | policy_log_list.append(tuple( 28 | int(g) if i is 0 else float(g) for i, g in enumerate(match.groups()) 29 | )) 30 | 31 | # Create data frame 32 | df = pd.DataFrame(data=policy_log_list, columns=df_header) 33 | 34 | return df 35 | -------------------------------------------------------------------------------- /Notebooks/Visualization/VisualizationLibrary.py: -------------------------------------------------------------------------------- 1 | """The main class that contains all the tabs in the visualization""" 2 | import ipywidgets as widgets 3 | from IPython.display import display 4 | 5 | import Tabs 6 | 7 | 8 | class Visualization: 9 | 10 | def __init__(self): 11 | self.tab = widgets.Tab() 12 | self.tab.children = [ 13 | Tabs.PolicyComparisonTab(), 14 | Tabs.LearningCurvesTab(), 15 | Tabs.EpisodeReviewTab(), 16 | Tabs.PiePlotTab(), 17 | Tabs.HeatMapTab(), 18 | Tabs.HeatMapComparisonTab(), 19 | Tabs.DimensionalityReductionPlotTab(), 20 | Tabs.ExperimentsDirectoryTab() 21 | ] 22 | titles = ['Policy performance', 23 | 'Learing curves', 24 | 'Episode review', 25 | 'Success Pie', 26 | 'Success Heatmap', 27 | 'Heatmap Compare', 28 | 'Failures scatter plot', 29 | 'Edit', 30 | ] 31 | # self.tab.children = [Tabs.HeatMapComparisonTab()] 32 | # titles = ['test'] 33 | for i in range(len(self.tab.children)): 34 | self.tab.set_title(i, titles[i]) 35 | 36 | def display(self): 37 | display(self.tab) 38 | -------------------------------------------------------------------------------- /scripts/run_train_mpur.sh: -------------------------------------------------------------------------------- 1 | # Allows named arguments 2 | set -k 3 | 4 | for npred in 30; do 5 | for batch_size in 6; do 6 | for u_reg in 0.05; do 7 | for lambda_a in 0.0; do 8 | for z_updates in 0; do 9 | for lrt_z in 0; do 10 | for lambda_l in 0.2; do 11 | for lambda_o in 1.0; do 12 | for infer_z in 0; do 13 | for seed in 1 2 3; do 14 | sbatch submit_train_mpur.slurm \ 15 | npred=$npred \ 16 | u_reg=$u_reg \ 17 | lrt_z=$lrt_z \ 18 | z_updates=$z_updates \ 19 | batch_size=$batch_size \ 20 | lambda_l=$lambda_l \ 21 | lambda_o=$lambda_o \ 22 | infer_z=$infer_z \ 23 | lambda_a=$lambda_a \ 24 | seed=$seed 25 | done 26 | done 27 | done 28 | done 29 | done 30 | done 31 | done 32 | done 33 | done 34 | done 35 | -------------------------------------------------------------------------------- /extract_car_size.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | import pandas as pd 5 | import torch 6 | 7 | parser = argparse.ArgumentParser() 8 | parser.add_argument('-map', type=str, default='i80', choices={'ai', 'i80', 'us101', 'lanker', 'peach'}) 9 | opt = parser.parse_args() 10 | 11 | path = './traffic-data/xy-trajectories/{}/'.format(opt.map) 12 | trajectories_path = './traffic-data/state-action-cost/data_{}_v0'.format(opt.map) 13 | time_slots = [d[0].split("/")[-1] for d in os.walk(trajectories_path) if d[0] != trajectories_path] 14 | 15 | df = dict() 16 | for ts in time_slots: 17 | df[ts] = pd.read_table(path + ts + '.txt', sep='\s+', header=None, names=( 18 | 'Vehicle ID', 19 | 'Frame ID', 20 | 'Total Frames', 21 | 'Global Time', 22 | 'Local X', 23 | 'Local Y', 24 | 'Global X', 25 | 'Global Y', 26 | 'Vehicle Length', 27 | 'Vehicle Width', 28 | 'Vehicle Class', 29 | 'Vehicle Velocity', 30 | 'Vehicle Acceleration', 31 | 'Lane Identification', 32 | 'Preceding Vehicle', 33 | 'Following Vehicle', 34 | 'Spacing', 35 | 'Headway' 36 | )) 37 | 38 | car_sizes = dict() 39 | for ts in time_slots: 40 | d = df[ts] 41 | car = lambda i: d[d['Vehicle ID'] == i] 42 | car_sizes[ts] = dict() 43 | cars = set(d['Vehicle ID']) 44 | for c in cars: 45 | if len(car(c)) > 0: 46 | size = tuple(car(c).loc[car(c).index[0], ['Vehicle Width', 'Vehicle Length']].values) 47 | car_sizes[ts][c] = size 48 | print(c) 49 | 50 | torch.save(car_sizes, 'traffic-data/state-action-cost/data_{}_v0/car_sizes.pth'.format(opt.map)) 51 | -------------------------------------------------------------------------------- /traffic_gym_v2.py: -------------------------------------------------------------------------------- 1 | from traffic_gym import Simulator, Car 2 | from map_i80 import I80 3 | import pygame 4 | 5 | # Conversion LANE_W from real world to pixels 6 | # A US highway lane width is 3.7 metres, here 50 pixels 7 | LANE_W = 24 # pixels / 3.7 m, lane width 8 | SCALE = LANE_W / 3.7 # pixels per metre 9 | 10 | 11 | class PatchedCar(Car): 12 | # Global constants 13 | SCALE = SCALE 14 | LANE_W = LANE_W 15 | 16 | def __init__(self, *args, **kwargs): 17 | super().__init__(*args, **kwargs) 18 | 19 | def get_lane_set(self, lanes): 20 | # Bottom end of normal lanes 21 | bottom = lanes[-1]['max'] 22 | 23 | # No merging 24 | if self._position[1] < bottom: 25 | return super().get_lane_set(lanes) 26 | 27 | # Done merging 28 | if self._position[0] > 60 * LANE_W: 29 | self._target_lane = lanes[-1]['mid'] 30 | return {5} 31 | 32 | # We're on ramp! 33 | self._target_lane = bottom + 42 - self._position[0] * 0.035 34 | 35 | if self._position[0] < 18 * LANE_W: 36 | return {6} 37 | else: 38 | return {5, 6} 39 | 40 | 41 | class MergingMap(Simulator): 42 | # Environment's car class 43 | EnvCar = PatchedCar 44 | 45 | # Global constants 46 | SCALE = SCALE 47 | LANE_W = LANE_W 48 | 49 | # Import map from Simulator 50 | _draw_lanes = I80._draw_lanes 51 | 52 | def __init__(self, **kwargs): 53 | kwargs['nb_lanes'] = 6 54 | kwargs['delta_t'] = 1/10 55 | super().__init__(**kwargs) 56 | self.nb_lanes = 7 57 | self.screen_size = (85 * self.LANE_W, self.nb_lanes * self.LANE_W + 5 * self.LANE_W) 58 | if self.display: # if display is required 59 | self.screen = pygame.display.set_mode(self.screen_size) # set screen size 60 | -------------------------------------------------------------------------------- /Notebooks/Visualization/Visualization.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "scrolled": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import DimensionalityReduction\n", 12 | "import Widgets\n", 13 | "import DataReader\n", 14 | "import VisualizationLibrary\n", 15 | "import importlib\n", 16 | "import Tabs\n", 17 | "from types import ModuleType\n", 18 | "\n", 19 | "importlib.reload(DataReader)\n", 20 | "importlib.reload(DimensionalityReduction)\n", 21 | "importlib.reload(Widgets)\n", 22 | "importlib.reload(Tabs)\n", 23 | "importlib.reload(VisualizationLibrary)\n", 24 | "\n", 25 | "v = VisualizationLibrary.Visualization()\n", 26 | "v.display()" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "import sklearn\n", 36 | "import sklearn.cluster\n", 37 | "from matplotlib import pyplot as plt\n", 38 | "\n", 39 | "features = PCA.PCA.get_pca_data()\n", 40 | "scores = []\n", 41 | "\n", 42 | "for c in range(2, 15):\n", 43 | " score = sklearn.cluster.KMeans(n_clusters=c).fit(features).score(features)\n", 44 | " scores.append(-score)\n", 45 | "\n", 46 | "plt.plot(scores, 'o-')\n", 47 | "\n", 48 | "\n" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": null, 54 | "metadata": {}, 55 | "outputs": [], 56 | "source": [] 57 | } 58 | ], 59 | "metadata": { 60 | "jupytext": { 61 | "formats": "ipynb,py:percent" 62 | }, 63 | "kernelspec": { 64 | "display_name": "Python 3", 65 | "language": "python", 66 | "name": "python3" 67 | }, 68 | "language_info": { 69 | "codemirror_mode": { 70 | "name": "ipython", 71 | "version": 3 72 | }, 73 | "file_extension": ".py", 74 | "mimetype": "text/x-python", 75 | "name": "python", 76 | "nbconvert_exporter": "python", 77 | "pygments_lexer": "ipython3", 78 | "version": "3.7.4" 79 | } 80 | }, 81 | "nbformat": 4, 82 | "nbformat_minor": 2 83 | } 84 | -------------------------------------------------------------------------------- /custom_graphics.py: -------------------------------------------------------------------------------- 1 | import pygame 2 | import math 3 | import numpy as np 4 | 5 | 6 | class Point: 7 | # constructed using a normal tupple 8 | def __init__(self, point_t=(0, 0)): 9 | self.x = float(point_t[0]) 10 | self.y = float(point_t[1]) 11 | 12 | # define all useful operators 13 | def __add__(self, other): 14 | return Point((self.x + other.x, self.y + other.y)) 15 | 16 | def __sub__(self, other): 17 | return Point((self.x - other.x, self.y - other.y)) 18 | 19 | def __mul__(self, scalar): 20 | return Point((self.x * scalar, self.y * scalar)) 21 | 22 | def __truediv__(self, scalar): 23 | return Point((self.x / scalar, self.y / scalar)) 24 | 25 | def norm(self): 26 | return math.sqrt(self.x ** 2 + self.y ** 2) 27 | 28 | # get back values in original tuple format 29 | def get(self): 30 | return self.x, self.y 31 | 32 | 33 | def draw_dashed_line(surf, color, start_pos, end_pos, width=1, dash_length=10): 34 | origin = Point(start_pos) 35 | target = Point(end_pos) 36 | displacement = target - origin 37 | length = displacement.norm() 38 | slope = displacement / length 39 | 40 | for index in range(0, round(length / dash_length), 2): 41 | start = origin + (slope * index * dash_length) 42 | end = origin + (slope * (index + 1) * dash_length) 43 | pygame.draw.line(surf, color, start.get(), end.get(), width) 44 | 45 | 46 | def draw_text(screen, text, xy, font_size=30, colour=(255, 255, 255), font=None): 47 | if font is None: 48 | font = pygame.font.SysFont(None, font_size) 49 | text = font.render(text, True, colour) 50 | text_rect = text.get_rect() 51 | text_rect.left = xy[0] 52 | text_rect.top = xy[1] 53 | screen.blit(text, text_rect) 54 | 55 | 56 | def draw_rect(screen, colour, rect, direction=(1, 0), thickness=0): 57 | x, y, l, w = rect 58 | xy = np.array(((x, y - w/2), (x, y + w/2), (x + l, y + w/2), (x + l, y - w/2))) 59 | c, s = direction 60 | rot = np.array(((c, -s), (s, c))) 61 | xy = (rot @ (xy - (x, y)).T).T + (x, y) 62 | return pygame.draw.polygon(screen, colour, xy, thickness) 63 | -------------------------------------------------------------------------------- /Notebooks/Training-curves.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # jupyter: 3 | # jupytext: 4 | # formats: ipynb,py:percent 5 | # text_representation: 6 | # extension: .py 7 | # format_name: percent 8 | # format_version: '1.2' 9 | # jupytext_version: 1.1.7 10 | # kernelspec: 11 | # display_name: Python 3 12 | # language: python 13 | # name: python3 14 | # --- 15 | 16 | # %% 17 | from os import path 18 | from matplotlib import pyplot as plt 19 | from utils import load_log_file 20 | 21 | # %% 22 | # %matplotlib inline 23 | plt.style.use(['dark_background', 'bmh']) 24 | plt.rc('axes', facecolor='k') 25 | plt.rc('figure', facecolor='k') 26 | plt.rc('figure', figsize=(20,5)) 27 | 28 | # %% 29 | policy_path = '/misc/vlgscratch4/LecunGroup/nvidia-collab/models_v12/policy_networks' 30 | 31 | deterministic_name = 'MPUR-policy-deterministic-model=vae-zdropout=0.5-nfeature=256-bsize=6-npred=30-ureg=0.05-lambdal=0.2-lambdaa=0.0-gamma=0.99-lrtz=0.0-updatez=0-inferz=0-learnedcost=1-seed={seed}-novalue.log' 32 | stochastic_name = 'MPUR-policy-gauss-model=vae-zdropout=0.5-policy-gauss-nfeature=256-bsize=6-npred=30-ureg=0.05-lambdal=0.2-lambdaa=0.0-gamma=0.99-lrtz=0.0-updatez=0-inferz=0-learnedcost=1-seed={seed}-novalue.log' 33 | 34 | dtr_policies = list(deterministic_name.format(seed=s) for s in range(1, 3 + 1)) 35 | stc_policies = list(stochastic_name.format(seed=s) for s in range(10)) 36 | 37 | # %% 38 | dtr_df = tuple(load_log_file(path.join(policy_path, policy)) for policy in dtr_policies) 39 | stc_df = tuple(load_log_file(path.join(policy_path, policy)) for policy in stc_policies) 40 | 41 | # %% 42 | print('Available columns:', *dtr_df[0].columns) 43 | 44 | # %% 45 | axis = None 46 | items = ['TrLss', 'TeLss'] 47 | items = ['TeLss'] 48 | 49 | # Plot deterministic policy 50 | for n, df in enumerate(dtr_df): 51 | labels = [f'Dtr{n}-{i}' for i in items] 52 | axis = df.plot(x='Step', y=items, label=labels, ax=axis) 53 | 54 | # Plot stochastic policy 55 | for n, df in enumerate(stc_df): 56 | labels = [f'Stc{n}-{i}' for i in items] 57 | axis = df.plot(x='Step', y=items, label=labels, ax=axis, ls='--') 58 | 59 | # Set some plotting config 60 | plt.xlim(left=80000) 61 | plt.ylim(top=.22) 62 | -------------------------------------------------------------------------------- /scripts/run_train_fm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Allows named arguments 3 | set -k 4 | 5 | rm *.err 6 | rm *.out 7 | 8 | for model in fwd-cnn-vae-fp; do 9 | for lrt in 0.0001; do 10 | for nfeature in 256; do 11 | for warmstart in 1; do 12 | for ncond in 20; do 13 | for npred in 20; do 14 | for nz in 32; do 15 | for beta in 0.000001; do 16 | for z_dropout in 0.5; do 17 | for layers in 3; do 18 | for bsize in 64; do 19 | for seed in 1; do 20 | for dropout in 0.1; do 21 | sbatch submit_train_fm.slurm \ 22 | model=$model \ 23 | lrt=$lrt \ 24 | nfeature=$nfeature \ 25 | warmstart=$warmstart \ 26 | ncond=$ncond \ 27 | npred=$npred \ 28 | beta=$beta \ 29 | nz=$nz \ 30 | z_dropout=$z_dropout\ 31 | layers=$layers \ 32 | batch_size=$bsize \ 33 | seed=$seed \ 34 | dropout=$dropout 35 | done 36 | done 37 | done 38 | done 39 | done 40 | done 41 | done 42 | done 43 | done 44 | done 45 | done 46 | done 47 | done 48 | -------------------------------------------------------------------------------- /play_maps.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import numpy 3 | import gym 4 | 5 | parser = argparse.ArgumentParser() 6 | parser.add_argument('-nb_conditions', type=int, default=10) 7 | parser.add_argument('-display', type=int, default=1) 8 | parser.add_argument('-map', type=str, default='i80', choices={'ai', 'i80', 'us101', 'lanker', 'peach'}) 9 | parser.add_argument('-state_image', type=int, default=0) 10 | parser.add_argument('-store', type=int, default=0) 11 | parser.add_argument('-nb_episodes', type=int, default=1) 12 | parser.add_argument('-fps', type=int, default=1e3) 13 | parser.add_argument('-delta_t', type=float, default=0.1) 14 | 15 | opt = parser.parse_args() 16 | 17 | kwargs = { 18 | 'fps': opt.fps, 19 | 'nb_states': opt.nb_conditions, 20 | 'display': opt.display, 21 | 'state_image': opt.state_image, 22 | 'store': opt.store, 23 | 'delta_t': opt.delta_t, 24 | } 25 | 26 | gym.envs.registration.register( 27 | id='Traffic-v0', 28 | entry_point='traffic_gym:Simulator', 29 | kwargs=kwargs 30 | ) 31 | 32 | gym.envs.registration.register( 33 | id='I-80-v0', 34 | entry_point='map_i80:I80', 35 | kwargs=kwargs, 36 | ) 37 | 38 | gym.envs.registration.register( 39 | id='US-101-v0', 40 | entry_point='map_us101:US101', 41 | kwargs=kwargs, 42 | ) 43 | 44 | gym.envs.registration.register( 45 | id='Lankershim-v0', 46 | entry_point='map_lanker:Lankershim', 47 | kwargs=kwargs, 48 | ) 49 | 50 | gym.envs.registration.register( 51 | id='Peachtree-v0', 52 | entry_point='map_peach:Peachtree', 53 | kwargs=kwargs, 54 | ) 55 | 56 | env_names = { 57 | 'ai': 'Traffic-v0', 58 | 'i80': 'I-80-v0', 59 | 'us101': 'US-101-v0', 60 | 'lanker': 'Lankershim-v0', 61 | 'peach': 'Peachtree-v0', 62 | } 63 | 64 | print('Building the environment (loading data, if any)') 65 | env = gym.make(env_names[opt.map]) 66 | 67 | for episode in range(opt.nb_episodes): 68 | # env.reset(frame=int(input('Frame: ')), time_slot=0) 69 | env.reset(frame=0, time_slot=0) 70 | 71 | done = False 72 | while not done: 73 | observation, reward, done, info = env.step(numpy.zeros((2,))) 74 | # print(observation, reward, done, info) 75 | env.render() 76 | 77 | print('Episode completed!') 78 | 79 | print('Done') 80 | -------------------------------------------------------------------------------- /scripts/run_train_fm_det.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Allows named arguments 3 | set -k 4 | 5 | for model in fwd-cnn; do 6 | for lrt in 0.0001; do 7 | for nfeature in 256; do 8 | for warmstart in 0; do 9 | for ncond in 20; do 10 | for npred in 20; do 11 | for nz in 0; do 12 | for beta in 0; do 13 | for z_dropout in 0; do 14 | for layers in 3; do 15 | for bsize in 8; do 16 | for seed in 1; do 17 | for dropout in 0.1; do 18 | for l2reg in 0.0 0.01 0.001 0.0001; do 19 | sbatch submit_train_fm.slurm \ 20 | model=$model \ 21 | lrt=$lrt \ 22 | nfeature=$nfeature \ 23 | warmstart=$warmstart \ 24 | ncond=$ncond \ 25 | npred=$npred \ 26 | beta=$beta \ 27 | nz=$nz \ 28 | z_dropout=$z_dropout \ 29 | layers=$layers \ 30 | batch_size=$bsize \ 31 | seed=$seed \ 32 | dropout=$dropout 33 | done 34 | done 35 | done 36 | done 37 | done 38 | done 39 | done 40 | done 41 | done 42 | done 43 | done 44 | done 45 | done 46 | done 47 | -------------------------------------------------------------------------------- /map_i80_ctrl.py: -------------------------------------------------------------------------------- 1 | from map_i80 import I80, I80Car 2 | from traffic_gym_v2 import PatchedCar 3 | 4 | 5 | class ControlledI80Car(I80Car): 6 | 7 | # Import get_lane_set from PatchedCar 8 | get_lane_set = PatchedCar.get_lane_set 9 | 10 | def __init__(self, df, y_offset, look_ahead, screen_w, font=None, kernel=0, dt=1/10): 11 | super().__init__(df, y_offset, look_ahead, screen_w, font, kernel, dt) 12 | self.is_controlled = False 13 | self.buffer_size = 0 14 | self.lanes = None 15 | self.arrived_to_dst = False # arrived to destination 16 | self.frames = list() 17 | 18 | @property 19 | def current_lane(self): 20 | # If following the I-80 trajectories 21 | if not self.is_controlled or len(self._states_image) < self.buffer_size: 22 | return super().current_lane 23 | 24 | # Otherwise fetch x location 25 | x = self._position[0] 26 | if x > self.screen_w - 1.75 * self.look_ahead: 27 | self.off_screen = True 28 | self.arrived_to_dst = True 29 | 30 | # Fetch the y location 31 | y = self._position[1] 32 | 33 | # If way too up 34 | if y < self.lanes[0]['min']: 35 | self.off_screen = True 36 | self.arrived_to_dst = False 37 | return 0 38 | 39 | # Maybe within a sensible range? 40 | for lane_idx, lane in enumerate(self.lanes): 41 | if lane['min'] <= y <= lane['max']: 42 | return lane_idx 43 | 44 | # Or maybe on the ramp 45 | bottom = self.lanes[-1]['max'] 46 | if y <= bottom + 53 - x * 0.035: 47 | return 6 48 | 49 | # Actually, way too low 50 | self.off_screen = True 51 | self.arrived_to_dst = False 52 | return 6 53 | 54 | @property 55 | def is_autonomous(self): 56 | return self.is_controlled and len(self._states_image) > self.buffer_size 57 | 58 | 59 | class ControlledI80(I80): 60 | 61 | # Environment's car class 62 | EnvCar = ControlledI80Car 63 | 64 | def __init__(self, **kwargs): 65 | super().__init__(**kwargs) 66 | 67 | def reset(self, **kwargs): 68 | super().reset(**kwargs) 69 | observation = None 70 | while observation is None: 71 | observation, reward, done, info = self.step() 72 | return observation 73 | -------------------------------------------------------------------------------- /Notebooks/Proximity_cost_design.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # jupyter: 3 | # jupytext: 4 | # formats: ipynb,py:percent 5 | # text_representation: 6 | # extension: .py 7 | # format_name: percent 8 | # format_version: '1.3' 9 | # jupytext_version: 1.3.4 10 | # kernelspec: 11 | # display_name: Python 3 12 | # language: python 13 | # name: python3 14 | # --- 15 | 16 | # %% 17 | from matplotlib.pylab import * 18 | from matplotlib.pyplot import subplots 19 | 20 | # %% 21 | style.use(['dark_background', 'bmh']) 22 | rc('axes', facecolor='none') 23 | rc('figure', figsize=(16, 16)) 24 | 25 | # %% 26 | fig, axs = subplots(nrows=3, ncols=1, sharex=True) 27 | 28 | x = 1.2 - abs(linspace(-1.2, 1.2, 81)); x[x > 1] = 1 29 | x = zeros(81); x[34:47] = 1 30 | y = zeros(81); y[15:28] = 1.; y[67:80]=1 31 | 32 | axs[0].stem(x, label='ego car') 33 | axs[0].stem(y, linefmt='g', markerfmt='og', label='neighbor cars') 34 | axs[0].set_title('representation of ego car and two neighbor cars') 35 | # ylabel('cost') 36 | axs[0].set_xlabel('position') 37 | axs[0].legend(loc="upper left") 38 | 39 | ## second subplot 40 | x = 1.2 - abs(linspace(-1.2, 1.2, 81)); x[x > 1] = 1 41 | y = zeros(81); y[27] = 1.; y[67]=-1 42 | 43 | axs[1].plot(x, label='proximity mask') 44 | axs[1].stem(y, linefmt='g', markerfmt='og', label='neigh. car after applying filter') 45 | axs[1].set_title('neighbor cars after applying the proposed filter') 46 | # ylabel('cost') 47 | axs[1].set_xlabel('position') 48 | axs[1].legend(loc="upper left") 49 | 50 | ## cost_arr wrt the position of the ego car 51 | x_wide = np.concatenate((np.zeros(30),x,np.zeros(30))) 52 | 53 | cost_arr = zeros(51) 54 | for i in range(-20,31): 55 | x_crop = x_wide[30-i:30-i+81] 56 | cost = sum(x_crop * y) ** 2 57 | cost_arr[i+20] = cost 58 | 59 | ## third subplot 60 | # x = 1.2 - abs(linspace(-1.2, 1.2, 81)); x[x > 1] = 1 61 | # x = zeros(81); x[34:47] = 1 62 | # y = zeros(81); y[23] = 1.; y[67]=-1 63 | # y = 1.2 - abs(linspace(-1.2, 1.2, 15)); y[y > 1] = 1 64 | 65 | axs[2].plot(x, label='proximity mask') 66 | axs[2].plot(array(range(23,71)), cost_arr[3:], label='cost: sum(mask*green_channel) ** 2') 67 | axs[2].stem(y, linefmt='g', markerfmt='og', label='neigh. car after applying filter') 68 | 69 | axs[2].set_title('cost as a function of position of the ego car') 70 | # ylabel('cost') 71 | axs[2].set_xlabel('position') 72 | axs[2].legend() 73 | 74 | # %% 75 | -------------------------------------------------------------------------------- /binarise_trajectories_files.py: -------------------------------------------------------------------------------- 1 | from os import system 2 | from numpy import int64, int16, float64, float16 3 | from pandas import read_table, read_pickle 4 | 5 | 6 | def x64tox16(dtype): 7 | if dtype == int64: return int16 8 | if dtype == float64: 9 | return float16 10 | else: 11 | raise ValueError 12 | 13 | 14 | def binarise(time_slots_): 15 | 16 | for time_slot in time_slots_: 17 | print(f' > Load time slot: {time_slot}') 18 | src_file_name = f'traffic-data/xy-trajectories/{time_slot}.txt' 19 | df = read_table(src_file_name, sep='\s+', header=None, names=( 20 | 'Vehicle ID', 21 | 'Frame ID', 22 | 'Total Frames', 23 | 'Global Time', 24 | 'Local X', 25 | 'Local Y', 26 | 'Global X', 27 | 'Global Y', 28 | 'Vehicle Length', 29 | 'Vehicle Width', 30 | 'Vehicle Class', 31 | 'Vehicle Velocity', 32 | 'Vehicle Acceleration', 33 | 'Lane Identification', 34 | 'Preceding Vehicle', 35 | 'Following Vehicle', 36 | 'Spacing', 37 | 'Headway' 38 | )) 39 | 40 | print(' > Drop unnecessary fields') 41 | df.drop(columns=[ 42 | 'Total Frames', 43 | 'Global Time', 44 | 'Global X', 45 | 'Global Y', 46 | 'Vehicle Class', 47 | 'Vehicle Acceleration', 48 | 'Preceding Vehicle', 49 | 'Following Vehicle', 50 | 'Spacing', 51 | 'Headway', 52 | ], inplace=True) 53 | 54 | print(' > Cast {int,float}64 to {int,float}16, from 16 to 4 bytes per value') 55 | print(' Source data frame data types:', df.dtypes, sep='\n') 56 | src_columns_dtype = dict(df.dtypes) 57 | dst_columns_dtype = {k: x64tox16(v) for k, v in src_columns_dtype.items()} 58 | df = df.astype(dtype=dst_columns_dtype) 59 | print(' Destination data frame data types:', df.dtypes, sep='\n') 60 | 61 | print(' > Save binary (pickled) file') 62 | dst_file_name = f'traffic-data/xy-trajectories/{time_slot}.pkl' 63 | df.to_pickle(dst_file_name) 64 | 65 | print(' > Source and destination files') 66 | system(f'ls -lh {src_file_name}') 67 | system(f'ls -lh {dst_file_name}') 68 | 69 | 70 | if __name__ == '__main__': 71 | 72 | time_slots = ( 73 | 'i80/trajectories-0400-0415', 74 | 'i80/trajectories-0500-0515', 75 | 'i80/trajectories-0515-0530', 76 | ) 77 | binarise(time_slots) -------------------------------------------------------------------------------- /generate_trajectories.py: -------------------------------------------------------------------------------- 1 | import argparse, pdb 2 | import gym 3 | import numpy as np 4 | import os 5 | import pickle 6 | import random 7 | import torch 8 | import scipy.misc 9 | from gym.envs.registration import register 10 | 11 | parser = argparse.ArgumentParser() 12 | parser.add_argument('-display', type=int, default=0) 13 | parser.add_argument('-seed', type=int, default=1) 14 | parser.add_argument('-lanes', type=int, default=3) 15 | parser.add_argument('-traffic_rate', type=int, default=15) 16 | parser.add_argument('-state_image', type=int, default=1) 17 | parser.add_argument('-save_images', type=int, default=0) 18 | parser.add_argument('-store', type=int, default=1) 19 | parser.add_argument('-data_dir', type=str, default='traffic-data/state-action-cost/') 20 | parser.add_argument('-fps', type=int, default=30) 21 | parser.add_argument('-time_slot', type=int, default=0) 22 | parser.add_argument('-map', type=str, default='i80', choices={'ai', 'i80', 'us101', 'lanker', 'peach'}) 23 | parser.add_argument('-delta_t', type=float, default=0.1) 24 | opt = parser.parse_args() 25 | 26 | opt.state_image = (opt.state_image == 1) 27 | opt.store = (opt.store == 1) 28 | 29 | random.seed(opt.seed) 30 | np.random.seed(opt.seed) 31 | torch.manual_seed(opt.seed) 32 | 33 | os.system("mkdir -p " + opt.data_dir) 34 | 35 | kwargs = dict( 36 | display=opt.display, 37 | state_image=opt.state_image, 38 | store=opt.store, 39 | fps=opt.fps, 40 | nb_lanes=opt.lanes, 41 | traffic_rate=opt.traffic_rate, 42 | data_dir=opt.data_dir, 43 | delta_t=opt.delta_t, 44 | ) 45 | 46 | register( 47 | id='Traffic-v0', 48 | entry_point='traffic_gym:Simulator', 49 | kwargs=kwargs 50 | ) 51 | 52 | register( 53 | id='I-80-v0', 54 | entry_point='map_i80:I80', 55 | kwargs=kwargs 56 | ) 57 | 58 | gym.envs.registration.register( 59 | id='US-101-v0', 60 | entry_point='map_us101:US101', 61 | kwargs=kwargs, 62 | ) 63 | 64 | gym.envs.registration.register( 65 | id='Lankershim-v0', 66 | entry_point='map_lanker:Lankershim', 67 | kwargs=kwargs, 68 | ) 69 | 70 | gym.envs.registration.register( 71 | id='Peachtree-v0', 72 | entry_point='map_peach:Peachtree', 73 | kwargs=kwargs, 74 | ) 75 | 76 | env_names = { 77 | 'ai': 'Traffic-v0', 78 | 'i80': 'I-80-v0', 79 | 'us101': 'US-101-v0', 80 | 'lanker': 'Lankershim-v0', 81 | 'peach': 'Peachtree-v0', 82 | } 83 | 84 | print('Building the environment (loading data, if any)') 85 | env = gym.make(env_names[opt.map]) 86 | 87 | env.reset(frame=0, time_slot=opt.time_slot) 88 | done = False 89 | while not done: 90 | observation, reward, done, info = env.step(np.zeros((2,))) 91 | env.render() 92 | 93 | print(f'Data generation for <{opt.map}, time slot {opt.time_slot}> completed') 94 | -------------------------------------------------------------------------------- /Notebooks/Loss-design.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # jupyter: 3 | # jupytext: 4 | # formats: ipynb,py:percent 5 | # text_representation: 6 | # extension: .py 7 | # format_name: percent 8 | # format_version: '1.2' 9 | # jupytext_version: 1.0.1 10 | # kernelspec: 11 | # display_name: Python 3 12 | # language: python 13 | # name: python3 14 | # --- 15 | 16 | # %% 17 | from matplotlib.pylab import * 18 | 19 | # %% 20 | style.use(['dark_background', 'bmh']) 21 | rc('axes', facecolor='none') 22 | rc('figure', figsize=(16, 4)) 23 | 24 | # %% 25 | x = r_[0:1.5:0.01] 26 | y = maximum(0, 1 - sqrt(x)) 27 | y2 = maximum(0, 1 - x) 28 | 29 | # %% 30 | axhline() 31 | axvline() 32 | 33 | plot(x, y) 34 | plot(x, y2) 35 | axis('equal') 36 | xticks(arange(-0.4, 2, 0.2)); 37 | 38 | # %% 39 | x = linspace(-2, 2, 1001) 40 | lane_width = 3.7 # m 41 | car_width = 1.8 # m 42 | alpha = 0.5 # m 43 | 44 | y = piecewise(x, [ 45 | x < (-car_width / 2 - alpha), 46 | (-car_width / 2 - alpha <= x) * (x < -car_width / 2), 47 | (-car_width / 2 <= x) * (x < car_width / 2), 48 | (car_width / 2 <= x) * (x < car_width / 2 + alpha), 49 | car_width / 2 + alpha <= x 50 | ], [ 51 | 0, 52 | lambda z: (z + car_width / 2 + alpha) / alpha, 53 | 1, 54 | lambda z: 1 - (z - car_width / 2) / alpha, 55 | 0 56 | ]) 57 | 58 | # %% 59 | axvline() 60 | axhline() 61 | axvline(-lane_width / 2, color='y') 62 | axvline(lane_width / 2, color='y') 63 | axvline(-car_width / 2, color='g') 64 | axvline(car_width / 2, color='g') 65 | 66 | plot(x, y) 67 | 68 | xlabel('y-axis [m]') 69 | ylabel('y-axis lane loss [ / ]') 70 | 71 | # %% 72 | loss = 1 - abs(linspace(-1, 1, 11)) 73 | stem(loss) 74 | 75 | # %% 76 | print(broadcast_to(loss.reshape(-1, 1), (11, 5))) 77 | 78 | # %% 79 | a = arange(1, 12 + 1).reshape(3, 4) 80 | print(a) 81 | 82 | # %% 83 | print(rot90(a)) 84 | 85 | # %% 86 | x = 1.2 - abs(linspace(-1.2, 1.2, 81)); x[x > 1] = 1 87 | y = 1.2 - abs(linspace(-1.2, 1.2, 15)); y[y > 1] = 1 88 | 89 | stem(x) 90 | stem(y, 'C1', markerfmt='oC1') 91 | 92 | # %% 93 | mask = y.reshape(-1, 1) @ x.reshape(1, -1) 94 | 95 | imshow(mask) 96 | axis('equal') 97 | 98 | # %% 99 | import torch 100 | 101 | # %% 102 | cost = torch.load('/Users/atcold/Traffic/cost.pth') 103 | 104 | # %% 105 | plot(cost[0].numpy()) 106 | plot(cost[3].numpy()) 107 | plot(cost[1].numpy()) 108 | legend(('Symbolic proximity', 'Pixel proximity', 'Lane')) 109 | title('Costs comparison') 110 | xlabel('Time steps [0.1s]') 111 | 112 | # %% 113 | for epoch in range(10): 114 | cost = torch.load(f'/Users/atcold/Traffic/cost{epoch}.pth') 115 | figure() 116 | plot(cost[0].numpy()) 117 | plot(cost[3].numpy()) 118 | plot(cost[1].numpy()) 119 | legend(('Symbolic proximity', 'Pixel proximity', 'Lane')) 120 | title('Costs comparison') 121 | xlabel('Time steps [0.1s]') 122 | ylim((-0.05, 1.05)) 123 | -------------------------------------------------------------------------------- /plots/plot_best_of_k.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy, scipy, argparse, pdb 3 | import scipy.stats 4 | import torch 5 | 6 | 7 | parser = argparse.ArgumentParser() 8 | parser.add_argument('-metric', type=str, default='l2') 9 | parser.add_argument('-path', type=str, default='/home/mbhenaff/projects/pytorch-Traffic-Simulator/scratch/models_v8/eval/') 10 | opt = parser.parse_args() 11 | 12 | 13 | def best_of_k(x): 14 | bsize = x.shape[0] 15 | best_samples = [] 16 | for b in range(bsize): 17 | mean = numpy.mean(x[b], 1) 18 | best = numpy.argsort(mean) 19 | best_samples.append(x[b][best[-1]]) 20 | best_samples = numpy.stack(best_samples) 21 | return best_samples 22 | 23 | 24 | 25 | 26 | def plot_mean_and_CI(mean, lb, ub, color_mean=None, color_shading=None): 27 | # plot the shaded range of the confidence intervals 28 | time_steps = [i+1 for i in range(len(mean))] 29 | plt.fill_between(time_steps, ub, lb, 30 | color=color_shading, alpha=0.2) 31 | # plot the mean on top 32 | plt.plot(time_steps, mean, color_mean) 33 | 34 | 35 | 36 | 37 | def mean_confidence_interval(data, confidence=0.95): 38 | n = data.shape[0] 39 | m, se = numpy.mean(data, 0), scipy.stats.sem(data, 0) 40 | h = se * scipy.stats.t._ppf((1+confidence)/2., n-1) 41 | return m, m-h, m+h 42 | 43 | ssim = [] 44 | 45 | 46 | plot = 0 47 | loss_type = 'loss_i' 48 | npred = 10 49 | nsample = 200 50 | 51 | if plot == 0: 52 | 53 | x = torch.load(f'{opt.path}/model=fwd-cnn-vae3-fp-layers=3-bsize=64-ncond=20-npred=20-lrt=0.0001-nfeature=256-nhidden=128-fgeom=1-zeroact=0-zmult=0-dropout=0.0-nz=32-beta=1e-06-zdropout=0.5-gclip=5.0-warmstart=1-seed=1.step200000.model-nbatches=25-npred=200-nsample=200.eval/loss.pth') 54 | x = x[loss_type].view(100*4, nsample, -1)[:, :, :npred].numpy() 55 | x=-10*numpy.log(x) / numpy.log(10) 56 | best = best_of_k(x) 57 | mean, hi, low = mean_confidence_interval(best) 58 | plot_mean_and_CI(mean, low, hi, color_mean='b-', color_shading='b') 59 | 60 | x = torch.load(f'{opt.path}/model=fwd-cnn-vae3-fp-layers=3-bsize=64-ncond=20-npred=20-lrt=0.0001-nfeature=256-nhidden=128-fgeom=1-zeroact=0-zmult=0-dropout=0.0-nz=32-beta=1e-05-zdropout=0.5-gclip=5.0-warmstart=1-seed=1.step200000.model-nbatches=25-npred=200-nsample=200.eval/loss.pth') 61 | x = x[loss_type].view(100*4, nsample, -1)[:, :, :npred].numpy() 62 | x=-10*numpy.log(x) / numpy.log(10) 63 | best = best_of_k(x) 64 | mean, hi, low = mean_confidence_interval(best) 65 | plot_mean_and_CI(mean, low, hi, color_mean='magenta', color_shading='magenta') 66 | 67 | 68 | x = torch.load(f'{opt.path}/model=fwd-cnn-ten3-layers=3-bsize=64-ncond=20-npred=20-lrt=0.0001-nfeature=256-nhidden=128-fgeom=1-zeroact=0-zmult=0-dropout=0.0-nz=32-beta=0.0-zdropout=0.5-gclip=5.0-warmstart=1-seed=1.step200000.model-nbatches=25-npred=200-nsample=200-sampling=fp.eval/loss.pth') 69 | x = x[loss_type].view(100*4, nsample, -1)[:, :, :npred].numpy() 70 | x=-10*numpy.log(x) / numpy.log(10) 71 | best = best_of_k(x) 72 | mean, hi, low = mean_confidence_interval(best) 73 | plot_mean_and_CI(mean, low, hi, color_mean='red', color_shading='red') 74 | 75 | 76 | ''' 77 | x = torch.load(f'{opt.path}/model=fwd-cnn-vae3-fp-layers=3-bsize=64-ncond=20-npred=20-lrt=0.0001-nfeature=256-nhidden=128-fgeom=1-zeroact=0-zmult=0-dropout=0.0-nz=32-beta=0.0001-zdropout=0.5-gclip=5.0-warmstart=1-seed=1.step200000.model-nbatches=25-npred=200-nsample=200.eval/loss.pth') 78 | x = x[loss_type].view(100*4, nsample, -1)[:, :, :npred].numpy() 79 | x=-10*numpy.log(x) / numpy.log(10) 80 | best = best_of_k(x) 81 | mean, hi, low = mean_confidence_interval(best) 82 | plot_mean_and_CI(mean, low, hi, color_mean='cyan', color_shading='cyan') 83 | ''' 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | title = 'comparison_vae.pdf' 92 | plt.ylabel('Average PSNR', fontsize=18) 93 | plt.xlabel('Time Step', fontsize=18) 94 | plt.xticks([i+1 for i in range(npred)], fontsize=12) 95 | plt.legend(['VAE, beta=1e-06', 'VAE, beta=1e-05', 'TEN'], fontsize=16) 96 | plt.savefig(title) 97 | 98 | -------------------------------------------------------------------------------- /Notebooks/Target-lane.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # jupyter: 3 | # jupytext: 4 | # formats: ipynb,py:percent 5 | # text_representation: 6 | # extension: .py 7 | # format_name: percent 8 | # format_version: '1.2' 9 | # jupytext_version: 1.2.1 10 | # kernelspec: 11 | # display_name: Python 3 12 | # language: python 13 | # name: python3 14 | # --- 15 | 16 | # %% 17 | from mpl_toolkits import mplot3d 18 | from matplotlib.pylab import * 19 | 20 | # %% 21 | style.use(['dark_background', 'bmh']) 22 | rc('axes', facecolor='k') 23 | rc('figure', facecolor='k') 24 | rc('figure', figsize=(10,5)) 25 | 26 | # %% 27 | # Lanes from -1 to 3 --> 0 is target lane 28 | y = r_[-1:3.1:.1] 29 | 30 | # 2 lane widths in front and in back 31 | x = r_[-2:2.1:.1] 32 | 33 | # %% 34 | # Target lane cost 35 | # target_lane_cost = y ** 2 / 4 36 | target_lane_cost = abs(y) * .5 37 | 38 | # %% 39 | # Color shorthands 40 | r, g, b, p = 'C1', 'C3', 'C0', 'C2' 41 | set_color = lambda c: dict(linefmt=c, basefmt=" ", markerfmt='o'+c) 42 | 43 | # %% 44 | # Target Lane 0, Ego Car Lane 2, Other Car Lane 1 45 | figure() 46 | y_proximity = maximum(1 - abs(1 - y), 0) 47 | stem(y, target_lane_cost + y_proximity, **set_color(p), label='Total Cost') 48 | stem(y, target_lane_cost, **set_color(b), label='Target Lane Cost') 49 | stem(y, y_proximity, **set_color(g), label='Y Proximity Cost') 50 | arrow_props = dict(width=1.5, facecolor='white') 51 | # annotate('Ego Car', (2.0, 0.0), (2, -0.25), arrowprops=arrow_props) 52 | annotate('Other Car', (1.0, 0.0), (1, -0.25), arrowprops=arrow_props) 53 | annotate('Target Lane', (0.0, 0.0), (0, -0.25), arrowprops=arrow_props) 54 | axis('equal') 55 | title('Target Lane Cost + Proximity Cost') 56 | legend() 57 | savefig('car_1_left.png') 58 | 59 | # Target Lane 0, Ego Car Lane 2, Other Car Lane 0 60 | figure() 61 | y_proximity = maximum(1 - abs(0 - y), 0) 62 | stem(y, target_lane_cost + y_proximity, **set_color(p), label='Total Cost') 63 | stem(y, target_lane_cost, **set_color(b), label='Target Lane Cost') 64 | stem(y, y_proximity, **set_color(g), label='Y Proximity Cost') 65 | # annotate('Ego Car', (2.0, 0.0), (2, -0.25), arrowprops=arrow_props) 66 | annotate('Other Car', (0.0, 0.0), (0.8, -0.25), arrowprops=arrow_props) 67 | annotate('Target Lane', (0.0, 0.0), (0, -0.25), arrowprops=arrow_props) 68 | axis('equal') 69 | title('Target Lane Cost + Proximity Cost') 70 | legend() 71 | savefig('car_2_left.png') 72 | 73 | # Target Lane 0, Ego Car Lane 1, Lane Cost accounted for 74 | figure() 75 | lane_cost = (maximum(0.5 - abs(0.5 - y), 0) + maximum(0.5 - abs(1.5 - y), 0)) * 0.4 76 | stem(y, target_lane_cost + lane_cost, **set_color(p), label='Total Cost') 77 | stem(y, target_lane_cost, **set_color(b), label='Target Lane Cost') 78 | stem(y, lane_cost, **set_color(r), label='Lane Cost') 79 | annotate('Ego Car', (1.0, 0.0), (1, -0.25), arrowprops=arrow_props) 80 | annotate('Target Lane', (0.0, 0.0), (0, -0.25), arrowprops=arrow_props) 81 | axis('equal') 82 | title('Target Lane Cost + Lane Cost') 83 | legend(); 84 | savefig('lane_change.png') 85 | 86 | # %% 87 | figure() 88 | plot(y, lane_cost) 89 | ylim(-0.05, 0.2) 90 | annotate('Ego Car', (1.0, 0.0), (1, -0.04), arrowprops=arrow_props) 91 | title('Lane Cost'); 92 | 93 | # %% 94 | set_color_3d = lambda c: dict(color=c, marker='o', markeredgecolor=c, markevery=(0.1, 0.1)) 95 | x_proximity = maximum(0, 1 - abs(x)) 96 | figure() 97 | ax = axes(projection='3d'); 98 | ax.set_xlabel('x-direction') 99 | ax.set_ylabel('y-direction') 100 | ax.set_zlabel('Cost') 101 | for i in range(len(y)): 102 | # Target lane cost 103 | line = mplot3d.art3d.Line3D(*zip((0, y[i], 0), (0, y[i], target_lane_cost[i])), **set_color_3d(b)) 104 | ax.add_line(line) 105 | # Lane cost 106 | line = mplot3d.art3d.Line3D(*zip((0, y[i], 0), (0, y[i], lane_cost[i])), **set_color_3d(r)) 107 | ax.add_line(line) 108 | # X-Proximity cost 109 | line = mplot3d.art3d.Line3D(*zip((x[i], 1, 0), (x[i], 1, x_proximity[i])), **set_color_3d(g)) 110 | ax.add_line(line) 111 | ax.set_xlim3d(-2, 2) 112 | ax.set_ylim3d(-1, 3) 113 | ax.set_zlim3d(-0.25, 2); 114 | 115 | # %% 116 | -------------------------------------------------------------------------------- /Notebooks/New-cost-design.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # --- 3 | # jupyter: 4 | # jupytext: 5 | # formats: ipynb,py:percent 6 | # text_representation: 7 | # extension: .py 8 | # format_name: percent 9 | # format_version: '1.3' 10 | # jupytext_version: 1.3.4 11 | # kernelspec: 12 | # display_name: Python 3 13 | # language: python 14 | # name: python3 15 | # --- 16 | 17 | # %% [markdown] 18 | # # Cost design wrt actions 19 | # 20 | # This notebook attempts to properly design the proximity cost function for the controlled agent. 21 | # 22 | # $$ 23 | # C_x(x) = a\big[x_s(v_0) - l/2 - x\big]^p 24 | # $$ 25 | # 26 | # where 27 | # 28 | # - $l$: vehicle length, *e.g.* equal to $4.8\,\text{m}$; 29 | # - $w$: vehicle width, *e.g.* equal to $1.8\,\text{m}$; 30 | # - $x_s(v_0)$: safety distance in the $x$ direction, $x_s(v_0) = \beta v_0 + {l \over 2} + 1\,\text{m}$, $\beta = 1.5$; 31 | # 32 | # Let's also enforce $[x] = \mathrm{m}$. 33 | 34 | # %% 35 | from matplotlib.pylab import * 36 | 37 | # %% 38 | style.use(['dark_background', 'bmh']) 39 | rc('axes', facecolor='none') 40 | rc('figure', figsize=(12, 3)) 41 | rc('savefig', bbox='tight') 42 | 43 | # %% 44 | from astropy import units as u 45 | U = u.Unit 46 | 47 | # %% 48 | l = 4.8 * u.m 49 | w = 1.8 * u.m 50 | print(f'car length: {l}') 51 | print(f'car width: {w}') 52 | 53 | # %% 54 | lane = 3.7 * u.m 55 | print(f'US lane width: {lane}') 56 | 57 | # %% 58 | v0 = (30 * U('km/h')).si 59 | print(f"speed: {v0.to('km/h'):.1f}") 60 | 61 | β = 1.5 * u.s 62 | x_s = β * v0 + l/2 + 1 * u.m 63 | print(f'x safety distance: {x_s}') 64 | 65 | y_d = lane - 2 * (w / 2) 66 | print(f'y decay distance: {y_d:.1f}') 67 | 68 | # %% 69 | v_max = 130 * U('km/h') 70 | print(f'max speed: {v_max}') 71 | 72 | x_max = (v_max * u.s).si 73 | print(f'max lookahead: {x_max:.1f}') 74 | 75 | # %% 76 | y_max = 2 * lane 77 | print(f'max look sideways: {y_max}') 78 | 79 | # %% 80 | δ = 0.1 * u.m 81 | x = arange(-x_max.value, x_max.value, δ.value) * u.m 82 | y = arange(-y_max.value, y_max.value, δ.value) * u.m 83 | 84 | # %% [markdown] 85 | # Let's consider the poisitive (frontal) side only. 86 | # We want a line going through $(x_0, y_0)$, hence $y^+ - y_0 = m (x^+ - x_0)$. 87 | # Moreover, we want the slope to be $m = y_1 - y_0 \big/ x_1 - x_0$. 88 | # Therefore, we end up with the following equality: 89 | # 90 | # $$ 91 | # y^+ - y_0 = {y_1 - y_0 \over x_1 - x_0}(x^+ - x_0) 92 | # $$ 93 | # 94 | # Finally, given that $(x_0, y_0) = (x_s, 0)$ and $(x_1, y_1) = \big({l\over2}, 1\big)$ we get that: 95 | # 96 | # $$ 97 | # \begin{aligned} 98 | # y^+ = {x^+ - x_s \over {l\over2} - x_s} &= - {x^+ - x_s \over x_s - {l\over2}} \\ 99 | # y^- &= + {x^- - x_s \over x_s - {l\over2}} 100 | # \end{aligned} 101 | # $$ 102 | # 103 | # Putting altogether, we get: 104 | # 105 | # $$ 106 | # y = - {\vert x \vert - x_s \over x_s - {l\over2}} 107 | # $$ 108 | 109 | # %% 110 | p = 2 # polynomial degree 111 | C_xx = minimum(maximum(-(abs(x) - x_s)/(x_s - l/2), 0), 1)**p 112 | r_y = minimum(maximum(-(abs(y) - y_d)/(y_d - w/2), 0), 1) 113 | 114 | 115 | plot(x, C_xx, c='C0', label='C_x(x)') 116 | plot(y, r_y, c='C2', label='y_ramp') 117 | xlabel('distance [m]') 118 | ylabel('cost [/]') 119 | legend(); 120 | for m in arange(-1.5, 2, 1): 121 | axvline(m * lane.value, lw=0.5, c=(1,0,0)) 122 | axvline(-x_s.value, lw=0.5, c=(0,1,0)) 123 | axvline(+x_s.value, lw=0.5, c=(0,1,0)) 124 | 125 | title(f'speed: {v0.to("km/h")}, safe distance: {x_s}, car: {l}×{w}, lane: {lane}') 126 | savefig('x-cost.png', dpi=300) 127 | 128 | # %% 129 | C_x = C_xx.reshape(-1, 1) @ r_y.reshape(1, -1) 130 | 131 | # %% 132 | f, a = subplots(figsize=(1.4, 7), dpi=100) 133 | a.matshow(C_x, origin='lower') 134 | axis('equal') 135 | title('C_x(x, y)') 136 | H, W = C_x.shape 137 | xticks(linspace(0, W-1, 3), [str(-lane), '0', str(lane)]) 138 | yticks(linspace(0, H-1, 11), [f'{n:.1f}' for n in linspace(-x_max, x_max, 11)]); 139 | # add some references 140 | for m in arange(-1.5, 2, 1): 141 | axvline(W/2 - 1 + m * lane / δ, lw=.5, c=(1,0,0)) 142 | for m in arange(-1, 2, 1): 143 | axvline(W/2 - 1 + m * lane / δ, lw=1, c='.5', ls=':') 144 | axhline(H/2 - 1 + x_s / δ, lw=.5, c=(0,1,0)) 145 | axhline(H/2 - 1 - x_s / δ, lw=.5, c=(0,1,0)) 146 | savefig('x-map.png', dpi=300) 147 | -------------------------------------------------------------------------------- /plots/plot_z_trajectories.py: -------------------------------------------------------------------------------- 1 | import argparse, pdb, os, pickle, random, sys, numpy 2 | import gym 3 | import numpy as np 4 | import torch 5 | import torch.nn.functional as F 6 | import torch.nn as nn 7 | from gym.envs.registration import register 8 | import scipy.misc 9 | from dataloader import DataLoader 10 | import utils 11 | from sklearn import decomposition 12 | import sklearn.manifold as manifold 13 | import matplotlib.pyplot as plt 14 | from mpl_toolkits.mplot3d import Axes3D 15 | 16 | 17 | parser = argparse.ArgumentParser() 18 | parser.add_argument('-dataset', type=str, default='i80') 19 | parser.add_argument('-debug', action='store_true') 20 | parser.add_argument('-batch_size', type=int, default=1) 21 | parser.add_argument('-v', type=int, default=1) 22 | parser.add_argument('-display', type=int, default=0) 23 | parser.add_argument('-seed', type=int, default=9999) 24 | parser.add_argument('-lanes', type=int, default=8) 25 | parser.add_argument('-traffic_rate', type=int, default=15) 26 | parser.add_argument('-n_episodes', type=int, default=1) 27 | parser.add_argument('-ncond', type=int, default=10) 28 | parser.add_argument('-npred', type=int, default=400) 29 | parser.add_argument('-n_batches', type=int, default=200) 30 | parser.add_argument('-n_samples', type=int, default=10) 31 | parser.add_argument('-sampling', type=str, default='fp') 32 | parser.add_argument('-topz_sample', type=int, default=10) 33 | parser.add_argument('-model_dir', type=str, default='models/') 34 | parser.add_argument('-mfile', type=str, default='model=fwd-cnn-ae-fp-bsize=16-ncond=10-npred=20-lrt=0.0001-nhidden=100-nfeature=128-decoder=0-combine=add-gclip=1-nz=32-beta=0.0-nmix=1-warmstart=1.model') 35 | parser.add_argument('-cuda', type=int, default=1) 36 | parser.add_argument('-save_video', type=int, default=1) 37 | opt = parser.parse_args() 38 | 39 | random.seed(opt.seed) 40 | np.random.seed(opt.seed) 41 | torch.manual_seed(opt.seed) 42 | 43 | opt.save_video = (opt.save_video == 1) 44 | opt.eval_dir = opt.model_dir + f'/eval/' 45 | 46 | opt.model_dir += '/' 47 | 48 | print(f'[loading {opt.model_dir + opt.mfile}]') 49 | model = torch.load(opt.model_dir + opt.mfile) 50 | 51 | model.eval() 52 | if opt.cuda == 1: 53 | model.intype('gpu') 54 | 55 | dataloader = DataLoader(None, opt, opt.dataset) 56 | 57 | def compute_pz(nbatches): 58 | model.p_z = [] 59 | for j in range(nbatches): 60 | print('[estimating z distribution: {:2.1%}]'.format(float(j)/nbatches), end="\r") 61 | inputs, actions, targets = dataloader.get_batch_fm('train', opt.npred, (opt.cuda == 1)) 62 | pred, loss_kl = model(inputs, actions, targets, save_z = True) 63 | del inputs, actions, targets 64 | 65 | 66 | pzfile = opt.model_dir + opt.mfile + '_100000.pz' 67 | if os.path.isfile(pzfile): 68 | p_z = torch.load(pzfile) 69 | graph = torch.load(pzfile + '.graph') 70 | model.p_z = p_z 71 | model.knn_indx = graph.get('knn_indx') 72 | model.knn_dist = graph.get('knn_dist') 73 | 74 | 75 | model.opt.npred = opt.npred 76 | compute_pz(20) 77 | print('[computing embeddings]') 78 | zpca = decomposition.PCA(n_components=3).fit_transform(torch.cat((model.p_z, p_z), 0)) 79 | isomap = manifold.Isomap(n_components=3).fit(model.p_z) 80 | ziso=isomap.fit_transform(model.p_z) 81 | 82 | fig = plt.figure(); ax = fig.add_subplot(111, projection='3d'); ax.scatter(zpca[:400, 0], zpca[:400, 1], range(400), s=20, c=range(400), depthshade=True) 83 | 84 | 85 | ''' 86 | pzfile = opt.model_dir + opt.mfile + '.pz' 87 | print(f'[loading p(z) from {pzfile}]') 88 | pz = torch.load(pzfile) 89 | print(f'[loading graph from {pzfile}.graph]') 90 | graph = torch.load(pzfile + '.graph') 91 | 92 | dist_null = [] 93 | for i in range(100000): 94 | z1=random.choice(pz) 95 | z2=random.choice(pz) 96 | dist_null.append(torch.norm(z1-z2)) 97 | 98 | dist_true = [] 99 | for i in range(model.p_z.size(0)-1): 100 | z1 = model.p_z[i] 101 | z2 = model.p_z[i+1] 102 | dist_true.append(torch.norm(z1-z2)) 103 | 104 | plt.hist(dist_true, bins, alpha=0.5, color='red', normed=True); plt.hist(dist_null, bins, alpha=0.3, color='gray', normed=True); 105 | plt.legend(['consecutive', 'random'], fontsize=16) 106 | plt.xlabel('L2 distance', fontsize=16) 107 | plt.ylabel('Frequency', fontsize=16) 108 | plt.savefig('plots/distance_histograms.pdf') 109 | #plt.show() 110 | ''' 111 | -------------------------------------------------------------------------------- /Notebooks/Proximity-cost-profile.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # --- 3 | # jupyter: 4 | # jupytext: 5 | # formats: ipynb,py:percent 6 | # text_representation: 7 | # extension: .py 8 | # format_name: percent 9 | # format_version: '1.2' 10 | # jupytext_version: 1.2.3 11 | # kernelspec: 12 | # display_name: Python 3 13 | # language: python 14 | # name: python3 15 | # --- 16 | 17 | # %% 18 | from matplotlib.pylab import * 19 | 20 | # %% 21 | style.use(['dark_background', 'bmh']) 22 | rc('axes', facecolor='k') 23 | rc('figure', facecolor='k') 24 | rc('figure', figsize=(10,7)) 25 | 26 | 27 | # %% 28 | def get_quad_func(minima=1, zero_at=1, one_at=0): 29 | t0 = (zero_at - minima)**2 30 | t1 = (one_at - minima)**2 31 | mult = 1 / (t1 - t0) 32 | bias = -mult*((zero_at - minima)**2) 33 | return lambda x: mult * (x - minima)**2 + bias 34 | 35 | 36 | # %% 37 | # Linear and quadratic proximity profile design 38 | x = linspace(0, 2) 39 | y = dict() 40 | y['linear SF=1.5'] = maximum(1 - x / 1.5, 0) 41 | y['quad SF=1.5'] = maximum((1 - x / 1.5), 0) ** 2 42 | y['quad SF=2.0'] = (1 - x / 2.0) ** 2 43 | 44 | # ✏️TODO: write function for analytical computation of shifts 45 | quad_with_shift = get_quad_func(minima=2, zero_at=1.5) 46 | y['quad SF=1.5-shift_min_to_2'] = maximum(quad_with_shift(x), 0) # find correct values 47 | quad_with_shift = get_quad_func(minima=3, zero_at=1.5) 48 | y['quad SF=1.5-shift_min_to_3'] = maximum(quad_with_shift(x), 0) # find correct values 49 | 50 | for k in y: plot(x, y[k], label=k) 51 | 52 | axis('equal') 53 | legend() 54 | xlabel('normalised distance per second') 55 | ylabel('proximity cost profile') 56 | 57 | 58 | # %% 59 | def softmax(v, β=1): 60 | return log(sum(exp(β*v))) / β 61 | 62 | 63 | # %% 64 | import torch 65 | 66 | # %% 67 | # Max, softmax, sum 68 | x = torch.linspace(0, 1, 10, requires_grad=True) 69 | y = dict() 70 | c = 1 - x 71 | y['linear'] = c 72 | for e in range(-1, 4): 73 | β = 10 ** (e / 2) 74 | if e == -1: β = 0 75 | y[f'softmax β={β:.1f}'] = torch.softmax(β * c, dim=0) 76 | 77 | for k in y: plot(x.detach(), y[k].detach(), '-o', label=k) 78 | 79 | axis('equal') 80 | legend() 81 | xlabel('distance normalised by safe_distance') 82 | ylabel('∂proximity_cost / ∂s') 83 | 84 | # %% 85 | # Max, softmax, sum 86 | x = torch.linspace(0, 1, 10, requires_grad=True) 87 | y = dict() 88 | c = 1 - x 89 | c[:3] = 0 90 | y['linear'] = c 91 | for e in range(-1, 4): 92 | β = 10 ** (e / 2) 93 | if e == -1: β = 0 94 | y[f'softmax β={β:.1f}'] = torch.softmax(β * c, dim=0) * c 95 | 96 | for k in y: plot(x.detach(), y[k].detach(), '-o', label=k) 97 | 98 | axis('equal') 99 | legend() 100 | xlabel('distance normalised by safe_distance') 101 | ylabel('∂proximity_cost / ∂s') 102 | 103 | # %% 104 | # Max, softmax, sum 105 | N = 100 106 | start = 30 107 | x = torch.linspace(0, 1, N) 108 | y = dict() 109 | c = 1 - x 110 | c[:start] = 0 111 | y['cost profile'] = 1 - x 112 | y['car'] = torch.zeros(N) 113 | y['car'][start:] = 1 114 | for e in range(-1, 5): 115 | β = 10 ** (e / 2) 116 | if e == -1: β = 0 117 | if e == 4: β = 1e3 118 | sam = torch.softmax(β * c, dim=0) 119 | y[f'softmax-normalised β={β:.1f}'] = sam * c / max(sam) 120 | 121 | for k in y: plot(x, y[k], '-', label=k) 122 | 123 | axis('equal') 124 | legend() 125 | xlabel('distance normalised by safe_distance') 126 | ylabel('∂proximity_cost / ∂s') 127 | savefig('normalised_attention.png') 128 | 129 | # %% 130 | # Max, softmax, sum 131 | N = 100 132 | start = 30 133 | x = torch.linspace(0, 1, N) 134 | y = dict() 135 | c = 1 - x 136 | c[:start] = 0 137 | y['cost profile'] = 1 - x 138 | y['car'] = torch.zeros(N) 139 | y['car'][start:] = 1 140 | y['car'].requires_grad = True 141 | for e in range(-1, 5): 142 | β = 10 ** (e / 2) 143 | if e == -1: β = 0 144 | if e == 4: β = 1e3 145 | 146 | cost = torch.logsumexp(β * y['cost profile'] * y['car'], dim=0 ) / β 147 | cost.backward() 148 | y[f'log-sum-exp β={β:.1f}'] = y['car'].grad.clone() 149 | y['car'].grad.zero_() 150 | 151 | for k in y: 152 | if hasattr(y[k],'grad'): y[k] = y[k].detach().numpy() 153 | plot(x, y[k], '-', label=k) 154 | 155 | axis('equal') 156 | legend() 157 | xlabel('distance normalised by safe_distance') 158 | ylabel('∂proximity_cost / ∂s') 159 | savefig('normalised_attention.png') 160 | 161 | # %% 162 | # Max, softmax, sum 163 | N = 100 164 | start = 30 165 | x = torch.linspace(0, 1, N) 166 | y = dict() 167 | c = 1 - x 168 | c[:start] = 0 169 | y['cost profile'] = 1 - x 170 | y['car'] = torch.zeros(N) 171 | y['car'][start:] = 1 172 | y['car'].requires_grad = True 173 | for e in range(-1, 5): 174 | β = 10 ** (e / 2) 175 | if e == -1: β = 0 176 | if e == 4: β = 1e3 177 | logit = y['cost profile'] * y['car'] 178 | cost = torch.softmax(β *logit, dim=0)*logit 179 | cost.sum().backward() 180 | y[f'self-attention β={β:.1f}'] = y['car'].grad.clone() 181 | y['car'].grad.zero_() 182 | 183 | for k in y: 184 | if hasattr(y[k],'grad'): y[k] = y[k].detach().numpy() 185 | plot(x, y[k], '-', label=k) 186 | 187 | axis('equal') 188 | legend() 189 | xlabel('distance normalised by safe_distance') 190 | ylabel('∂proximity_cost / ∂s') 191 | savefig('normalised_attention.png') 192 | -------------------------------------------------------------------------------- /train_IL.py: -------------------------------------------------------------------------------- 1 | import torch, numpy, argparse, pdb, os, math 2 | import utils 3 | import models 4 | from dataloader import DataLoader 5 | import torch.nn.functional as F 6 | import torch.optim as optim 7 | 8 | 9 | ########################################### 10 | # Train an imitation learner model 11 | ########################################### 12 | 13 | parser = argparse.ArgumentParser() 14 | # data params 15 | parser.add_argument('-dataset', type=str, default='i80') 16 | parser.add_argument('-v', type=int, default=4) 17 | parser.add_argument('-model', type=str, default='policy-il-mdn') 18 | parser.add_argument('-layers', type=int, default=3) 19 | parser.add_argument('-fmap_geom', type=int, default=1) 20 | parser.add_argument('-model_dir', type=str, default='models/policy_networks/') 21 | parser.add_argument('-n_episodes', type=int, default=20) 22 | parser.add_argument('-ncond', type=int, default=10) 23 | parser.add_argument('-npred', type=int, default=20) 24 | parser.add_argument('-seed', type=int, default=1) 25 | parser.add_argument('-batch_size', type=int, default=64) 26 | parser.add_argument('-dropout', type=float, default=0.0) 27 | parser.add_argument('-nfeature', type=int, default=256) 28 | parser.add_argument('-n_hidden', type=int, default=256) 29 | parser.add_argument('-n_mixture', type=int, default=10) 30 | parser.add_argument('-nz', type=int, default=2) 31 | parser.add_argument('-beta', type=float, default=0.1) 32 | parser.add_argument('-lrt', type=float, default=0.0001) 33 | parser.add_argument('-warmstart', type=int, default=0) 34 | parser.add_argument('-epoch_size', type=int, default=1000) 35 | parser.add_argument('-combine', type=str, default='add') 36 | parser.add_argument('-grad_clip', type=float, default=50) 37 | parser.add_argument('-debug', action='store_true') 38 | parser.add_argument('-enable_tensorboard', action='store_true', 39 | help='Enables tensorboard logging.') 40 | parser.add_argument('-tensorboard_dir', type=str, default='models/policy_networks', 41 | help='path to the directory where to save tensorboard log. If passed empty path' \ 42 | ' no logs are saved.') 43 | opt = parser.parse_args() 44 | 45 | 46 | opt.n_actions = 2 47 | opt.n_inputs = opt.ncond 48 | opt.height = 117 49 | opt.width = 24 50 | opt.h_height = 14 51 | opt.h_width = 3 52 | opt.hidden_size = opt.nfeature*opt.h_height*opt.h_width 53 | 54 | 55 | 56 | os.system('mkdir -p ' + opt.model_dir) 57 | 58 | dataloader = DataLoader(None, opt, opt.dataset) 59 | 60 | opt.model_file = f'{opt.model_dir}/model={opt.model}-bsize={opt.batch_size}-ncond={opt.ncond}-npred={opt.npred}-lrt={opt.lrt}-nhidden={opt.n_hidden}-nfeature={opt.nfeature}-nmixture={opt.n_mixture}-gclip={opt.grad_clip}-seed={opt.seed}' 61 | 62 | if 'vae' in opt.model or '-ten-' in opt.model: 63 | opt.model_file += f'-nz={opt.nz}-beta={opt.beta}' 64 | 65 | print(f'[will save model as: {opt.model_file}]') 66 | 67 | if opt.warmstart == 0: 68 | prev_model = '' 69 | 70 | policy = models.PolicyMDN(opt, npred=opt.npred) 71 | policy.intype('gpu') 72 | 73 | optimizer = optim.Adam(policy.parameters(), opt.lrt, eps=1e-3) 74 | 75 | def train(nbatches): 76 | policy.train() 77 | total_loss, nb = 0, 0 78 | for i in range(nbatches): 79 | optimizer.zero_grad() 80 | inputs, actions, targets, _, _ = dataloader.get_batch_fm('train') 81 | pi, mu, sigma, _ = policy(inputs[0], inputs[1]) 82 | loss = utils.mdn_loss_fn(pi, sigma, mu, actions.view(opt.batch_size, -1)) 83 | if not math.isnan(loss.item()): 84 | loss.backward() 85 | if opt.grad_clip != -1: 86 | torch.nn.utils.clip_grad_norm_(policy.parameters(), opt.grad_clip) 87 | optimizer.step() 88 | total_loss += loss.item() 89 | nb += 1 90 | else: 91 | print('warning, NaN') 92 | return total_loss / nb 93 | 94 | def test(nbatches): 95 | policy.eval() 96 | total_loss, nb = 0, 0 97 | for i in range(nbatches): 98 | inputs, actions, targets, _, _ = dataloader.get_batch_fm('valid') 99 | pi, mu, sigma, _ = policy(inputs[0], inputs[1]) 100 | loss = utils.mdn_loss_fn(pi, sigma, mu, actions.view(opt.batch_size, -1)) 101 | if not math.isnan(loss.item()): 102 | total_loss += loss.item() 103 | nb += 1 104 | else: 105 | print('warning, NaN') 106 | return total_loss / nb 107 | 108 | 109 | 110 | writer = utils.create_tensorboard_writer(opt) 111 | 112 | print('[training]') 113 | best_valid_loss = 1e6 114 | for i in range(200): 115 | train_loss = train(opt.epoch_size) 116 | valid_loss = test(opt.epoch_size) 117 | if valid_loss < best_valid_loss: 118 | best_valid_loss = valid_loss 119 | policy.intype('cpu') 120 | torch.save(policy, opt.model_file + '.model') 121 | policy.intype('gpu') 122 | 123 | if writer is not None: 124 | writer.add_scalar('Loss/train', train_loss, i) 125 | writer.add_scalar('Loss/valid', valid_loss, i) 126 | 127 | log_string = f'iter {opt.epoch_size*i} | train loss: {train_loss:.5f}, valid: {valid_loss:.5f}, best valid loss: {best_valid_loss:.5f}' 128 | print(log_string) 129 | utils.log(opt.model_file + '.log', log_string) 130 | 131 | if writer is not None: 132 | writer.close() 133 | -------------------------------------------------------------------------------- /Notebooks/Nonconstant_vs_Constant_Slope.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # jupyter: 3 | # jupytext: 4 | # formats: ipynb,py:percent 5 | # text_representation: 6 | # extension: .py 7 | # format_name: percent 8 | # format_version: '1.2' 9 | # jupytext_version: 1.2.3 10 | # kernelspec: 11 | # display_name: Python 3 12 | # language: python 13 | # name: python3 14 | # --- 15 | 16 | # %% 17 | import matplotlib.pyplot as plt 18 | import numpy as np 19 | import torch 20 | import pandas as pd 21 | import pickle 22 | from jupyterthemes import jtplot 23 | jtplot.style('oceans16') 24 | 25 | # %% [markdown] 26 | # # Position Change to the Nearest Car between Frames 27 | 28 | # %% 29 | t1 = torch.load('../traffic-data/state-action-cost/data_i80_v0/trajectories-0400-0415/all_data.pth') 30 | t2 = torch.load('../traffic-data/state-action-cost/data_i80_v0/trajectories-0500-0515/all_data.pth') 31 | t3 = torch.load('../traffic-data/state-action-cost/data_i80_v0/trajectories-0515-0530/all_data.pth') 32 | t_states_full = t1['states'] + t2['states'] + t3['states'] 33 | 34 | 35 | # %% 36 | def get_xy_diff_with_closest_car(episode): 37 | dists_t = torch.sqrt((episode[:, 0, :2][:, None, :] - episode[:, 1:, :2]).norm(2, dim=-1)) 38 | dists_t[dists_t<=1e-7] = 999999 # if there is no car there 39 | min_dists_t_idx = dists_t.argmin(dim=-1) 40 | dist_diff = (dists_t[1:] - dists_t[:-1]) 41 | return dist_diff.gather(dim=1, index=min_dists_t_idx[:-1].view(-1,1)).view(-1) 42 | 43 | 44 | # %% 45 | xy_diffs = torch.cat([ 46 | get_xy_diff_with_closest_car(k) for k in t_states_full 47 | ]) 48 | 49 | velocity = torch.cat( 50 | [k[:-1] for k in t_states_full] 51 | )[:, 0, 2:].norm(2, dim=-1) 52 | # velocity *= 4*3.7/24 * 3.6 53 | 54 | velocity_np = (velocity // 5 * 5).int().numpy() 55 | xy_diffs_np = xy_diffs.numpy() 56 | 57 | # %% 58 | df = pd.DataFrame({'velocity':velocity_np, 'position_diff': -xy_diffs_np}) 59 | 60 | # %% 61 | df_new = df[df['position_diff']>1e-7].copy() 62 | df_new['position_diff'] = np.log10(df_new['position_diff']) 63 | 64 | # %% 65 | df_new.boxplot(column='position_diff', by='velocity', figsize=(14,8),) 66 | plt.title("$\delta_{xy}(t, t+1)$ to the nearest car") 67 | plt.suptitle("") 68 | plt.ylabel('$\log_{10}(\delta)$') 69 | plt.xlabel('speed') 70 | 71 | # %% [markdown] 72 | # # Gradients wrt Actions 73 | 74 | # %% 75 | act_grads = torch.load('../actions_grads_orig.pkl') 76 | data = [np.array(k) for k in act_grads] 77 | data = np.concatenate(data, axis=0) 78 | xedges = [np.quantile(data[:,0], p) for p in np.linspace(0,1,21)] 79 | 80 | df = pd.DataFrame(data) 81 | df.columns = ['speed', 'grad_proximity', 'grad_lane'] 82 | df.speed = (df.speed//5 * 5).astype(int) 83 | 84 | # %% [markdown] 85 | # ## Non-Constant Slope 86 | 87 | # %% 88 | df_new = df.copy() 89 | 90 | # %% 91 | df_new.boxplot(column='grad_proximity', by='speed', figsize=(14,8),) 92 | plt.title("Version non-constant slope, proximity cost: $\partial c_{t+1} / \partial a_t$ ") 93 | plt.suptitle("") 94 | plt.ylabel('$\partial c_{t+1} / \partial a_t$') 95 | plt.xlabel('speed (km/h)') 96 | 97 | # %% 98 | df_new.boxplot(column='grad_lane', by='speed', figsize=(14,8)) 99 | plt.title("Version non-constant slope, lane cost: $\partial c_{t+1} / \partial a_t$ ") 100 | plt.suptitle("") 101 | plt.ylabel('$\partial c_{t+1} / \partial a_t$') 102 | plt.xlabel('speed (km/h)') 103 | 104 | # %% 105 | df_new[['grad_proximity', 'grad_lane']] = np.log10(df_new[['grad_proximity', 'grad_lane']]) 106 | 107 | # %% 108 | df_new.boxplot(column='grad_proximity', by='speed', figsize=(14,8)) 109 | plt.title("Version non-constant slope, proximity cost: $\partial c_{t+1} / \partial a_t$ ") 110 | plt.suptitle("") 111 | plt.ylabel('$\log_{10}(\partial c_{t+1} / \partial a_t)$') 112 | plt.xlabel('speed (km/h)') 113 | plt.ylim(-5, 1.2) 114 | 115 | # %% 116 | df_new.boxplot(column='grad_lane', by='speed', figsize=(14,8)) 117 | plt.title("Version non-constant slope, lane cost: $\partial c_{t+1} / \partial a_t$ ") 118 | plt.suptitle("") 119 | plt.ylabel('$\log_{10}(\partial c_{t+1} / \partial a_t)$') 120 | plt.xlabel('speed (km/h)') 121 | 122 | # %% [markdown] 123 | # ## Constant Slope 124 | 125 | # %% 126 | act_grads = torch.load('../actions_grads.pkl') 127 | data = [np.array(k) for k in act_grads] 128 | data = np.concatenate(data, axis=0) 129 | xedges = [np.quantile(data[:,0], p) for p in np.linspace(0,1,21)] 130 | 131 | df = pd.DataFrame(data) 132 | df.columns = ['speed', 'grad_proximity', 'grad_lane'] 133 | df.speed = (df.speed//5 * 5).astype(int) 134 | 135 | # %% 136 | df_new = df.copy() 137 | 138 | # %% 139 | df_new.boxplot(column='grad_proximity', by='speed', figsize=(14,8),) 140 | plt.title("Version constant slope, proximity cost: $\partial c_{t+1} / \partial a_t$ ") 141 | plt.suptitle("") 142 | plt.ylabel('$\partial c_{t+1} / \partial a_t$') 143 | plt.xlabel('speed (km/h)') 144 | 145 | # %% 146 | df_new.boxplot(column='grad_lane', by='speed', figsize=(14,8)) 147 | plt.title("Version constant slope, lane cost: $\partial c_{t+1} / \partial a_t$ ") 148 | plt.suptitle("") 149 | plt.ylabel('$\partial c_{t+1} / \partial a_t$') 150 | plt.xlabel('speed (km/h)') 151 | 152 | # %% 153 | df_new[['grad_proximity', 'grad_lane']] = np.log10(df_new[['grad_proximity', 'grad_lane']]) 154 | 155 | # %% 156 | df_new.boxplot(column='grad_proximity', by='speed', figsize=(14,8)) 157 | plt.title("Version constant slope, proximity cost: $\partial c_{t+1} / \partial a_t$ ") 158 | plt.suptitle("") 159 | plt.ylabel('$\log_{10}(\partial c_{t+1} / \partial a_t)$') 160 | plt.xlabel('speed (km/h)') 161 | plt.ylim(-5, 1.2) 162 | 163 | # %% 164 | df_new.boxplot(column='grad_lane', by='speed', figsize=(14,8)) 165 | plt.title("Version constant slope, lane cost: $\partial c_{t+1} / \partial a_t$ ") 166 | plt.suptitle("") 167 | plt.ylabel('$\log_{10}(\partial c_{t+1} / \partial a_t)$') 168 | plt.xlabel('speed (km/h)') 169 | -------------------------------------------------------------------------------- /train_cost.py: -------------------------------------------------------------------------------- 1 | import torch, numpy, argparse, pdb, os, time, math, random 2 | import utils 3 | from dataloader import DataLoader 4 | import torch.nn.functional as F 5 | import torch.optim as optim 6 | import importlib 7 | import models 8 | import torch.nn as nn 9 | import utils 10 | 11 | ################################################# 12 | # Train an action-conditional forward model 13 | ################################################# 14 | 15 | parser = argparse.ArgumentParser() 16 | # data params 17 | parser.add_argument('-seed', type=int, default=1) 18 | parser.add_argument('-v', type=int, default=4) 19 | parser.add_argument('-dataset', type=str, default='i80') 20 | parser.add_argument('-data_dir', type=str, default='traffic-data/state-action-cost/data_i80_v0/') 21 | parser.add_argument('-model_dir', type=str, default='models/') 22 | parser.add_argument('-ncond', type=int, default=20, help='number of conditioning frames') 23 | parser.add_argument('-npred', type=int, default=20, help='number of predictions to make with unrolled fwd model') 24 | parser.add_argument('-batch_size', type=int, default=64) 25 | parser.add_argument('-layers', type=int, default=3) 26 | parser.add_argument('-nfeature', type=int, default=256) 27 | parser.add_argument('-n_hidden', type=int, default=256) 28 | parser.add_argument('-dropout', type=float, default=0.0, help='regular dropout') 29 | parser.add_argument('-lrt', type=float, default=0.0001) 30 | parser.add_argument('-grad_clip', type=float, default=5.0) 31 | parser.add_argument('-epoch_size', type=int, default=1000) 32 | parser.add_argument('-mfile', type=str, default='model=fwd-cnn-vae-fp-layers=3-bsize=64-ncond=20-npred=20-lrt=0.0001-nfeature=256-dropout=0.1-nz=32-beta=1e-06-zdropout=0.5-gclip=5.0-warmstart=1-seed=1.step200000.model') 33 | #parser.add_argument('-mfile', type=str, default='model=fwd-cnn-layers=3-bsize=64-ncond=20-npred=20-lrt=0.0001-nfeature=256-dropout=0.1-gclip=5.0-warmstart=0-seed=1.step200000.model') 34 | parser.add_argument('-debug', action='store_true') 35 | parser.add_argument('-enable_tensorboard', action='store_true', 36 | help='Enables tensorboard logging.') 37 | parser.add_argument('-tensorboard_dir', type=str, default='models', 38 | help='path to the directory where to save tensorboard log. If passed empty path' \ 39 | ' no logs are saved.') 40 | opt = parser.parse_args() 41 | 42 | os.system('mkdir -p ' + opt.model_dir) 43 | 44 | random.seed(opt.seed) 45 | numpy.random.seed(opt.seed) 46 | torch.manual_seed(opt.seed) 47 | torch.cuda.manual_seed(opt.seed) 48 | dataloader = DataLoader(None, opt, opt.dataset) 49 | 50 | 51 | 52 | 53 | # specific to the I-80 dataset 54 | opt.n_inputs = 4 55 | opt.n_actions = 2 56 | opt.height = 117 57 | opt.width = 24 58 | if opt.layers == 3: 59 | opt.h_height = 14 60 | opt.h_width = 3 61 | elif opt.layers == 4: 62 | opt.h_height = 7 63 | opt.h_width = 1 64 | opt.hidden_size = opt.nfeature*opt.h_height*opt.h_width 65 | 66 | model = torch.load(opt.model_dir + opt.mfile) 67 | cost = models.CostPredictor(opt).cuda() 68 | model.intype('gpu') 69 | optimizer = optim.Adam(cost.parameters(), opt.lrt) 70 | opt.model_file = opt.model_dir + opt.mfile + '.cost' 71 | print(f'[will save as: {opt.model_file}]') 72 | 73 | 74 | def train(nbatches, npred): 75 | model.train() 76 | total_loss = 0 77 | for i in range(nbatches): 78 | optimizer.zero_grad() 79 | inputs, actions, targets, _, _ = dataloader.get_batch_fm('train', npred) 80 | pred, _ = model(inputs, actions, targets, z_dropout=0) 81 | pred_cost = cost(pred[0].view(opt.batch_size*opt.npred, 1, 3, opt.height, opt.width), pred[1].view(opt.batch_size*opt.npred, 1, 4)) 82 | loss = F.mse_loss(pred_cost.view(opt.batch_size, opt.npred, 2), targets[2]) 83 | if not math.isnan(loss.item()): 84 | loss.backward(retain_graph=False) 85 | if not math.isnan(utils.grad_norm(model).item()): 86 | torch.nn.utils.clip_grad_norm_(model.parameters(), opt.grad_clip) 87 | optimizer.step() 88 | total_loss += loss.item() 89 | del inputs, actions, targets 90 | 91 | total_loss /= nbatches 92 | return total_loss 93 | 94 | def test(nbatches, npred): 95 | model.train() 96 | total_loss = 0 97 | for i in range(nbatches): 98 | inputs, actions, targets, _, _ = dataloader.get_batch_fm('valid', npred) 99 | pred, _ = model(inputs, actions, targets, z_dropout=0) 100 | pred_cost = cost(pred[0].view(opt.batch_size*opt.npred, 1, 3, opt.height, opt.width), pred[1].view(opt.batch_size*opt.npred, 1, 4)) 101 | loss = F.mse_loss(pred_cost.view(opt.batch_size, opt.npred, 2), targets[2]) 102 | if not math.isnan(loss.item()): 103 | total_loss += loss.item() 104 | del inputs, actions, targets 105 | 106 | total_loss /= nbatches 107 | return total_loss 108 | 109 | writer = utils.create_tensorboard_writer(opt) 110 | 111 | 112 | print('[training]') 113 | n_iter = 0 114 | for i in range(200): 115 | t0 = time.time() 116 | train_loss = train(opt.epoch_size, opt.npred) 117 | valid_loss = test(int(opt.epoch_size / 2), opt.npred) 118 | n_iter += opt.epoch_size 119 | model.intype('cpu') 120 | torch.save({'model': cost, 121 | 'optimizer': optimizer.state_dict(), 122 | 'n_iter': n_iter}, opt.model_file + '.model') 123 | if (n_iter/opt.epoch_size) % 10 == 0: 124 | torch.save({'model': cost, 125 | 'optimizer': optimizer.state_dict(), 126 | 'n_iter': n_iter}, opt.model_file + f'.step{n_iter}.model') 127 | torch.save(model, opt.model_file + f'.step{n_iter}.model') 128 | model.intype('gpu') 129 | if writer is not None: 130 | writer.add_scalar('Loss/train', train_loss, i) 131 | writer.add_scalar('Loss/valid', valid_loss, i) 132 | log_string = f'step {n_iter} | train: {train_loss} | valid: {valid_loss}' 133 | print(log_string) 134 | utils.log(opt.model_file + '.log', log_string) 135 | 136 | if writer is not None: 137 | writer.close() 138 | -------------------------------------------------------------------------------- /map_lanker.py: -------------------------------------------------------------------------------- 1 | from random import choice, randrange 2 | 3 | from custom_graphics import draw_dashed_line 4 | from map_i80 import I80, I80Car, colours 5 | from traffic_gym import Simulator 6 | import pygame 7 | import pandas as pd 8 | import numpy as np 9 | import pdb, random 10 | import bisect 11 | import pdb, pickle, os 12 | 13 | # Conversion LANE_W from real world to pixels 14 | # A US highway lane width is 3.7 metres, here 50 pixels 15 | LANE_W = 24 # pixels / 3.7 m, lane width 16 | SCALE = LANE_W / 3.7 # pixels per metre 17 | FOOT = 0.3048 # metres per foot 18 | X_OFFSET = -35 # horizontal offset (camera 2 leftmost view) 19 | MAX_SPEED = 130 20 | 21 | 22 | class LankerCar(I80Car): 23 | # Global constants 24 | SCALE = SCALE 25 | LANE_W = LANE_W 26 | X_OFFSET = X_OFFSET 27 | max_b = 0.05 # set a looser max turning limitation 28 | 29 | @property 30 | def current_lane(self): 31 | # 1: left-most, 5: right-most, 6: auxiliary lane, 7: on-ramp, 8: off-ramp 32 | return 0 33 | 34 | 35 | class Lankershim(I80): 36 | # Environment's car class 37 | EnvCar = LankerCar 38 | 39 | # Global constants 40 | SCALE = SCALE 41 | LANE_W = LANE_W 42 | X_OFFSET = X_OFFSET 43 | DUMP_NAME = 'data_lanker_v0' 44 | 45 | def __init__(self, **kwargs): 46 | kwargs['nb_lanes'] = 1 47 | kwargs['delta_t'] = 1/10 48 | super().__init__(**kwargs) 49 | 50 | self.screen_size = (560 + 760 + 648 + 912 + 328, 20 * self.LANE_W) 51 | # self.photos = ( 52 | # pygame.image.load('Lankershim/cam1.png'), 53 | # pygame.image.load('Lankershim/cam2.png'), 54 | # pygame.image.load('Lankershim/cam3.png'), 55 | # pygame.image.load('Lankershim/cam4.png'), 56 | # pygame.image.load('Lankershim/cam5.png'), 57 | # ) 58 | # self.photos_rect = ( 59 | # self.photos[0].get_rect().move([0, 20]), 60 | # self.photos[1].get_rect().move([560, 20]), 61 | # self.photos[2].get_rect().move([560 + 760, 20]), 62 | # self.photos[3].get_rect().move([560 + 760 + 648, 20]), 63 | # self.photos[4].get_rect().move([560 + 760 + 648 + 912, 20]), 64 | # ) 65 | if self.display: # if display is required 66 | self.screen = pygame.display.set_mode(self.screen_size) # set screen size 67 | # self.delta_t = 1 / 10 # simulation timing interval 68 | self._time_slots = ( 69 | 'lanker/trajectories-0830am-0845am', 70 | 'lanker/trajectories-0845am-0900am', 71 | ) 72 | self._t_slot = None 73 | self._black_list = { 74 | self._time_slots[0]: 75 | {128, 65, 995, 1124, 377, 810, 1003, 172, 335, 591, # off track (OT) 76 | 560, 1173, 1399, 1437, 153, 890, 1308, 1405, 413, 639, # OT 77 | 66, 112, 111, 94, 115, 122, 130, 170, 149, 152, 160, 210, 292, 261, 291, 339, # crash 78 | 300, 312, 306, 320, 391, 415, 434, 436, 472, 345, 432, 468, 397, 329, 528, 567, # crash 79 | 549, 468, 530, 585, 624, 737, 711, 716, 690, 753, 716, 762, 818, 904, 930, 887, # crash 80 | 964, 906, 931, 1005, 982, 989, 1000, 1433, 1037, 1189, 1155, 1221, 1260, 1258, # crash 81 | 1249, 1277, 1285, 1386, 1372, 1366, 1007, 1001}, # crash 82 | self._time_slots[1]: 83 | {1539, 772, 517, 267, 396, 1164, 1421, 1549, 530, 664, 1570, 1059, 804, 169, 812, 1453, 48, 53, # OT 84 | 1469, 1600, 1472, 1474, 451, 580, 1478, 584, 212, 1492, 1114, 228, 233, 625, 1394, 1268, 1023, # OT 85 | 58, 36, 129, 131, 74, 163, 122, 160, 296, 321, 330, 369, 395, 358, 322, 274, 481, 492, # crash 86 | 443, 490, 524, 437, 545, 600, 487, 730, 740, 628, 810, 753, 844, 716, 903, 672, 915, 936, # crash 87 | 809, 872, 967, 1075, 1069, 1109, 1098, 1075, 982, 986, 1069, 1109, 1180, 1155, 1103, 1232, # crash 88 | 1238, 1260, 1132, 1308, 1353, 1306, 1392, 1409, 1301, 1456, 1422, 1475, 1542, 1552, 1524, # crash 89 | 348, 521, 824, 911, 985, 1178} 90 | } 91 | self.df = None 92 | self.vehicles_history = None 93 | self.lane_occupancy = None 94 | # self._lane_surfaces = dict() 95 | # self.nb_lanes = 1 96 | self.smoothing_window = 15 97 | self.offset = 195 98 | 99 | @staticmethod 100 | def _get_data_frame(time_slot, x_max, x_offset): 101 | # TODO: need caching! See I-80 102 | file_name = f'traffic-data/xy-trajectories/{time_slot}.txt' 103 | print(f'Loading trajectories from {file_name}') 104 | df = pd.read_csv(file_name, sep=r'\s+', header=None, names=( 105 | 'Vehicle ID', 106 | 'Frame ID', 107 | 'Total Frames', 108 | 'Global Time', 109 | 'Local X', 110 | 'Local Y', 111 | 'Global X', 112 | 'Global Y', 113 | 'Vehicle Length', 114 | 'Vehicle Width', 115 | 'Vehicle Class', 116 | 'Vehicle Velocity', 117 | 'Vehicle Acceleration', 118 | 'Lane Identification', 119 | 'Origin Zone', 120 | 'Destination Zone', 121 | 'Intersection', 122 | 'Section', 123 | 'Direction', 124 | 'Movement', 125 | 'Preceding Vehicle', 126 | 'Following Vehicle', 127 | 'Spacing', 128 | 'Headway' 129 | )) 130 | 131 | # Get valid x coordinate rows 132 | valid_x = (df['Local Y'] * FOOT * SCALE - x_offset).between(0, x_max).values 133 | 134 | # Restrict data frame to valid x coordinates 135 | return df[valid_x] 136 | 137 | def _draw_lanes(self, surface, mode='human', offset=0): 138 | 139 | if mode == 'human': 140 | 141 | # load lanes, if not already done so 142 | if mode not in self._lane_surfaces: 143 | self._lane_surfaces[mode] = pygame.image.load('Lankershim/lanes_human.png') 144 | 145 | surface.blit(self._lane_surfaces[mode], (0, 0)) 146 | 147 | if mode == 'machine': 148 | 149 | # load lanes 150 | lanes_surface = pygame.image.load('Lankershim/lanes_machine.png') 151 | surface.blit(lanes_surface, (offset, offset)) 152 | 153 | # save for later 154 | self._lane_surfaces[mode] = surface.copy() 155 | -------------------------------------------------------------------------------- /Notebooks/Plotting-success-rate.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # --- 3 | # jupyter: 4 | # jupytext: 5 | # formats: ipynb,py:percent 6 | # text_representation: 7 | # extension: .py 8 | # format_name: percent 9 | # format_version: '1.2' 10 | # jupytext_version: 1.1.7 11 | # kernelspec: 12 | # display_name: Python 3 13 | # language: python 14 | # name: python3 15 | # --- 16 | 17 | # %% 18 | import os 19 | from matplotlib import pylab as plt 20 | from os import path 21 | from numpy import array, maximum, minimum, median 22 | import numpy 23 | 24 | # %% 25 | # %matplotlib inline 26 | plt.style.use(['dark_background', 'bmh']) 27 | plt.rc('axes', facecolor='k') 28 | plt.rc('figure', facecolor='k') 29 | plt.rc('figure', figsize=(20,5)) 30 | 31 | # %% 32 | N = 20 33 | seeds = [i for i in range(10)] 34 | success = list(list() for seed in seeds) 35 | performance_path = '/misc/vlgscratch4/LecunGroup/nvidia-collab/models_v12/planning_results/' 36 | name = 'MPUR-policy-gauss-model=vae-zdropout=0.5-policy-gauss-nfeature=256-bsize=6-npred=30-ureg=0.05' + \ 37 | '-lambdal=0.2-lambdaa=0.0-gamma=0.99-lrtz=0.0-updatez=0-inferz=0-learnedcost=1-seed={seed}' + \ 38 | '-novaluestep{step}.model.log' 39 | steps = [(i + 1) * 5000 for i in range(N)] 40 | 41 | for seed in seeds: 42 | for step in steps: 43 | file_name = path.join(performance_path, name.format(seed=seed, step=step)) 44 | if os.path.isfile(file_name): 45 | with open(file_name) as f: 46 | success[seed - 1].append(float(f.readlines()[-1].split()[-1])) 47 | else: 48 | success[seed - 1].append(None) 49 | 50 | # %% 51 | if success[0][0] is None: 52 | for seed in seeds: 53 | plt.plot(steps[1::2], success[seed - 1][1::2], label=f'seed: {seed}') 54 | else: 55 | for seed in seeds: 56 | plt.plot(steps, success[seed - 1], label=f'seed: {seed}') 57 | plt.grid(True) 58 | plt.xlabel('steps') 59 | plt.ylabel('success rate') 60 | plt.legend() 61 | plt.ylim([0.40, 0.80]) 62 | plt.ylim([0.50, 0.85]) 63 | plt.xlim([5_000, 105_000]) 64 | 65 | # %% 66 | N = 20 67 | performance_path = '/misc/vlgscratch4/LecunGroup/nvidia-collab/models_v12/planning_results/' 68 | n = dict(); s = 'stc'; d = 'dtr' 69 | n[s] = 'MPUR-policy-gauss-model=vae-zdropout=0.5-policy-gauss-nfeature=256-bsize=6-npred=30-ureg=0.05' + \ 70 | '-lambdal=0.2-lambdaa=0.0-gamma=0.99-lrtz=0.0-updatez=0-inferz=0-learnedcost=1-seed={seed}' + \ 71 | '-novaluestep{step}.model.log' 72 | n[d] = 'MPUR-policy-deterministic-model=vae-zdropout=0.5-nfeature=256-bsize=6-npred=30-ureg=0.05-lambdal=0.2' + \ 73 | '-lambdaa=0.0-gamma=0.99-lrtz=0.0-updatez=0-inferz=0-learnedcost=1-seed={seed}-novaluestep{step}.model.log' 74 | name = n 75 | steps = [(i + 1) * 5000 for i in range(N)] 76 | 77 | seeds = dict( 78 | stc=[i for i in range(10)], 79 | dtr=[i + 1 for i in range(3)], 80 | ) 81 | success = {k: list(list() for seed in seeds[k]) for k in seeds} 82 | 83 | for k in seeds: 84 | for seed in seeds[k]: 85 | for step in steps: 86 | file_name = path.join(performance_path, name[k].format(seed=seed, step=step)) 87 | with open(file_name) as f: 88 | success[k][seed - 1].append(float(f.readlines()[-1].split()[-1])) 89 | 90 | # %% 91 | for k in seeds: 92 | for seed in seeds[k]: 93 | plt.plot( 94 | array(steps) / 1e3, success[k][seed - 1], 95 | label=f'{k}-seed: {seed}', 96 | alpha=.4 if k is 'stc' else 1, 97 | linewidth=4 if k is 'dtr' else 1 98 | ) 99 | plt.grid(True) 100 | plt.xlabel('steps [k]') 101 | plt.ylabel('success rate') 102 | plt.legend(ncol=7) 103 | plt.ylim([0.50, 0.85]) 104 | plt.xlim([5, 105]) 105 | plt.title('Stochastic vs. deterministic policy success rate') 106 | plt.xticks(range(10, 100 + 10, 10)); 107 | 108 | # plt.savefig('Stc-vs-dtr-success_rate.png', bbox_inches = 'tight') 109 | 110 | # %% 111 | performance_path = '/misc/vlgscratch4/LecunGroup/nvidia-collab/models_v{version}/planning_results/' 112 | n = dict(); r = 'rgr'; h = 'hrd' # regressed / hardwired 113 | v = {r: 12, h: 13} 114 | N = {r: 20, h: 24} 115 | n[r] = 'MPUR-policy-deterministic-model=vae-zdropout=0.5-nfeature=256-bsize=6-npred=30-ureg=0.05-lambdal=0.2' + \ 116 | '-lambdaa=0.0-gamma=0.99-lrtz=0.0-updatez=0-inferz=0-learnedcost=1-seed={seed}-novaluestep{step}.model.log' 117 | n[h] = 'MPUR-policy-deterministic-model=vae-zdropout=0.5-nfeature=256-bsize=6-npred=30-ureg=0.05-lambdal=0.2' + \ 118 | '-lambdaa=0.0-gamma=0.99-lrtz=0.0-updatez=0-inferz=0-learnedcost=False-seed={seed}-novaluestep{step}.model.log' 119 | names = n 120 | steps = {k: [(i + 1) * 5000 for i in range(N[k])] for k in names} 121 | 122 | seeds = [i + 1 for i in range(3)] 123 | success = {k: list(list() for seed in seeds) for k in names} 124 | 125 | for k in names: 126 | for seed in seeds: 127 | for step in steps[k]: 128 | file_name = path.join(performance_path.format(version=v[k]), names[k].format(seed=seed, step=step)) 129 | with open(file_name) as f: 130 | success[k][seed - 1].append(float(f.readlines()[-1].split()[-1])) 131 | 132 | # %% 133 | for k in names: 134 | for seed in seeds: 135 | plt.plot( 136 | array(steps[k]) / 1e3, success[k][seed - 1], 137 | label=f'{k}-seed: {seed}', 138 | linewidth=4 if k is h else 1 139 | ) 140 | plt.grid(True) 141 | plt.xlabel('steps [k–]') 142 | plt.ylabel('success rate') 143 | plt.legend(ncol=7) 144 | plt.ylim([0.50, 0.85]) 145 | plt.xlim([5, 105]) 146 | plt.title('Regressed vs. hardwired cost policy success rate') 147 | plt.xticks(range(10, 100 + 10, 10)); 148 | 149 | plt.savefig('Rgr-vs-hrd-success_rate.png', bbox_inches = 'tight') 150 | 151 | # %% 152 | success_arr = {k: array(success[k]) for k in names} 153 | # stats = ('min', 'max', 'median') 154 | for k in names: 155 | plt.plot( 156 | array(steps[k]) / 1e3, numpy.median(success_arr[k], 0), 157 | label=f'{k}', 158 | linewidth=2, 159 | ) 160 | for k in names: 161 | plt.fill_between( 162 | array(steps[k]) / 1e3, success_arr[k].min(0), success_arr[k].max(0), 163 | alpha=.5, 164 | ) 165 | plt.grid(True) 166 | plt.xlabel('steps [k–]') 167 | plt.ylabel('success rate') 168 | plt.legend(ncol=7) 169 | plt.ylim([0.50, 0.85]) 170 | plt.xlim([5, 105]) 171 | plt.title('Regressed vs. hardwired cost policy success rate min-max') 172 | plt.xticks(range(10, 100 + 10, 10)); 173 | 174 | plt.savefig('Rgr-vs-hrd-success_rate-min-max.png', bbox_inches = 'tight') 175 | -------------------------------------------------------------------------------- /Notebooks/Proximity-Cost_Slope_Adjustment.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # jupyter: 3 | # jupytext: 4 | # formats: ipynb,py:light 5 | # text_representation: 6 | # extension: .py 7 | # format_name: light 8 | # format_version: '1.4' 9 | # jupytext_version: 1.1.7 10 | # kernelspec: 11 | # display_name: Python 3 12 | # language: python 13 | # name: python3 14 | # --- 15 | 16 | # ### Imports 17 | 18 | # from differentiable_cost import proximity_cost 19 | import torch 20 | import numpy as np 21 | from torch.nn.functional import affine_grid, grid_sample 22 | from scipy.misc import imread 23 | from matplotlib import pyplot as plt 24 | 25 | 26 | def proximity_cost(images, states, car_size=(6.4, 14.3), green_channel=1, unnormalize=False, s_mean=None, s_std=None): 27 | SCALE = 0.25 28 | safe_factor = 1.5 29 | bsize, npred, nchannels, crop_h, crop_w = images.size(0), images.size(1), images.size(2), images.size( 30 | 3), images.size(4) 31 | images = images.view(bsize * npred, nchannels, crop_h, crop_w) 32 | states = states.view(bsize * npred, 4).clone() 33 | 34 | if unnormalize: 35 | states = states * (1e-8 + s_std.view(1, 4).expand(states.size())).cuda() 36 | states = states + s_mean.view(1, 4).expand(states.size()).cuda() 37 | speed = states[:, 2:].norm(2, 1) * SCALE # pixel/s 38 | width, length = car_size[ 0], car_size[ 1] # feet 39 | width = width * SCALE * (0.3048 * 24 / 3.7) # pixels 40 | length = length * SCALE * (0.3048 * 24 / 3.7) # pixels 41 | 42 | safe_distance = torch.abs(speed) * safe_factor + (1 * 24 / 3.7) * SCALE # plus one metre (TODO change) 43 | 44 | # Compute x/y minimum distance to other vehicles (pixel version) 45 | # Account for 1 metre overlap (low data accuracy) 46 | alpha = 1 * SCALE * (24 / 3.7) # 1 m overlap collision 47 | # Create separable proximity mask 48 | 49 | max_x = torch.ceil((crop_h - torch.clamp(torch.tensor([length - alpha]), min=0)) / 2) 50 | max_y = torch.ceil((crop_w - torch.clamp(torch.tensor([width - alpha]), min=0)) / 2) 51 | max_x = max_x.view(bsize, 1).expand(bsize, npred).contiguous().view(bsize * npred).cuda() 52 | max_y = max_y.view(bsize, 1).expand(bsize, npred).contiguous().view(bsize * npred).cuda() 53 | 54 | 55 | min_x = torch.clamp(max_x.cpu() - safe_distance, min=0) 56 | min_y = np.ceil(crop_w / 2 - width) # assumes other._width / 2 = self._width / 2 57 | min_y = torch.tensor(min_y) 58 | min_y = min_y.view(bsize, 1).expand(bsize, npred).contiguous().view(bsize * npred).cuda() 59 | 60 | x_filter = (1 - torch.abs(torch.linspace(-1, 1, crop_h))) * crop_h / 2 61 | x_filter = x_filter.unsqueeze(0).expand(bsize * npred, crop_h).cuda() 62 | x_filter = torch.min(x_filter, max_x.view(bsize * npred, 1).expand(x_filter.size())) 63 | x_filter = torch.max(x_filter.cpu(), min_x.view(bsize * npred, 1).cpu()) 64 | 65 | max_x = max_x.cpu() 66 | x_filter = (x_filter - min_x.view(bsize * npred, 1)) / (max_x - min_x).view(bsize * npred, 1) 67 | y_filter = (1 - torch.abs(torch.linspace(-1, 1, crop_w))) * crop_w / 2 68 | y_filter = y_filter.view(1, crop_w).expand(bsize * npred, crop_w).cuda() 69 | y_filter = torch.min(y_filter, max_y.view(bsize * npred, 1)) 70 | y_filter = torch.max(y_filter, min_y.view(bsize * npred, 1)) 71 | y_filter = (y_filter - min_y.view(bsize * npred, 1)) / (max_y.view(bsize * npred, 1) - min_y.view(bsize * npred, 1)) 72 | x_filter = x_filter.cpu() 73 | y_filter = y_filter.cpu() 74 | proximity_mask = torch.bmm(x_filter.view(-1, crop_h, 1), y_filter.view(-1, 1, crop_w)) 75 | proximity_mask = proximity_mask.view(bsize, npred, crop_h, crop_w) * safe_distance 76 | images = images.view(bsize, npred, nchannels, crop_h, crop_w) 77 | # ipdb.set_trace() 78 | costs = torch.max((proximity_mask.cpu() * images[:,:, green_channel].float().cpu()).view(bsize, npred, -1), 2)[0] 79 | # costs = torch.sum((proximity_mask * images[:, :, green_channel].float()).view(bsize, npred, -1), 2) 80 | # costs = torch.max((proximity_mask * images[:, :, green_channel].float()).view(bsize, npred, -1), 2)[0] 81 | vertical_diff = proximity_mask[:,:,1:,:] - proximity_mask[:,:,:-1,:] 82 | horizontal_diff = proximity_mask[:,:,:,1:] - proximity_mask[:,:,:,:-1] 83 | return costs.cpu(), proximity_mask.cpu(), vertical_diff, horizontal_diff 84 | 85 | 86 | # ## Debug functions 87 | 88 | # These two functions alow us to backpropagate our cost to an affine transformation. We look at the gradient at the identity transformation 89 | 90 | def test(image, green_channel, speed=40): 91 | state = torch.zeros((4)) 92 | state[2] = speed 93 | state[3] = 0. 94 | image_var = torch.tensor(image, requires_grad=True) 95 | trans = torch.tensor([[[.999, 0., 0.], [0., .999, 0.0]]], requires_grad=True) 96 | grid = affine_grid(trans, torch.Size((1, 3, 117, 24))) 97 | future_context = grid_sample(image_var[0].float(), grid) 98 | cost, mask, vertical_diff, horizontal_diff = proximity_cost(future_context.unsqueeze(0)/255., state.unsqueeze(0), green_channel=green_channel) 99 | future_context.retain_grad() 100 | grid.retain_grad() 101 | trans.retain_grad() 102 | cost.backward(retain_graph=True) 103 | 104 | return future_context, mask, trans.grad, vertical_diff, horizontal_diff 105 | 106 | 107 | def plot_test(image, green_channel=1, speed=40): 108 | future_context, mask, trans_grad, vertical_diff, horizontal_diff= test(image, green_channel, speed) 109 | plt.figure(figsize=(15,5)) 110 | plt.subplot(161) 111 | plt.imshow(mask[0][0]) 112 | plt.subplot(162) 113 | plt.imshow(image[0][0].permute((1,2,0))/255.) 114 | plt.subplot(163) 115 | plt.imshow((mask[0]*future_context.detach()[0]).permute((1,2,0)).numpy().astype(int)) 116 | plt.subplot(164) 117 | plt.imshow(vertical_diff[0][0]) 118 | plt.subplot(165) 119 | plt.imshow(horizontal_diff[0][0]) 120 | print(f'vertical diff max: {vertical_diff.max().item():.5}') 121 | print(f'horizontal diff max: {horizontal_diff.max().item():.5}') 122 | print(f'mask max: {mask.max().item():.5}') 123 | print(f'cost (max value): {(mask[0]*future_context.detach()[0]).max().item():.5}') 124 | return vertical_diff, horizontal_diff 125 | 126 | 127 | image = torch.ones((1, 1, 3, 117, 24)) 128 | image[0, 0, 1, 40:50, 10:14] = 255. 129 | vertical_diff, horizontal_diff = plot_test(image, speed=100) 130 | 131 | print(vertical_diff[0,0,30:80,9:15]) 132 | 133 | image = torch.ones((1, 1, 3, 117, 24)) 134 | image[0, 0, 1, 40:50, 10:14] = 255. 135 | vertical_diff, horizontal_diff = plot_test(image, speed=25) 136 | 137 | print(vertical_diff[0,0,30:80,9:15]) 138 | 139 | 140 | -------------------------------------------------------------------------------- /map_us101.py: -------------------------------------------------------------------------------- 1 | from random import choice, randrange 2 | 3 | from custom_graphics import draw_dashed_line 4 | from map_i80 import I80, I80Car, colours 5 | from traffic_gym import Simulator 6 | import pygame 7 | import pandas as pd 8 | import numpy as np 9 | import pdb, random 10 | import bisect 11 | import pdb, pickle, os 12 | 13 | # Conversion LANE_W from real world to pixels 14 | # A US highway lane width is 3.7 metres, here 50 pixels 15 | LANE_W = 24 # pixels / 3.7 m, lane width 16 | SCALE = LANE_W / 3.7 # pixels per metre 17 | FOOT = 0.3048 # metres per foot 18 | X_OFFSET = 615 # horizontal offset (camera 2 leftmost view) 19 | MAX_SPEED = 130 20 | 21 | 22 | class US101Car(I80Car): 23 | # Global constants 24 | SCALE = SCALE 25 | LANE_W = LANE_W 26 | X_OFFSET = X_OFFSET 27 | 28 | @property 29 | def current_lane(self): 30 | # 1: left-most, 5: right-most, 6: auxiliary lane, 7: on-ramp, 8: off-ramp 31 | return min(self._lane_list[self._frame], 6) - 1 32 | 33 | 34 | class US101(I80): 35 | # Environment's car class 36 | EnvCar = US101Car 37 | 38 | # Global constants 39 | SCALE = SCALE 40 | LANE_W = LANE_W 41 | X_OFFSET = X_OFFSET 42 | DUMP_NAME = 'data_us101_v0' 43 | 44 | def __init__(self, **kwargs): 45 | kwargs['nb_lanes'] = 5 46 | kwargs['delta_t'] = 1/10 47 | super().__init__(**kwargs) 48 | 49 | self.screen_size = (125 * self.LANE_W, self.nb_lanes * self.LANE_W + 4 * self.LANE_W) 50 | # self.photos = ( 51 | # pygame.image.load('US-101/cam7.png'), 52 | # pygame.image.load('US-101/cam6.png'), 53 | # pygame.image.load('US-101/cam5.png'), 54 | # pygame.image.load('US-101/cam4.png'), 55 | # pygame.image.load('US-101/cam3.png'), 56 | # ) 57 | # self.photos_rect = ( 58 | # self.photos[0].get_rect().move([0, 33]), 59 | # self.photos[1].get_rect().move([552, 33 + 2]), 60 | # self.photos[2].get_rect().move([552 + 472, 33 + 2]), 61 | # self.photos[3].get_rect().move([552 + 472 + 388, 33 + 3]), 62 | # self.photos[4].get_rect().move([552 + 472 + 388 + 532, 33 + 3]), 63 | # ) 64 | if self.display: # if display is required 65 | self.screen = pygame.display.set_mode(self.screen_size) # set screen size 66 | # self.delta_t = 1 / 10 # simulation timing interval 67 | self._time_slots = ( 68 | 'us101/trajectories-0750am-0805am', 69 | 'us101/trajectories-0805am-0820am', 70 | 'us101/trajectories-0820am-0835am', 71 | ) 72 | self._t_slot = None 73 | self._black_list = { 74 | self._time_slots[0]: 75 | {2691, 2809, 2820, 2871, 2851, 2873}, 76 | self._time_slots[1]: 77 | {649, 806, 1690, 1725, 1734, 1773, 1949, 1877}, 78 | self._time_slots[2]: 79 | {183, 329, 791, 804, 1187, 1183, 1107, 1247, 1202, 1371, 1346, 1435, 1390, 1912}, 80 | } 81 | self.df = None 82 | self.vehicles_history = None 83 | self.lane_occupancy = None 84 | self.nb_lanes = 6 85 | self.smoothing_window = 15 86 | 87 | def _draw_lanes(self, surface, mode='human', offset=0): 88 | 89 | slope = 0.07 90 | 91 | lanes = self.lanes # lanes 92 | 93 | if mode == 'human': 94 | s = surface # screen 95 | draw_line = pygame.draw.line # shortcut 96 | w = colours['w'] # colour white 97 | g = (128, 128, 128) 98 | sw = self.screen_size[0] # screen width 99 | 100 | for lane in lanes: 101 | draw_line(s, g, (0, lane['min']), (sw, lane['min']), 1) 102 | # draw_dashed_line(s, colours['r'], (0, lane['mid']), (sw, lane['mid'])) # red centres 103 | 104 | draw_line(s, w, (0, lanes[0]['min']), (sw, lanes[0]['min']), 3) 105 | bottom = lanes[-1]['max'] 106 | draw_line(s, w, (0, bottom), (28 * LANE_W, bottom), 3) 107 | draw_line(s, g, (28 * LANE_W, bottom), (86 * LANE_W, bottom), 1) 108 | draw_line(s, w, (86 * LANE_W, bottom), (sw, bottom), 3) 109 | draw_line(s, w, (22 * LANE_W, bottom + LANE_W), (90 * LANE_W, bottom + LANE_W), 3) 110 | 111 | # Tilted lanes and lines 112 | x0, x1, y = 0 * LANE_W, 20 * LANE_W, bottom + 35 113 | draw_line(s, w, (x0, y), (x1, y - slope * (x1 - x0)), 3) 114 | x0, x1, y = 15 * LANE_W, 22 * LANE_W, bottom + 35 115 | draw_line(s, w, (x0, y), (x1, y - slope * (x1 - x0)), 3) 116 | x0, x1, y = 92 * LANE_W, 112 * LANE_W, bottom 117 | draw_line(s, w, (x0, y), (x1, y + slope * (x1 - x0)), 3) 118 | x0, x1, y = 90 * LANE_W, 97 * LANE_W, bottom + LANE_W 119 | draw_line(s, w, (x0, y), (x1, y + slope * (x1 - x0)), 3) 120 | 121 | look_ahead = MAX_SPEED * 1000 / 3600 * self.SCALE 122 | o = self.offset 123 | draw_line(s, (255, 255, 0), (look_ahead, o), (look_ahead, 9.4 * LANE_W)) 124 | draw_line(s, (255, 255, 0), (sw - 1.75 * look_ahead, o), (sw - 1.75 * look_ahead, bottom)) 125 | draw_line(s, (255, 255, 0), (sw - 0.75 * look_ahead, o), (sw - 0.75 * look_ahead, bottom), 5) 126 | 127 | if mode == 'machine': 128 | s = surface # screen 129 | draw_line = pygame.draw.line # shortcut 130 | w = colours['r'] # colour white 131 | sw = self.screen_size[0] # screen width 132 | m = offset 133 | 134 | for lane in lanes: 135 | draw_line(s, w, (0, lane['min'] + m), (sw + 2 * m, lane['min'] + m), 1) 136 | 137 | draw_line(s, w, (0, lanes[-1]['max'] + m), (sw + 2 * m, lanes[-1]['max'] + m), 1) 138 | bottom = lanes[-1]['max'] + m 139 | draw_line(s, w, (0, bottom), (28 * LANE_W, bottom), 1) 140 | draw_line(s, w, (28 * LANE_W, bottom), (86 * LANE_W, bottom), 1) 141 | draw_line(s, w, (86 * LANE_W, bottom), (sw, bottom), 1) 142 | draw_line(s, w, (22 * LANE_W, bottom + LANE_W), (90 * LANE_W, bottom + LANE_W), 1) 143 | 144 | # Tilted lanes and lines 145 | x0, x1, y = 0 * LANE_W, 20 * LANE_W, bottom + 35 146 | draw_line(s, w, (x0, y), (x1, y - slope * (x1 - x0)), 1) 147 | x0, x1, y = 15 * LANE_W, 22 * LANE_W, bottom + 35 148 | draw_line(s, w, (x0, y), (x1, y - slope * (x1 - x0)), 1) 149 | x0, x1, y = 92 * LANE_W, 112 * LANE_W, bottom 150 | draw_line(s, w, (x0, y), (x1, y + slope * (x1 - x0)), 1) 151 | x0, x1, y = 90 * LANE_W, 97 * LANE_W, bottom + LANE_W 152 | draw_line(s, w, (x0, y), (x1, y + slope * (x1 - x0)), 1) 153 | 154 | self._lane_surfaces[mode] = surface.copy() 155 | # pygame.image.save(surface, "us101-machine.png") 156 | -------------------------------------------------------------------------------- /train_MPUR.py: -------------------------------------------------------------------------------- 1 | import math 2 | from collections import OrderedDict 3 | 4 | import numpy 5 | import os 6 | import ipdb 7 | import random 8 | import torch 9 | import torch.optim as optim 10 | from os import path 11 | 12 | import planning 13 | import utils 14 | from dataloader import DataLoader 15 | 16 | torch.backends.cudnn.deterministic = True 17 | torch.backends.cudnn.benchmark = False 18 | 19 | ################################################# 20 | # Train a policy / controller 21 | ################################################# 22 | 23 | opt = utils.parse_command_line() 24 | 25 | # Create file_name 26 | opt.model_file = path.join(opt.model_dir, 'policy_networks', 'MPUR-' + opt.policy) 27 | utils.build_model_file_name(opt) 28 | 29 | os.system('mkdir -p ' + path.join(opt.model_dir, 'policy_networks')) 30 | 31 | random.seed(opt.seed) 32 | numpy.random.seed(opt.seed) 33 | torch.manual_seed(opt.seed) 34 | 35 | # Define default device 36 | opt.device = torch.device('cuda' if torch.cuda.is_available() and not opt.no_cuda else 'cpu') 37 | if torch.cuda.is_available() and opt.no_cuda: 38 | print('WARNING: You have a CUDA device, so you should probably run without -no_cuda') 39 | 40 | # load the model 41 | 42 | model_path = path.join(opt.model_dir, opt.mfile) 43 | if path.exists(model_path): 44 | model = torch.load(model_path) 45 | elif path.exists(opt.mfile): 46 | model = torch.load(opt.mfile) 47 | else: 48 | raise runtime_error(f'couldn\'t find file {opt.mfile}') 49 | 50 | if not hasattr(model.encoder, 'n_channels'): 51 | model.encoder.n_channels = 3 52 | 53 | if type(model) is dict: model = model['model'] 54 | model.opt.lambda_l = opt.lambda_l # used by planning.py/compute_uncertainty_batch 55 | model.opt.lambda_o = opt.lambda_o # used by planning.py/compute_uncertainty_batch 56 | if opt.value_model != '': 57 | value_function = torch.load(path.join(opt.model_dir, 'value_functions', opt.value_model)).to(opt.device) 58 | model.value_function = value_function 59 | 60 | # Create policy 61 | model.create_policy_net(opt) 62 | optimizer = optim.Adam(model.policy_net.parameters(), opt.lrt) # POLICY optimiser ONLY! 63 | 64 | # Load normalisation stats 65 | stats = torch.load('traffic-data/state-action-cost/data_i80_v0/data_stats.pth') 66 | model.stats = stats # used by planning.py/compute_uncertainty_batch 67 | if 'ten' in opt.mfile: 68 | p_z_file = opt.model_dir + opt.mfile + '.pz' 69 | p_z = torch.load(p_z_file) 70 | model.p_z = p_z 71 | 72 | # Send to GPU if possible 73 | model.to(opt.device) 74 | model.policy_net.stats_d = {} 75 | for k, v in stats.items(): 76 | if isinstance(v, torch.Tensor): 77 | model.policy_net.stats_d[k] = v.to(opt.device) 78 | 79 | if opt.learned_cost: 80 | print('[loading cost regressor]') 81 | model.cost = torch.load(path.join(opt.model_dir, opt.mfile + '.cost.model'))['model'] 82 | 83 | dataloader = DataLoader(None, opt, opt.dataset) 84 | model.train() 85 | model.opt.u_hinge = opt.u_hinge 86 | planning.estimate_uncertainty_stats(model, dataloader, n_batches=50, npred=opt.npred) 87 | model.eval() 88 | 89 | 90 | def start(what, nbatches, npred): 91 | train = True if what is 'train' else False 92 | model.train() 93 | model.policy_net.train() 94 | n_updates, grad_norm = 0, 0 95 | total_losses = dict( 96 | proximity=0, 97 | uncertainty=0, 98 | lane=0, 99 | offroad=0, 100 | action=0, 101 | policy=0, 102 | ) 103 | for j in range(nbatches): 104 | inputs, actions, targets, ids, car_sizes = dataloader.get_batch_fm(what, npred) 105 | pred, actions = planning.train_policy_net_mpur( 106 | model, inputs, targets, car_sizes, n_models=10, lrt_z=opt.lrt_z, 107 | n_updates_z=opt.z_updates, infer_z=opt.infer_z 108 | ) 109 | pred['policy'] = pred['proximity'] + \ 110 | opt.u_reg * pred['uncertainty'] + \ 111 | opt.lambda_l * pred['lane'] + \ 112 | opt.lambda_a * pred['action'] + \ 113 | opt.lambda_o * pred['offroad'] 114 | 115 | if not math.isnan(pred['policy'].item()): 116 | if train: 117 | optimizer.zero_grad() 118 | pred['policy'].backward() # back-propagation through time! 119 | grad_norm += utils.grad_norm(model.policy_net).item() 120 | torch.nn.utils.clip_grad_norm_(model.policy_net.parameters(), opt.grad_clip) 121 | optimizer.step() 122 | for loss in total_losses: total_losses[loss] += pred[loss].item() 123 | n_updates += 1 124 | else: 125 | print('warning, NaN') # Oh no... Something got quite fucked up! 126 | ipdb.set_trace() 127 | 128 | if j == 0 and opt.save_movies and train: 129 | # save videos of normal and adversarial scenarios 130 | for b in range(opt.batch_size): 131 | state_img = pred['state_img'][b] 132 | state_vct = pred['state_vct'][b] 133 | utils.save_movie(opt.model_file + f'.mov/sampled/mov{b}', state_img, state_vct, None, actions[b]) 134 | 135 | del inputs, actions, targets, pred 136 | 137 | for loss in total_losses: total_losses[loss] /= n_updates 138 | if train: print(f'[avg grad norm: {grad_norm / n_updates:.4f}]') 139 | return total_losses 140 | 141 | 142 | print('[training]') 143 | utils.log(opt.model_file + '.log', f'[job name: {opt.model_file}]') 144 | n_iter = 0 145 | losses = OrderedDict( 146 | p='proximity', 147 | l='lane', 148 | o='offroad', 149 | u='uncertainty', 150 | a='action', 151 | π='policy', 152 | ) 153 | 154 | writer = utils.create_tensorboard_writer(opt) 155 | 156 | for i in range(500): 157 | train_losses = start('train', opt.epoch_size, opt.npred) 158 | with torch.no_grad(): # Torch, please please please, do not track computations :) 159 | valid_losses = start('valid', opt.epoch_size // 2, opt.npred) 160 | 161 | if writer is not None: 162 | for key in train_losses: 163 | writer.add_scalar(f'Loss/train_{key}', train_losses[key], i) 164 | for key in valid_losses: 165 | writer.add_scalar(f'Loss/valid_{key}', valid_losses[key], i) 166 | 167 | n_iter += opt.epoch_size 168 | model.to('cpu') 169 | torch.save(dict( 170 | model=model, 171 | optimizer=optimizer.state_dict(), 172 | opt=opt, 173 | n_iter=n_iter, 174 | ), opt.model_file + '.model') 175 | if (n_iter / opt.epoch_size) % 10 == 0: 176 | torch.save(dict( 177 | model=model, 178 | optimizer=optimizer.state_dict(), 179 | opt=opt, 180 | n_iter=n_iter, 181 | ), opt.model_file + f'step{n_iter}.model') 182 | 183 | model.to(opt.device) 184 | 185 | log_string = f'step {n_iter} | ' 186 | log_string += 'train: [' + ', '.join(f'{k}: {train_losses[v]:.4f}' for k, v in losses.items()) + '] | ' 187 | log_string += 'valid: [' + ', '.join(f'{k}: {valid_losses[v]:.4f}' for k, v in losses.items()) + ']' 188 | print(log_string) 189 | utils.log(opt.model_file + '.log', log_string) 190 | 191 | if writer is not None: 192 | writer.close() 193 | -------------------------------------------------------------------------------- /Notebooks/Visualization/DimensionalityReduction.py: -------------------------------------------------------------------------------- 1 | """Contains a class used for clustering and dimensionality reduction 2 | of episode features for failure analysis.""" 3 | import sklearn 4 | import sklearn.decomposition 5 | import sklearn.manifold 6 | import sklearn.preprocessing 7 | import sklearn.cluster 8 | import numpy as np 9 | import pickle 10 | import os 11 | import traceback 12 | 13 | from DataReader import DataReader 14 | 15 | 16 | class DimensionalityReduction: 17 | """Dimensionality reduction and clustering class""" 18 | 19 | SAVE_DIMENSIONALITY_REDUCTION_PATH = './dimensionality_reduction.pickle' 20 | """ Path for saving dimensionality reduction class state for 21 | faster loading""" 22 | SAVE_KM_PATH = './km.pickle' 23 | """ Path for saving clustering class state for faster loading""" 24 | 25 | def __init__(self): 26 | """Lazy initialization, in order to not waste time every time we open the notebook""" 27 | self.initialized = False 28 | 29 | def initialize(self): 30 | """If no saved state data found, loads the data for dimensionality 31 | reduction and fit the dimensionality reduction and clustering 32 | algorithms on the data. Fitting the data may take time. 33 | Otherwise loads the saved state data. 34 | """ 35 | if self.initialized: 36 | return 37 | if os.path.exists(DimensionalityReduction.SAVE_DIMENSIONALITY_REDUCTION_PATH): 38 | with open(DimensionalityReduction.SAVE_DIMENSIONALITY_REDUCTION_PATH, 'rb') as f: 39 | self.dimensionality_reduction = pickle.load(f) 40 | with open(DimensionalityReduction.SAVE_KM_PATH, 'rb') as f: 41 | self.km = pickle.load(f) 42 | print('loaded from pickle') 43 | else: 44 | print('creating dimensionality_reduction') 45 | features = DimensionalityReduction.get_dimensionality_reduction_data() 46 | # features = np.random.rand(100, 50) 47 | # self.dimensionality_reduction = sklearn.decomposition.PCA(n_components=2) 48 | self.dimensionality_reduction = sklearn.manifold.TSNE( 49 | n_components=2) 50 | self.dimensionality_reduction.fit(features) 51 | self.km = sklearn.cluster.KMeans(n_clusters=5).fit(features) 52 | with open(DimensionalityReduction.SAVE_DIMENSIONALITY_REDUCTION_PATH, 'wb') as f: 53 | pickle.dump(self.dimensionality_reduction, f) 54 | with open(DimensionalityReduction.SAVE_KM_PATH, 'wb') as f: 55 | pickle.dump(self.km, f) 56 | self.initialized = True 57 | 58 | def transform(self, value): 59 | """ Performs dimensionality reduction on given values""" 60 | self.initialize() 61 | return self.dimensionality_reduction.fit_transform(value) 62 | 63 | def cluster(self, value): 64 | """ Performs clustering on given values""" 65 | self.initialize() 66 | return self.km.predict(value) 67 | 68 | @staticmethod 69 | def get_episode_features(experiment, seed, checkpoint, episode): 70 | """ Get features for one episode 71 | This is used for dimensionality reduction, which is later used for 72 | scatter plotting. 73 | """ 74 | history_size = 10 75 | features = [] 76 | features.append(DataReader.get_episode_speeds( 77 | experiment, seed, checkpoint, episode)[-history_size:]) 78 | costs = DataReader.get_episode_costs( 79 | experiment, seed, checkpoint, episode) 80 | columns_to_save = ['proximity_cost', 81 | 'lane_cost', 'pixel_proximity_cost'] 82 | for column in columns_to_save: 83 | features.append(costs[column].to_numpy()[-history_size:]) 84 | features = np.stack(features) 85 | features = features.flatten() 86 | return features 87 | 88 | @staticmethod 89 | def get_model_failing_features(experiment, seed, checkpoint): 90 | """ Get features for one model 91 | This is used for dimensionality reduction, which is later used for 92 | scatter plotting. 93 | """ 94 | speeds = DataReader.get_model_speeds(experiment, seed, checkpoint) 95 | costs = DataReader.get_model_costs(experiment, seed, checkpoint) 96 | states = DataReader.get_model_states(experiment, seed, checkpoint) 97 | failing = DataReader.get_episodes_with_outcome( 98 | experiment, seed, checkpoint, 0) 99 | #failing = DataReader.find_option_values('episode', experiment, seed, checkpoint) 100 | 101 | data = [] 102 | 103 | history_size = 10 104 | 105 | for fail in failing: 106 | # features = DimensionalityReduction.get_episode_features(experiment, seed, checkpoint, fail) 107 | columns_to_save = ['lane_cost', 'pixel_proximity_cost'] 108 | features = [speeds[fail - 1][-history_size:]] 109 | for c in columns_to_save: 110 | features.append(costs[fail - 1][c][-history_size:]) 111 | for i in range(len(features)): 112 | features[i] = np.pad( 113 | features[i], (history_size - features[i].shape[0], 0), 'constant') 114 | features = np.stack(features) 115 | features = features.flatten() 116 | l = costs[fail - 1]['collisions_per_frame'].shape[0] 117 | features = np.append( 118 | features, [costs[fail - 1]['collisions_per_frame'][l - 1]]) 119 | features = np.append( 120 | features, [costs[fail - 1]['arrived_to_dst'][l - 1]]) 121 | features = np.append(features, [states[fail - 1][l - 1][0]]) 122 | features = np.append(features, [states[fail - 1][l - 1][1]]) 123 | features = np.append(features, DataReader.get_last_gradient( 124 | experiment, seed, checkpoint, fail)) 125 | data.append(features) 126 | 127 | data = np.stack(data) 128 | data = sklearn.preprocessing.scale(data) 129 | 130 | return data 131 | 132 | @staticmethod 133 | def get_dimensionality_reduction_data(): 134 | """ Get features for all models 135 | This is used for dimensionality reduction, which is later used for 136 | scatter plotting. 137 | """ 138 | # we don't have costs for all values, should do that. 139 | data = [] 140 | experiment = 'Deterministic policy, regressed cost' 141 | seed = 3 142 | checkpoint = 25000 143 | seeds = DataReader.find_option_values('seed', experiment) 144 | for seed in seeds: 145 | checkpoints = DataReader.find_option_values( 146 | 'checkpoint', experiment, seed) 147 | for checkpoint in checkpoints: 148 | try: 149 | if data == []: 150 | data = DimensionalityReduction.get_model_failing_features( 151 | experiment, seed, checkpoint) 152 | else: 153 | data = np.concatenate([data, DimensionalityReduction.get_model_failing_features( 154 | experiment, seed, checkpoint)]) 155 | except Exception as e: 156 | print(checkpoint, 'failed', e) 157 | traceback.print_exc() 158 | data = sklearn.preprocessing.scale(data) 159 | return data 160 | -------------------------------------------------------------------------------- /map_peach.py: -------------------------------------------------------------------------------- 1 | from random import choice, randrange 2 | 3 | from custom_graphics import draw_dashed_line 4 | from map_lanker import LankerCar 5 | from map_i80 import I80, colours 6 | from traffic_gym import Simulator 7 | import pygame 8 | import pandas as pd 9 | import numpy as np 10 | import pdb, random 11 | import bisect 12 | import pdb, pickle, os 13 | 14 | # Conversion LANE_W from real world to pixels 15 | # A US highway lane width is 3.7 metres, here 50 pixels 16 | LANE_W = 24 # pixels / 3.7 m, lane width 17 | SCALE = LANE_W / 3.7 # pixels per metre 18 | FOOT = 0.3048 # metres per foot 19 | X_OFFSET = 0 # horizontal offset (camera 2 leftmost view) 20 | MAX_SPEED = 130 21 | 22 | 23 | class PeachCar(LankerCar): 24 | # Global constants 25 | SCALE = SCALE 26 | LANE_W = LANE_W 27 | X_OFFSET = X_OFFSET 28 | max_b = 0.05 # set a looser max turning limitation 29 | 30 | 31 | class Peachtree(I80): 32 | # Environment's car class 33 | EnvCar = PeachCar 34 | 35 | # Global constants 36 | SCALE = SCALE 37 | LANE_W = LANE_W 38 | X_OFFSET = X_OFFSET 39 | DUMP_NAME = 'data_peach_v0' 40 | 41 | def __init__(self, **kwargs): 42 | kwargs['nb_lanes'] = 1 43 | kwargs['delta_t'] = 1/10 44 | super().__init__(**kwargs) 45 | 46 | w = (640, 624, 472, 688, 456, 472, 752, 280) 47 | self.screen_size = (sum(w[-8:]) - 270, 315) 48 | # self.photos = ( 49 | # pygame.image.load('Peachtree/cam8.png'), 50 | # pygame.image.load('Peachtree/cam7.png'), 51 | # pygame.image.load('Peachtree/cam6.png'), 52 | # pygame.image.load('Peachtree/cam5.png'), 53 | # pygame.image.load('Peachtree/cam4.png'), 54 | # pygame.image.load('Peachtree/cam3.png'), 55 | # pygame.image.load('Peachtree/cam2.png'), 56 | # pygame.image.load('Peachtree/cam1.png'), 57 | # ) 58 | # self.photos_rect = ( 59 | # self.photos[0].get_rect().move([0, 25]), 60 | # self.photos[1].get_rect().move([w[-1] - 25, 25]), 61 | # self.photos[2].get_rect().move([sum(w[-2:]) - 30, 25]), 62 | # self.photos[3].get_rect().move([sum(w[-3:]) - 35, 25]), 63 | # self.photos[4].get_rect().move([sum(w[-4:]) - 120, 25]), 64 | # self.photos[5].get_rect().move([sum(w[-5:]) - 220, 25]), 65 | # self.photos[5].get_rect().move([sum(w[-6:]) - 230, 25]), 66 | # self.photos[5].get_rect().move([sum(w[-7:]) - 270, 25]), 67 | # ) 68 | if self.display: # if display is required 69 | self.screen = pygame.display.set_mode(self.screen_size) # set screen size 70 | # self.delta_t = 1 / 10 # simulation timing interval 71 | self._time_slots = ( 72 | 'peach/trajectories-0400pm-0415pm', 73 | 'peach/trajectories-1245pm-0100pm', 74 | ) 75 | self._t_slot = None 76 | self._black_list = { 77 | self._time_slots[0]: 78 | {256, 137, 11, 1293, 399, 1551, 794, 556, 942, 562, 307, 1077, 694, 188, 63, 705, 451, 579, 1098, 605, 79 | 606, 95, 225, 611, 997, 107, 1643, 366, 624, 245, 255, 738, 1755, # off track 80 | 114, 15, 70, 142, 28, 94, 1672, 93, 194, 75, 59, 249, 75, 239, 253, 240, 254, 269, 266, 275, 336, 1709, 81 | 290, 247, 262, 1689, 352, 380, 415, 468, 405, 449, 419, 475, 396, 559, 597, 703, 404, 642, 682, 593, 82 | 567, 703, 650, 815, 760, 763, 945, 951, 973, 975, 1005, 961, 978, 972, 1103, 1144, 984, 970, 1122, 978, 83 | 1179, 1090, 1147, 1145, 1201, 1314, 1134, 1201, 1200, 1431, 1397, 1507, 1640, 1552, 1392, 1530, 1561, 84 | 1564}, 85 | self._time_slots[1]: 86 | {391, 1037, 399, 404, 1459, 948, 1206, 440, 314, 1339, 829, 577, 962, 67, 219, 861, 863, 991, 358, 998, 87 | 246, 1022, 127, # off track 88 | 197, 131, 211, 228, 125, 218, 112, 299, 217, 406, 402, 297, 376, 409, 444, 569, 436, 426, 620, 706, 89 | 701, 787, 783, 723, 1498, 734, 715, 757, 706, 798, 805, 787, 842, 847, 783, 901, 850, 994, 882, 1092, 90 | 1055, 966, 1008, 1092, 1079, 1026, 1142, 1273, 1164, 1183, 1320, 1324, 1192, 1129, 1320, 1372, 1326, 91 | 1406, 1372, 1358, 1298, 1336, 1480}, 92 | } 93 | self.df = None 94 | self.vehicles_history = None 95 | self.lane_occupancy = None 96 | # self._lane_surfaces = dict() 97 | # self.nb_lanes = 1 98 | self.smoothing_window = 15 99 | self.offset = None # data is fucked up here, fixing it in the custom reset method 100 | 101 | def reset(self, frame=None, time_slot=None): 102 | super().reset(frame, time_slot) 103 | self.offset = -180 if time_slot == 0 else -15 104 | 105 | def _get_data_frame(self, time_slot, x_max, x_offset): 106 | # TODO: should use Lanker, not re-implement this! 107 | file_name = f'traffic-data/xy-trajectories/{time_slot}.txt' 108 | print(f'Loading trajectories from {file_name}') 109 | df = pd.read_csv(file_name, sep=r'\s+', header=None, names=( 110 | 'Vehicle ID', 111 | 'Frame ID', 112 | 'Total Frames', 113 | 'Global Time', 114 | 'Local X', 115 | 'Local Y', 116 | 'Global X', 117 | 'Global Y', 118 | 'Vehicle Length', 119 | 'Vehicle Width', 120 | 'Vehicle Class', 121 | 'Vehicle Velocity', 122 | 'Vehicle Acceleration', 123 | 'Lane Identification', 124 | 'Origin Zone', 125 | 'Destination Zone', 126 | 'Intersection', 127 | 'Section', 128 | 'Direction', 129 | 'Movement', 130 | 'Preceding Vehicle', 131 | 'Following Vehicle', 132 | 'Spacing', 133 | 'Headway' 134 | )) 135 | 136 | # Get valid x coordinate rows 137 | valid_x = (df['Local Y'] * FOOT * SCALE - x_offset).between(0, x_max).values 138 | df = df[valid_x] 139 | 140 | # Invert coordinates (IDK WTF is going on with these trajectories) 141 | max_x = df['Local Y'].max() 142 | max_y = df['Local X'].max() 143 | extra_offset = 30 if time_slot == 0 else 17 144 | df['Local Y'] = max_x + extra_offset - df['Local Y'] 145 | df['Local X'] = max_y - df['Local X'] 146 | 147 | # Dropping cars with lifespan shorter than 5 second 148 | baby_cars = set(df[df['Total Frames'] < 50]['Vehicle ID']) 149 | print(f'Removing {len(baby_cars)} baby vehicles from the database') 150 | self._black_list[time_slot] |= baby_cars 151 | 152 | # Restrict data frame to valid x coordinates 153 | return df 154 | 155 | def _draw_lanes(self, surface, mode='human', offset=0): 156 | 157 | if mode == 'human': 158 | 159 | # load lanes, if not already done so 160 | if mode not in self._lane_surfaces: 161 | self._lane_surfaces[mode] = pygame.image.load('Peachtree/lanes_human.png') 162 | 163 | surface.blit(self._lane_surfaces[mode], (0, 0)) 164 | 165 | if mode == 'machine': 166 | 167 | # load lanes 168 | lanes_surface = pygame.image.load('Peachtree/lanes_machine.png') 169 | surface.blit(lanes_surface, (offset, offset)) 170 | 171 | # save for later 172 | self._lane_surfaces[mode] = surface.copy() 173 | -------------------------------------------------------------------------------- /eval_fm.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import random 4 | 5 | import numpy as np 6 | import torch 7 | import torch.nn.functional as fun 8 | 9 | import utils 10 | from dataloader import DataLoader 11 | 12 | parser = argparse.ArgumentParser(fromfile_prefix_chars='@') 13 | parser.add_argument('-dataset', type=str, default='i80') 14 | parser.add_argument('-debug', action='store_true') 15 | parser.add_argument('-batch_size', type=int, default=4) 16 | parser.add_argument('-v', type=int, default=4) 17 | parser.add_argument('-display', type=int, default=0) 18 | parser.add_argument('-seed', type=int, default=9999) 19 | parser.add_argument('-lanes', type=int, default=8) 20 | parser.add_argument('-traffic_rate', type=int, default=15) 21 | parser.add_argument('-n_episodes', type=int, default=1) 22 | parser.add_argument('-ncond', type=int, default=20) 23 | parser.add_argument('-npred', type=int, default=200) 24 | parser.add_argument('-n_batches', type=int, default=200) 25 | parser.add_argument('-n_samples', type=int, default=10) 26 | parser.add_argument('-n_action_seq', type=int, default=5) 27 | parser.add_argument('-sampling', type=str, default='fp') 28 | parser.add_argument('-noise', type=float, default=0.0) 29 | parser.add_argument('-n_mixture', type=int, default=20) 30 | parser.add_argument('-graph_density', type=float, default=0.001) 31 | parser.add_argument('-model_dir', type=str, default='models/') 32 | M1 = 'model=fwd-cnn-layers=3-bsize=64-ncond=20-npred=20-lrt=0.0001-nfeature=256-dropout=0.1-gclip=5.0-' + \ 33 | 'warmstart=0-seed=1.step200000.model' 34 | M2 = 'model=fwd-cnn-vae-fp-layers=3-bsize=64-ncond=20-npred=20-lrt=0.0001-nfeature=256-dropout=0.1-nz=32-' + \ 35 | 'beta=1e-06-zdropout=0.0-gclip=5.0-warmstart=1-seed=1.step200000.model' 36 | M3 = 'model=fwd-cnn-vae-fp-layers=3-bsize=64-ncond=20-npred=20-lrt=0.0001-nfeature=256-dropout=0.1-nz=32-' + \ 37 | 'beta=1e-06-zdropout=0.5-gclip=5.0-warmstart=1-seed=1.step200000.model' 38 | M4 = 'model=fwd-cnn-ten3-layers=3-bsize=64-ncond=20-npred=20-lrt=0.0001-nfeature=256-nhidden=128-fgeom=1-' + \ 39 | 'zeroact=0-zmult=0-dropout=0.1-nz=32-beta=0.0-zdropout=0.5-gclip=5.0-warmstart=1-seed=1.step200000.model' 40 | parser.add_argument('-mfile', type=str, default=M3) 41 | parser.add_argument('-cuda', type=int, default=1) 42 | parser.add_argument('-save_video', type=int, default=1) 43 | opt = parser.parse_args() 44 | 45 | if 'zeroact=1' in opt.mfile: 46 | opt.zeroact = 1 47 | else: 48 | opt.zeroact = 0 49 | 50 | random.seed(opt.seed) 51 | np.random.seed(opt.seed) 52 | torch.manual_seed(opt.seed) 53 | 54 | opt.save_video = (opt.save_video == 1) 55 | opt.eval_dir = opt.model_dir + f'eval/' 56 | 57 | 58 | print(f'[loading {opt.model_dir + opt.mfile}]') 59 | model = torch.load(opt.model_dir + opt.mfile) 60 | if type(model) is dict: model = model['model'] 61 | model = model.cuda() 62 | model.eval() 63 | # if opt.cuda == 1: 64 | # model.intype('gpu') 65 | 66 | dataloader = DataLoader(None, opt, opt.dataset) 67 | # model.opt.npred = opt.npred # instruct the model about how many predictions we want it to produce 68 | model.opt.alpha = 0 69 | 70 | dirname = f'{opt.eval_dir}{opt.mfile}-nbatches={opt.n_batches}-npred={opt.npred}-nsample={opt.n_samples}' 71 | if '-ten' in opt.mfile: 72 | dirname += f'-sampling={opt.sampling}' 73 | if opt.sampling == 'knn': 74 | dirname += f'-density={opt.graph_density}' 75 | elif opt.sampling == 'pdf': 76 | dirname += f'-nmixture={opt.n_mixture}' 77 | mfile_prior = f'{opt.model_dir}/{opt.mfile}-nfeature=128-lrt=0.0001-nmixture={opt.n_mixture}.prior' 78 | print(f'[loading prior model: {mfile_prior}]') 79 | model.prior = torch.load(mfile_prior).cuda() 80 | # load z vectors. Extract them if they are not already saved. 81 | pzfile = opt.model_dir + opt.mfile + '.pz' 82 | if os.path.isfile(pzfile): 83 | p_z = torch.load(pzfile) 84 | graph = torch.load(pzfile + '.graph') 85 | model.p_z = p_z 86 | model.knn_indx = graph.get('knn_indx') 87 | model.knn_dist = graph.get('knn_dist') 88 | model.opt.topz_sample = int(model.p_z.size(0) * opt.graph_density) 89 | else: 90 | model.compute_pz(dataloader, opt, 250) 91 | torch.save(model.p_z, pzfile) 92 | model.compute_z_graph() 93 | torch.save({'knn_dist': model.knn_dist, 'knn_indx': model.knn_indx}, pzfile + '.graph') 94 | print('[done]') 95 | 96 | dirname += '.eval' 97 | os.system('mkdir -p ' + dirname) 98 | 99 | # if opt.cuda == 1: 100 | # model.intype('gpu') 101 | 102 | loss_i = torch.zeros(opt.n_batches, opt.batch_size, opt.n_samples, opt.npred) 103 | loss_s = torch.zeros(opt.n_batches, opt.batch_size, opt.n_samples, opt.npred) 104 | loss_c = torch.zeros(opt.n_batches, opt.batch_size, opt.n_samples, opt.npred) 105 | true_costs = torch.zeros(opt.n_batches, opt.batch_size, opt.npred, 2) 106 | pred_costs = torch.zeros(opt.n_batches, opt.batch_size, opt.n_samples, opt.npred, 2) 107 | true_states = torch.zeros(opt.n_batches, opt.batch_size, opt.npred, 4) 108 | pred_states = torch.zeros(opt.n_batches, opt.batch_size, opt.n_samples, opt.npred, 4) 109 | 110 | 111 | def compute_loss(targets, predictions, r=True): 112 | pred_images, pred_states, _ = predictions 113 | target_images, target_states, target_costs = targets 114 | loss_i = fun.mse_loss(pred_images, target_images, reduce=r) 115 | loss_s = fun.mse_loss(pred_states, target_states, reduce=r) 116 | loss_c = fun.mse_loss(pred_costs.cuda(), target_costs.cuda(), reduce=r) 117 | return loss_i, loss_s, loss_c 118 | 119 | 120 | dataloader.random.seed(12345) 121 | 122 | for i in range(opt.n_batches): 123 | with torch.no_grad(): 124 | torch.cuda.empty_cache() 125 | inputs, actions, targets, _, _ = dataloader.get_batch_fm('test', opt.npred) 126 | 127 | # save ground truth for the first 10 x batch_size samples 128 | if i < 10 and opt.save_video: 129 | for b in range(opt.batch_size): 130 | dirname_movie = f'{dirname}/videos/x{i * opt.batch_size + b:d}/y/' 131 | print(f'[saving ground truth video: {dirname_movie}]') 132 | utils.save_movie(dirname_movie, targets[0][b], targets[1][b], targets[2][b]) 133 | 134 | for s in range(opt.n_samples): 135 | print(f'[batch {i}, sample {s}]', end="\r") 136 | 137 | if opt.zeroact == 1: 138 | actions.data.zero_() 139 | 140 | pred, _ = model(inputs, actions, targets, sampling=opt.sampling) # return as many predictions as actions 141 | pred_states[i, :, s].copy_(pred[1]) 142 | true_states[i].copy_(targets[1]) 143 | 144 | if i < 10 and s < 20 and opt.save_video: 145 | for b in range(opt.batch_size): 146 | dirname_movie = f'{dirname}/videos/sampled_z/true_actions/x{i * opt.batch_size + b:d}/z{s:d}/' 147 | print(f'[saving video: {dirname_movie}]', end="\r") 148 | utils.save_movie(dirname_movie, pred[0][b], pred[1][b]) # , pred_[2][b]) 149 | # ^ images ^ position and velocity 150 | 151 | # rotate actions across the batch: a_{t} -> a_{t + 1} 152 | actions_rot = actions[(torch.arange(opt.batch_size) - 1) % opt.batch_size] 153 | 154 | # also generate videos with different action sequences 155 | pred_rot, _ = model(inputs, actions_rot, targets, sampling=opt.sampling) 156 | if i < 10 and s < 20 and opt.save_video: 157 | for b in range(opt.batch_size): 158 | dirname_movie = f'{dirname}/videos/sampled_z/rot_actions/x{i * opt.batch_size + b:d}/z{s:d}/' 159 | print('[saving video: {}]'.format(dirname_movie), end="\r") 160 | utils.save_movie(dirname_movie, pred_rot[0][b], pred_rot[1][b]) # , pred_perm[2][b]) 161 | 162 | # also generate videos with true z vectors 163 | if s == 0: 164 | pred_true_z, _ = model(inputs, actions, targets) 165 | for b in range(opt.batch_size): 166 | dirname_movie = f'{dirname}/videos/true_z/true_actions/x{i * opt.batch_size + b:d}/z{s:d}/' 167 | print('[saving video: {}]'.format(dirname_movie), end="\r") 168 | utils.save_movie(dirname_movie, pred_true_z[0][b], pred_true_z[1][b]) # , pred_true_z[2][b]) 169 | 170 | pred_true_z_rot, _ = model(inputs, actions_rot, targets) 171 | for b in range(opt.batch_size): 172 | dirname_movie = f'{dirname}/videos/true_z/rot_actions/x{i * opt.batch_size + b:d}/z{s:d}/' 173 | print('[saving video: {}]'.format(dirname_movie), end="\r") 174 | utils.save_movie(dirname_movie, pred_true_z_rot[0][b], pred_true_z_rot[1][b]) 175 | # , pred_true_z_perm[2][b]) 176 | 177 | # del inputs, actions, targets, pred 178 | 179 | torch.save({'loss_i': loss_i, 180 | 'loss_s': loss_s, 181 | 'loss_c': loss_c, 182 | 'true_costs': true_costs, 183 | 'pred_costs': pred_costs, 184 | 'true_states': true_states, 185 | 'pred_states': pred_states}, 186 | f'{dirname}/loss.pth') 187 | 188 | os.system(f'tar -cvf {dirname}.tgz {dirname}') 189 | -------------------------------------------------------------------------------- /train_fm.py: -------------------------------------------------------------------------------- 1 | import torch, numpy, argparse, pdb, os, time, math, random 2 | import utils 3 | from dataloader import DataLoader 4 | import torch.nn.functional as F 5 | import torch.optim as optim 6 | import importlib 7 | import models 8 | import torch.nn as nn 9 | 10 | 11 | torch.backends.cudnn.deterministic = True 12 | torch.backends.cudnn.benchmark = False 13 | 14 | ################################################# 15 | # Train an action-conditional forward model 16 | ################################################# 17 | 18 | parser = argparse.ArgumentParser() 19 | # data params 20 | parser.add_argument('-seed', type=int, default=1) 21 | parser.add_argument('-v', type=int, default=4) 22 | parser.add_argument('-dataset', type=str, default='i80') 23 | parser.add_argument('-model', type=str, default='fwd-cnn') 24 | parser.add_argument('-layers', type=int, default=3, help='layers in frame encoder/decoders') 25 | parser.add_argument('-data_dir', type=str, default='traffic-data/state-action-cost/data_i80_v0/') 26 | parser.add_argument('-model_dir', type=str, default='models') 27 | parser.add_argument('-ncond', type=int, default=20, help='number of conditioning frames') 28 | parser.add_argument('-npred', type=int, default=20, help='number of predictions to make with unrolled fwd model') 29 | parser.add_argument('-batch_size', type=int, default=8) 30 | parser.add_argument('-nfeature', type=int, default=256) 31 | parser.add_argument('-beta', type=float, default=0.0, help='coefficient for KL term in VAE') 32 | parser.add_argument('-ploss', type=str, default='hinge') 33 | parser.add_argument('-z_dropout', type=float, default=0.0, help='set z=0 with this probability') 34 | parser.add_argument('-dropout', type=float, default=0.0, help='regular dropout') 35 | parser.add_argument('-nz', type=int, default=32) 36 | parser.add_argument('-lrt', type=float, default=0.0001) 37 | parser.add_argument('-grad_clip', type=float, default=5.0) 38 | parser.add_argument('-epoch_size', type=int, default=2000) 39 | parser.add_argument('-warmstart', type=int, default=0, help='initialize with pretrained model') 40 | parser.add_argument('-debug', action='store_true') 41 | parser.add_argument('-enable_tensorboard', action='store_true', 42 | help='Enables tensorboard logging.') 43 | parser.add_argument('-tensorboard_dir', type=str, default='models', 44 | help='path to the directory where to save tensorboard log. If passed empty path' \ 45 | ' no logs are saved.') 46 | opt = parser.parse_args() 47 | 48 | os.system('mkdir -p ' + opt.model_dir) 49 | 50 | random.seed(opt.seed) 51 | numpy.random.seed(opt.seed) 52 | torch.manual_seed(opt.seed) 53 | torch.cuda.manual_seed(opt.seed) 54 | dataloader = DataLoader(None, opt, opt.dataset) 55 | 56 | 57 | # define model file name 58 | opt.model_file = f'{opt.model_dir}/model={opt.model}-layers={opt.layers}-bsize={opt.batch_size}-ncond={opt.ncond}-npred={opt.npred}-lrt={opt.lrt}-nfeature={opt.nfeature}-dropout={opt.dropout}' 59 | 60 | if 'vae' in opt.model: 61 | opt.model_file += f'-nz={opt.nz}' 62 | opt.model_file += f'-beta={opt.beta}' 63 | opt.model_file += f'-zdropout={opt.z_dropout}' 64 | 65 | if opt.grad_clip != -1: 66 | opt.model_file += f'-gclip={opt.grad_clip}' 67 | 68 | opt.model_file += f'-warmstart={opt.warmstart}' 69 | opt.model_file += f'-seed={opt.seed}' 70 | print(f'[will save model as: {opt.model_file}]') 71 | 72 | 73 | # parameters specific to the I-80 dataset 74 | opt.n_inputs = 4 75 | opt.n_actions = 2 76 | opt.height = 117 77 | opt.width = 24 78 | if opt.layers == 3: 79 | opt.h_height = 14 80 | opt.h_width = 3 81 | elif opt.layers == 4: 82 | opt.h_height = 7 83 | opt.h_width = 1 84 | opt.hidden_size = opt.nfeature*opt.h_height*opt.h_width 85 | 86 | mfile = opt.model_file + '.model' 87 | 88 | # load previous checkpoint or create new model 89 | if os.path.isfile(mfile): 90 | print(f'[loading previous checkpoint: {mfile}]') 91 | checkpoint = torch.load(mfile) 92 | model = checkpoint['model'] 93 | model.cuda() 94 | optimizer = optim.Adam(model.parameters(), opt.lrt) 95 | optimizer.load_state_dict(checkpoint['optimizer']) 96 | n_iter = checkpoint['n_iter'] 97 | utils.log(opt.model_file + '.log', '[resuming from checkpoint]') 98 | else: 99 | # specify deterministic model we use to initialize parameters with 100 | if opt.warmstart == 1: 101 | prev_model = f'{opt.model_dir}/model=fwd-cnn-layers={opt.layers}-bsize=8-ncond={opt.ncond}-npred={opt.npred}-lrt=0.0001-nfeature={opt.nfeature}-dropout={opt.dropout}-gclip=5.0' 102 | prev_model += '-warmstart=0-seed=1.step400000.model' 103 | else: 104 | prev_model = '' 105 | 106 | if opt.model == 'fwd-cnn': 107 | # deterministic model 108 | model = models.FwdCNN(opt, mfile=prev_model) 109 | elif opt.model == 'fwd-cnn-vae-fp': 110 | # stochastic VAE model 111 | model = models.FwdCNN_VAE(opt, mfile=prev_model) 112 | optimizer = optim.Adam(model.parameters(), opt.lrt) 113 | n_iter = 0 114 | 115 | model.cuda() 116 | 117 | 118 | # training and testing functions. We will compute several losses: 119 | # loss_i: images 120 | # loss_s: states 121 | # loss_p: relative entropy (optional) 122 | 123 | def compute_loss(targets, predictions, reduction='mean'): 124 | target_images = targets[0] 125 | target_states = targets[1] 126 | pred_images, pred_states, _ = predictions 127 | loss_i = F.mse_loss(pred_images, target_images, reduction=reduction) 128 | loss_s = F.mse_loss(pred_states, target_states, reduction=reduction) 129 | return loss_i, loss_s 130 | 131 | 132 | def expand(x, actions, nrep): 133 | images, states = x[0], x[1] 134 | bsize = images.size(0) 135 | nsteps = images.size(1) 136 | images_ = images.unsqueeze(0).expand(nrep, bsize, nsteps, 3, opt.height, opt.width) 137 | images_ = images_.contiguous().view(nrep*bsize, nsteps, 3, opt.height, opt.width) 138 | states_ = states.unsqueeze(0).expand(nrep, bsize, nsteps, opt.n_inputs) 139 | states_ = states_.contiguous().view(nrep*bsize, nsteps, opt.n_inputs) 140 | if actions is not None: 141 | actions_ = actions.unsqueeze(0).expand(nrep, bsize, nsteps, opt.n_actions) 142 | actions_ = actions_.contiguous().view(nrep*bsize, nsteps, opt.n_actions).contiguous() 143 | return [images_, states_, None], actions_ 144 | else: 145 | return [images_, states_] 146 | 147 | 148 | 149 | 150 | 151 | def train(nbatches, npred): 152 | model.train() 153 | total_loss_i, total_loss_s, total_loss_p = 0, 0, 0 154 | for i in range(nbatches): 155 | optimizer.zero_grad() 156 | inputs, actions, targets, _, _ = dataloader.get_batch_fm('train', npred) 157 | pred, loss_p = model(inputs[: -1], actions, targets, z_dropout=opt.z_dropout) 158 | loss_p = loss_p[0] 159 | loss_i, loss_s = compute_loss(targets, pred) 160 | loss = loss_i + loss_s + opt.beta*loss_p 161 | 162 | # VAEs get NaN loss sometimes, so check for it 163 | if not math.isnan(loss.item()): 164 | loss.backward(retain_graph=False) 165 | if not math.isnan(utils.grad_norm(model).item()): 166 | torch.nn.utils.clip_grad_norm_(model.parameters(), opt.grad_clip) 167 | optimizer.step() 168 | 169 | total_loss_i += loss_i.item() 170 | total_loss_s += loss_s.item() 171 | total_loss_p += loss_p.item() 172 | del inputs, actions, targets 173 | 174 | total_loss_i /= nbatches 175 | total_loss_s /= nbatches 176 | total_loss_p /= nbatches 177 | return total_loss_i, total_loss_s, total_loss_p 178 | 179 | 180 | def test(nbatches): 181 | model.eval() 182 | total_loss_i, total_loss_s, total_loss_p = 0, 0, 0 183 | for i in range(nbatches): 184 | inputs, actions, targets, _, _ = dataloader.get_batch_fm('valid') 185 | 186 | pred, loss_p = model(inputs[: -1], actions, targets, z_dropout=opt.z_dropout) 187 | loss_p = loss_p[0] 188 | loss_i, loss_s = compute_loss(targets, pred) 189 | loss = loss_i + loss_s + opt.beta*loss_p 190 | 191 | total_loss_i += loss_i.item() 192 | total_loss_s += loss_s.item() 193 | total_loss_p += loss_p.item() 194 | del inputs, actions, targets 195 | 196 | total_loss_i /= nbatches 197 | total_loss_s /= nbatches 198 | total_loss_p /= nbatches 199 | return total_loss_i, total_loss_s, total_loss_p 200 | 201 | writer = utils.create_tensorboard_writer(opt) 202 | 203 | print('[training]') 204 | for i in range(200): 205 | t0 = time.time() 206 | train_losses = train(opt.epoch_size, opt.npred) 207 | valid_losses = test(int(opt.epoch_size / 2)) 208 | 209 | if writer is not None: 210 | writer.add_scalar('Loss/train_state_img', train_losses[0], i) 211 | writer.add_scalar('Loss/train_state_vct', train_losses[1], i) 212 | writer.add_scalar('Loss/train_relative_entropy', train_losses[2], i) 213 | 214 | writer.add_scalar('Loss/validation_state_img', valid_losses[0], i) 215 | writer.add_scalar('Loss/validation_state_vct', valid_losses[1], i) 216 | writer.add_scalar('Loss/validation_relative_entropy', valid_losses[2], i) 217 | 218 | n_iter += opt.epoch_size 219 | model.cpu() 220 | torch.save({'model': model, 221 | 'optimizer': optimizer.state_dict(), 222 | 'n_iter': n_iter}, opt.model_file + '.model') 223 | if (n_iter/opt.epoch_size) % 10 == 0: 224 | torch.save(model, opt.model_file + f'.step{n_iter}.model') 225 | model.cuda() 226 | log_string = f'step {n_iter} | ' 227 | log_string += utils.format_losses(*train_losses, split='train') 228 | log_string += utils.format_losses(*valid_losses, split='valid') 229 | print(log_string) 230 | utils.log(opt.model_file + '.log', log_string) 231 | 232 | if writer is not None: 233 | writer.close() 234 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Prediction and Policy-learning Under Uncertainty (PPUU) 2 | [Gitter chatroom](http://gitter.im/PPUU), [video summary](http://youtu.be/X2s7gy3wIYw), [slides](http://bit.ly/PPUU-slides), [poster](http://bit.ly/PPUU-poster), [website](http://bit.ly/PPUU-web). 3 | Implementing [Model-Predictive Policy Learning with Uncertainty Regularization for Driving in Dense Traffic](http://bit.ly/PPUU-article) in [PyTorch](https://pytorch.org). 4 | 5 | ![planning](doc/planning.png) 6 | 7 | The objective is to train an *agent* (pink brain drawing) who's going to plan its own trajectory in a densely (stochastic) traffic highway. 8 | To do so, it minimises a few costs over trajectories unrolled while interacting with a *world model* (blue world drawing). 9 | We need to start, then, by training the *world model* with observational data from the real world (Earth's photo), which needs to be downloaded from the Internet. 10 | 11 | ## Getting the real data 12 | 13 | To get started, you need to fetch the real world data. 14 | Go to this [address](http://bit.ly/PPUU-data), and download the TGZ file (330 MB) on your machine. 15 | Open a terminal, go to the location where you've downloaded the file, and type: 16 | 17 | ```bash 18 | tar xf xy-trajectories.tgz 19 | ``` 20 | 21 | This will expand the NGSIM (Next Generation Simulation) data set compressed archive, consisting of all cars trajectories for the 4 available maps (now 1.6 GB). 22 | Its content is the following: 23 | 24 | ```bash 25 | xy-trajectories 26 | ├── i80 27 | │   ├── trajectories-0400-0415.txt 28 | │   ├── trajectories-0500-0515.txt 29 | │   ├── trajectories-0515-0530.txt 30 | │   └── trajectory-data-dictionary.htm 31 | ├── lanker 32 | │   ├── trajectories-0830am-0845am.txt 33 | │   ├── trajectories-0845am-0900am.txt 34 | │   └── trajectory-data-dictionary.htm 35 | ├── peach 36 | │   ├── trajectories-0400pm-0415pm.txt 37 | │   ├── trajectories-1245pm-0100pm.txt 38 | │   └── trajectory-data-dictionary.htm 39 | └── us101 40 | ├── trajectories-0750am-0805am.txt 41 | ├── trajectories-0805am-0820am.txt 42 | ├── trajectories-0820am-0835am.txt 43 | └── trajectory-data-dictionary.htm 44 | 45 | 4 directories, 14 files 46 | ``` 47 | 48 | Finally, move the `xy-trajectories` directory inside a folder named `traffic-data`. 49 | 50 | ## Setting up the environment 51 | 52 | In this section we will fetch the repo, install the dependencies, and view the data we just downloaded, so that we can see if everything runs fine. 53 | So, open up your terminal, and type: 54 | 55 | ```bash 56 | git clone git@github.com:Atcold/pytorch-PPUU.git 57 | # or with the https protocol 58 | # git clone https://github.com/Atcold/pytorch-PPUU 59 | ``` 60 | 61 | Now move (or symlink) the `traffic-data` folder inside the repo: 62 | 63 | ```bash 64 | cd pytorch-PPUU 65 | mv . 66 | # or 67 | # ln -s 68 | ``` 69 | 70 | Now install the `PPUU` environment (this expects you have `conda` on your system, go [here](https://conda.io/docs/user-guide/install/) if this is not the case): 71 | 72 | ```bash 73 | conda env create -f environment.yaml 74 | # 75 | # To activate this environment, use: 76 | # > source activate PPUU 77 | # 78 | # To deactivate an active environment, use: 79 | # > source deactivate 80 | # 81 | ``` 82 | 83 | As prescribed, activate it by typing: 84 | 85 | ```bash 86 | source activate PPUU # or 87 | conda activate PPUU 88 | ``` 89 | 90 | Finally, have a look at the four maps available in the NGSIM data set, namely: *I-80*, *US-101*, *Lankershim*, and *Peachtree*. 91 | There is a "bonus" map, called *AI*, where I've hard coded a policy for the vehicles, which are using a PID controller. 92 | Type the following command: 93 | 94 | ```bash 95 | python play_maps.py -map 96 | # where can be one of {i80,us101,peach,lanker,ai} 97 | # add -h to see the full list of options available 98 | ``` 99 | 100 | The frame rate should be greater than 20 Hz. 101 | Often it will be larger than 60 Hz. 102 | To be noted, here the vehicles are performing the actions extracted from the trajectories, and not simply following the original spatial coordinates. 103 | 104 | ## Dumping the "state, action, cost" triple 105 | 106 | In order to train both the *world* and *agent models*, we need to create the observations, starting from the NGSIM trajectories and the simulator. 107 | This can be done with the following command: 108 | 109 | ```bash 110 | for t in 0 1 2; do python generate_trajectories.py -map i80 -time_slot $t; done 111 | # to dump the triple for the i80 map, otherwise replace i80 with the map you want 112 | ``` 113 | 114 | Upon the script termination, we will find a folder named `state-action-cost` within our `traffic-data`. 115 | The content of the latter is now the following: 116 | 117 | ```bash 118 | traffic-data/ 119 | ├── state-action-cost 120 | │   └── data_i80_v0 121 | │   ├── trajectories-0400-0415 122 | │   │ ├── car1.pkl 123 | │   │ └── ... 124 | │    ├── trajectories-0500-0515 125 | │   │ └── ... 126 | │    └── trajectories-0515-0530 127 | │   └── ... 128 | └── xy-trajectories 129 | └── ... 130 | ``` 131 | 132 | > ### Additional info 133 | > Each pickled vehicle observation is stored as `car{idx}.pkl`. 134 | > Its content is a `dict` which includes the items and corresponding sizes (shapes): 135 | > 136 | > ```python 137 | > images (309, 3, 117, 24) 138 | > actions (309, 2) 139 | > lane_cost (309,) 140 | > pixel_proximity_cost (309,) 141 | > states (309, 7, 4) 142 | > frames (309,) 143 | > ``` 144 | > For example, this vehicle was alive for 309 frames (time steps). 145 | > The `images` represent the occupancy grid, which is as large as 4 lanes width (24 pixels, here). 146 | > 147 | > - The *R* channel represents the lane markings. 148 | > - The *G* channel encodes the position and shape of the neighbouring vehicles. 149 | > - The *B* channel depits our own vehicle. 150 | > 151 | > The `actions` is a collection of 2D vectors, encoding the positive and negative acceleration in both *x* and *y* directions. 152 | > The `lane_cost` and `pixel_proximity_cost` are the task specific costs (see [slides](http://bit.ly/alf-PPUU) for details). 153 | > The `states` encode position and velocity of the current vehicle and the most closest 6 ones: left/current/right lanes, front/back. 154 | > Finally, `frames` tells us the snapshot time stamp, so that we can go back to the simulator, and inspect strange situations present in the observations. 155 | 156 | Finally (this will likely be automated soon, and made avaiable for every map), extract the car sizes for the *I-80* map with: 157 | 158 | ```python 159 | python extract_car_size.py 160 | ``` 161 | 162 | ## Training the world model 163 | 164 | As we have stated above, we need to start by learning how the real world evolve. 165 | To do so, we train a neural net, which tries to predict what happens next, given that we start in a given *state*, and a specific *action* is performed. 166 | More precisely, we are going to train an *action conditional variational predictive net*, which resembles much a variational autoencoder (VAE) that has three inputs (concatenated sequence of `states`, `images`, `action`) and its output is set to be the next item in the sequence (`states`, `images`). 167 | 168 | In the code, the world model is shortened as `fm`, which stands for *forward dynamics model*. 169 | So, let's train the forward dynamics model (`fm`) on the observational dataset. 170 | This can be done by running: 171 | 172 | ```bash 173 | python train_fm.py -model_dir 174 | ``` 175 | 176 | ## Training the cost model 177 | Along with the dynamics model, we have a separate model to predict the costs of state and action pairs, which can be trained by running: 178 | 179 | ```bash 180 | python train_cost.py 181 | ``` 182 | 183 | ## Training the agent 184 | 185 | ![agent training](doc/agent_train.png) 186 | 187 | ![uncertainty computation](doc/uncertainty.png) 188 | 189 | Once the dynamics model is trained, it can be used to train the policy network, using *MPUR*, *MPER*, or *IL*. 190 | These corresponds to: 191 | 192 | - *MPUR*: Model-based Policy learning with Uncertainty Regularisation (shown in the figure above) 193 | - *MPER*: Model-based Policy learning with Expert Regularisation (model-based IL) 194 | - *IL*: Imitation Learning (copying the expert actions given the past observations) 195 | 196 | This is done by running: 197 | 198 | ```bash 199 | python train_{MPUR,MPER,IL}.py -model_dir -mfile 200 | ``` 201 | 202 | ## Evaluating the agent 203 | 204 | To evaluate a trained policy, run the script `eval_policy.py` in one of the three following modes. 205 | Type `-h` to see other options and details. 206 | 207 | ```bash 208 | python eval_policy.py -model_dir -policy_model -method policy-{MPUR,MPER,IL} 209 | ``` 210 | 211 | You can also specify `-method bprop` to perform "brute force" planning, which will be computationally expensive. 212 | 213 | ### Parallel evaluation 214 | Evaluation happens in parallel. By default, evaluator script uses min(10, #cores_available) processes. It doesn't go above 10 because then it hits GPU memory limits. 215 | To change the number of processes, you can pass `-num-processes` argument to `eval_policy.py` script. Also, for this to work, you need to request cpu cores using `--cpus-per-task=X` argument for slurm. 216 | The slurm limits cpu usage to 64 cores per user, and gpus to 18 per user, therefore 3 is a reasonable limit to enable us to use all the gpus without hitting the gpu limit when running multiple evaluations. The CPU limit can be extended, but you need to email the IT helpdesk. 217 | 218 | ## Pre-trained models 219 | 220 | [Here](https://drive.google.com/file/d/1XahspfgFlBVF6ne479LCJgBr0luZGQt7/) you can download the predictive model and the policy we've trained on our servers (they are bundled together in the `model` field of this *Python* dictionary). The agent achieves 82.0% of success rate. 221 | [Here](https://drive.google.com/file/d/1di7hGnyzUiCADfxOhq6zGnRX0AwhEdLo/), instead, you can download only the predictive models (one for the state and one for the cost), and try to train the policy by your own. 222 | -------------------------------------------------------------------------------- /Notebooks/Visualization/Tabs.py: -------------------------------------------------------------------------------- 1 | """Contains the tabs for different visualizations. 2 | Each tab is a standalone visualization.""" 3 | from DataReader import DataReader 4 | import ipywidgets as widgets 5 | import json 6 | import traitlets 7 | 8 | from Widgets import ( 9 | Picker, 10 | EpisodeReview, 11 | DimensionalityReductionPlot, 12 | PiePlot, 13 | HeatMap, 14 | HeatMapComparison, 15 | PolicyComparison, 16 | LearningCurve, 17 | ExperimentEntryView, 18 | ) 19 | 20 | 21 | class EpisodeReviewTab(widgets.VBox): 22 | """A tab for visualizing model's performance on an episode. 23 | Model is picked with dropdown picker""" 24 | 25 | def __init__(self): 26 | self.episode_review = EpisodeReview() 27 | self.picker = Picker( 28 | Picker.EPISODE_LEVEL, widget=self.episode_review) 29 | super(EpisodeReviewTab, self).__init__( 30 | [self.picker, self.episode_review]) 31 | 32 | 33 | class PiePlotTab(widgets.VBox): 34 | """A tab for visualizing model's success rate with pie chart. 35 | Model is picked with dropdown picker""" 36 | 37 | def __init__(self): 38 | self.pie_plot = PiePlot() 39 | self.picker = Picker(Picker.MODEL_LEVEL, 40 | widget=self.pie_plot) 41 | super(PiePlotTab, self).__init__([self.picker, self.pie_plot]) 42 | 43 | 44 | class DimensionalityReductionPlotTab(widgets.VBox): 45 | """A tab for visualizing episodes using with scatter plot and 46 | dimensionality reduction.""" 47 | 48 | def __init__(self): 49 | self.episode_review = EpisodeReview() 50 | self.dimensionality_reduction_plot = DimensionalityReductionPlot( 51 | widget=self.episode_review) 52 | self.picker = Picker(Picker.MODEL_LEVEL, 53 | widget=self.dimensionality_reduction_plot) 54 | super(DimensionalityReductionPlotTab, self).__init__( 55 | [self.picker, self.dimensionality_reduction_plot, self.episode_review]) 56 | 57 | 58 | class HeatMapTab(widgets.VBox): 59 | """A tabl showing episodes 'difficulty' for a given model 60 | Each cell in the heatmap represents how hard an episode is""" 61 | 62 | def __init__(self): 63 | self.heat_map = HeatMap() 64 | self.picker = Picker( 65 | Picker.EXPERIMENT_LEVEL, widget=self.heat_map) 66 | 67 | super(HeatMapTab, self).__init__([self.picker, self.heat_map]) 68 | 69 | 70 | class HeatMapComparisonTab(widgets.VBox): 71 | """Compares two models in their performance for different 72 | episodes, enabling us to see which episodes were failing or successful 73 | for two models. 74 | Color coding: 75 | orange - both models failed. 76 | red - first model succeeded, second failed. 77 | green - first model failed, second succeeded, 78 | blue - both models succeeded. 79 | """ 80 | 81 | def __init__(self): 82 | 83 | self.heat_map = HeatMapComparison() 84 | self.picker0 = Picker( 85 | Picker.MODEL_LEVEL, callback=self.get_callback(0)) 86 | self.picker1 = Picker( 87 | Picker.MODEL_LEVEL, callback=self.get_callback(1)) 88 | 89 | self.picked_values = [None, None] 90 | 91 | self.pickers_hbox = widgets.HBox([self.picker0, self.picker1]) 92 | 93 | super(HeatMapComparisonTab, self).__init__( 94 | [self.pickers_hbox, self.heat_map]) 95 | 96 | def get_callback(self, index): 97 | def callback(episode, seed, step): 98 | self.picked_values[index] = (episode, seed, step) 99 | if self.picked_values[0] is not None and \ 100 | self.picked_values[1] is not None: 101 | self.heat_map.update( 102 | self.picked_values[0], self.picked_values[1]) 103 | 104 | return callback 105 | 106 | 107 | class PolicyComparisonTab(widgets.VBox): 108 | """Tab for comparing success rates across checkpoints of different 109 | experiments. 110 | 111 | Experiments are chosen using a multiselect widget. 112 | """ 113 | 114 | def __init__(self): 115 | self.experiment_multiselect = widgets.SelectMultiple( 116 | options=list(DataReader.get_experiments_mapping().keys()), 117 | description='Experiments:', 118 | disabled=False, 119 | value=[], 120 | ) 121 | 122 | self.policy_comparison = PolicyComparison() 123 | 124 | def experiment_multiselect_change_callback(change): 125 | if change.name == 'value' and change.new is not None: 126 | self.policy_comparison.update(change.new) 127 | 128 | self.experiment_multiselect.observe( 129 | experiment_multiselect_change_callback, type='change') 130 | super(PolicyComparisonTab, self).__init__( 131 | [self.experiment_multiselect, self.policy_comparison]) 132 | 133 | 134 | class LearningCurvesTab(widgets.VBox): 135 | """Tab for comparing learning curves for experiments. 136 | 137 | Experiments are chosen using a multiselect widget. 138 | """ 139 | 140 | def __init__(self): 141 | self.experiment_multiselect = widgets.SelectMultiple( 142 | options=list(DataReader.get_experiments_mapping().keys()), 143 | description='Experiments:', 144 | disabled=False, 145 | value=[], 146 | ) 147 | 148 | self.learning_curve = LearningCurve() 149 | 150 | def experiment_multiselect_change_callback(change): 151 | if change.name == 'value' and change.new is not None: 152 | self.learning_curve.update(change.new) 153 | 154 | self.experiment_multiselect.observe( 155 | experiment_multiselect_change_callback, type='change') 156 | super(LearningCurvesTab, self).__init__( 157 | [self.experiment_multiselect, self.learning_curve]) 158 | 159 | 160 | class ExperimentsDirectoryTab(widgets.HBox): 161 | """A tab that allows editing, deleting, and adding values to experiments 162 | directory. 163 | Contains a select widget, buttons and ExperimentEntryView. 164 | """ 165 | 166 | def __init__(self): 167 | self.ignore_update = False 168 | 169 | def select_experiment_change_callback(change): 170 | if self.ignore_update: 171 | return 172 | if change.name == 'value' and change.new is not None: 173 | self.edit_experiment.children = [ 174 | self.widget_mapping[change.new]] 175 | 176 | def name_update_callback(_): 177 | self.update_selector() 178 | 179 | def save_callback(_): 180 | result_dict = {} 181 | for x in self.widget_mapping: 182 | root = self.widget_mapping[x].experiment_root.value 183 | model_name = self.widget_mapping[x].model_name.value 184 | result_dict[x] = [root, model_name] 185 | 186 | with open(DataReader.EXPERIMENTS_MAPPING_FILE, 'w') as f: 187 | json.dump(result_dict, f) 188 | 189 | def delete_callback(_): 190 | del self.widget_mapping[self.select_experiment.value] 191 | self.update_selector() 192 | 193 | def add_callback(_): 194 | self.widget_mapping['new'] = ExperimentEntryView('new', '', '') 195 | self.update_selector() 196 | self.select_experiment.value = 'new' 197 | 198 | with open(DataReader.EXPERIMENTS_MAPPING_FILE, 'r') as f: 199 | self.mapping = json.load(f) 200 | 201 | self.select_experiment = widgets.Select( 202 | options=self.mapping.keys(), 203 | disabled=False 204 | ) 205 | self.save_button = widgets.Button( 206 | description='Save', 207 | disabled=False, 208 | layout=widgets.Layout(width='auto'), 209 | ) 210 | self.delete_button = widgets.Button( 211 | description='Delete', 212 | disabled=False, 213 | ) 214 | self.add_button = widgets.Button( 215 | description='Add', 216 | disabled=False, 217 | ) 218 | self.buttons_hbox = widgets.HBox([self.add_button, self.delete_button]) 219 | 220 | self.widget_mapping = {} 221 | for key in self.mapping: 222 | self.widget_mapping[key] = ExperimentEntryView( 223 | key, 224 | self.mapping[key][0], 225 | self.mapping[key][1] 226 | ) 227 | self.widget_mapping[key].experiment_name.observe( 228 | name_update_callback) 229 | 230 | self.left_column = widgets.VBox( 231 | [ 232 | self.select_experiment, 233 | self.buttons_hbox, 234 | self.save_button, 235 | ]) 236 | self.edit_experiment = widgets.Box([]) 237 | self.edit_experiment.layout.width = 'auto' 238 | 239 | self.select_experiment.observe( 240 | select_experiment_change_callback, type='change') 241 | 242 | self.add_button.on_click(add_callback) 243 | self.save_button.on_click(save_callback) 244 | self.delete_button.on_click(delete_callback) 245 | 246 | super(ExperimentsDirectoryTab, self).__init__( 247 | [self.left_column, self.edit_experiment], 248 | layout=widgets.Layout(width='100%', align_items='stretch')) 249 | 250 | def update_selector(self): 251 | """ 252 | This functions serves to rebuild the selector with the 253 | actual values. Used when we add a new value, delete a value, 254 | and each time we change a the name 255 | of a given experiment. It updates the values in the selector, 256 | preserving the selection. 257 | """ 258 | self.ignore_update = True 259 | new_widget_mapping = {} 260 | options = [] 261 | old_index = self.select_experiment.index 262 | for x in self.widget_mapping: 263 | name = self.widget_mapping[x].experiment_name.value 264 | new_widget_mapping[name] = self.widget_mapping[x] 265 | options.append(name) 266 | self.widget_mapping = new_widget_mapping 267 | self.select_experiment.options = options 268 | self.select_experiment.index = min(old_index, len(options) - 1) 269 | self.ignore_update = False 270 | -------------------------------------------------------------------------------- /train_MPER.py: -------------------------------------------------------------------------------- 1 | import torch, numpy, argparse, pdb, os, time, math, random, re 2 | import utils 3 | from dataloader import DataLoader 4 | import torch.nn.functional as F 5 | import torch.optim as optim 6 | import models, planning 7 | import importlib 8 | 9 | torch.backends.cudnn.deterministic = True 10 | torch.backends.cudnn.benchmark = False 11 | 12 | ################################################# 13 | # Train an action-conditional forward model 14 | ################################################# 15 | 16 | parser = argparse.ArgumentParser() 17 | # data params 18 | parser.add_argument('-seed', type=int, default=1) 19 | parser.add_argument('-dataset', type=str, default='i80') 20 | parser.add_argument('-v', type=int, default=4) 21 | parser.add_argument('-model', type=str, default='fwd-cnn') 22 | parser.add_argument('-policy', type=str, default='policy-deterministic') 23 | parser.add_argument('-model_dir', type=str, default='models/') 24 | parser.add_argument('-ncond', type=int, default=20) 25 | parser.add_argument('-npred', type=int, default=16) 26 | parser.add_argument('-batch_size', type=int, default=8) 27 | parser.add_argument('-layers', type=int, default=3) 28 | parser.add_argument('-nfeature', type=int, default=256) 29 | parser.add_argument('-n_hidden', type=int, default=256) 30 | parser.add_argument('-beta', type=float, default=0.0, help='weight coefficient of prior loss') 31 | parser.add_argument('-p_dropout', type=float, default=0.0, help='set z=0 with this probability') 32 | parser.add_argument('-dropout', type=float, default=0.0, help='regular dropout') 33 | parser.add_argument('-nz', type=int, default=2) 34 | parser.add_argument('-n_mixture', type=int, default=10) 35 | parser.add_argument('-context_dim', type=int, default=2) 36 | parser.add_argument('-actions_subsample', type=int, default=4) 37 | parser.add_argument('-lrt', type=float, default=0.0001) 38 | parser.add_argument('-grad_clip', type=float, default=1.0) 39 | parser.add_argument('-epoch_size', type=int, default=500) 40 | parser.add_argument('-curriculum_length', type=int, default=16) 41 | parser.add_argument('-zeroact', type=int, default=0) 42 | parser.add_argument('-warmstart', type=int, default=0) 43 | parser.add_argument('-targetprop', type=int, default=0) 44 | parser.add_argument('-loss_c', type=int, default=0) 45 | parser.add_argument('-lambda_c', type=float, default=0.0) 46 | parser.add_argument('-lambda_h', type=float, default=0.0) 47 | parser.add_argument('-lambda_lane', type=float, default=0.1) 48 | parser.add_argument('-lrt_traj', type=float, default=0.5) 49 | parser.add_argument('-niter_traj', type=int, default=20) 50 | parser.add_argument('-gamma', type=float, default=1.0) 51 | #parser.add_argument('-mfile', type=str, default='model=fwd-cnn-vae-fp-layers=3-bsize=64-ncond=20-npred=20-lrt=0.0001-nfeature=256-dropout=0.1-nz=32-beta=1e-06-zdropout=0.5-gclip=5.0-warmstart=1-seed=1.step200000.model') 52 | parser.add_argument('-mfile', type=str, default='model=fwd-cnn-layers=3-bsize=64-ncond=20-npred=20-lrt=0.0001-nfeature=256-dropout=0.1-gclip=5.0-warmstart=0-seed=1.step200000.model') 53 | parser.add_argument('-load_model_file', type=str, default='') 54 | parser.add_argument('-combine', type=str, default='add') 55 | parser.add_argument('-debug', action='store_true') 56 | parser.add_argument('-test_only', type=int, default=0) 57 | parser.add_argument('-enable_tensorboard', action='store_true', 58 | help='Enables tensorboard logging.') 59 | parser.add_argument('-tensorboard_dir', type=str, default='models/policy_networks', 60 | help='path to the directory where to save tensorboard log. If passed empty path' \ 61 | ' no logs are saved.') 62 | opt = parser.parse_args() 63 | 64 | opt.n_inputs = 4 65 | opt.n_actions = 2 66 | opt.height = 117 67 | opt.width = 24 68 | opt.h_height = 14 69 | opt.h_width = 3 70 | opt.hidden_size = opt.nfeature*opt.h_height*opt.h_width 71 | 72 | 73 | os.system('mkdir -p ' + opt.model_dir + '/policy_networks/') 74 | 75 | random.seed(opt.seed) 76 | numpy.random.seed(opt.seed) 77 | torch.manual_seed(opt.seed) 78 | torch.cuda.manual_seed(opt.seed) 79 | 80 | 81 | 82 | opt.model_file = f'{opt.model_dir}/policy_networks/' 83 | 84 | opt.model_file += f'mbil-{opt.policy}-nfeature={opt.nfeature}-npred={opt.npred}-lambdac={opt.lambda_c}-gamma={opt.gamma}-seed={opt.seed}' 85 | if 'vae' in opt.mfile: 86 | opt.model_file += f'-model=vae' 87 | model_type = 'vae' 88 | elif 'ten' in opt.mfile: 89 | opt.model_file += f'-model=ten' 90 | model_type = 'ten' 91 | elif 'model=fwd-cnn-layers' in opt.mfile: 92 | model_type = 'det' 93 | opt.model_file += '-deterministic' 94 | if 'zdropout=0.5' in opt.mfile: 95 | opt.model_file += '-zdropout=0.5' 96 | elif 'zdropout=0.0' in opt.mfile: 97 | opt.model_file += '-zdropout=0.0' 98 | 99 | 100 | print(f'[will save as: {opt.model_file}]') 101 | 102 | if os.path.isfile(opt.model_file + '.model') and False: 103 | print('[found previous checkpoint, loading]') 104 | checkpoint = torch.load(opt.model_file + '.model') 105 | model = checkpoint['model'] 106 | optimizer = optim.Adam(model.policy_net.parameters(), opt.lrt) 107 | optimizer.load_state_dict(checkpoint['optimizer']) 108 | n_iter = checkpoint['n_iter'] 109 | if opt.test_only == 0: 110 | utils.log(opt.model_file + '.log', '[resuming from checkpoint]') 111 | else: 112 | # load the model 113 | model = torch.load(opt.model_dir + opt.mfile) 114 | if type(model) is dict: model = model['model'] 115 | model.create_policy_net(opt) 116 | model.opt.actions_subsample = opt.actions_subsample 117 | optimizer = optim.Adam(model.policy_net.parameters(), opt.lrt) 118 | n_iter = 0 119 | # stats = torch.load('/misc/vlgscratch4/LecunGroup/nvidia-collab/traffic-data-atcold/data_i80_v0/data_stats.pth') 120 | # model.stats=stats 121 | if 'ten' in opt.mfile: 122 | pzfile = opt.model_dir + opt.mfile + '.pz' 123 | p_z = torch.load(pzfile) 124 | model.p_z = p_z 125 | 126 | 127 | if opt.actions_subsample == -1: 128 | opt.context_dim = 0 129 | 130 | model.intype('gpu') 131 | model.cuda() 132 | 133 | 134 | print('[loading data]') 135 | dataloader = DataLoader(None, opt, opt.dataset) 136 | 137 | 138 | # training and testing functions. We will compute several losses: 139 | # loss_i: images 140 | # loss_s: states 141 | # loss_c: costs 142 | # loss_p: prior (optional) 143 | 144 | def compute_loss(targets, predictions, gamma=1.0, r=True): 145 | target_images, target_states, target_costs = targets 146 | pred_images, pred_states, pred_costs, loss_p = predictions 147 | loss_i = F.mse_loss(pred_images, target_images, reduce=False).mean(4).mean(3).mean(2) 148 | loss_s = F.mse_loss(pred_states, target_states, reduce=False).mean(2) 149 | # loss_c = F.mse_loss(pred_costs, target_costs, reduce=False).mean(2) 150 | if gamma < 1.0: 151 | loss_i *= gamma_mask 152 | loss_s *= gamma_mask 153 | loss_c *= gamma_mask 154 | return loss_i.mean(), loss_s.mean(), torch.zeros(1), loss_p.mean() 155 | 156 | def train(nbatches, npred): 157 | gamma_mask = torch.Tensor([opt.gamma**t for t in range(npred)]).view(1, -1).cuda() 158 | model.eval() 159 | model.policy_net.train() 160 | total_loss_i, total_loss_s, total_loss_c, total_loss_policy, total_loss_p, n_updates = 0, 0, 0, 0, 0, 0 161 | for i in range(nbatches): 162 | optimizer.zero_grad() 163 | inputs, actions, targets, _, _ = dataloader.get_batch_fm('train', npred) 164 | pred, _ = planning.train_policy_net_mper(model, inputs, targets, dropout=opt.p_dropout, model_type=model_type) 165 | loss_i, loss_s, loss_c_, loss_p = compute_loss(targets, pred) 166 | # proximity_cost, lane_cost = pred[2][:, :, 0], pred[2][:, :, 1] 167 | # proximity_cost = proximity_cost * gamma_mask 168 | # lane_cost = lane_cost * gamma_mask 169 | # loss_c = proximity_cost.mean() + opt.lambda_lane * lane_cost.mean() 170 | loss_policy = loss_i + loss_s + opt.lambda_h*loss_p 171 | if opt.loss_c == 1: 172 | loss_policy += loss_c_ 173 | if not math.isnan(loss_policy.item()): 174 | loss_policy.backward() 175 | torch.nn.utils.clip_grad_norm(model.policy_net.parameters(), opt.grad_clip) 176 | optimizer.step() 177 | total_loss_i += loss_i.item() 178 | total_loss_s += loss_s.item() 179 | total_loss_p += loss_p.item() 180 | total_loss_policy += loss_policy.item() 181 | n_updates += 1 182 | else: 183 | print('warning, NaN') 184 | 185 | del inputs, actions, targets, pred 186 | 187 | total_loss_i /= n_updates 188 | total_loss_s /= n_updates 189 | total_loss_c /= n_updates 190 | total_loss_policy /= n_updates 191 | total_loss_p /= n_updates 192 | return total_loss_i, total_loss_s, total_loss_c, total_loss_policy, total_loss_p 193 | 194 | 195 | def test(nbatches, npred): 196 | gamma_mask = torch.Tensor([opt.gamma**t for t in range(npred)]).view(1, -1).cuda() 197 | model.eval() 198 | total_loss_i, total_loss_s, total_loss_c, total_loss_policy, total_loss_p, n_updates = 0, 0, 0, 0, 0, 0 199 | for i in range(nbatches): 200 | inputs, actions, targets, _, _ = dataloader.get_batch_fm('test', npred) 201 | pred, pred_actions = planning.train_policy_net_mper(model, inputs, targets, targetprop = opt.targetprop, dropout=0.0, model_type = model_type) 202 | loss_i, loss_s, loss_c_, loss_p = compute_loss(targets, pred) 203 | loss_policy = loss_i + loss_s 204 | if opt.loss_c == 1: 205 | loss_policy += loss_c_ 206 | if not math.isnan(loss_policy.item()): 207 | total_loss_i += loss_i.item() 208 | total_loss_s += loss_s.item() 209 | total_loss_p += loss_p.item() 210 | total_loss_policy += loss_policy.item() 211 | n_updates += 1 212 | del inputs, actions, targets, pred 213 | 214 | total_loss_i /= n_updates 215 | total_loss_s /= n_updates 216 | total_loss_c /= n_updates 217 | total_loss_policy /= n_updates 218 | total_loss_p /= n_updates 219 | return total_loss_i, total_loss_s, total_loss_c, total_loss_policy, total_loss_p 220 | 221 | 222 | # set by hand to fit on 12gb GPU 223 | def get_batch_size(npred): 224 | if npred <= 15: 225 | return 64 226 | elif npred <= 50: 227 | return 32 228 | elif npred <= 100: 229 | return 16 230 | elif npred <= 200: 231 | return 8 232 | elif npred <= 400: 233 | return 4 234 | elif npred <= 800: 235 | return 2 236 | else: 237 | return 1 238 | 239 | 240 | if opt.test_only == 1: 241 | print('[testing]') 242 | valid_losses = test(10, 200) 243 | else: 244 | 245 | writer = utils.create_tensorboard_writer(opt) 246 | 247 | print('[training]') 248 | utils.log(opt.model_file + '.log', f'[job name: {opt.model_file}]') 249 | npred = opt.npred if opt.npred != -1 else 16 250 | 251 | for i in range(500): 252 | bsize = get_batch_size(npred) 253 | dataloader.opt.batch_size = bsize 254 | train_losses = train(opt.epoch_size, npred) 255 | valid_losses = test(int(opt.epoch_size / 2), npred) 256 | n_iter += opt.epoch_size 257 | model.intype('cpu') 258 | torch.save({'model': model, 259 | 'optimizer': optimizer.state_dict(), 260 | 'opt': opt, 261 | 'npred': npred, 262 | 'n_iter': n_iter}, 263 | opt.model_file + '.model') 264 | model.intype('gpu') 265 | 266 | if writer is not None: 267 | writer.add_scalar('Loss/train_state_img', train_losses[0], i) 268 | writer.add_scalar('Loss/train_state_vct', train_losses[1], i) 269 | writer.add_scalar('Loss/train_costs', train_losses[2], i) 270 | writer.add_scalar('Loss/train_policy', train_losses[3], i) 271 | writer.add_scalar('Loss/train_relative_entropy', train_losses[4], i) 272 | 273 | writer.add_scalar('Loss/validation_state_img', valid_losses[0], i) 274 | writer.add_scalar('Loss/validation_state_vct', valid_losses[1], i) 275 | writer.add_scalar('Loss/validation_costs', valid_losses[2], i) 276 | writer.add_scalar('Loss/validation_policy', valid_losses[3], i) 277 | writer.add_scalar('Loss/validation_relative_entropy', valid_losses[4], i) 278 | 279 | log_string = f'step {n_iter} | npred {npred} | bsize {bsize} | esize {opt.epoch_size} | ' 280 | log_string += utils.format_losses(train_losses[0], train_losses[1], split='train') 281 | log_string += utils.format_losses(valid_losses[0], valid_losses[1], split='valid') 282 | print(log_string) 283 | utils.log(opt.model_file + '.log', log_string) 284 | if i > 0 and(i % opt.curriculum_length == 0) and (opt.npred == -1) and npred < 400: 285 | npred += 8 286 | 287 | if writer is not None: 288 | writer.close() 289 | -------------------------------------------------------------------------------- /dataloader.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy, random, pdb, math, pickle, glob, time, os, re 3 | import torch 4 | 5 | 6 | class DataLoader: 7 | def __init__(self, fname, opt, dataset='simulator', single_shard=False): 8 | if opt.debug: 9 | single_shard = True 10 | self.opt = opt 11 | self.random = random.Random() 12 | self.random.seed(12345) # use this so that the same batches will always be picked 13 | 14 | if dataset == 'i80' or dataset == 'us101': 15 | data_dir = f'traffic-data/state-action-cost/data_{dataset}_v0' 16 | else: 17 | data_dir = dataset 18 | 19 | if single_shard: 20 | # quick load for debugging 21 | data_files = [f'{next(os.walk(data_dir))[1][0]}.txt/'] 22 | else: 23 | data_files = next(os.walk(data_dir))[1] 24 | 25 | self.images = [] 26 | self.actions = [] 27 | self.costs = [] 28 | self.states = [] 29 | self.ids = [] 30 | self.ego_car_images = [] 31 | for df in data_files: 32 | combined_data_path = f'{data_dir}/{df}/all_data.pth' 33 | if os.path.isfile(combined_data_path): 34 | print(f'[loading data shard: {combined_data_path}]') 35 | data = torch.load(combined_data_path) 36 | self.images += data.get('images') 37 | self.actions += data.get('actions') 38 | self.costs += data.get('costs') 39 | self.states += data.get('states') 40 | self.ids += data.get('ids') 41 | self.ego_car_images += data.get('ego_car') 42 | else: 43 | print(data_dir) 44 | images = [] 45 | actions = [] 46 | costs = [] 47 | states = [] 48 | ids = glob.glob(f'{data_dir}/{df}/car*.pkl') 49 | ids.sort() 50 | ego_car_images = [] 51 | for f in ids: 52 | print(f'[loading {f}]') 53 | fd = pickle.load(open(f, 'rb')) 54 | Ta = fd['actions'].size(0) 55 | Tp = fd['pixel_proximity_cost'].size(0) 56 | Tl = fd['lane_cost'].size(0) 57 | # assert Ta == Tp == Tl # TODO Check why there are more costs than actions 58 | # if not(Ta == Tp == Tl): pdb.set_trace() 59 | images.append(fd['images']) 60 | actions.append(fd['actions']) 61 | costs.append(torch.cat(( 62 | fd.get('pixel_proximity_cost')[:Ta].view(-1, 1), 63 | fd.get('lane_cost')[:Ta].view(-1, 1), 64 | ), 1),) 65 | states.append(fd['states']) 66 | ego_car_images.append(fd['ego_car']) 67 | 68 | print(f'Saving {combined_data_path} to disk') 69 | torch.save({ 70 | 'images': images, 71 | 'actions': actions, 72 | 'costs': costs, 73 | 'states': states, 74 | 'ids': ids, 75 | 'ego_car': ego_car_images, 76 | }, combined_data_path) 77 | self.images += images 78 | self.actions += actions 79 | self.costs += costs 80 | self.states += states 81 | self.ids += ids 82 | self.ego_car_images += ego_car_images 83 | 84 | self.n_episodes = len(self.images) 85 | print(f'Number of episodes: {self.n_episodes}') 86 | splits_path = data_dir + '/splits.pth' 87 | if os.path.exists(splits_path): 88 | print(f'[loading data splits: {splits_path}]') 89 | self.splits = torch.load(splits_path) 90 | self.train_indx = self.splits.get('train_indx') 91 | self.valid_indx = self.splits.get('valid_indx') 92 | self.test_indx = self.splits.get('test_indx') 93 | else: 94 | print('[generating data splits]') 95 | rgn = numpy.random.RandomState(0) 96 | perm = rgn.permutation(self.n_episodes) 97 | n_train = int(math.floor(self.n_episodes * 0.8)) 98 | n_valid = int(math.floor(self.n_episodes * 0.1)) 99 | self.train_indx = perm[0 : n_train] 100 | self.valid_indx = perm[n_train : n_train + n_valid] 101 | self.test_indx = perm[n_train + n_valid :] 102 | torch.save(dict( 103 | train_indx=self.train_indx, 104 | valid_indx=self.valid_indx, 105 | test_indx=self.test_indx, 106 | ), splits_path) 107 | 108 | stats_path = data_dir + '/data_stats.pth' 109 | if os.path.isfile(stats_path): 110 | print(f'[loading data stats: {stats_path}]') 111 | stats = torch.load(stats_path) 112 | self.a_mean = stats.get('a_mean') 113 | self.a_std = stats.get('a_std') 114 | self.s_mean = stats.get('s_mean') 115 | self.s_std = stats.get('s_std') 116 | else: 117 | print('[computing action stats]') 118 | all_actions = [] 119 | for i in self.train_indx: 120 | all_actions.append(self.actions[i]) 121 | all_actions = torch.cat(all_actions, 0) 122 | self.a_mean = torch.mean(all_actions, 0) 123 | self.a_std = torch.std(all_actions, 0) 124 | print('[computing state stats]') 125 | all_states = [] 126 | for i in self.train_indx: 127 | all_states.append(self.states[i][:, 0]) 128 | all_states = torch.cat(all_states, 0) 129 | self.s_mean = torch.mean(all_states, 0) 130 | self.s_std = torch.std(all_states, 0) 131 | torch.save({'a_mean': self.a_mean, 132 | 'a_std': self.a_std, 133 | 's_mean': self.s_mean, 134 | 's_std': self.s_std}, stats_path) 135 | 136 | car_sizes_path = data_dir + '/car_sizes.pth' 137 | print(f'[loading car sizes: {car_sizes_path}]') 138 | self.car_sizes = torch.load(car_sizes_path) 139 | 140 | # get batch to use for forward modeling 141 | # a sequence of ncond given states, a sequence of npred actions, 142 | # and a sequence of npred states to be predicted 143 | def get_batch_fm(self, split, npred=-1, cuda=True): 144 | 145 | # Choose the correct device 146 | device = torch.device('cuda') if cuda else torch.device('cpu') 147 | 148 | if split == 'train': 149 | indx = self.train_indx 150 | elif split == 'valid': 151 | indx = self.valid_indx 152 | elif split == 'test': 153 | indx = self.test_indx 154 | 155 | if npred == -1: 156 | npred = self.opt.npred 157 | 158 | images, states, actions, costs, ids, sizes, ego_cars = [], [], [], [], [], [], [] 159 | nb = 0 160 | T = self.opt.ncond + npred 161 | while nb < self.opt.batch_size: 162 | s = self.random.choice(indx) 163 | # min is important since sometimes numbers do not align causing issues in stack operation below 164 | episode_length = min(self.images[s].size(0), self.states[s].size(0)) 165 | if episode_length >= T: 166 | t = self.random.randint(0, episode_length - T) 167 | images.append(self.images[s][t : t + T].to(device)) 168 | actions.append(self.actions[s][t : t + T].to(device)) 169 | states.append(self.states[s][t : t + T, 0].to(device)) # discard 6 neighbouring cars 170 | costs.append(self.costs[s][t : t + T].to(device)) 171 | ids.append(self.ids[s]) 172 | ego_cars.append(self.ego_car_images[s].to(device)) 173 | splits = self.ids[s].split('/') 174 | time_slot = splits[-2] 175 | car_id = int(re.findall(r'car(\d+).pkl', splits[-1])[0]) 176 | size = self.car_sizes[time_slot][car_id] 177 | sizes.append([size[0], size[1]]) 178 | nb += 1 179 | 180 | # Pile up stuff 181 | images = torch.stack(images) 182 | states = torch.stack(states) 183 | actions = torch.stack(actions) 184 | sizes = torch.tensor(sizes) 185 | ego_cars = torch.stack(ego_cars) 186 | 187 | # Normalise actions, state_vectors, state_images 188 | if not self.opt.debug: 189 | actions = self.normalise_action(actions) 190 | states = self.normalise_state_vector(states) 191 | images = self.normalise_state_image(images) 192 | ego_cars = self.normalise_state_image(ego_cars) 193 | 194 | costs = torch.stack(costs) 195 | 196 | # |-----ncond-----||------------npred------------|| 197 | # ^ ^ ^ 198 | # 0 t0 t1 199 | t0 = self.opt.ncond 200 | t1 = T 201 | input_images = images [:, :t0].float().contiguous() 202 | input_states = states [:, :t0].float().contiguous() 203 | target_images = images [:, t0:t1].float().contiguous() 204 | target_states = states [:, t0:t1].float().contiguous() 205 | target_costs = costs [:, t0:t1].float().contiguous() 206 | t0 -= 1; t1 -= 1 207 | actions = actions[:, t0:t1].float().contiguous() 208 | # input_actions = actions[:, :t0].float().contiguous() 209 | ego_cars = ego_cars.float().contiguous() 210 | # n_cond n_pred 211 | # <---------------------><----------------------------------> 212 | # . .. . 213 | # +---------------------+. . ^ ^ 214 | # |i|i|i|i|i|i|i|i|i|i|i|. 3 × 117 × 24 . | | 215 | # +---------------------+. . | inputs | 216 | # +---------------------+. . | | 217 | # |s|s|s|s|s|s|s|s|s|s|s|. 4 . | | 218 | # +---------------------+. . v | 219 | # . +-----------------------------------+ . ^ | 220 | # . 2 |a|a|a|a|a|a|a|a|a|a|a|a|a|a|a|a|a|a| . | actions | 221 | # . +-----------------------------------+ . v | 222 | # . +-----------------------------------+ ^ | tensors 223 | # . 3 × 117 × 24 |i|i|i|i|i|i|i|i|i|i|i|i|i|i|i|i|i|i| | | 224 | # . +-----------------------------------+ | | 225 | # . +-----------------------------------+ | | 226 | # . 4 |s|s|s|s|s|s|s|s|s|s|s|s|s|s|s|s|s|s| | targets | 227 | # . +-----------------------------------+ | | 228 | # . +-----------------------------------+ | | 229 | # . 2 |c|c|c|c|c|c|c|c|c|c|c|c|c|c|c|c|c|c| | | 230 | # . +-----------------------------------+ v v 231 | # +---------------------------------------------------------+ ^ 232 | # | car_id | | string 233 | # +---------------------------------------------------------+ v 234 | # +---------------------------------------------------------+ ^ 235 | # | car_size | 2 | tensor 236 | # +---------------------------------------------------------+ v 237 | 238 | return [input_images, input_states, ego_cars], actions, [target_images, target_states, target_costs], ids, sizes 239 | 240 | @staticmethod 241 | def normalise_state_image(images): 242 | return images.float().div_(255.0) 243 | 244 | def normalise_state_vector(self, states): 245 | shape = (1, 1, 4) if states.dim() == 3 else (1, 4) # dim = 3: state sequence, dim = 2: single state 246 | states -= self.s_mean.view(*shape).expand(states.size()).to(states.device) 247 | states /= (1e-8 + self.s_std.view(*shape).expand(states.size())).to(states.device) 248 | return states 249 | 250 | def normalise_action(self, actions): 251 | actions -= self.a_mean.view(1, 1, 2).expand(actions.size()).to(actions.device) 252 | actions /= (1e-8 + self.a_std.view(1, 1, 2).expand(actions.size())).to(actions.device) 253 | return actions 254 | 255 | 256 | if __name__ == '__main__': 257 | # Create some dummy options 258 | class DataSettings: 259 | debug = False 260 | batch_size = 4 261 | npred = 20 262 | ncond = 10 263 | # Instantiate data set object 264 | d = DataLoader(None, opt=DataSettings, dataset='i80') 265 | # Retrieve first training batch 266 | x = d.get_batch_fm('train', cuda=False) 267 | --------------------------------------------------------------------------------