├── youtube-8m-ensemble ├── .vimrc ├── all_ensemble_models │ ├── .vimrc │ ├── __init__.py │ └── mean_model.py ├── ensemble_scripts │ ├── .vimrc │ ├── ensemble_no1.conf │ ├── ensemble_no2.conf │ ├── ensemble_no5.conf │ ├── ensemble_no3.conf │ ├── ensemble_no4.conf │ ├── check-video_id.sh │ ├── combine-tfrecords-frame.sh │ ├── combine-tfrecords-video.sh │ ├── combine-tfrecords-video-v2.sh │ ├── ensemble_no6.conf │ ├── check-video_id_match.sh │ ├── combine-tfrecords-frame-v2.sh │ ├── ensemble_no8.conf │ ├── train-mean_model.sh │ ├── infer-attention_linear_model.sh │ ├── infer-mean_model.sh │ ├── infer-attention_rectified_linear_model.sh │ ├── infer-moe_model.sh │ ├── infer-attention_moe_model.sh │ ├── eval-mean_model.sh │ ├── infer-attention_matrix_model.sh │ ├── after_submission_no1.conf │ ├── infer-attention_linmatrix_model.sh │ ├── train-moe_model.sh │ ├── infer-linear_model.sh │ ├── train-linear_model.sh │ ├── train-nonunit_matrix_model.sh │ ├── infer-attention_moe_matrix_model.sh │ ├── infer-matrix_model.sh │ ├── train-matrix_model.sh │ ├── train-matrix_model_lr.sh │ ├── preensemble-mean_model.sh │ ├── auto-preensemble-matrix_model.sh │ ├── ensemble_no7.conf │ ├── preensemble-matrix_model.sh │ ├── train-input_moe_model.sh │ └── auto-preensemble-deep_combine_chain_model.sh ├── model_selection_scripts │ ├── .vimrc │ ├── get_patterns.py │ ├── extend-step-mean_model.sh │ ├── get_top_k.py │ └── get_extend_candidates.py ├── cloudml-gpu.yaml ├── cloudml-gpu-distributed.yaml ├── top_k_scripts │ ├── run_top_k.sh │ └── infer-attention_matrix_model.sh ├── data_augmentation.py ├── ensemble_command.example ├── training_utils │ ├── del.py │ ├── select.py │ └── sample_conf.py ├── feature_transform.py ├── __init__.py └── models.py ├── youtube-8m-wangheda ├── .vimrc ├── all_video_models │ ├── .vimrc │ ├── stage2_logistic_model.py │ ├── __init__.py │ └── logistic_model.py ├── all_frame_models │ └── .vimrc ├── cloudml-gpu.yaml ├── all_data_augmentation │ ├── __init__.py │ ├── default_augmenter.py │ └── noise_augmenter.py ├── all_feature_transform │ ├── __init__.py │ ├── identical_transformer.py │ ├── default_transformer.py │ └── avg_transformer.py ├── cloudml-gpu-distributed.yaml ├── data_augmentation.py ├── training_utils │ ├── del.py │ └── select.py ├── training_scripts │ ├── run-cnn-lstm.sh │ ├── run-lstm-memory-cell1024.sh │ ├── run-parallel-lstm-memory.sh │ ├── run-parallel-lstm-output.sh │ ├── run-cnn-model.sh │ ├── run-cascade-75-chaining-parallel-lstm.sh │ ├── run-lstm-memory-cell2048.sh │ ├── run-cascade-75-multiple-attention-pooling.sh │ ├── run-cascade-76-multiple-attention-pooling.sh │ ├── run-temporal-pooling-lstm.sh │ ├── run-cascade-76-parallel-lstm-boosting.sh │ ├── run-multiscale-cnn-lstm-model.sh │ ├── run-chaining-lstm.sh │ ├── run-cascade-76-parallel-lstm.sh │ ├── run-chaining-lstm-cnn.sh │ ├── run-chaining-cnn.sh │ ├── run-cascade-75-chaining-cnn.sh │ ├── run-cascade-76-chaining-cnn.sh │ ├── run-multiple-attention-pooling-positional-embedding.sh │ ├── run-chaining-video.sh │ ├── run-chaining-deep-cnn.sh │ ├── run-cascade-76-multiscale-cnn-lstm.sh │ ├── run-chaining-shared-lstm.sh │ ├── run-cascade-75-chaining-lstm.sh │ ├── run-cascade-75-chaining-lstm-cnn.sh │ ├── run-cascade-75-chaining-video.sh │ ├── run-cascade-76-chaining-lstm-cnn.sh │ └── run-cascade-76-chaining-video.sh ├── infer_scripts │ ├── infer-lstmmemory-audio.sh │ ├── infer-model_input.sh │ ├── infer-biunilstm1024_moe8.sh │ ├── infer-attentionlstm_moe4.sh │ ├── infer-lstmattlstm1024_moe8.sh │ ├── infer-dbof.sh │ ├── infer-deeplstm1024_layer6_moe4.sh │ ├── infer-cnn_lstmmemory1024_moe8.sh │ ├── infer-video_logistic.sh │ ├── infer-cnn_deep_combine_chain.sh │ ├── infer-lstmmemory1024_moe8.sh │ ├── infer-lstmparalleloutput1024_moe8.sh │ ├── infer-deep_cnn_deep_combine.sh │ ├── infer-framehop_lstm.sh │ ├── infer-lstm_attention8_max.sh │ ├── infer-lstm_cnn_deep_combine_chain.sh │ ├── infer-lstmmemory2048_moe4.sh │ ├── infer-lstmparallelmemory1024_moe8.sh │ ├── infer-video_moe16_model.sh │ ├── infer-multilstmmemory1024_moe4_deep_chain.sh │ ├── infer-video_very_deep_combine_chain.sh │ ├── infer-video_group_moe4_noise0.2_layer4_elu.sh │ ├── infer-cnn_model.sh │ ├── infer-frame_seg.sh │ ├── infer-multires_lstm_deep_combine_chain.sh │ ├── infer-lstmmemory1024_deep_combine_chain_add_length.sh │ ├── infer-distill_video_dcc.sh │ ├── infer-lstmmemory-layer1.sh │ ├── infer-positional-lstmattention8max.sh │ ├── infer-distillation-cnn-dcc.sh │ ├── infer-distillchain-cnn-dcc.sh │ ├── infer-distillchain-v2-video-dcc.sh │ ├── infer-distillchain-lstmparalleloutput.sh │ ├── infer-distillchain-v2-lstmparalleloutput.sh │ ├── infer-video-distillchain-video-dcc.sh │ ├── infer-distillchain-v2-multiscal-cnnlstm.sh │ └── infer-distillation-video-dcc.sh ├── feature_transform.py ├── __init__.py ├── data_augmentation_scripts │ ├── run-parallel-lstm-memory.sh │ ├── run-multiple-attention-pooling-positional-embedding.sh │ ├── run-multiscale-cnn-lstm-model.sh │ ├── run-chaining-lstm.sh │ ├── run-chaining-cnn.sh │ └── run-chaining-video.sh ├── eval_scripts │ ├── eval-dbof.sh │ ├── eval-video-logistic.sh │ ├── eval-moe-model.sh │ ├── eval-chain-moe.sh │ ├── eval-video-pairwise.sh │ ├── eval-video-moe.sh │ ├── eval-chain-moe-0.4.sh │ ├── eval-stage2-logistic.sh │ ├── eval-bi-uni-lstm.sh │ ├── eval-att.sh │ ├── eval-mem.sh │ ├── eval-moe-baseline.sh │ ├── eval-frame-seg.sh │ ├── eval-cnn-model.sh │ ├── eval-stage2-moe.sh │ ├── eval-chain-model-relu.sh │ ├── eval-lstmmemory.sh │ ├── eval-chain-model-suprelu.sh │ ├── eval-lstmmem-cnnlstm.sh │ ├── eval-att-lstm.sh │ ├── eval-chain-moe-freq.sh │ ├── eval-lstmmem-dropout.sh │ ├── eval-lstm-attention-8max.sh │ ├── eval-video-deep-chain.sh │ ├── eval-lstmmem-noise.sh │ ├── eval-lstmmem2048.sh │ ├── eval-lstmmemory-layer1.sh │ ├── eval-lstmoutput-parallel.sh │ ├── eval-moe-batchagreement1.sh │ ├── eval-moe-batchagreement2.sh │ ├── eval-moe-batchagreement3.sh │ ├── eval-lstmmem-augmenter.sh │ ├── eval-lstmmem-input-noise.sh │ ├── eval-lstmmem-parallel.sh │ ├── eval-lstmmemory-audio.sh │ ├── eval-lstmmem-layernorm.sh │ ├── eval-lstmmem-lowres.sh │ ├── eval-moe-topk-batchagreement1.sh │ ├── eval-moe-topk-batchagreement2.sh │ ├── eval-moe-topk-batchagreement3.sh │ ├── eval-lstm-look-back.sh │ ├── eval-lstmmem-no-transform.sh │ ├── eval-mm-lstm.sh │ ├── eval-lstm-positional-attention-8max.sh │ ├── eval-lstmmem-feature.sh │ ├── eval-distill-video-dcc-noise-scene1.sh │ ├── eval-distill-video-dcc-noise-scene2.sh │ ├── eval-multitask-ce.sh │ ├── eval-multitask.sh │ ├── eval-chain-moe-suprelu-vert+freq.sh │ └── eval-lstmmem-l2norm.sh ├── eval.sh └── models.py ├── .gitignore ├── model ├── virtual_grouping │ ├── virtual_group_mean.conf │ ├── virtual_group_video.conf │ ├── virtual_group_attention.conf │ ├── virtual_group_dcc.conf │ ├── virtual_group_lstm.conf │ └── ensemble.conf └── model_selection │ ├── top_8_model.conf │ ├── top_12_model.conf │ ├── top_16_model.conf │ └── top_20_model.conf ├── youtube-8m-zhangteng ├── cloudml-gpu.yaml ├── cloudml-gpu-distributed.yaml ├── train_scripts │ ├── run-bilstm-a.sh │ ├── run-chaining-video-normal.sh │ ├── run-chaining-video-vertical.sh │ ├── run-chaining-video-add-confident.sh │ ├── run-chaining-video-infrequent-softmax.sh │ ├── run-lstm-s.sh │ ├── run-attention-pooling-lstm-a.sh │ ├── run-lstm-random-augmentation.sh │ ├── run-temporal-segment-lstm.sh │ ├── run-attention-pooling.sh │ ├── run-multiscale-cnn-lstm.sh │ ├── run-attention-pooling-lstm2lstm.sh │ ├── run-attention-pooling-lstm-s.sh │ ├── run-cascade-chaining-video-normalize.sh │ ├── run-cascade-chaining-cnn-layer2.sh │ ├── run-cascade-lstm-s.sh │ ├── run-cascade-lstm-s-split.sh │ ├── run-cascade-lstm.sh │ ├── run-cascade-76-lstm-a.sh │ ├── run-cascade-76-lstm-s.sh │ ├── run-cascade-76-lstm.sh │ ├── run-cascade-multiscale-cnn-lstm-laery4.sh │ ├── run-cascade-multiscale-cnn-lstm-layer2.sh │ └── run-cascade-attention-pooling.sh ├── training_utils │ ├── del.py │ └── select.py ├── infer_scripts │ ├── infer-lstm_shortlayers_moe8.sh │ ├── infer-lstm_attention8_max.sh │ ├── infer-video_relabel_combine_chain.sh │ ├── infer-lstmglu2_1024_moe8.sh │ ├── infer-lstm_multiscale4_moe4.sh │ ├── infer-lstmbiglu_1024_moe8.sh │ ├── infer-lstmgate1024_moe8.sh │ └── infer-lstm_gate_multiscale4_moe4.sh ├── __init__.py ├── eval_scripts │ ├── eval-lstmbiglu_1024_moe8.sh │ ├── eval-lstmglu2_1024_moe8.sh │ ├── eval-video_relabel_combine_chain.sh │ ├── eval-lstm_random_moe8.sh │ ├── eval-lstmgate1024_moe8.sh │ ├── eval-video_notzero_combine_chain.sh │ ├── eval-lstm_attention8_max.sh │ ├── eval-video_knowledge_combine_chain.sh │ ├── eval-lstm_shortlayers_moe8.sh │ ├── eval-video_softmax_combine_chain.sh │ ├── eval-lstm2_attention8_max.sh │ ├── eval-lstm_multiscale4_moe4.sh │ └── eval-lstm_gate_multiscale4_moe4.sh ├── YM_labels_model.py └── models.py └── .gitmodules /youtube-8m-ensemble/.vimrc: -------------------------------------------------------------------------------- 1 | set tabstop=2 2 | set shiftwidth=2 3 | set expandtab 4 | set autoindent 5 | -------------------------------------------------------------------------------- /youtube-8m-ensemble/all_ensemble_models/.vimrc: -------------------------------------------------------------------------------- 1 | set tabstop=2 2 | set shiftwidth=2 3 | set expandtab 4 | set autoindent 5 | -------------------------------------------------------------------------------- /youtube-8m-ensemble/ensemble_scripts/.vimrc: -------------------------------------------------------------------------------- 1 | set tabstop=2 2 | set shiftwidth=2 3 | set expandtab 4 | set autoindent 5 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/.vimrc: -------------------------------------------------------------------------------- 1 | set tabstop=2 2 | set shiftwidth=2 3 | set expandtab 4 | set autoindent 5 | color desert 6 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/all_video_models/.vimrc: -------------------------------------------------------------------------------- 1 | set tabstop=2 2 | set shiftwidth=2 3 | set expandtab 4 | set autoindent 5 | -------------------------------------------------------------------------------- /youtube-8m-ensemble/model_selection_scripts/.vimrc: -------------------------------------------------------------------------------- 1 | set tabstop=2 2 | set shiftwidth=2 3 | set expandtab 4 | set autoindent 5 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/all_frame_models/.vimrc: -------------------------------------------------------------------------------- 1 | syntax on 2 | set tabstop=2 3 | set shiftwidth=2 4 | set expandtab 5 | set autoindent 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | model/ 2 | prediction/ 3 | *.pdf 4 | */__pycache__/ 5 | *.out 6 | *.csv 7 | *.tfrecord 8 | *.pyc 9 | .*.swp 10 | eda/data/ 11 | -------------------------------------------------------------------------------- /model/virtual_grouping/virtual_group_mean.conf: -------------------------------------------------------------------------------- 1 | 2 | lstm_shortlayers_moe8 3 | lstm_random_mean_moe8 4 | framehop_lstm 5 | cnnlstmmemory1024_moe8 6 | -------------------------------------------------------------------------------- /model/virtual_grouping/virtual_group_video.conf: -------------------------------------------------------------------------------- 1 | video_relabel_combine_chain 2 | video_very_deep_combine_chain 3 | distillation_video_dcc_noise 4 | -------------------------------------------------------------------------------- /model/virtual_grouping/virtual_group_attention.conf: -------------------------------------------------------------------------------- 1 | attentionlstm_moe4 2 | lstmattlstm1024_moe8 3 | lstm2_attention8_max 4 | lstm_attention8_max 5 | lstm_positional_attention8max 6 | -------------------------------------------------------------------------------- /youtube-8m-ensemble/ensemble_scripts/ensemble_no1.conf: -------------------------------------------------------------------------------- 1 | lstmmem1024_layer2_moe4_deep_combine_chain_add_length 2 | lstmmemory_cell2048_layer2_moe4 3 | video_very_deep_combine_chain 4 | -------------------------------------------------------------------------------- /model/virtual_grouping/virtual_group_dcc.conf: -------------------------------------------------------------------------------- 1 | multilstmmemory1024_moe4_deep_chain 2 | multires_lstm_deep_combine_chain 3 | cnn_deep_combine_chain 4 | deep_cnn_deep_combine_chain 5 | lstm_cnn_deep_combine_chain 6 | -------------------------------------------------------------------------------- /youtube-8m-ensemble/cloudml-gpu.yaml: -------------------------------------------------------------------------------- 1 | trainingInput: 2 | scaleTier: CUSTOM 3 | # standard_gpu provides 1 GPU. Change to complex_model_m_gpu for 4 GPUs 4 | masterType: standard_gpu 5 | runtimeVersion: "1.0" 6 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/cloudml-gpu.yaml: -------------------------------------------------------------------------------- 1 | trainingInput: 2 | scaleTier: CUSTOM 3 | # standard_gpu provides 1 GPU. Change to complex_model_m_gpu for 4 GPUs 4 | masterType: standard_gpu 5 | runtimeVersion: "1.0" 6 | -------------------------------------------------------------------------------- /youtube-8m-zhangteng/cloudml-gpu.yaml: -------------------------------------------------------------------------------- 1 | trainingInput: 2 | scaleTier: CUSTOM 3 | # standard_gpu provides 1 GPU. Change to complex_model_m_gpu for 4 GPUs 4 | masterType: standard_gpu 5 | runtimeVersion: "1.0" 6 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "3rd_party/annoy"] 2 | path = 3rd_party/annoy 3 | url = git@github.com:spotify/annoy.git 4 | [submodule "tensorflow"] 5 | path = tensorflow 6 | url = git@github.com:tensorflow/tensorflow.git 7 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/all_data_augmentation/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from default_augmenter import * 3 | from half_augmenter import * 4 | from half_video_augmenter import * 5 | from noise_augmenter import * 6 | from clipping_augmenter import * 7 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/all_feature_transform/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from default_transformer import * 3 | from identical_transformer import * 4 | from engineer_transformer import * 5 | from avg_transformer import * 6 | from resolution_transformer import * 7 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/all_feature_transform/identical_transformer.py: -------------------------------------------------------------------------------- 1 | 2 | import tensorflow as tf 3 | 4 | class IdenticalTransformer: 5 | def transform(self, model_input_raw, num_frames, **unused_params): 6 | return model_input_raw, num_frames 7 | -------------------------------------------------------------------------------- /model/virtual_grouping/virtual_group_lstm.conf: -------------------------------------------------------------------------------- 1 | lstmparallelfinaloutput1024_moe8 2 | lstmparallelmemory1024_moe8 3 | lstmmemory_cell1024_layer2_moe8 4 | lstmmemory_cell2048_layer2_moe4 5 | biunilstm1024_moe4 6 | lstmmem1024_layer2_moe4_deep_combine_chain_add_length 7 | -------------------------------------------------------------------------------- /youtube-8m-ensemble/cloudml-gpu-distributed.yaml: -------------------------------------------------------------------------------- 1 | trainingInput: 2 | runtimeVersion: "1.0" 3 | scaleTier: CUSTOM 4 | masterType: standard_gpu 5 | workerCount: 2 6 | workerType: standard_gpu 7 | parameterServerCount: 2 8 | parameterServerType: standard 9 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/cloudml-gpu-distributed.yaml: -------------------------------------------------------------------------------- 1 | trainingInput: 2 | runtimeVersion: "1.0" 3 | scaleTier: CUSTOM 4 | masterType: standard_gpu 5 | workerCount: 2 6 | workerType: standard_gpu 7 | parameterServerCount: 2 8 | parameterServerType: standard 9 | -------------------------------------------------------------------------------- /youtube-8m-zhangteng/cloudml-gpu-distributed.yaml: -------------------------------------------------------------------------------- 1 | trainingInput: 2 | runtimeVersion: "1.0" 3 | scaleTier: CUSTOM 4 | masterType: standard_gpu 5 | workerCount: 2 6 | workerType: standard_gpu 7 | parameterServerCount: 2 8 | parameterServerType: standard 9 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/all_feature_transform/default_transformer.py: -------------------------------------------------------------------------------- 1 | 2 | import tensorflow as tf 3 | 4 | class DefaultTransformer: 5 | def transform(self, model_input_raw, num_frames, **unused_params): 6 | feature_dim = len(model_input_raw.get_shape()) - 1 7 | model_input = tf.nn.l2_normalize(model_input_raw, feature_dim) 8 | return model_input, num_frames 9 | -------------------------------------------------------------------------------- /youtube-8m-ensemble/ensemble_scripts/ensemble_no2.conf: -------------------------------------------------------------------------------- 1 | cnn_deep_combine_chain 2 | deep_cnn_deep_combine_chain 3 | lstm_attention8_max 4 | lstmmem1024_layer2_moe4_deep_combine_chain_add_length 5 | lstmmemory_cell1024_layer2_moe8 6 | lstmmemory_cell2048_layer2_moe4 7 | lstmparallelmemory1024_moe8 8 | multilstmmemory1024_moe4_deep_chain 9 | video_very_deep_combine_chain 10 | -------------------------------------------------------------------------------- /youtube-8m-ensemble/top_k_scripts/run_top_k.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for i in 8 12 16 20; do 4 | bash top_k_scripts/train-attention_matrix_model.sh model_selection/top_${i}_model ../model/model_selection/top_${i}_model.conf 4 $(($i/4)) 5 | bash top_k_scripts/eval-attention_matrix_model.sh model_selection/top_${i}_model ../model/model_selection/top_${i}_model.conf 4 $(($i/4)) 6 | done 7 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/all_data_augmentation/default_augmenter.py: -------------------------------------------------------------------------------- 1 | 2 | import tensorflow as tf 3 | from tensorflow import flags 4 | FLAGS = flags.FLAGS 5 | 6 | class DefaultAugmenter: 7 | """This only works with frame data""" 8 | def augment(self, model_input_raw, num_frames, labels_batch, **unused_params): 9 | print "DefaultAugmenter" 10 | return model_input_raw, labels_batch, num_frames 11 | -------------------------------------------------------------------------------- /youtube-8m-ensemble/data_augmentation.py: -------------------------------------------------------------------------------- 1 | from tensorflow import flags 2 | 3 | flags.DEFINE_string("data_augmenter", "DefaultAugmenter", 4 | "how to preprocess feature, defaults to identical, which means no transform") 5 | flags.DEFINE_float("input_noise_level", 0.2, 6 | "the standard deviation of normal noise added to input") 7 | 8 | from all_data_augmentation import * 9 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/data_augmentation.py: -------------------------------------------------------------------------------- 1 | from tensorflow import flags 2 | 3 | flags.DEFINE_string("data_augmenter", "DefaultAugmenter", 4 | "how to preprocess feature, defaults to identical, which means no transform") 5 | flags.DEFINE_float("input_noise_level", 0.2, 6 | "the standard deviation of normal noise added to input") 7 | 8 | from all_data_augmentation import * 9 | -------------------------------------------------------------------------------- /model/model_selection/top_8_model.conf: -------------------------------------------------------------------------------- 1 | cnn_deep_combine_chain_bagging/ensemble_matrix_model 2 | distillation_lstmcnn_dcc_boosting/sub_model_1 3 | distillation_multilstm_dcc_boosting/sub_model_1 4 | lstm_positional_attention8max 5 | lstmattention8max_boosting/ensemble_attention_matrix_model 6 | lstmparallelfinaloutput1024_moe8 7 | video_dcc_boosting_discardhopeless/ensemble_matrix_model 8 | video_dcc_boosting_weightclip/ensemble_matrix_model 9 | -------------------------------------------------------------------------------- /youtube-8m-zhangteng/train_scripts/run-bilstm-a.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=1 python train.py \ 2 | --train_data_pattern='/Youtube-8M/data/frame/train/*.tfrecord' \ 3 | --train_dir='../model/frame_level_lstm_biglu_model' \ 4 | --frame_features=True \ 5 | --feature_names="rgb, audio" \ 6 | --feature_sizes="1024, 128" \ 7 | --model='LstmBigluModel' \ 8 | --moe_num_mixtures=4 \ 9 | --batch_size=128 \ 10 | --base_learning_rate=0.001 11 | -------------------------------------------------------------------------------- /youtube-8m-zhangteng/train_scripts/run-chaining-video-normal.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0 python train.py \ 2 | --train_data_pattern='/Youtube-8M/data/video/train/*.tfrecord' \ 3 | --train_dir='../model/video_level_moemix4_model' \ 4 | --model='MoeMix4Model' \ 5 | --feature_names="mean_rgb, mean_audio" \ 6 | --feature_sizes="1024, 128" \ 7 | --moe_num_mixtures=4 \ 8 | --moe_layers=3 \ 9 | --class_size=100 10 | 11 | -------------------------------------------------------------------------------- /youtube-8m-zhangteng/train_scripts/run-chaining-video-vertical.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0 python train.py \ 2 | --train_data_pattern='/Youtube-8M/data/video/train/*.tfrecord' \ 3 | --train_dir='../model/video_level_moeknowledge_model' \ 4 | --model='MoeKnowledgeModel' \ 5 | --feature_names="mean_rgb, mean_audio" \ 6 | --feature_sizes="1024, 128" \ 7 | --moe_num_mixtures=4 \ 8 | --moe_layers=3 \ 9 | --class_size=100 10 | 11 | -------------------------------------------------------------------------------- /youtube-8m-ensemble/ensemble_scripts/ensemble_no5.conf: -------------------------------------------------------------------------------- 1 | cnn_deep_combine_chain 2 | cnnlstmmemory1024_moe8 3 | deep_cnn_deep_combine_chain 4 | framehop_lstm 5 | lstm_attention8_max 6 | lstm_cnn_deep_combine_chain 7 | lstmmem1024_layer2_moe4_deep_combine_chain_add_length 8 | lstmparallelfinaloutput1024_moe8 9 | lstmparallelmemory1024_moe8 10 | multilstmmemory1024_moe4_deep_chain 11 | multires_lstm_deep_combine_chain 12 | video_dcc_bagging 13 | video_very_deep_combine_chain 14 | -------------------------------------------------------------------------------- /youtube-8m-zhangteng/train_scripts/run-chaining-video-add-confident.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0 python train.py \ 2 | --train_data_pattern='/Youtube-8M/data/video/train/*.tfrecord' \ 3 | --train_dir='../model/video_level_moemix4_relabel_model' \ 4 | --model='MoeMix4Model' \ 5 | --feature_names="mean_rgb, mean_audio" \ 6 | --feature_sizes="1024, 128" \ 7 | --moe_num_mixtures=4 \ 8 | --moe_layers=3 \ 9 | --class_size=100 \ 10 | --loss_function="loss_relabel" -------------------------------------------------------------------------------- /youtube-8m-zhangteng/train_scripts/run-chaining-video-infrequent-softmax.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0 python train.py \ 2 | --train_data_pattern='/Youtube-8M/data/video/train/*.tfrecord' \ 3 | --train_dir='../model/video_level_moesoftmax_model' \ 4 | --model='MoeSoftmaxModel' \ 5 | --feature_names="mean_rgb, mean_audio" \ 6 | --feature_sizes="1024, 128" \ 7 | --moe_num_mixtures=4 \ 8 | --moe_layers=3 \ 9 | --class_size=100 10 | 11 | -------------------------------------------------------------------------------- /youtube-8m-zhangteng/train_scripts/run-lstm-s.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=1 python train.py \ 2 | --train_data_pattern='/Youtube-8M/data/frame/train/*.tfrecord' \ 3 | --train_dir='../model/frame_level_lstm_gate_model' \ 4 | --frame_features=True \ 5 | --feature_names="rgb, audio" \ 6 | --feature_sizes="1024, 128" \ 7 | --model='LstmGateModel' \ 8 | --video_level_classifier_model=MoeModel \ 9 | --moe_num_mixtures=8 \ 10 | --batch_size=128 \ 11 | --base_learning_rate=0.001 12 | -------------------------------------------------------------------------------- /youtube-8m-zhangteng/train_scripts/run-attention-pooling-lstm-a.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=1 python train.py \ 2 | --train_data_pattern='/Youtube-8M/data/frame/train/*.tfrecord' \ 3 | --train_dir='../model/frame_level_lstm_glu2_model' \ 4 | --frame_features=True \ 5 | --feature_names="rgb, audio" \ 6 | --feature_sizes="1024, 128" \ 7 | --model='LstmGlu2Model' \ 8 | --video_level_classifier_model=MoeModel \ 9 | --moe_num_mixtures=8 \ 10 | --batch_size=128 \ 11 | --base_learning_rate=0.001 -------------------------------------------------------------------------------- /youtube-8m-zhangteng/train_scripts/run-lstm-random-augmentation.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=1 python train.py \ 2 | --train_data_pattern='/Youtube-8M/data/frame/train/*.tfrecord' \ 3 | --train_dir='../model/frame_level_lstm_random_model' \ 4 | --frame_features=True \ 5 | --feature_names="rgb, audio" \ 6 | --feature_sizes="1024, 128" \ 7 | --model='LstmRandomModel' \ 8 | --video_level_classifier_model=MoeModel \ 9 | --moe_num_mixtures=8 \ 10 | --batch_size=128 \ 11 | --base_learning_rate=0.001 -------------------------------------------------------------------------------- /youtube-8m-ensemble/ensemble_command.example: -------------------------------------------------------------------------------- 1 | # bash [script] [model_name] [config] 2 | # use a different model name each time, make it understandable 3 | # create a new conf file, add it to git repository 4 | 5 | bash ensemble_scripts/train-mean_model.sh ensemble_mean_model_10 ensemble_scripts/ensemble_no3.conf 6 | bash ensemble_scripts/eval-mean_model.sh ensemble_mean_model_10 ensemble_scripts/ensemble_no3.conf 7 | bash ensemble_scripts/infer-mean_model.sh ensemble_mean_model_10 ensemble_scripts/ensemble_no3.conf 8 | -------------------------------------------------------------------------------- /youtube-8m-ensemble/ensemble_scripts/ensemble_no3.conf: -------------------------------------------------------------------------------- 1 | attentionlstm_moe4 2 | biunilstm1024_moe4 3 | cnn_deep_combine_chain 4 | cnnlstmmemory1024_moe8 5 | deep_cnn_deep_combine_chain 6 | framehop_lstm 7 | lstm_attention8_max 8 | lstmattlstm1024_moe8 9 | lstmmem1024_layer2_moe4_deep_combine_chain_add_length 10 | lstmmemory_cell1024_layer2_moe8 11 | lstmmemory_cell2048_layer2_moe4 12 | lstmparallelfinaloutput1024_moe8 13 | lstmparallelmemory1024_moe8 14 | multilstmmemory1024_moe4_deep_chain 15 | video_very_deep_combine_chain 16 | -------------------------------------------------------------------------------- /youtube-8m-ensemble/training_utils/del.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | check = {} 4 | check_list = [] 5 | for filename in os.listdir("."): 6 | if filename.endswith("meta"): 7 | checkpoint = int(filename.split("-")[1].split(".")[0]) 8 | check_list.append(checkpoint) 9 | 10 | check_list.sort() 11 | for checkpoint in check_list: 12 | if check.has_key(checkpoint / 4000): 13 | print checkpoint 14 | else: 15 | check[checkpoint / 4000] = True 16 | 17 | 18 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/training_utils/del.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | check = {} 4 | check_list = [] 5 | for filename in os.listdir("."): 6 | if filename.endswith("meta"): 7 | checkpoint = int(filename.split("-")[1].split(".")[0]) 8 | check_list.append(checkpoint) 9 | 10 | check_list.sort() 11 | for checkpoint in check_list: 12 | if check.has_key(checkpoint / 4000): 13 | print checkpoint 14 | else: 15 | check[checkpoint / 4000] = True 16 | 17 | 18 | -------------------------------------------------------------------------------- /youtube-8m-zhangteng/train_scripts/run-temporal-segment-lstm.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=1 python train.py \ 2 | --train_data_pattern='/Youtube-8M/data/frame/train/*.tfrecord' \ 3 | --train_dir='../model/frame_level_lstm_layer_model' \ 4 | --frame_features=True \ 5 | --feature_names="rgb, audio" \ 6 | --feature_sizes="1024, 128" \ 7 | --model='LstmLayerModel' \ 8 | --video_level_classifier_model=MoeModel \ 9 | --lstm_length=10 \ 10 | --moe_num_mixtures=8 \ 11 | --batch_size=128 \ 12 | --base_learning_rate=0.001 13 | -------------------------------------------------------------------------------- /youtube-8m-zhangteng/training_utils/del.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | check = {} 4 | check_list = [] 5 | for filename in os.listdir("."): 6 | if filename.endswith("meta"): 7 | checkpoint = int(filename.split("-")[1].split(".")[0]) 8 | check_list.append(checkpoint) 9 | 10 | check_list.sort() 11 | for checkpoint in check_list: 12 | if check.has_key(checkpoint / 4000): 13 | print checkpoint 14 | else: 15 | check[checkpoint / 4000] = True 16 | 17 | 18 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/training_scripts/run-cnn-lstm.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0 python train.py \ 2 | --train_dir="../model/cnnlstmmemory1024_moe8/" \ 3 | --frame_features=True \ 4 | --feature_names="rgb,audio" \ 5 | --feature_sizes="1024,128" \ 6 | --train_data_pattern="/Youtube-8M/data/frame/train/train*" \ 7 | --batch_size=128 \ 8 | --moe_num_mixtures=8 \ 9 | --model=CnnLstmMemoryModel \ 10 | --num_readers=4 \ 11 | --base_learning_rate=0.0008 \ 12 | --lstm_cells=1024 \ 13 | --lstm_layers=2 \ 14 | --rnn_swap_memory=True 15 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/infer_scripts/infer-lstmmemory-audio.sh: -------------------------------------------------------------------------------- 1 | 2 | 3 | CUDA_VISIBLE_DEVICES=1 python inference.py \ 4 | --output_file="../model/audio_lstmmemory1024_layer1_moe8/error_analysis.train_samples.tsv" \ 5 | --model_checkpoint_path="../model/audio_lstmmemory1024_layer1_moe8/model.ckpt-187979" \ 6 | --input_data_pattern="/Youtube-8M/data/frame/train/train1r.tfrecord" \ 7 | --frame_features=True \ 8 | --feature_names="audio" \ 9 | --feature_sizes="128" \ 10 | --batch_size=128 \ 11 | --file_size=4096 12 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/infer_scripts/infer-model_input.sh: -------------------------------------------------------------------------------- 1 | 2 | for part in test ensemble_train ensemble_validate; do 3 | CUDA_VISIBLE_DEVICES=1 python inference-pre-ensemble-get-input.py \ 4 | --output_dir="/Youtube-8M/model_predictions/${part}/model_input" \ 5 | --input_data_pattern="/Youtube-8M/data/video/${part}/*.tfrecord" \ 6 | --frame_features=False \ 7 | --feature_names="mean_rgb,mean_audio" \ 8 | --feature_sizes="1024,128" \ 9 | --batch_size=128 \ 10 | --file_size=4096 11 | done 12 | -------------------------------------------------------------------------------- /youtube-8m-zhangteng/train_scripts/run-attention-pooling.sh: -------------------------------------------------------------------------------- 1 | 2 | CUDA_VISIBLE_DEVICES=1 python train.py \ 3 | --train_data_pattern='/Youtube-8M/data/frame/train/*.tfrecord' \ 4 | --train_dir='../model/frame_level_lstm_extend8_model' \ 5 | --frame_features=True \ 6 | --feature_names="rgb, audio" \ 7 | --feature_sizes="1024, 128" \ 8 | --model='LstmExtendModel' \ 9 | --video_level_classifier_model=MoeExtendModel \ 10 | --moe_num_extend=8 \ 11 | --moe_num_mixtures=8 \ 12 | --batch_size=128 \ 13 | --base_learning_rate=0.001 14 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/training_scripts/run-lstm-memory-cell1024.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0 python train.py \ 2 | --train_dir="../model/lstmmemory_cell1024_layer2_moe8/" \ 3 | --frame_features=True \ 4 | --feature_names="rgb,audio" \ 5 | --feature_sizes="1024,128" \ 6 | --train_data_pattern="/Youtube-8M/data/frame/train/train*" \ 7 | --batch_size=128 \ 8 | --moe_num_mixtures=8 \ 9 | --model=LstmMemoryModel \ 10 | --num_readers=4 \ 11 | --base_learning_rate=0.0008 \ 12 | --lstm_cells=1024 \ 13 | --lstm_layers=2 \ 14 | --rnn_swap_memory=True 15 | -------------------------------------------------------------------------------- /youtube-8m-zhangteng/train_scripts/run-multiscale-cnn-lstm.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=1 python train.py \ 2 | --train_data_pattern='/Youtube-8M/data/frame/train/*.tfrecord' \ 3 | --train_dir='../model/frame_level_lstm_multiscale_model' \ 4 | --frame_features=True \ 5 | --feature_names="rgb, audio" \ 6 | --feature_sizes="1024, 128" \ 7 | --model='LstmMultiscaleModel' \ 8 | --video_level_classifier_model=MoeModel \ 9 | --moe_num_extend=4 \ 10 | --moe_num_mixtures=4 \ 11 | --batch_size=128 \ 12 | --base_learning_rate=0.001 13 | 14 | -------------------------------------------------------------------------------- /youtube-8m-zhangteng/train_scripts/run-attention-pooling-lstm2lstm.sh: -------------------------------------------------------------------------------- 1 | 2 | CUDA_VISIBLE_DEVICES=1 python train.py \ 3 | --train_data_pattern='/Youtube-8M/data/frame/train/*.tfrecord' \ 4 | --train_dir='../model/frame_level_input_extend_model' \ 5 | --frame_features=True \ 6 | --feature_names="rgb, audio" \ 7 | --feature_sizes="1024, 128" \ 8 | --model='InputExtendModel' \ 9 | --video_level_classifier_model=MoeExtendModel \ 10 | --moe_num_extend=4 \ 11 | --moe_num_mixtures=8 \ 12 | --batch_size=128 \ 13 | --base_learning_rate=0.001 14 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/training_scripts/run-parallel-lstm-memory.sh: -------------------------------------------------------------------------------- 1 | 2 | CUDA_VISIBLE_DEVICES=0 python train.py \ 3 | --train_dir="../model/lstmparallelmemory1024_moe8" \ 4 | --frame_features=True \ 5 | --feature_names="rgb,audio" \ 6 | --feature_sizes="1024,128" \ 7 | --train_data_pattern="/Youtube-8M/data/frame/train/train*" \ 8 | --batch_size=128 \ 9 | --lstm_cells="1024,128" \ 10 | --moe_num_mixtures=8 \ 11 | --model=LstmParallelMemoryModel \ 12 | --rnn_swap_memory=True \ 13 | --num_readers=4 \ 14 | --num_epochs=5 \ 15 | --base_learning_rate=0.0008 16 | -------------------------------------------------------------------------------- /model/model_selection/top_12_model.conf: -------------------------------------------------------------------------------- 1 | cnn_deep_combine_chain 2 | cnn_deep_combine_chain_bagging/ensemble_matrix_model 3 | distillation_lstmcnn_dcc_boosting/sub_model_1 4 | distillation_multilstm_dcc_boosting/sub_model_1 5 | lstm_attention8_max 6 | lstm_positional_attention8max 7 | lstmattention8max_boosting/ensemble_attention_matrix_model 8 | lstmbiglu_cell1024_layer1_moe8 9 | lstmparallelfinaloutput1024_moe8 10 | multilstmmemory1024_moe4_deep_chain 11 | video_dcc_boosting_discardhopeless/ensemble_matrix_model 12 | video_dcc_boosting_weightclip/ensemble_matrix_model 13 | -------------------------------------------------------------------------------- /youtube-8m-zhangteng/train_scripts/run-attention-pooling-lstm-s.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=1 python train.py \ 2 | --train_data_pattern='/Youtube-8M/data/frame/train/*.tfrecord' \ 3 | --train_dir='../model/frame_level_lstm_multiscale2_model' \ 4 | --frame_features=True \ 5 | --feature_names="rgb, audio" \ 6 | --feature_sizes="1024, 128" \ 7 | --model='LstmMultiscale2Model' \ 8 | --video_level_classifier_model=MoeModel \ 9 | --norm=False \ 10 | --moe_num_extend=4 \ 11 | --moe_num_mixtures=4 \ 12 | --batch_size=128 \ 13 | --base_learning_rate=0.001 14 | -------------------------------------------------------------------------------- /youtube-8m-ensemble/ensemble_scripts/ensemble_no4.conf: -------------------------------------------------------------------------------- 1 | attentionlstm_moe4 2 | biunilstm1024_moe4 3 | cnn_deep_combine_chain 4 | cnnlstmmemory1024_moe8 5 | deep_cnn_deep_combine_chain 6 | framehop_lstm 7 | lstm_attention8_max 8 | lstmattlstm1024_moe8 9 | lstmmem1024_layer2_moe4_deep_combine_chain_add_length 10 | lstmmemory_cell1024_layer2_moe8 11 | lstmmemory_cell2048_layer2_moe4 12 | lstmparallelfinaloutput1024_moe8 13 | lstmparallelmemory1024_moe8 14 | multilstmmemory1024_moe4_deep_chain 15 | video_very_deep_combine_chain 16 | lstm_shortlayers_moe8 17 | video_dcc_bagging 18 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/training_scripts/run-parallel-lstm-output.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=1 python train.py \ 2 | --train_dir="../model/lstmparallelfinaloutput1024_moe8" \ 3 | --frame_features=True \ 4 | --feature_names="rgb,audio" \ 5 | --feature_sizes="1024,128" \ 6 | --train_data_pattern="/Youtube-8M/data/frame/train/train*" \ 7 | --batch_size=128 \ 8 | --lstm_cells="1024,128" \ 9 | --moe_num_mixtures=8 \ 10 | --model=LstmParallelFinaloutputModel \ 11 | --rnn_swap_memory=True \ 12 | --num_readers=1 \ 13 | --num_epochs=3 \ 14 | --base_learning_rate=0.001 15 | 16 | -------------------------------------------------------------------------------- /youtube-8m-zhangteng/train_scripts/run-cascade-chaining-video-normalize.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=1 python train.py \ 2 | --train_data_pattern='/Youtube-8M/distillation/video/train/*.tfrecord' \ 3 | --train_dir='../model/video_level_distillchainnorm2_model' \ 4 | --frame_features=False \ 5 | --feature_names="mean_rgb, mean_audio" \ 6 | --feature_sizes="1024, 128" \ 7 | --model='MoeDistillChainNorm2Model' \ 8 | --moe_num_mixtures=8 \ 9 | --batch_size=128 \ 10 | --base_learning_rate=0.001 \ 11 | --distillation_features=True \ 12 | --distillation_type=0 13 | -------------------------------------------------------------------------------- /youtube-8m-ensemble/feature_transform.py: -------------------------------------------------------------------------------- 1 | from tensorflow import flags 2 | 3 | flags.DEFINE_string("feature_transformer", "DefaultTransformer", 4 | "how to preprocess feature, defaults to identical, which means no transform") 5 | flags.DEFINE_string("engineer_types", "identical,avg,std,diff", 6 | "how to preprocess feature, defaults to identical, which means no transform") 7 | flags.DEFINE_integer("time_resolution", 8, 8 | "how many frames to mean pooling at a time") 9 | 10 | from all_feature_transform import * 11 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/feature_transform.py: -------------------------------------------------------------------------------- 1 | from tensorflow import flags 2 | 3 | flags.DEFINE_string("feature_transformer", "DefaultTransformer", 4 | "how to preprocess feature, defaults to identical, which means no transform") 5 | flags.DEFINE_string("engineer_types", "identical,avg,std,diff", 6 | "how to preprocess feature, defaults to identical, which means no transform") 7 | flags.DEFINE_integer("time_resolution", 8, 8 | "how many frames to mean pooling at a time") 9 | 10 | from all_feature_transform import * 11 | -------------------------------------------------------------------------------- /youtube-8m-zhangteng/train_scripts/run-cascade-chaining-cnn-layer2.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=1 python train.py \ 2 | --train_data_pattern='/Youtube-8M/distillation/frame/train/*.tfrecord' \ 3 | --train_dir='../model/frame_level_cnndcc_distillchain_model' \ 4 | --frame_features=True \ 5 | --feature_names="rgb, audio" \ 6 | --feature_sizes="1024, 128" \ 7 | --model='CnnDCCDistillChainModel' \ 8 | --moe_layers=2 9 | --moe_num_mixtures=4 \ 10 | --batch_size=128 \ 11 | --base_learning_rate=0.001 \ 12 | --distillation_features=True \ 13 | --distillation_type=0 14 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/training_utils/select.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | if __name__=="__main__": 5 | every = int(sys.argv[1]) 6 | 7 | check = {} 8 | check_list = [] 9 | for filename in os.listdir("."): 10 | if filename.endswith("meta"): 11 | checkpoint = int(filename.split("-")[1].split(".")[0]) 12 | check_list.append(checkpoint) 13 | 14 | check_list.sort() 15 | for checkpoint in check_list: 16 | if check.has_key(checkpoint / every): 17 | pass 18 | else: 19 | check[checkpoint / every] = True 20 | print checkpoint 21 | 22 | 23 | -------------------------------------------------------------------------------- /youtube-8m-ensemble/ensemble_scripts/check-video_id.sh: -------------------------------------------------------------------------------- 1 | conf=$1 2 | part=$2 3 | 4 | validate_path=/Youtube-8M/model_predictions/${part} 5 | validate_data_patterns="" 6 | for d in $(cat $conf); do 7 | validate_data_patterns="${validate_path}/${d}/*.tfrecord${validate_data_patterns:+,$validate_data_patterns}" 8 | done 9 | echo "$validate_data_patterns" 10 | input_data_pattern="/Youtube-8M/data/video/${part}/*.tfrecord" 11 | 12 | CUDA_VISIBLE_DEVICES="" python check_video_id.py \ 13 | --input_data_pattern=$input_data_pattern \ 14 | --eval_data_patterns="$validate_data_patterns" 15 | 16 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/training_scripts/run-cnn-model.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0 python train.py \ 2 | --train_dir="../model/cnn_model/" \ 3 | --train_data_pattern="/Youtube-8M/data/frame/train/train*" \ 4 | --frame_features=True \ 5 | --feature_names="rgb,audio" \ 6 | --feature_sizes="1024,128" \ 7 | --model=CnnModel \ 8 | --cnn_num_filters=512 \ 9 | --moe_num_mixtures=4 \ 10 | --num_readers=4 \ 11 | --batch_size=128 \ 12 | --keep_checkpoint_every_n_hours=0.5 \ 13 | --base_learning_rate=0.001 14 | 15 | -------------------------------------------------------------------------------- /youtube-8m-zhangteng/train_scripts/run-cascade-lstm-s.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=1 python train.py \ 2 | --train_data_pattern='/Youtube-8M/distillation/frame/train/*.tfrecord' \ 3 | --train_dir='../model/frame_level_lstm_gate_distillchain_model' \ 4 | --frame_features=True \ 5 | --feature_names="rgb, audio" \ 6 | --feature_sizes="1024, 128" \ 7 | --model='LstmGateModel' \ 8 | --video_level_classifier_model=MoeDistillChainModel \ 9 | --moe_num_mixtures=8 \ 10 | --batch_size=128 \ 11 | --base_learning_rate=0.001 \ 12 | --distillation_features=True \ 13 | --distillation_type=0 -------------------------------------------------------------------------------- /youtube-8m-zhangteng/training_utils/select.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | every = int(sys.argv[1]) 5 | 6 | check = {} 7 | check_list = [] 8 | for filename in os.listdir("."): 9 | if filename.endswith("meta"): 10 | checkpoint = int(filename.split("-")[1].split(".")[0]) 11 | check_list.append(checkpoint) 12 | 13 | check_list.sort() 14 | for checkpoint in check_list: 15 | if check.has_key(checkpoint / every): 16 | pass 17 | else: 18 | check[checkpoint / every] = True 19 | print checkpoint 20 | 21 | 22 | -------------------------------------------------------------------------------- /youtube-8m-zhangteng/train_scripts/run-cascade-lstm-s-split.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=1 python train.py \ 2 | --train_data_pattern='/Youtube-8M/distillation/frame/train/*.tfrecord' \ 3 | --train_dir='../model/frame_level_lstm_gate_distillsplit_model' \ 4 | --frame_features=True \ 5 | --feature_names="rgb, audio" \ 6 | --feature_sizes="1024, 128" \ 7 | --model='LstmGateModel' \ 8 | --video_level_classifier_model=MoeDistillSplitModel \ 9 | --moe_num_mixtures=8 \ 10 | --batch_size=128 \ 11 | --base_learning_rate=0.001 \ 12 | --distillation_features=True \ 13 | --distillation_type=0 14 | -------------------------------------------------------------------------------- /youtube-8m-zhangteng/train_scripts/run-cascade-lstm.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=1 python train.py \ 2 | --train_data_pattern='/Youtube-8M/distillation/frame/train/*.tfrecord' \ 3 | --train_dir='../model/frame_level_lstm_distillchain_model' \ 4 | --frame_features=True \ 5 | --feature_names="rgb, audio" \ 6 | --feature_sizes="1024, 128" \ 7 | --model='LstmModel' \ 8 | --video_level_classifier_model=MoeDistillChainModel \ 9 | --moe_num_mixtures=8 \ 10 | --lstm_layers=1 \ 11 | --batch_size=128 \ 12 | --base_learning_rate=0.001 \ 13 | --distillation_features=True \ 14 | --distillation_type=0 -------------------------------------------------------------------------------- /youtube-8m-ensemble/all_ensemble_models/__init__.py: -------------------------------------------------------------------------------- 1 | from logistic_model import * 2 | from moe_model import * 3 | from attention_moe_model import * 4 | from attention_moe_matrix_model import * 5 | from input_moe_model import * 6 | from linear_regression_model import * 7 | from matrix_regression_model import * 8 | from attention_matrix_model import * 9 | from attention_linear_model import * 10 | from attention_linmatrix_model import * 11 | from attention_rectified_linear_model import * 12 | from nonunit_matrix_regression_model import * 13 | from mean_model import * 14 | from deep_combine_chain_model import * 15 | -------------------------------------------------------------------------------- /youtube-8m-ensemble/training_utils/select.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | every = int(sys.argv[1]) 5 | 6 | check = {} 7 | check_list = [] 8 | for filename in os.listdir("."): 9 | if filename.endswith("meta"): 10 | checkpoint = int(filename.split("-")[1].split(".")[0]) 11 | check_list.append(checkpoint) 12 | 13 | check_list.sort() 14 | for checkpoint in check_list: 15 | if check.has_key(checkpoint / every): 16 | pass 17 | else: 18 | check[checkpoint / every] = True 19 | print checkpoint 20 | 21 | 22 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/all_data_augmentation/noise_augmenter.py: -------------------------------------------------------------------------------- 1 | 2 | import tensorflow as tf 3 | from tensorflow import flags 4 | FLAGS = flags.FLAGS 5 | 6 | class NoiseAugmenter: 7 | """This only works with frame data""" 8 | def augment(self, model_input_raw, num_frames, labels_batch, **unused_params): 9 | print "NoiseAugmenter", model_input_raw.shape, "noise =", FLAGS.input_noise_level 10 | noise_input = tf.random_normal(tf.shape(model_input_raw), mean=0.0, stddev=FLAGS.input_noise_level) 11 | model_input = model_input_raw + noise_input 12 | return model_input, labels_batch, num_frames 13 | -------------------------------------------------------------------------------- /youtube-8m-zhangteng/train_scripts/run-cascade-76-lstm-a.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=1 python train.py \ 2 | --train_data_pattern='/Youtube-8M/distillation_v2/frame/train/*.tfrecord' \ 3 | --train_dir='../model/frame_level_lstm_glu2_distillchain_v2_model' \ 4 | --frame_features=True \ 5 | --feature_names="rgb, audio" \ 6 | --feature_sizes="1024, 128" \ 7 | --model='LstmGlu2Model' \ 8 | --video_level_classifier_model=MoeDistillChainModel \ 9 | --moe_num_mixtures=8 \ 10 | --batch_size=128 \ 11 | --base_learning_rate=0.001 \ 12 | --distillation_features=True \ 13 | --distillation_type=0 14 | -------------------------------------------------------------------------------- /youtube-8m-zhangteng/train_scripts/run-cascade-76-lstm-s.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=1 python train.py \ 2 | --train_data_pattern='/Youtube-8M/distillation_v2/frame/train/*.tfrecord' \ 3 | --train_dir='../model/frame_level_lstm_gate_distillchain_v2_model' \ 4 | --frame_features=True \ 5 | --feature_names="rgb, audio" \ 6 | --feature_sizes="1024, 128" \ 7 | --model='LstmGateModel' \ 8 | --video_level_classifier_model=MoeDistillChainModel \ 9 | --moe_num_mixtures=8 \ 10 | --batch_size=128 \ 11 | --base_learning_rate=0.001 \ 12 | --distillation_features=True \ 13 | --distillation_type=0 14 | 15 | -------------------------------------------------------------------------------- /youtube-8m-zhangteng/train_scripts/run-cascade-76-lstm.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=1 python train.py \ 2 | --train_data_pattern='/Youtube-8M/distillation_v2/frame/train/*.tfrecord' \ 3 | --train_dir='../model/frame_level_lstm_distillchain_v2_model' \ 4 | --frame_features=True \ 5 | --feature_names="rgb, audio" \ 6 | --feature_sizes="1024, 128" \ 7 | --model='LstmModel' \ 8 | --video_level_classifier_model=MoeDistillChainModel \ 9 | --moe_num_mixtures=8 \ 10 | --lstm_layers=1 \ 11 | --batch_size=128 \ 12 | --base_learning_rate=0.001 \ 13 | --distillation_features=True \ 14 | --distillation_type=0 -------------------------------------------------------------------------------- /youtube-8m-ensemble/ensemble_scripts/combine-tfrecords-frame.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | data_path=/Youtube-8M/model_predictions/train 4 | 5 | input_data_pattern="/Youtube-8M/data/frame/train/train*.tfrecord" 6 | prediction_data_pattern="${data_path}/distillation/ensemble_mean_model/prediction*.tfrecord" 7 | 8 | CUDA_VISIBLE_DEVICES="" python inference-combine-tfrecords-frame.py \ 9 | --output_dir="/Youtube-8M/distillation/frame/train" \ 10 | --input_data_pattern="$input_data_pattern" \ 11 | --prediction_data_pattern="$prediction_data_pattern" \ 12 | --batch_size=128 \ 13 | --file_size=1024 14 | -------------------------------------------------------------------------------- /youtube-8m-ensemble/ensemble_scripts/combine-tfrecords-video.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | data_path=/Youtube-8M/model_predictions/train 4 | 5 | input_data_pattern="/Youtube-8M/data/video/train/train*.tfrecord" 6 | prediction_data_pattern="${data_path}/distillation/ensemble_mean_model/prediction*.tfrecord" 7 | 8 | CUDA_VISIBLE_DEVICES="" python inference-combine-tfrecords-video.py \ 9 | --output_dir="/Youtube-8M/distillation/video/train" \ 10 | --input_data_pattern="$input_data_pattern" \ 11 | --prediction_data_pattern="$prediction_data_pattern" \ 12 | --batch_size=1024 \ 13 | --file_size=4096 14 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/infer_scripts/infer-biunilstm1024_moe8.sh: -------------------------------------------------------------------------------- 1 | 2 | for part in test ensemble_train ensemble_validate train_samples; do 3 | CUDA_VISIBLE_DEVICES=0 python inference-pre-ensemble.py \ 4 | --output_dir="/Youtube-8M/model_predictions/${part}/biunilstm1024_moe4" \ 5 | --model_checkpoint_path="../model/biunilstm1024_moe4/model.ckpt-94882" \ 6 | --input_data_pattern="/Youtube-8M/data/frame/${part}/*.tfrecord" \ 7 | --frame_features=True \ 8 | --feature_names="rgb,audio" \ 9 | --feature_sizes="1024,128" \ 10 | --batch_size=32 \ 11 | --file_size=4096 12 | done 13 | -------------------------------------------------------------------------------- /youtube-8m-zhangteng/train_scripts/run-cascade-multiscale-cnn-lstm-laery4.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=1 python train.py \ 2 | --train_data_pattern='/Youtube-8M/distillation/frame/train/*.tfrecord' \ 3 | --train_dir='../model/frame_level_lstm_multiscale_distillchain4_model' \ 4 | --frame_features=True \ 5 | --feature_names="rgb, audio" \ 6 | --feature_sizes="1024, 128" \ 7 | --model='LstmMultiscaleDitillChainModel' \ 8 | --moe_num_extend=4 \ 9 | --moe_num_mixtures=4 \ 10 | --cnn_cells=196 \ 11 | --batch_size=128 \ 12 | --base_learning_rate=0.001 \ 13 | --distillation_features=True \ 14 | --distillation_type=0 -------------------------------------------------------------------------------- /youtube-8m-zhangteng/train_scripts/run-cascade-multiscale-cnn-lstm-layer2.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=1 python train.py \ 2 | --train_data_pattern='/Youtube-8M/distillation/frame/train/*.tfrecord' \ 3 | --train_dir='../model/frame_level_lstm_multiscale_distillchain_model' \ 4 | --frame_features=True \ 5 | --feature_names="rgb, audio" \ 6 | --feature_sizes="1024, 128" \ 7 | --model='LstmMultiscaleDitillChainModel' \ 8 | --moe_num_extend=2 \ 9 | --moe_num_mixtures=4 \ 10 | --cnn_cells=256 \ 11 | --batch_size=128 \ 12 | --base_learning_rate=0.001 \ 13 | --distillation_features=True \ 14 | --distillation_type=0 -------------------------------------------------------------------------------- /youtube-8m-wangheda/infer_scripts/infer-attentionlstm_moe4.sh: -------------------------------------------------------------------------------- 1 | 2 | for part in test ensemble_train ensemble_validate train_samples; do 3 | CUDA_VISIBLE_DEVICES=1 python inference-pre-ensemble.py \ 4 | --output_dir="/Youtube-8M/model_predictions/${part}/attentionlstm_moe4" \ 5 | --model_checkpoint_path="../model/attentionlstm_moe4/model.ckpt-104135" \ 6 | --input_data_pattern="/Youtube-8M/data/frame/${part}/*.tfrecord" \ 7 | --frame_features=True \ 8 | --feature_names="rgb,audio" \ 9 | --feature_sizes="1024,128" \ 10 | --batch_size=64 \ 11 | --file_size=4096 12 | done 13 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/all_video_models/stage2_logistic_model.py: -------------------------------------------------------------------------------- 1 | import math 2 | import models 3 | import tensorflow as tf 4 | import utils 5 | from tensorflow import flags 6 | import tensorflow.contrib.slim as slim 7 | FLAGS = flags.FLAGS 8 | 9 | class Stage2LogisticModel(models.BaseModel): 10 | def create_model(self, model_input, vocab_size, l2_penalty=1e-8, original_input=None, **unused_params): 11 | output = tf.nn.sigmoid(model_input + slim.fully_connected( 12 | model_input, vocab_size, activation_fn=None, 13 | weights_regularizer=slim.l2_regularizer(l2_penalty))) 14 | return {"predictions": output} 15 | 16 | -------------------------------------------------------------------------------- /youtube-8m-ensemble/ensemble_scripts/combine-tfrecords-video-v2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | data_path=/Youtube-8M/model_predictions_local/train 4 | 5 | input_data_pattern="/Youtube-8M/data/video/train/*.tfrecord" 6 | prediction_data_pattern="${data_path}/distillation/ensemble_v2_matrix_model/prediction*.tfrecord" 7 | 8 | CUDA_VISIBLE_DEVICES="" python inference-combine-tfrecords-video.py \ 9 | --output_dir="/Youtube-8M/distillation_v2/video/train" \ 10 | --input_data_pattern="$input_data_pattern" \ 11 | --prediction_data_pattern="$prediction_data_pattern" \ 12 | --batch_size=1024 \ 13 | --file_size=4096 14 | -------------------------------------------------------------------------------- /youtube-8m-ensemble/ensemble_scripts/ensemble_no6.conf: -------------------------------------------------------------------------------- 1 | attentionlstm_moe4 2 | biunilstm1024_moe4 3 | cnn_deep_combine_chain 4 | cnnlstmmemory1024_moe8 5 | deep_cnn_deep_combine_chain 6 | framehop_lstm 7 | lstm_attention8_max 8 | lstmattlstm1024_moe8 9 | lstm_cnn_deep_combine_chain 10 | lstmmem1024_layer2_moe4_deep_combine_chain_add_length 11 | lstmmemory_cell1024_layer2_moe8 12 | lstmmemory_cell2048_layer2_moe4 13 | lstmparallelfinaloutput1024_moe8 14 | lstmparallelmemory1024_moe8 15 | lstm_shortlayers_moe8 16 | multilstmmemory1024_moe4_deep_chain 17 | multires_lstm_deep_combine_chain 18 | video_dcc_bagging 19 | video_very_deep_combine_chain 20 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/infer_scripts/infer-lstmattlstm1024_moe8.sh: -------------------------------------------------------------------------------- 1 | 2 | for part in test ensemble_train ensemble_validate train_samples; do 3 | CUDA_VISIBLE_DEVICES=1 python inference-pre-ensemble.py \ 4 | --output_dir="/Youtube-8M/model_predictions/${part}/lstmattlstm1024_moe8" \ 5 | --model_checkpoint_path="../model/lstmattlstm1024_moe8/model.ckpt-86809" \ 6 | --input_data_pattern="/Youtube-8M/data/frame/${part}/*.tfrecord" \ 7 | --frame_features=True \ 8 | --feature_names="rgb,audio" \ 9 | --feature_sizes="1024,128" \ 10 | --batch_size=64 \ 11 | --file_size=4096 12 | done 13 | -------------------------------------------------------------------------------- /youtube-8m-zhangteng/train_scripts/run-cascade-attention-pooling.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=1 python train.py \ 2 | --train_data_pattern='/Youtube-8M/distillation/frame/train/*.tfrecord' \ 3 | --train_dir='../model/frame_level_lstm_extend_distillchain_model' \ 4 | --frame_features=True \ 5 | --feature_names="rgb, audio" \ 6 | --feature_sizes="1024, 128" \ 7 | --model='LstmExtendModel' \ 8 | --video_level_classifier_model=MoeExtendDistillChainModel \ 9 | --moe_num_extend=8 10 | --moe_num_mixtures=8 \ 11 | --batch_size=128 \ 12 | --base_learning_rate=0.001 \ 13 | --distillation_features=True \ 14 | --distillation_type=0 15 | -------------------------------------------------------------------------------- /youtube-8m-ensemble/model_selection_scripts/get_patterns.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from tensorflow import flags 3 | 4 | FLAGS = flags.FLAGS 5 | 6 | if __name__=="__main__": 7 | flags.DEFINE_string("train_path", "", "The directory where training files locates.") 8 | flags.DEFINE_string("candidates", "", "The candidate methods.") 9 | 10 | if __name__=="__main__": 11 | candidate_methods = map(lambda x: x.strip(), FLAGS.candidates.strip().split(",")) 12 | train_path = FLAGS.train_path 13 | output_path = ",".join(map(lambda x: "%s/%s/*.tfrecord"%(train_path, x), candidate_methods)) 14 | sys.stdout.write(output_path) 15 | sys.stdout.flush() 16 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/infer_scripts/infer-dbof.sh: -------------------------------------------------------------------------------- 1 | 2 | for part in test ensemble_train ensemble_validate; do 3 | CUDA_VISIBLE_DEVICES=1 python inference-pre-ensemble.py \ 4 | --output_dir="/Youtube-8M/model_predictions/${part}/dbof_model" \ 5 | --model_checkpoint_path="../model/dbof_model/model.ckpt-184058" \ 6 | --input_data_pattern="/Youtube-8M/data/frame/${part}/*.tfrecord" \ 7 | --frame_features=True \ 8 | --feature_names="rgb,audio" \ 9 | --feature_sizes="1024,128" \ 10 | --model=DbofModel \ 11 | --batch_size=64 \ 12 | --num_readers=1 \ 13 | --file_size=4096 14 | done 15 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/infer_scripts/infer-deeplstm1024_layer6_moe4.sh: -------------------------------------------------------------------------------- 1 | 2 | for part in test ensemble_train ensemble_validate train_samples; do 3 | CUDA_VISIBLE_DEVICES=0 python inference-pre-ensemble.py \ 4 | --output_dir="/Youtube-8M/model_predictions/${part}/deeplstm1024_layer6_moe4" \ 5 | --model_checkpoint_path="../model/deeplstm1024_layer6_moe4/model.ckpt-175048" \ 6 | --input_data_pattern="/Youtube-8M/data/frame/${part}/*.tfrecord" \ 7 | --frame_features=True \ 8 | --feature_names="rgb,audio" \ 9 | --feature_sizes="1024,128" \ 10 | --batch_size=128 \ 11 | --file_size=4096 12 | done 13 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/infer_scripts/infer-cnn_lstmmemory1024_moe8.sh: -------------------------------------------------------------------------------- 1 | 2 | for part in test ensemble_train ensemble_validate train_samples; do 3 | CUDA_VISIBLE_DEVICES=0 python inference-pre-ensemble.py \ 4 | --output_dir="/Youtube-8M/model_predictions/${part}/cnnlstmmemory1024_moe8" \ 5 | --model_checkpoint_path="../model/cnnlstmmemory1024_moe8/model.ckpt-149354" \ 6 | --input_data_pattern="/Youtube-8M/data/frame/${part}/*.tfrecord" \ 7 | --frame_features=True \ 8 | --feature_names="rgb,audio" \ 9 | --feature_sizes="1024,128" \ 10 | --batch_size=64 \ 11 | --file_size=4096 12 | done 13 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/infer_scripts/infer-video_logistic.sh: -------------------------------------------------------------------------------- 1 | 2 | for part in ensemble_train ensemble_validate test; do 3 | CUDA_VISIBLE_DEVICES=1 python inference-pre-ensemble.py \ 4 | --output_dir="/Youtube-8M/model_predictions/${part}/video_logistic_model" \ 5 | --model_checkpoint_path="../model/video_logistic_model/model.ckpt-23581" \ 6 | --input_data_pattern="/Youtube-8M/data/video/${part}/*.tfrecord" \ 7 | --frame_features=False \ 8 | --feature_names="mean_rgb,mean_audio" \ 9 | --feature_sizes="1024,128" \ 10 | --model=LogisticModel \ 11 | --batch_size=32 \ 12 | --file_size=4096 13 | done 14 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/infer_scripts/infer-cnn_deep_combine_chain.sh: -------------------------------------------------------------------------------- 1 | 2 | #for part in ensemble_train ensemble_validate test; do 3 | for part in train_samples; do 4 | CUDA_VISIBLE_DEVICES=0 python inference-pre-ensemble.py \ 5 | --output_dir="/Youtube-8M/model_predictions/${part}/cnn_deep_combine_chain" \ 6 | --model_checkpoint_path="../model/cnn_deep_combine_chain/model.ckpt-228332" \ 7 | --input_data_pattern="/Youtube-8M/data/frame/${part}/*.tfrecord" \ 8 | --frame_features=True \ 9 | --feature_names="rgb,audio" \ 10 | --feature_sizes="1024,128" \ 11 | --batch_size=32 \ 12 | --file_size=4096 13 | done 14 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/infer_scripts/infer-lstmmemory1024_moe8.sh: -------------------------------------------------------------------------------- 1 | 2 | #for part in ensemble_train ensemble_validate test; do 3 | for part in train_samples; do 4 | CUDA_VISIBLE_DEVICES=1 python inference-pre-ensemble.py \ 5 | --output_dir="/Youtube-8M/model_predictions/${part}/lstmmemory_cell1024_layer2_moe8" \ 6 | --model_checkpoint_path="../model/lstmmemory1024_moe8/model.ckpt-123144" \ 7 | --input_data_pattern="/Youtube-8M/data/frame/${part}/*.tfrecord" \ 8 | --frame_features=True \ 9 | --feature_names="rgb,audio" \ 10 | --feature_sizes="1024,128" \ 11 | --batch_size=32 \ 12 | --file_size=4096 13 | done 14 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/infer_scripts/infer-lstmparalleloutput1024_moe8.sh: -------------------------------------------------------------------------------- 1 | 2 | for part in test ensemble_train ensemble_validate train_samples; do 3 | CUDA_VISIBLE_DEVICES=0 python inference-pre-ensemble.py \ 4 | --output_dir="/Youtube-8M/model_predictions/${part}/lstmparallelfinaloutput1024_moe8" \ 5 | --model_checkpoint_path="../model/lstmparallelfinaloutput1024_moe8/model.ckpt-144351" \ 6 | --input_data_pattern="/Youtube-8M/data/frame/${part}/*.tfrecord" \ 7 | --frame_features=True \ 8 | --feature_names="rgb,audio" \ 9 | --feature_sizes="1024,128" \ 10 | --batch_size=64 \ 11 | --file_size=4096 12 | done 13 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/infer_scripts/infer-deep_cnn_deep_combine.sh: -------------------------------------------------------------------------------- 1 | 2 | #for part in ensemble_train ensemble_validate test; do 3 | for part in train_samples; do 4 | CUDA_VISIBLE_DEVICES=0 python inference-pre-ensemble.py \ 5 | --output_dir="/Youtube-8M/model_predictions/${part}/deep_cnn_deep_combine_chain" \ 6 | --model_checkpoint_path="../model/deep_cnn_deep_combine_chain/model.ckpt-303289" \ 7 | --input_data_pattern="/Youtube-8M/data/frame/${part}/*.tfrecord" \ 8 | --frame_features=True \ 9 | --feature_names="rgb,audio" \ 10 | --feature_sizes="1024,128" \ 11 | --batch_size=32 \ 12 | --file_size=4096 13 | done 14 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/infer_scripts/infer-framehop_lstm.sh: -------------------------------------------------------------------------------- 1 | 2 | for part in test ensemble_train ensemble_validate train_samples; do 3 | CUDA_VISIBLE_DEVICES=1 python inference-pre-ensemble.py \ 4 | --output_dir="/Youtube-8M/model_predictions/${part}/framehop_lstm" \ 5 | --model_checkpoint_path="../model/framehop_lstm/model.ckpt-143746" \ 6 | --input_data_pattern="/Youtube-8M/data/frame/${part}/*.tfrecord" \ 7 | --frame_features=True \ 8 | --feature_names="rgb,audio" \ 9 | --feature_sizes="1024,128" \ 10 | --feature_transformer=IdenticalTransformer \ 11 | --batch_size=64 \ 12 | --file_size=4096 13 | done 14 | -------------------------------------------------------------------------------- /youtube-8m-zhangteng/infer_scripts/infer-lstm_shortlayers_moe8.sh: -------------------------------------------------------------------------------- 1 | 2 | for part in ensemble_train ensemble_validate test; do 3 | #for part in train_samples; do 4 | CUDA_VISIBLE_DEVICES=1 python inference-pre-ensemble.py \ 5 | --output_dir="/Youtube-8M/model_predictions/${part}/lstm_shortlayers_moe8" \ 6 | --model_checkpoint_path="../model/frame_level_lstm_layer_model/model.ckpt-107188" \ 7 | --input_data_pattern="/Youtube-8M/data/frame/${part}/*.tfrecord" \ 8 | --frame_features=True \ 9 | --feature_names="rgb,audio" \ 10 | --feature_sizes="1024,128" \ 11 | --batch_size=32 \ 12 | --file_size=4096 13 | done 14 | -------------------------------------------------------------------------------- /youtube-8m-ensemble/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS-IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS-IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/all_feature_transform/avg_transformer.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | class AvgTransformer: 4 | def transform(self, model_input_raw, num_frames, **unused_params): 5 | float_num_frames = tf.cast(tf.expand_dims(num_frames, 1), tf.float32) 6 | feature_size = model_input_raw.get_shape().as_list()[2] 7 | denominators = tf.reshape( 8 | tf.tile(float_num_frames, [1, feature_size]), [-1, feature_size]) 9 | avg_pooled = tf.reduce_sum(model_input_raw, axis=[1]) / denominators 10 | feature_dim = len(avg_pooled.get_shape()) - 1 11 | model_input = tf.nn.l2_normalize(avg_pooled, feature_dim) 12 | return model_input, num_frames 13 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/infer_scripts/infer-lstm_attention8_max.sh: -------------------------------------------------------------------------------- 1 | 2 | #for part in ensemble_train ensemble_validate test; do 3 | for part in train_samples; do 4 | CUDA_VISIBLE_DEVICES=0 python inference-pre-ensemble.py \ 5 | --output_dir="/Youtube-8M/model_predictions/${part}/lstm_attention8_max" \ 6 | --model_checkpoint_path="/home/zhangt/yt8m/frame_level_lstm_extend8_model/model.ckpt-181785" \ 7 | --input_data_pattern="/Youtube-8M/data/frame/${part}/*.tfrecord" \ 8 | --frame_features=True \ 9 | --feature_names="rgb,audio" \ 10 | --feature_sizes="1024,128" \ 11 | --batch_size=32 \ 12 | --file_size=4096 13 | done 14 | -------------------------------------------------------------------------------- /youtube-8m-zhangteng/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS-IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /youtube-8m-zhangteng/infer_scripts/infer-lstm_attention8_max.sh: -------------------------------------------------------------------------------- 1 | 2 | #for part in ensemble_train ensemble_validate test; do 3 | for part in ensemble_validate; do 4 | CUDA_VISIBLE_DEVICES=1 python inference-pre-ensemble.py \ 5 | --output_dir="/Youtube-8M/model_predictions/${part}/lstm_attention8_max" \ 6 | --model_checkpoint_path="../model/frame_level_lstm_extend8_model/model.ckpt-181785" \ 7 | --input_data_pattern="/Youtube-8M/data/frame/${part}/*.tfrecord" \ 8 | --frame_features=True \ 9 | --feature_names="rgb,audio" \ 10 | --feature_sizes="1024,128" \ 11 | --batch_size=32 \ 12 | --file_size=4096 13 | done 14 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/data_augmentation_scripts/run-parallel-lstm-memory.sh: -------------------------------------------------------------------------------- 1 | 2 | CUDA_VISIBLE_DEVICES=1 python train.py \ 3 | --train_dir="../model/dataaugmentation_parallel_lstm_memory" \ 4 | --frame_features=True \ 5 | --feature_names="rgb,audio" \ 6 | --feature_sizes="1024,128" \ 7 | --train_data_pattern="/Youtube-8M/data/frame/largetrain/*.tfrecord" \ 8 | --lstm_cells="1024,128" \ 9 | --moe_num_mixtures=8 \ 10 | --model=LstmParallelMemoryModel \ 11 | --rnn_swap_memory=True \ 12 | --num_readers=4 \ 13 | --batch_size=40 \ 14 | --data_augmenter=HalfAugmenter \ 15 | --num_epochs=5 \ 16 | --base_learning_rate=0.0008 \ 17 | --keep_checkpoint_every_n_hour=2.0 18 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/infer_scripts/infer-lstm_cnn_deep_combine_chain.sh: -------------------------------------------------------------------------------- 1 | 2 | for part in test ensemble_validate ensemble_train; do 3 | #for part in train_samples; do 4 | CUDA_VISIBLE_DEVICES=0 python inference-pre-ensemble.py \ 5 | --output_dir="/Youtube-8M/model_predictions/${part}/lstm_cnn_deep_combine_chain" \ 6 | --model_checkpoint_path="../model/lstm_cnn_deep_combine_chain/model.ckpt-259179" \ 7 | --input_data_pattern="/Youtube-8M/data/frame/${part}/*.tfrecord" \ 8 | --frame_features=True \ 9 | --feature_names="rgb,audio" \ 10 | --feature_sizes="1024,128" \ 11 | --batch_size=32 \ 12 | --file_size=4096 13 | done 14 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/infer_scripts/infer-lstmmemory2048_moe4.sh: -------------------------------------------------------------------------------- 1 | 2 | #for part in ensemble_train ensemble_validate test; do 3 | for part in train_samples; do 4 | CUDA_VISIBLE_DEVICES=0 python inference-pre-ensemble.py \ 5 | --output_dir="/Youtube-8M/model_predictions/${part}/lstmmemory_cell2048_layer2_moe4" \ 6 | --model_checkpoint_path="../model/lstmmemory2048_moe4_batch128/model.ckpt-109434" \ 7 | --input_data_pattern="/Youtube-8M/data/frame/${part}/*.tfrecord" \ 8 | --frame_features=True \ 9 | --feature_names="rgb,audio" \ 10 | --feature_sizes="1024,128" \ 11 | --batch_size=32 \ 12 | --file_size=4096 13 | done 14 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/training_scripts/run-cascade-75-chaining-parallel-lstm.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0 python train.py \ 2 | --train_dir="../model/distillchain_lstmparalleloutput" \ 3 | --train_data_pattern="/Youtube-8M/distillation/frame/train/*.tfrecord" \ 4 | --frame_features=True \ 5 | --feature_names="rgb,audio" \ 6 | --feature_sizes="1024,128" \ 7 | --distillation_features=True \ 8 | --distillation_as_input=True \ 9 | --model=DistillchainLstmParallelFinaloutputModel \ 10 | --rnn_swap_memory=True \ 11 | --lstm_cells="1024,128" \ 12 | --moe_num_mixtures=8 \ 13 | --num_readers=4 \ 14 | --batch_size=128 \ 15 | --num_epochs=3 \ 16 | --base_learning_rate=0.001 17 | 18 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/infer_scripts/infer-lstmparallelmemory1024_moe8.sh: -------------------------------------------------------------------------------- 1 | 2 | #for part in ensemble_train ensemble_validate test; do 3 | for part in train_samples; do 4 | CUDA_VISIBLE_DEVICES=0 python inference-pre-ensemble.py \ 5 | --output_dir="/Youtube-8M/model_predictions/${part}/lstmparallelmemory1024_moe8" \ 6 | --model_checkpoint_path="../model/lstmparallelmemory1024_moe8/model.ckpt-111155" \ 7 | --input_data_pattern="/Youtube-8M/data/frame/${part}/*.tfrecord" \ 8 | --frame_features=True \ 9 | --feature_names="rgb,audio" \ 10 | --feature_sizes="1024,128" \ 11 | --batch_size=32 \ 12 | --file_size=4096 13 | done 14 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/training_scripts/run-lstm-memory-cell2048.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0 python train.py \ 2 | --train_dir="../model/lstmmemory2048_moe4_batch128/" \ 3 | --frame_features=True \ 4 | --feature_names="rgb,audio" \ 5 | --feature_sizes="1024,128" \ 6 | --train_data_pattern="/Youtube-8M/data/frame/train/train*" \ 7 | --batch_size=128 \ 8 | --moe_num_mixtures=4 \ 9 | --model=LstmMemoryModel \ 10 | --num_readers=4 \ 11 | --base_learning_rate=0.0004 \ 12 | --keep_checkpoint_every_n_hours=3.0 \ 13 | --lstm_cells=2048 \ 14 | --lstm_layers=2 \ 15 | --rnn_swap_memory=True 16 | 17 | -------------------------------------------------------------------------------- /youtube-8m-zhangteng/infer_scripts/infer-video_relabel_combine_chain.sh: -------------------------------------------------------------------------------- 1 | 2 | for part in ensemble_train ensemble_validate test; do 3 | #for part in test; do 4 | CUDA_VISIBLE_DEVICES=1 python inference-pre-ensemble.py \ 5 | --output_dir="/Youtube-8M/model_predictions/${part}/video_relabel_combine_chain" \ 6 | --model_checkpoint_path="../model/video_level_moemix4_relabel_model/model.ckpt-14863" \ 7 | --input_data_pattern="/Youtube-8M/data/video/${part}/*.tfrecord" \ 8 | --frame_features=False \ 9 | --feature_names="mean_rgb,mean_audio" \ 10 | --feature_sizes="1024,128" \ 11 | --batch_size=128 \ 12 | --file_size=4096 13 | done 14 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/infer_scripts/infer-video_moe16_model.sh: -------------------------------------------------------------------------------- 1 | 2 | for part in ensemble_train ensemble_validate test; do 3 | CUDA_VISIBLE_DEVICES=0 python inference-pre-ensemble.py \ 4 | --output_dir="/Youtube-8M/model_predictions/${part}/video_moe16_model" \ 5 | --model_checkpoint_path="../model/video_moe16_model/model.ckpt-19058" \ 6 | --input_data_pattern="/Youtube-8M/data/video/${part}/*.tfrecord" \ 7 | --frame_features=False \ 8 | --feature_names="mean_rgb,mean_audio" \ 9 | --feature_sizes="1024,128" \ 10 | --model=MoeModel \ 11 | --moe_num_mixtures=16 \ 12 | --batch_size=128 \ 13 | --file_size=4096 14 | done 15 | -------------------------------------------------------------------------------- /youtube-8m-ensemble/ensemble_scripts/check-video_id_match.sh: -------------------------------------------------------------------------------- 1 | conf=$1 2 | part=$2 3 | 4 | validate_path=/Youtube-8M/model_predictions_local/${part} 5 | validate_data_patterns="" 6 | for d in $(cat $conf); do 7 | validate_data_patterns="${validate_path}/${d}/*.tfrecord${validate_data_patterns:+,$validate_data_patterns}" 8 | done 9 | echo "$validate_data_patterns" 10 | input_data_pattern="/Youtube-8M/data/video/${part}/*.tfrecord" 11 | #input_data_pattern="/Youtube-8M/model_predictions/${part}/model_input/*.tfrecord" 12 | 13 | CUDA_VISIBLE_DEVICES="" python check_video_id_match.py \ 14 | --input_data_pattern=$input_data_pattern \ 15 | --eval_data_patterns="$validate_data_patterns" 16 | 17 | -------------------------------------------------------------------------------- /youtube-8m-ensemble/ensemble_scripts/combine-tfrecords-frame-v2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | file_num_mod=$1 3 | 4 | data_path=/Youtube-8M/model_predictions_x32/train 5 | 6 | input_data_pattern="/Youtube-8M/data/frame/train/*.tfrecord" 7 | prediction_data_pattern="${data_path}/distillation/ensemble_v2_matrix_model/prediction*.tfrecord" 8 | 9 | CUDA_VISIBLE_DEVICES="" python inference-combine-tfrecords-frame.py \ 10 | --output_dir="/Youtube-8M/distillation_v2/frame/train" \ 11 | --input_data_pattern="$input_data_pattern" \ 12 | --prediction_data_pattern="$prediction_data_pattern" \ 13 | --file_num_mod=$file_num_mod \ 14 | --batch_size=128 \ 15 | --file_size=1024 16 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/infer_scripts/infer-multilstmmemory1024_moe4_deep_chain.sh: -------------------------------------------------------------------------------- 1 | 2 | #for part in test ensemble_validate ensemble_train; do 3 | for part in train_samples; do 4 | CUDA_VISIBLE_DEVICES=1 python inference-pre-ensemble.py \ 5 | --output_dir="/Youtube-8M/model_predictions/${part}/multilstmmemory1024_moe4_deep_chain" \ 6 | --model_checkpoint_path="../model/multilstmmemory1024_moe4_deep_chain/model.ckpt-133508" \ 7 | --input_data_pattern="/Youtube-8M/data/frame/${part}/*.tfrecord" \ 8 | --frame_features=True \ 9 | --feature_names="rgb,audio" \ 10 | --feature_sizes="1024,128" \ 11 | --batch_size=32 \ 12 | --file_size=4096 13 | done 14 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/infer_scripts/infer-video_very_deep_combine_chain.sh: -------------------------------------------------------------------------------- 1 | 2 | #for part in ensemble_train ensemble_validate test; do 3 | for part in train_samples; do 4 | CUDA_VISIBLE_DEVICES=0 python inference-pre-ensemble.py \ 5 | --output_dir="/Youtube-8M/model_predictions/${part}/video_very_deep_combine_chain" \ 6 | --model_checkpoint_path="../model/video_chain_moe2_verydeep_combine/model.ckpt-28403" \ 7 | --input_data_pattern="/Youtube-8M/data/video/${part}/*.tfrecord" \ 8 | --frame_features=False \ 9 | --feature_names="mean_rgb,mean_audio" \ 10 | --feature_sizes="1024,128" \ 11 | --batch_size=128 \ 12 | --file_size=4096 13 | done 14 | -------------------------------------------------------------------------------- /youtube-8m-ensemble/ensemble_scripts/ensemble_no8.conf: -------------------------------------------------------------------------------- 1 | cnn_deep_combine_chain 2 | cnn_deep_combine_chain_bagging/ensemble_matrix_model 3 | cnnlstmmemory1024_moe8 4 | framehop_lstm 5 | lstm_attention8_max 6 | lstm_cnn_deep_combine_chain 7 | lstmmem1024_layer2_moe4_deep_combine_chain_add_length 8 | lstmparallelfinaloutput1024_moe8 9 | lstmparallelmemory1024_moe8 10 | multilstmmemory1024_moe4_deep_chain 11 | multires_lstm_deep_combine_chain 12 | video_cc_structure_bagging/ensemble_matrix_model 13 | video_dcc_boosting/ensemble_matrix_model 14 | video_dcc_boosting_discardhopeless/ensemble_matrix_model 15 | video_dcc_boosting_weightclip/ensemble_matrix_model 16 | lstmparalleloutput_bagging/ensemble_mean_model 17 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/all_video_models/__init__.py: -------------------------------------------------------------------------------- 1 | from logistic_model import * 2 | from moe_model import * 3 | from mlp_moe_model import * 4 | from chain_moe_model import * 5 | from deep_chain_model import * 6 | from deep_combine_chain_model import * 7 | from distillchain_deep_combine_chain_model import * 8 | from hidden_chain_model import * 9 | from hidden_combine_chain_model import * 10 | from chain_main_relu_moe_model import * 11 | from chain_support_relu_moe_model import * 12 | from shortcut_chain_support_relu_moe_model import * 13 | from multitask_moe_model import * 14 | from stage2_logistic_model import * 15 | from multitask_divergence_moe_model import * 16 | from multitask_divergence_deep_combine_chain_model import * 17 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/infer_scripts/infer-video_group_moe4_noise0.2_layer4_elu.sh: -------------------------------------------------------------------------------- 1 | 2 | #for part in ensemble_train ensemble_validate test; do 3 | for part in train_samples; do 4 | CUDA_VISIBLE_DEVICES=0 python inference-pre-ensemble.py \ 5 | --output_dir="/Youtube-8M/model_predictions/${part}/video_moe4_noise0.2_layer4_elu" \ 6 | --model_checkpoint_path="/home/zhangt/yt8m/video_level_moenoise0.2_model/moe_4layers_elu/model.ckpt-22845" \ 7 | --input_data_pattern="/Youtube-8M/data/video/${part}/*.tfrecord" \ 8 | --frame_features=False \ 9 | --feature_names="mean_rgb,mean_audio" \ 10 | --feature_sizes="1024,128" \ 11 | --batch_size=128 \ 12 | --file_size=4096 13 | done 14 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/infer_scripts/infer-cnn_model.sh: -------------------------------------------------------------------------------- 1 | 2 | #for part in ensemble_train ensemble_validate test; do 3 | for part in ensemble_validate; do 4 | CUDA_VISIBLE_DEVICES=0 python inference-pre-ensemble.py \ 5 | --output_dir="/Youtube-8M/model_predictions/${part}/cnn_model" \ 6 | --model_checkpoint_path="../model/cnn_model/model.ckpt-374098" \ 7 | --input_data_pattern="/Youtube-8M/data/frame/${part}/*.tfrecord" \ 8 | --frame_features=True \ 9 | --feature_names="rgb,audio" \ 10 | --feature_sizes="1024,128" \ 11 | --model=CnnModel \ 12 | --cnn_num_filters=512 \ 13 | --moe_num_mixtures=4 \ 14 | --batch_size=128 \ 15 | --file_size=4096 16 | done 17 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/infer_scripts/infer-frame_seg.sh: -------------------------------------------------------------------------------- 1 | 2 | for part in ensemble_train ensemble_validate test; do 3 | CUDA_VISIBLE_DEVICES=1 python inference-pre-ensemble.py \ 4 | --output_dir="/Youtube-8M/model_predictions_local/${part}/frame_seg_model" \ 5 | --model_checkpoint_path="../model/frame_seg_model/model.ckpt-27945" \ 6 | --input_data_pattern="/Youtube-8M/data/frame/${part}/*.tfrecord" \ 7 | --frame_features=True \ 8 | --feature_names="rgb,audio" \ 9 | --feature_sizes="1024,128" \ 10 | --feature_transformer=IdenticalTransformer \ 11 | --model=FrameSegModel \ 12 | --moe_num_mixtures=16 \ 13 | --batch_size=64 \ 14 | --file_size=4096 15 | done 16 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/training_scripts/run-cascade-75-multiple-attention-pooling.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0 python train.py \ 2 | --train_dir="../model/distillchain_lstmattention8max" \ 3 | --train_data_pattern="/Youtube-8M/distillation/frame/train/*.tfrecord" \ 4 | --frame_features=True \ 5 | --feature_names="rgb,audio" \ 6 | --feature_sizes="1024,128" \ 7 | --distillation_features=True \ 8 | --distillation_as_input=True \ 9 | --model=DistillchainLstmAttentionMaxPoolingModel \ 10 | --moe_num_mixtures=8 \ 11 | --lstm_attentions=8 \ 12 | --lstm_cells=1024 \ 13 | --rnn_swap_memory=True \ 14 | --moe_num_mixtures=8 \ 15 | --num_readers=4 \ 16 | --batch_size=128 \ 17 | --num_epochs=3 \ 18 | --base_learning_rate=0.001 19 | 20 | -------------------------------------------------------------------------------- /model/model_selection/top_16_model.conf: -------------------------------------------------------------------------------- 1 | cnn_deep_combine_chain 2 | cnn_deep_combine_chain_bagging/ensemble_matrix_model 3 | distillation_lstmcnn_dcc_boosting/sub_model_1 4 | distillation_multilstm_dcc_boosting/sub_model_1 5 | lstm_attention8_max 6 | lstm_cnn_deep_combine_chain 7 | lstm_positional_attention8max 8 | lstmattention8max_boosting/ensemble_attention_matrix_model 9 | lstmbiglu_cell1024_layer1_moe8 10 | lstmmem1024_layer2_moe4_deep_combine_chain_add_length 11 | lstmparallelfinaloutput1024_moe8 12 | lstmparallelmemory1024_moe8 13 | multilstmmemory1024_moe4_deep_chain 14 | video_dcc_boosting/ensemble_matrix_model 15 | video_dcc_boosting_discardhopeless/ensemble_matrix_model 16 | video_dcc_boosting_weightclip/ensemble_matrix_model 17 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/training_scripts/run-cascade-76-multiple-attention-pooling.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0 python train.py \ 2 | --train_dir="../model/distillchain_v2_lstmattention8max" \ 3 | --train_data_pattern="/Youtube-8M/distillation_v2/frame/train/*.tfrecord" \ 4 | --frame_features=True \ 5 | --feature_names="rgb,audio" \ 6 | --feature_sizes="1024,128" \ 7 | --distillation_features=True \ 8 | --distillation_as_input=True \ 9 | --model=DistillchainLstmAttentionMaxPoolingModel \ 10 | --moe_num_mixtures=8 \ 11 | --lstm_attentions=8 \ 12 | --lstm_cells=1024 \ 13 | --rnn_swap_memory=True \ 14 | --moe_num_mixtures=8 \ 15 | --num_readers=4 \ 16 | --batch_size=128 \ 17 | --num_epochs=3 \ 18 | --base_learning_rate=0.001 19 | 20 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/training_scripts/run-temporal-pooling-lstm.sh: -------------------------------------------------------------------------------- 1 | 2 | CUDA_VISIBLE_DEVICES=0 python train.py \ 3 | --train_dir="../model/framehop_lstm/" \ 4 | --train_data_pattern="/Youtube-8M/data/frame/train/train*" \ 5 | --frame_features=True \ 6 | --feature_names="rgb,audio" \ 7 | --feature_sizes="1024,128" \ 8 | --model=FramehopLstmMemoryModel \ 9 | --deep_chain_layers=4 \ 10 | --deep_chain_relu_cells=256 \ 11 | --moe_num_mixtures=4 \ 12 | --keep_checkpoint_every_n_hours=1.0 \ 13 | --base_learning_rate=0.0008 \ 14 | --feature_transformer=IdenticalTransformer \ 15 | --num_readers=4 \ 16 | --batch_size=128 \ 17 | --num_epochs=5 \ 18 | --rnn_swap_memory=True \ 19 | --lstm_layers=2 \ 20 | --lstm_cells="512,64" 21 | 22 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/infer_scripts/infer-multires_lstm_deep_combine_chain.sh: -------------------------------------------------------------------------------- 1 | 2 | for part in test ensemble_validate ensemble_train; do 3 | #for part in train_samples; do 4 | CUDA_VISIBLE_DEVICES=0 python inference-pre-ensemble.py \ 5 | --output_dir="/Youtube-8M/model_predictions/${part}/multires_lstm_deep_combine_chain" \ 6 | --model_checkpoint_path="../model/multires_lstm_deep_combine_chain/model.ckpt-108969" \ 7 | --input_data_pattern="/Youtube-8M/data/frame/${part}/*.tfrecord" \ 8 | --frame_features=True \ 9 | --feature_names="rgb,audio" \ 10 | --feature_sizes="1024,128" \ 11 | --feature_transformer=IdenticalTransformer \ 12 | --batch_size=32 \ 13 | --file_size=4096 14 | done 15 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/infer_scripts/infer-lstmmemory1024_deep_combine_chain_add_length.sh: -------------------------------------------------------------------------------- 1 | 2 | #for part in ensemble_train ensemble_validate test; do 3 | for part in train_samples; do 4 | CUDA_VISIBLE_DEVICES=1 python inference-pre-ensemble.py \ 5 | --output_dir="/Youtube-8M/model_predictions/${part}/lstmmem1024_layer2_moe4_deep_combine_chain_add_length" \ 6 | --model_checkpoint_path="../model/lstmmem1024_deep_combine_chain_length/model.ckpt-148035" \ 7 | --input_data_pattern="/Youtube-8M/data/frame/${part}/*.tfrecord" \ 8 | --frame_features=True \ 9 | --feature_names="rgb,audio" \ 10 | --feature_sizes="1024,128" \ 11 | --batch_size=32 \ 12 | --file_size=4096 13 | done 14 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/training_scripts/run-cascade-76-parallel-lstm-boosting.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=1 python train.py \ 2 | --train_dir="../model/distillchain_v2_boost_lstmparalleloutput" \ 3 | --train_data_pattern="/Youtube-8M/distillation_v2/frame/train/*.tfrecord" \ 4 | --frame_features=True \ 5 | --feature_names="rgb,audio" \ 6 | --feature_sizes="1024,128" \ 7 | --distillation_features=True \ 8 | --distillation_as_input=True \ 9 | --distillation_as_boosting=True \ 10 | --model=DistillchainLstmParallelFinaloutputModel \ 11 | --rnn_swap_memory=True \ 12 | --lstm_layers=1 \ 13 | --lstm_cells="1024,128" \ 14 | --moe_num_mixtures=8 \ 15 | --num_readers=4 \ 16 | --batch_size=128 \ 17 | --num_epochs=3 \ 18 | --base_learning_rate=0.001 19 | 20 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/training_scripts/run-multiscale-cnn-lstm-model.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0 python train.py \ 2 | --train_dir="../model/multiscale_cnn_lstm_model" \ 3 | --train_data_pattern='/Youtube-8M/data/frame/train/*.tfrecord' \ 4 | --frame_features=True \ 5 | --feature_names="rgb,audio" \ 6 | --feature_sizes="1024,128" \ 7 | --model='MultiscaleCnnLstmModel' \ 8 | --multiscale_cnn_lstm_layers=4 \ 9 | --moe_num_mixtures=4 \ 10 | --multitask=True \ 11 | --label_loss=MultiTaskCrossEntropyLoss \ 12 | --support_loss_percent=1.0 \ 13 | --support_type="label,label,label,label" \ 14 | --is_training=True \ 15 | --batch_size=128 \ 16 | --num_readers=4 \ 17 | --num_epochs=5 \ 18 | --base_learning_rate=0.001 19 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/data_augmentation_scripts/run-multiple-attention-pooling-positional-embedding.sh: -------------------------------------------------------------------------------- 1 | 2 | CUDA_VISIBLE_DEVICES=0 python train.py \ 3 | --train_dir="../model/dataaugmentation_attention_pooling" \ 4 | --train_data_pattern="/Youtube-8M/data/frame/largetrain/*.tfrecord" \ 5 | --frame_features=True \ 6 | --feature_names="rgb,audio" \ 7 | --feature_sizes="1024,128" \ 8 | --model=LstmPositionalAttentionMaxPoolingModel \ 9 | --moe_num_mixtures=8 \ 10 | --lstm_attentions=1 \ 11 | --positional_embedding_size=32 \ 12 | --rnn_swap_memory=True \ 13 | --base_learning_rate=0.001 \ 14 | --num_readers=4 \ 15 | --num_epochs=6 \ 16 | --batch_size=40 \ 17 | --data_augmenter=HalfAugmenter \ 18 | --keep_checkpoint_every_n_hour=2.0 19 | -------------------------------------------------------------------------------- /model/virtual_grouping/ensemble.conf: -------------------------------------------------------------------------------- 1 | video_cc_structure_bagging/ensemble_matrix_model 2 | video_dcc_bagging 3 | video_dcc_boosting/ensemble_matrix_model 4 | video_dcc_boosting_discardhopeless/ensemble_matrix_model 5 | video_dcc_boosting_weightclip/ensemble_matrix_model 6 | cnn_deep_combine_chain_bagging/ensemble_matrix_model 7 | cnn_deep_combine_chain_boosting/ensemble_matrix_model 8 | lstmattention8max_bagging/ensemble_matrix_model 9 | lstmattention8max_boosting/ensemble4_matrix_model 10 | lstmparalleloutput_bagging/ensemble_mean_model 11 | lstmparalleloutput_boosting_weightclip/ensemble_matrix_model 12 | virtual_grouping/virtual_group_attention 13 | virtual_grouping/virtual_group_dcc 14 | virtual_grouping/virtual_group_lstm 15 | virtual_grouping/virtual_group_mean 16 | virtual_grouping/virtual_group_video 17 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/training_scripts/run-chaining-lstm.sh: -------------------------------------------------------------------------------- 1 | 2 | CUDA_VISIBLE_DEVICES=0 python train.py \ 3 | --train_dir="../model/multilstmmemory1024_moe4_deep_chain/" \ 4 | --train_data_pattern="/Youtube-8M/data/frame/train/train*" \ 5 | --model=LstmMemoryDeepChainModel \ 6 | --moe_num_mixtures=4 \ 7 | --deep_chain_relu_cells=200 \ 8 | --deep_chain_layers=1 \ 9 | --frame_features=True \ 10 | --feature_names="rgb,audio" \ 11 | --feature_sizes="1024,128" \ 12 | --lstm_layers=2 \ 13 | --lstm_cells=1024 \ 14 | --multitask=True \ 15 | --support_type="label" \ 16 | --num_supports=4716 \ 17 | --label_loss=MultiTaskCrossEntropyLoss \ 18 | --base_learning_rate=0.0008 \ 19 | --support_loss_percent=0.2 \ 20 | --keep_checkpoint_every_n_hours=1.75 \ 21 | --rnn_swap_memory=True \ 22 | --batch_size=128 23 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/eval_scripts/eval-dbof.sh: -------------------------------------------------------------------------------- 1 | 2 | GPU_ID=1 3 | EVERY=1000 4 | MODEL=DbofModel 5 | MODEL_DIR="../model/dbof_model" 6 | 7 | start=$1 8 | DIR="$(pwd)" 9 | 10 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 11 | echo $checkpoint; 12 | if [ $checkpoint -gt $start ]; then 13 | echo $checkpoint; 14 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 15 | --train_dir="$MODEL_DIR" \ 16 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 17 | --eval_data_pattern="/Youtube-8M/data/frame/validate/validatea*" \ 18 | --frame_features=True \ 19 | --feature_names="rgb,audio" \ 20 | --feature_sizes="1024,128" \ 21 | --model=$MODEL \ 22 | --batch_size=128 \ 23 | --num_readers=1 \ 24 | --run_once=True 25 | fi 26 | done 27 | 28 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/training_scripts/run-cascade-76-parallel-lstm.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=1 python train-with-predictions.py \ 2 | --train_dir="../model/distillchain_v2_lstmparalleloutput" \ 3 | --train_data_pattern="/Youtube-8M/data/frame/train/*.tfrecord" \ 4 | --predictions_data_pattern="/Youtube-8M/distillation_v2/predictions/*.tfrecord" \ 5 | --frame_features=True \ 6 | --feature_names="rgb,audio" \ 7 | --feature_sizes="1024,128" \ 8 | --distillation_features=False \ 9 | --distillation_as_input=True \ 10 | --model=DistillchainLstmParallelFinaloutputModel \ 11 | --rnn_swap_memory=True \ 12 | --lstm_cells="1024,128" \ 13 | --moe_num_mixtures=4 \ 14 | --num_readers=4 \ 15 | --batch_size=128 \ 16 | --num_epochs=2 \ 17 | --keep_checkpoint_every_n_hours=2.0 \ 18 | --base_learning_rate=0.001 19 | 20 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/eval_scripts/eval-video-logistic.sh: -------------------------------------------------------------------------------- 1 | 2 | GPU_ID=1 3 | EVERY=100 4 | MODEL=LogisticModel 5 | MODEL_DIR="../model/video_logistic_model" 6 | 7 | start=$1 8 | DIR="$(pwd)" 9 | 10 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 11 | echo $checkpoint; 12 | if [ $checkpoint -gt $start ]; then 13 | echo $checkpoint; 14 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 15 | --train_dir="$MODEL_DIR" \ 16 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 17 | --eval_data_pattern="/Youtube-8M/data/video/validate/validatea*" \ 18 | --frame_features=False \ 19 | --feature_names="mean_rgb,mean_audio" \ 20 | --feature_sizes="1024,128" \ 21 | --model=$MODEL \ 22 | --batch_size=32 \ 23 | --run_once=True 24 | fi 25 | done 26 | 27 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/infer_scripts/infer-distill_video_dcc.sh: -------------------------------------------------------------------------------- 1 | 2 | for part in ensemble_train ensemble_validate test; do 3 | CUDA_VISIBLE_DEVICES=1 python inference-pre-ensemble.py \ 4 | --output_dir="/Youtube-8M/model_predictions/${part}/distillation_video_dcc_noise" \ 5 | --model_checkpoint_path="../model/distillation_video_dcc_noise/scene1_percent_0.4/model.ckpt-47842" \ 6 | --input_data_pattern="/Youtube-8M/data/video/${part}/*.tfrecord" \ 7 | --frame_features=False \ 8 | --feature_names="mean_rgb,mean_audio" \ 9 | --feature_sizes="1024,128" \ 10 | --model=DeepCombineChainModel \ 11 | --deep_chain_relu_cells=256 \ 12 | --deep_chain_layers=4 \ 13 | --moe_num_mixtures=4 \ 14 | --batch_size=32 \ 15 | --file_size=4096 16 | done 17 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/infer_scripts/infer-lstmmemory-layer1.sh: -------------------------------------------------------------------------------- 1 | 2 | #for part in ensemble_train ensemble_validate test; do 3 | for part in ensemble_validate; do 4 | CUDA_VISIBLE_DEVICES=1 python inference-pre-ensemble.py \ 5 | --output_dir="/Youtube-8M/model_predictions/${part}/lstmmemory1024_layer1_moe8" \ 6 | --model_checkpoint_path="../model/lstmmemory1024_layer1_moe8/model.ckpt-149022" \ 7 | --input_data_pattern="/Youtube-8M/data/frame/${part}/*.tfrecord" \ 8 | --frame_features=True \ 9 | --feature_names="rgb,audio" \ 10 | --feature_sizes="1024,128" \ 11 | --model=LstmMemoryModel \ 12 | --lstm_cells=1024 \ 13 | --lstm_layers=1 \ 14 | --moe_num_mixtures=8 \ 15 | --rnn_swap_memory=False \ 16 | --batch_size=128 \ 17 | --file_size=4096 18 | done 19 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/training_scripts/run-chaining-lstm-cnn.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0 python train.py \ 2 | --train_dir="../model/lstm_cnn_deep_combine_chain/" \ 3 | --train_data_pattern="/Youtube-8M/data/frame/train/train*" \ 4 | --frame_features=True \ 5 | --feature_names="rgb,audio" \ 6 | --feature_sizes="1024,128" \ 7 | --model=LstmCnnDeepCombineChainModel \ 8 | --deep_chain_layers=3 \ 9 | --deep_chain_relu_cells=128 \ 10 | --moe_num_mixtures=4 \ 11 | --lstm_layers=1 \ 12 | --lstm_cells="1024,128" \ 13 | --rnn_swap_memory=True \ 14 | --multitask=True \ 15 | --label_loss=MultiTaskCrossEntropyLoss \ 16 | --support_type="label,label,label" \ 17 | --support_loss_percent=0.05 \ 18 | --keep_checkpoint_every_n_hours=1.0 \ 19 | --batch_size=96 \ 20 | --num_readers=4 \ 21 | --base_learning_rate=0.001 22 | 23 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/eval_scripts/eval-moe-model.sh: -------------------------------------------------------------------------------- 1 | 2 | GPU_ID="1" 3 | EVERY=100 4 | MODEL=MoeModel 5 | MODEL_DIR="../model/video_moe16_model" 6 | 7 | start=$1 8 | DIR="$(pwd)" 9 | 10 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 11 | echo $checkpoint; 12 | if [ $checkpoint -gt $start ]; then 13 | echo $checkpoint; 14 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 15 | --train_dir="$MODEL_DIR" \ 16 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 17 | --eval_data_pattern="/Youtube-8M/data/video/validate/validatea*" \ 18 | --frame_features=False \ 19 | --feature_names="mean_rgb,mean_audio" \ 20 | --feature_sizes="1024,128" \ 21 | --model=$MODEL \ 22 | --moe_num_mixtures=16 \ 23 | --batch_size=1024 \ 24 | --run_once=True 25 | fi 26 | done 27 | 28 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/training_scripts/run-chaining-cnn.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0 python train.py \ 2 | --train_dir="../model/cnn_deep_combine_chain/" \ 3 | --train_data_pattern="/Youtube-8M/data/frame/train/train*" \ 4 | --frame_features=True \ 5 | --feature_names="rgb,audio" \ 6 | --feature_sizes="1024,128" \ 7 | --model=CnnDeepCombineChainModel \ 8 | --deep_chain_layers=4 \ 9 | --deep_chain_relu_cells=128 \ 10 | --moe_num_mixtures=4 \ 11 | --multitask=True \ 12 | --label_loss=MultiTaskCrossEntropyLoss \ 13 | --support_type="label,label,label,label" \ 14 | --support_loss_percent=0.05 \ 15 | --num_readers=4 \ 16 | --batch_size=128 \ 17 | --keep_checkpoint_every_n_hours=0.5 \ 18 | --base_learning_rate=0.001 19 | 20 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/eval.sh: -------------------------------------------------------------------------------- 1 | 2 | GPU_ID=0 3 | EVERY=500 4 | MODEL=LstmModel 5 | MODEL_DIR="../model/attentionlstm_moe4" 6 | 7 | start=$1 8 | DIR="$(pwd)" 9 | 10 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 11 | echo $checkpoint; 12 | if [ $checkpoint -gt $start ]; then 13 | echo $checkpoint; 14 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 15 | --train_dir="$MODEL_DIR" \ 16 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 17 | --eval_data_pattern="/Youtube-8M-validate/validatea*" \ 18 | --frame_features=True \ 19 | --feature_names="rgb,audio" \ 20 | --feature_sizes="1024,128" \ 21 | --batch_size=128 \ 22 | --model=$MODEL \ 23 | --lstm_cells=1024 \ 24 | --moe_num_mixtures=4 \ 25 | --num_readers=1 \ 26 | --run_once=True 27 | fi 28 | done 29 | 30 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/eval_scripts/eval-chain-moe.sh: -------------------------------------------------------------------------------- 1 | 2 | GPU_ID="1" 3 | EVERY=500 4 | MODEL=ChainMoeModel 5 | MODEL_DIR="../model/video_chain_moe16_ce" 6 | 7 | start=$1 8 | DIR="$(pwd)" 9 | 10 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 11 | echo $checkpoint; 12 | if [ $checkpoint -gt $start ]; then 13 | echo $checkpoint; 14 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 15 | --train_dir="$MODEL_DIR" \ 16 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 17 | --eval_data_pattern="/Youtube-8M/data/video/validate/validatea*" \ 18 | --frame_features=False \ 19 | --feature_names="mean_rgb,mean_audio" \ 20 | --feature_sizes="1024,128" \ 21 | --batch_size=256 \ 22 | --model=$MODEL \ 23 | --moe_num_mixtures=16 \ 24 | --run_once=True 25 | fi 26 | done 27 | 28 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/eval_scripts/eval-video-pairwise.sh: -------------------------------------------------------------------------------- 1 | 2 | GPU_ID=1 3 | EVERY=500 4 | MODEL=MoeModel 5 | MODEL_DIR="../model/video_moe16_mixedloss" 6 | 7 | start=$1 8 | DIR="$(pwd)" 9 | 10 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 11 | echo $checkpoint; 12 | if [ $checkpoint -gt $start ]; then 13 | echo $checkpoint; 14 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 15 | --train_dir="$MODEL_DIR" \ 16 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 17 | --eval_data_pattern="/Youtube-8M/data/video/validate/validatea*" \ 18 | --frame_features=False \ 19 | --feature_names="mean_rgb,mean_audio" \ 20 | --feature_sizes="1024,128" \ 21 | --batch_size=256 \ 22 | --model=$MODEL \ 23 | --moe_num_mixtures=16 \ 24 | --run_once=True 25 | fi 26 | done 27 | 28 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/eval_scripts/eval-video-moe.sh: -------------------------------------------------------------------------------- 1 | 2 | GPU_ID=1 3 | EVERY=100 4 | MODEL=MoeModel 5 | MODEL_DIR="/home/zhangt/yt8m/video_level_moe_model" 6 | 7 | start=$1 8 | DIR="$(pwd)" 9 | 10 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 11 | echo $checkpoint; 12 | if [ $checkpoint -gt $start ]; then 13 | echo $checkpoint; 14 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 15 | --train_dir="$MODEL_DIR" \ 16 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 17 | --eval_data_pattern="/Youtube-8M/data/video/validate/validatea*" \ 18 | --frame_features=False \ 19 | --feature_names="mean_rgb,mean_audio" \ 20 | --feature_sizes="1024,128" \ 21 | --batch_size=4096 \ 22 | --model=$MODEL \ 23 | --moe_num_mixtures=16 \ 24 | --run_once=True 25 | fi 26 | done 27 | 28 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/training_scripts/run-cascade-75-chaining-cnn.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0 python train.py \ 2 | --train_dir="../model/distillchain_cnn_dcc" \ 3 | --train_data_pattern="/Youtube-8M/distillation/frame/train/*.tfrecord" \ 4 | --frame_features=True \ 5 | --feature_names="rgb,audio" \ 6 | --feature_sizes="1024,128" \ 7 | --distillation_features=True \ 8 | --distillation_as_input=True \ 9 | --model=DistillchainCnnDeepCombineChainModel \ 10 | --deep_chain_layers=3 \ 11 | --deep_chain_relu_cells=256 \ 12 | --moe_num_mixtures=4 \ 13 | --multitask=True \ 14 | --label_loss=MultiTaskCrossEntropyLoss \ 15 | --support_type="label,label,label" \ 16 | --support_loss_percent=0.05 \ 17 | --num_readers=4 \ 18 | --batch_size=128 \ 19 | --num_epochs=3 \ 20 | --keep_checkpoint_every_n_hours=0.5 \ 21 | --base_learning_rate=0.001 22 | 23 | -------------------------------------------------------------------------------- /youtube-8m-ensemble/ensemble_scripts/train-mean_model.sh: -------------------------------------------------------------------------------- 1 | model=$1 2 | conf=$2 3 | 4 | DEFAULT_GPU_ID=0 5 | if [ -z ${CUDA_VISIBLE_DEVICES+x} ]; then 6 | GPU_ID=$DEFAULT_GPU_ID 7 | echo "set CUDA_VISIBLE_DEVICES to default('$GPU_ID')" 8 | else 9 | GPU_ID=$CUDA_VISIBLE_DEVICES 10 | echo "set CUDA_VISIBLE_DEVICES to external('$GPU_ID')" 11 | fi 12 | 13 | train_path=/Youtube-8M/model_predictions/ensemble_train 14 | train_data_patterns="" 15 | for d in $(cat $conf); do 16 | train_data_patterns="${train_path}/${d}/*.tfrecord${train_data_patterns:+,$train_data_patterns}" 17 | done 18 | echo $train_data_patterns 19 | 20 | CUDA_VISIBLE_DEVICES="$GPU_ID" python train.py \ 21 | --train_dir="../model/${model}" \ 22 | --train_data_patterns="$train_data_patterns" \ 23 | --model=MeanModel \ 24 | --training=False \ 25 | --num_epochs=1 26 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/eval_scripts/eval-chain-moe-0.4.sh: -------------------------------------------------------------------------------- 1 | 2 | GPU_ID="1" 3 | EVERY=500 4 | MODEL=ChainMoeModel 5 | MODEL_DIR="../model/video_chain_moe16_ce_0.4" 6 | 7 | start=$1 8 | DIR="$(pwd)" 9 | 10 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 11 | echo $checkpoint; 12 | if [ $checkpoint -gt $start ]; then 13 | echo $checkpoint; 14 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 15 | --train_dir="$MODEL_DIR" \ 16 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 17 | --eval_data_pattern="/Youtube-8M/data/video/validate/validatea*" \ 18 | --frame_features=False \ 19 | --feature_names="mean_rgb,mean_audio" \ 20 | --feature_sizes="1024,128" \ 21 | --batch_size=256 \ 22 | --model=$MODEL \ 23 | --moe_num_mixtures=16 \ 24 | --run_once=True 25 | fi 26 | done 27 | 28 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/eval_scripts/eval-stage2-logistic.sh: -------------------------------------------------------------------------------- 1 | 2 | start=$1 3 | GPU_ID=1 4 | MODEL=Stage2LogisticModel 5 | MODEL_DIR="../model/video_moe16_stage2logistic" 6 | 7 | for checkpoint in $(for filename in $MODEL_DIR/model.ckpt-*.meta; do echo $filename | grep -o "ckpt-[0123456789]*.meta" | cut -d '-' -f 2 | cut -d '.' -f 1; done | sort -n); do 8 | if [ $checkpoint -gt $start ]; then 9 | echo $checkpoint; 10 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 11 | --train_dir="$MODEL_DIR" \ 12 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 13 | --eval_data_pattern="/Youtube-8M/data/video/validate-validate-part1/validatea*" \ 14 | --frame_features=False \ 15 | --feature_names="predictions" \ 16 | --feature_sizes=4716 \ 17 | --batch_size=1024 \ 18 | --model=$MODEL \ 19 | --run_once=True 20 | fi 21 | done 22 | 23 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/infer_scripts/infer-positional-lstmattention8max.sh: -------------------------------------------------------------------------------- 1 | 2 | for part in test ensemble_train ensemble_validate; do 3 | CUDA_VISIBLE_DEVICES=1 python inference-pre-ensemble.py \ 4 | --output_dir="/Youtube-8M/model_predictions/${part}/lstm_positional_attention8max" \ 5 | --model_checkpoint_path="../model/lstm_positional_attention8max/model.ckpt-198407" \ 6 | --input_data_pattern="/Youtube-8M/data/frame/${part}/*.tfrecord" \ 7 | --frame_features=True \ 8 | --feature_names="rgb,audio" \ 9 | --feature_sizes="1024,128" \ 10 | --model=LstmPositionalAttentionMaxPoolingModel \ 11 | --moe_num_mixtures=8 \ 12 | --lstm_attentions=8 \ 13 | --positional_embedding_size=32 \ 14 | --rnn_swap_memory=True \ 15 | --batch_size=32 \ 16 | --file_size=4096 17 | done 18 | -------------------------------------------------------------------------------- /youtube-8m-ensemble/ensemble_scripts/infer-attention_linear_model.sh: -------------------------------------------------------------------------------- 1 | model=$1 2 | conf=$2 3 | checkpoint=$3 4 | test_path=/Youtube-8M/model_predictions/test 5 | 6 | test_data_patterns="" 7 | for d in $(cat $conf); do 8 | test_data_patterns="${test_path}/${d}/*.tfrecord${test_data_patterns:+,$test_data_patterns}" 9 | done 10 | echo "$test_data_patterns" 11 | input_data_pattern="${test_path}/model_input/*.tfrecord" 12 | 13 | #CUDA_VISIBLE_DEVICES=0 python inference.py \ 14 | python inference.py \ 15 | --model_checkpoint_path="../model/${model}/model.ckpt-${checkpoint}" \ 16 | --output_file="../model/${model}/predictions.${model}.csv" \ 17 | --model="AttentionLinearModel" \ 18 | --moe_num_mixtures=16 \ 19 | --batch_size=1024 \ 20 | --input_data_pattern="$input_data_pattern" \ 21 | --input_data_patterns="$test_data_patterns" 22 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/eval_scripts/eval-bi-uni-lstm.sh: -------------------------------------------------------------------------------- 1 | 2 | GPU_ID=1 3 | EVERY=500 4 | MODEL=BiUniLstmModel 5 | MODEL_DIR="../model/biunilstm1024_moe4" 6 | 7 | start=$1 8 | DIR="$(pwd)" 9 | 10 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 11 | echo $checkpoint; 12 | if [ $checkpoint -gt $start ]; then 13 | echo $checkpoint; 14 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 15 | --train_dir="$MODEL_DIR" \ 16 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 17 | --eval_data_pattern="/Youtube-8M/data/frame/validate/validatea*" \ 18 | --frame_features=True \ 19 | --feature_names="rgb,audio" \ 20 | --feature_sizes="1024,128" \ 21 | --batch_size=32 \ 22 | --model=$MODEL \ 23 | --lstm_cells=1024 \ 24 | --moe_num_mixtures=4 \ 25 | --run_once=True 26 | fi 27 | done 28 | 29 | -------------------------------------------------------------------------------- /youtube-8m-ensemble/all_ensemble_models/mean_model.py: -------------------------------------------------------------------------------- 1 | import math 2 | import models 3 | import tensorflow as tf 4 | import utils 5 | from tensorflow import flags 6 | import tensorflow.contrib.slim as slim 7 | FLAGS = flags.FLAGS 8 | 9 | class MeanModel(models.BaseModel): 10 | """Mean model.""" 11 | 12 | def create_model(self, model_input, **unused_params): 13 | """Creates a logistic model. 14 | 15 | model_input: 'batch' x 'num_features' x 'num_methods' matrix of input features. 16 | vocab_size: The number of classes in the dataset. 17 | 18 | Returns: 19 | A dictionary with a tensor containing the probability predictions of the 20 | model in the 'predictions' key. The dimensions of the tensor are 21 | batch_size x num_classes.""" 22 | output = tf.reduce_mean(model_input, axis=2) 23 | return {"predictions": output} 24 | 25 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/eval_scripts/eval-att.sh: -------------------------------------------------------------------------------- 1 | 2 | GPU_ID=1 3 | EVERY=500 4 | MODEL=LstmAttentionModel 5 | MODEL_DIR="../model/attentionlstm_moe4" 6 | 7 | start=$1 8 | DIR="$(pwd)" 9 | 10 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 11 | echo $checkpoint; 12 | if [ $checkpoint -gt $start ]; then 13 | echo $checkpoint; 14 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 15 | --train_dir="$MODEL_DIR" \ 16 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 17 | --eval_data_pattern="/Youtube-8M-validate/validatea*" \ 18 | --frame_features=True \ 19 | --feature_names="rgb,audio" \ 20 | --feature_sizes="1024,128" \ 21 | --batch_size=128 \ 22 | --model=$MODEL \ 23 | --lstm_cells=1024 \ 24 | --moe_num_mixtures=4 \ 25 | --num_readers=1 \ 26 | --run_once=True 27 | fi 28 | done 29 | 30 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/eval_scripts/eval-mem.sh: -------------------------------------------------------------------------------- 1 | 2 | GPU_ID=1 3 | EVERY=500 4 | MODEL=LstmMemoryModel 5 | MODEL_DIR="../model/lstmmemory1024_moe8" 6 | 7 | start=$1 8 | DIR="$(pwd)" 9 | 10 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 11 | echo $checkpoint; 12 | if [ $checkpoint -gt $start ]; then 13 | echo $checkpoint; 14 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 15 | --train_dir="$MODEL_DIR" \ 16 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 17 | --eval_data_pattern="/Youtube-8M-validate/validatea*" \ 18 | --frame_features=True \ 19 | --feature_names="rgb,audio" \ 20 | --feature_sizes="1024,128" \ 21 | --batch_size=128 \ 22 | --model=$MODEL \ 23 | --lstm_cells=1024 \ 24 | --moe_num_mixtures=8 \ 25 | --num_readers=1 \ 26 | --run_once=True 27 | fi 28 | done 29 | 30 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/training_scripts/run-cascade-76-chaining-cnn.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0 python train.py \ 2 | --train_dir="../model/distillchain_v2_cnn_dcc" \ 3 | --train_data_pattern="/Youtube-8M/distillation_v2/frame/train/*.tfrecord" \ 4 | --frame_features=True \ 5 | --feature_names="rgb,audio" \ 6 | --feature_sizes="1024,128" \ 7 | --distillation_features=True \ 8 | --distillation_as_input=True \ 9 | --model=DistillchainCnnDeepCombineChainModel \ 10 | --deep_chain_layers=3 \ 11 | --deep_chain_relu_cells=256 \ 12 | --moe_num_mixtures=4 \ 13 | --multitask=True \ 14 | --label_loss=MultiTaskCrossEntropyLoss \ 15 | --support_type="label,label,label" \ 16 | --support_loss_percent=0.05 \ 17 | --num_readers=4 \ 18 | --batch_size=128 \ 19 | --num_epochs=3 \ 20 | --keep_checkpoint_every_n_hours=0.5 \ 21 | --base_learning_rate=0.001 22 | 23 | -------------------------------------------------------------------------------- /youtube-8m-ensemble/ensemble_scripts/infer-mean_model.sh: -------------------------------------------------------------------------------- 1 | model=$1 2 | conf=$2 3 | 4 | DEFAULT_GPU_ID=0 5 | if [ -z ${CUDA_VISIBLE_DEVICES+x} ]; then 6 | GPU_ID=$DEFAULT_GPU_ID 7 | echo "set CUDA_VISIBLE_DEVICES to default('$GPU_ID')" 8 | else 9 | GPU_ID=$CUDA_VISIBLE_DEVICES 10 | echo "set CUDA_VISIBLE_DEVICES to external('$GPU_ID')" 11 | fi 12 | 13 | test_path=/Youtube-8M/model_predictions/test 14 | test_data_patterns="" 15 | for d in $(cat $conf); do 16 | test_data_patterns="${test_path}/${d}/*.tfrecord${test_data_patterns:+,$test_data_patterns}" 17 | done 18 | echo "$test_data_patterns" 19 | 20 | CUDA_VISIBLE_DEVICES="$GPU_ID" python inference.py \ 21 | --model_checkpoint_path="../model/${model}/model.ckpt-0" \ 22 | --output_file="../model/${model}/predictions.csv" \ 23 | --model="MeanModel" \ 24 | --input_data_patterns="$test_data_patterns" 25 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/data_augmentation_scripts/run-multiscale-cnn-lstm-model.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0 python train.py \ 2 | --train_dir="../model/dataaugmentation_multiscale_cnn_lstm" \ 3 | --train_data_pattern='/Youtube-8M/data/frame/largetrain/*.tfrecord' \ 4 | --frame_features=True \ 5 | --feature_names="rgb,audio" \ 6 | --feature_sizes="1024,128" \ 7 | --model='MultiscaleCnnLstmModel' \ 8 | --multiscale_cnn_lstm_layers=4 \ 9 | --moe_num_mixtures=4 \ 10 | --multitask=True \ 11 | --label_loss=MultiTaskCrossEntropyLoss \ 12 | --support_loss_percent=1.0 \ 13 | --support_type="label,label,label,label" \ 14 | --is_training=True \ 15 | --num_readers=4 \ 16 | --num_epochs=5 \ 17 | --batch_size=40 \ 18 | --data_augmenter=HalfAugmenter \ 19 | --base_learning_rate=0.001 \ 20 | --keep_checkpoint_every_n_hour=2.0 21 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/training_scripts/run-multiple-attention-pooling-positional-embedding.sh: -------------------------------------------------------------------------------- 1 | 2 | CUDA_VISIBLE_DEVICES=0 python train.py \ 3 | --train_dir="../x27_model/lstm_positional_attention8max" \ 4 | --train_data_pattern="/Youtube-8M/data/frame/train/train*" \ 5 | --frame_features=True \ 6 | --feature_names="rgb,audio" \ 7 | --feature_sizes="1024,128" \ 8 | --reweight=True \ 9 | --sample_vocab_file="resources/train.video_id.vocab" \ 10 | --sample_freq_file="resources/train.video_id.freq" \ 11 | --model=LstmPositionalAttentionMaxPoolingModel \ 12 | --moe_num_mixtures=8 \ 13 | --lstm_attentions=8 \ 14 | --positional_embedding_size=32 \ 15 | --rnn_swap_memory=True \ 16 | --base_learning_rate=0.001 \ 17 | --num_readers=4 \ 18 | --num_epochs=6 \ 19 | --batch_size=128 \ 20 | --keep_checkpoint_every_n_hour=0.5 21 | -------------------------------------------------------------------------------- /model/model_selection/top_20_model.conf: -------------------------------------------------------------------------------- 1 | cnn_deep_combine_chain 2 | cnn_deep_combine_chain_bagging/ensemble_matrix_model 3 | distillation_cnn_dcc_boosting/sub_model_1 4 | distillation_lstmcnn_dcc_boosting/sub_model_1 5 | distillation_multilstm_dcc_boosting/sub_model_1 6 | framehop_lstm 7 | lstm_attention8_max 8 | lstm_cnn_deep_combine_chain 9 | lstm_positional_attention8max 10 | lstmattention8max_bagging/ensemble_matrix_model 11 | lstmattention8max_boosting/ensemble_attention_matrix_model 12 | lstmbiglu_cell1024_layer1_moe8 13 | lstmglu2_cell1024_layer1_moe8 14 | lstmmem1024_layer2_moe4_deep_combine_chain_add_length 15 | lstmparallelfinaloutput1024_moe8 16 | lstmparallelmemory1024_moe8 17 | multilstmmemory1024_moe4_deep_chain 18 | video_dcc_boosting/ensemble_matrix_model 19 | video_dcc_boosting_discardhopeless/ensemble_matrix_model 20 | video_dcc_boosting_weightclip/ensemble_matrix_model 21 | -------------------------------------------------------------------------------- /youtube-8m-ensemble/ensemble_scripts/infer-attention_rectified_linear_model.sh: -------------------------------------------------------------------------------- 1 | model=$1 2 | conf=$2 3 | checkpoint=$3 4 | test_path=/Youtube-8M/model_predictions/test 5 | 6 | test_data_patterns="" 7 | for d in $(cat $conf); do 8 | test_data_patterns="${test_path}/${d}/*.tfrecord${test_data_patterns:+,$test_data_patterns}" 9 | done 10 | echo "$test_data_patterns" 11 | input_data_pattern="${test_path}/model_input/*.tfrecord" 12 | 13 | #CUDA_VISIBLE_DEVICES=0 python inference.py \ 14 | python inference.py \ 15 | --model_checkpoint_path="../model/${model}/model.ckpt-${checkpoint}" \ 16 | --output_file="../model/${model}/predictions.${model}.csv" \ 17 | --model="AttentionRectifiedLinearModel" \ 18 | --moe_num_mixtures=16 \ 19 | --batch_size=1024 \ 20 | --input_data_pattern="$input_data_pattern" \ 21 | --input_data_patterns="$test_data_patterns" 22 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/eval_scripts/eval-moe-baseline.sh: -------------------------------------------------------------------------------- 1 | 2 | GPU_ID=1 3 | EVERY=100 4 | MODEL=MoeModel 5 | MODEL_DIR="../model/video_moe16_baseline" 6 | 7 | start=$1 8 | DIR="$(pwd)" 9 | 10 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 11 | echo $checkpoint; 12 | if [ $checkpoint -gt $start ]; then 13 | echo $checkpoint; 14 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 15 | --train_dir="$MODEL_DIR" \ 16 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 17 | --eval_data_pattern="/Youtube-8M/data/video/validate/validatea*" \ 18 | --frame_features=False \ 19 | --feature_names="mean_rgb,mean_audio" \ 20 | --feature_sizes="1024,128" \ 21 | --label_loss=CrossEntropyLoss \ 22 | --batch_size=128 \ 23 | --model=$MODEL \ 24 | --moe_num_mixtures=16 \ 25 | --run_once=True 26 | fi 27 | done 28 | 29 | -------------------------------------------------------------------------------- /youtube-8m-zhangteng/eval_scripts/eval-lstmbiglu_1024_moe8.sh: -------------------------------------------------------------------------------- 1 | 2 | GPU_ID=1 3 | EVERY=1000 4 | MODEL=LstmBigluModel 5 | MODEL_DIR="../model/frame_level_lstm_biglu_model" 6 | start=0 7 | DIR="$(pwd)" 8 | 9 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 10 | echo $checkpoint; 11 | if [[ $checkpoint -gt $start ]]; then 12 | 13 | echo $checkpoint; 14 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 15 | --train_dir="$MODEL_DIR" \ 16 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 17 | --eval_data_pattern="/Youtube-8M/data/frame/validate/validatea*" \ 18 | --frame_features=True \ 19 | --feature_names="rgb,audio" \ 20 | --feature_sizes="1024,128" \ 21 | --batch_size=64 \ 22 | --model=$MODEL \ 23 | --moe_num_mixtures=4 \ 24 | --train=False \ 25 | --run_once=True 26 | fi 27 | done 28 | 29 | -------------------------------------------------------------------------------- /youtube-8m-ensemble/ensemble_scripts/infer-moe_model.sh: -------------------------------------------------------------------------------- 1 | model=$1 2 | conf=$2 3 | 4 | DEFAULT_GPU_ID=0 5 | if [ -z ${CUDA_VISIBLE_DEVICES+x} ]; then 6 | GPU_ID=$DEFAULT_GPU_ID 7 | echo "set CUDA_VISIBLE_DEVICES to default('$GPU_ID')" 8 | else 9 | GPU_ID=$CUDA_VISIBLE_DEVICES 10 | echo "set CUDA_VISIBLE_DEVICES to external('$GPU_ID')" 11 | fi 12 | 13 | test_path=/Youtube-8M/model_predictions/test 14 | test_data_patterns="" 15 | for d in $(cat $conf); do 16 | test_data_patterns="${test_path}/${d}/*.tfrecord${test_data_patterns:+,$test_data_patterns}" 17 | done 18 | echo "$test_data_patterns" 19 | 20 | CUDA_VISIBLE_DEVICES="$GPU_ID" python inference.py \ 21 | --model_checkpoint_path="../model/${model}/model.ckpt-2203" \ 22 | --output_file="../model/${model}/predictions.${model}.csv" \ 23 | --model="MoeModel" \ 24 | --input_data_patterns="$test_data_patterns" 25 | -------------------------------------------------------------------------------- /youtube-8m-ensemble/model_selection_scripts/extend-step-mean_model.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 4 | model_name="$1" 5 | candidates_conf="$2" 6 | 7 | train_path=/Youtube-8M/model_predictions_for_selection/ensemble_train 8 | model_path="${DIR}/../../model/${model_name}" 9 | all_models_conf="${model_path}/all_models.conf" 10 | 11 | for candidates in $(cat $candidates_conf); do 12 | echo "$candidates" 13 | train_data_patterns=$(python ${DIR}/get_patterns.py --train_path="$train_path" --candidates="$candidates") 14 | CUDA_VISIBLE_DEVICES=1 python ${DIR}/../eval.py \ 15 | --model_checkpoint_path="${model_path}/model.ckpt-0" \ 16 | --train_dir="${model_path}" \ 17 | --model="MeanModel" \ 18 | --echo_gap=True \ 19 | --batch_size=1024 \ 20 | --eval_data_patterns="$train_data_patterns" | tail -n 1 21 | done 22 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/data_augmentation_scripts/run-chaining-lstm.sh: -------------------------------------------------------------------------------- 1 | 2 | CUDA_VISIBLE_DEVICES=0 python train.py \ 3 | --train_dir="../model/dataaugmentation_chaining_lstm/" \ 4 | --train_data_pattern="/Youtube-8M/data/frame/largetrain/*.tfrecord" \ 5 | --model=LstmMemoryDeepChainModel \ 6 | --moe_num_mixtures=4 \ 7 | --deep_chain_relu_cells=200 \ 8 | --deep_chain_layers=1 \ 9 | --frame_features=True \ 10 | --feature_names="rgb,audio" \ 11 | --feature_sizes="1024,128" \ 12 | --lstm_layers=2 \ 13 | --lstm_cells=1024 \ 14 | --multitask=True \ 15 | --support_type="label" \ 16 | --num_supports=4716 \ 17 | --label_loss=MultiTaskCrossEntropyLoss \ 18 | --base_learning_rate=0.0008 \ 19 | --support_loss_percent=0.2 \ 20 | --keep_checkpoint_every_n_hours=3.0 \ 21 | --rnn_swap_memory=True \ 22 | --num_epochs=4 \ 23 | --data_augmenter=HalfAugmenter \ 24 | --batch_size=40 25 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/eval_scripts/eval-frame-seg.sh: -------------------------------------------------------------------------------- 1 | 2 | GPU_ID="1" 3 | EVERY=1000 4 | MODEL=FrameSegModel 5 | MODEL_DIR="../model/frame_seg_model" 6 | 7 | start=$1 8 | DIR="$(pwd)" 9 | 10 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 11 | echo $checkpoint; 12 | if [ $checkpoint -gt $start ]; then 13 | echo $checkpoint; 14 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 15 | --train_dir="$MODEL_DIR" \ 16 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 17 | --eval_data_pattern="/Youtube-8M/data/frame/validate/validatea*" \ 18 | --frame_features=True \ 19 | --feature_names="rgb,audio" \ 20 | --feature_sizes="1024,128" \ 21 | --batch_size=32 \ 22 | --feature_transformer=IdenticalTransformer \ 23 | --model=$MODEL \ 24 | --moe_num_mixtures=16 \ 25 | --run_once=True 26 | fi 27 | done 28 | 29 | -------------------------------------------------------------------------------- /youtube-8m-ensemble/ensemble_scripts/infer-attention_moe_model.sh: -------------------------------------------------------------------------------- 1 | model=$1 2 | conf=$2 3 | checkpoint=$3 4 | test_path=/Youtube-8M/model_predictions/test 5 | 6 | test_data_patterns="" 7 | for d in $(cat $conf); do 8 | test_data_patterns="${test_path}/${d}/*.tfrecord${test_data_patterns:+,$test_data_patterns}" 9 | done 10 | echo "$test_data_patterns" 11 | input_data_pattern="${test_path}/model_input/*.tfrecord" 12 | 13 | #CUDA_VISIBLE_DEVICES=0 python inference.py \ 14 | python inference.py \ 15 | --model_checkpoint_path="../model/${model}/model.ckpt-${checkpoint}" \ 16 | --output_file="../model/${model}/predictions.${model}.csv" \ 17 | --model=AttentionMoeModel \ 18 | --moe_num_mixtures=8 \ 19 | --attention_relu_cells=128 \ 20 | --batch_size=1024 \ 21 | --input_data_pattern="$input_data_pattern" \ 22 | --input_data_patterns="$test_data_patterns" 23 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/eval_scripts/eval-cnn-model.sh: -------------------------------------------------------------------------------- 1 | 2 | GPU_ID="0" 3 | EVERY=4000 4 | MODEL=CnnModel 5 | MODEL_DIR="../model/cnn_model" 6 | 7 | start=$1 8 | DIR="$(pwd)" 9 | 10 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 11 | echo $checkpoint; 12 | if [ $checkpoint -gt $start ]; then 13 | echo $checkpoint; 14 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 15 | --train_dir="$MODEL_DIR" \ 16 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 17 | --eval_data_pattern="/Youtube-8M/data/frame/validate/validatea*" \ 18 | --frame_features=True \ 19 | --feature_names="rgb,audio" \ 20 | --feature_sizes="1024,128" \ 21 | --model=$MODEL \ 22 | --cnn_num_filters=512 \ 23 | --moe_num_mixtures=4 \ 24 | --num_readers=4 \ 25 | --batch_size=128 \ 26 | --run_once=True 27 | fi 28 | done 29 | 30 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/eval_scripts/eval-stage2-moe.sh: -------------------------------------------------------------------------------- 1 | 2 | start=$1 3 | GPU_ID=0 4 | MODEL=MoeModel 5 | MODEL_DIR="../model/video_moe16_stage2_moe8" 6 | 7 | for checkpoint in $(for filename in $MODEL_DIR/model.ckpt-*.meta; do echo $filename | grep -o "ckpt-[0123456789]*.meta" | cut -d '-' -f 2 | cut -d '.' -f 1; done | sort -n); do 8 | if [ $checkpoint -gt $start ]; then 9 | echo $checkpoint; 10 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 11 | --train_dir="$MODEL_DIR" \ 12 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 13 | --eval_data_pattern="/Youtube-8M/data/video/validate-validate-part1/validatea*" \ 14 | --frame_features=False \ 15 | --feature_names="predictions" \ 16 | --feature_sizes=4716 \ 17 | --batch_size=1024 \ 18 | --moe_num_mixtures=8 \ 19 | --model=$MODEL \ 20 | --run_once=True 21 | fi 22 | done 23 | 24 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/training_scripts/run-chaining-video.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES="0" python train.py \ 2 | --train_dir="../model/video_chain_moe16_verydeep_combine/" \ 3 | --frame_features=False \ 4 | --feature_names="mean_rgb,mean_audio" \ 5 | --feature_sizes="1024,128" \ 6 | --model=DeepCombineChainModel \ 7 | --label_loss=MultiTaskCrossEntropyLoss \ 8 | --train_data_pattern="/Youtube-8M/data/video/train/train*" \ 9 | --batch_size=512 \ 10 | --multitask=True \ 11 | --support_type="label,label,label,label,label,label,label,label" \ 12 | --support_loss_percent=0.025 \ 13 | --moe_num_mixtures=2 \ 14 | --keep_checkpoint_every_n_hours=0.25 \ 15 | --num_readers=4 \ 16 | --deep_chain_layers=8 \ 17 | --deep_chain_relu_cells=128 \ 18 | --base_learning_rate=0.004 19 | 20 | -------------------------------------------------------------------------------- /youtube-8m-ensemble/ensemble_scripts/eval-mean_model.sh: -------------------------------------------------------------------------------- 1 | model=$1 2 | conf=$2 3 | 4 | DEFAULT_GPU_ID=0 5 | if [ -z ${CUDA_VISIBLE_DEVICES+x} ]; then 6 | GPU_ID=$DEFAULT_GPU_ID 7 | echo "set CUDA_VISIBLE_DEVICES to default('$GPU_ID')" 8 | else 9 | GPU_ID=$CUDA_VISIBLE_DEVICES 10 | echo "set CUDA_VISIBLE_DEVICES to external('$GPU_ID')" 11 | fi 12 | 13 | validate_path=/Youtube-8M/model_predictions/ensemble_validate 14 | validate_data_patterns="" 15 | for d in $(cat $conf); do 16 | validate_data_patterns="${validate_path}/${d}/*.tfrecord${validate_data_patterns:+,$validate_data_patterns}" 17 | done 18 | echo "$validate_data_patterns" 19 | 20 | CUDA_VISIBLE_DEVICES="$GPU_ID" python eval.py \ 21 | --model_checkpoint_path="../model/${model}/model.ckpt-0" \ 22 | --train_dir="../model/${model}" \ 23 | --model="MeanModel" \ 24 | --eval_data_patterns="$validate_data_patterns" 25 | -------------------------------------------------------------------------------- /youtube-8m-ensemble/top_k_scripts/infer-attention_matrix_model.sh: -------------------------------------------------------------------------------- 1 | model=$1 2 | conf=$2 3 | checkpoint=$3 4 | test_path=/Youtube-8M/model_predictions/test 5 | 6 | test_data_patterns="" 7 | for d in $(cat $conf); do 8 | test_data_patterns="${test_path}/${d}/*.tfrecord${test_data_patterns:+,$test_data_patterns}" 9 | done 10 | echo "$test_data_patterns" 11 | input_data_pattern="${test_path}/model_input/*.tfrecord" 12 | 13 | #CUDA_VISIBLE_DEVICES=0 python inference.py \ 14 | python inference.py \ 15 | --model_checkpoint_path="../model/${model}/model.ckpt-${checkpoint}" \ 16 | --output_file="../model/${model}/predictions.${model}.csv" \ 17 | --model="AttentionMatrixModel" \ 18 | --moe_num_mixtures=4 \ 19 | --attention_matrix_rank=8 \ 20 | --batch_size=1024 \ 21 | --input_data_pattern="$input_data_pattern" \ 22 | --input_data_patterns="$test_data_patterns" 23 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/eval_scripts/eval-chain-model-relu.sh: -------------------------------------------------------------------------------- 1 | 2 | start=$1 3 | GPU_ID=1 4 | MODEL=ChainMainReluMoeModel 5 | MODEL_DIR="../model/video_chain_main_relu_moe16_ce" 6 | 7 | for checkpoint in $(for filename in $MODEL_DIR/model.ckpt-*.meta; do echo $filename | grep -o "ckpt-[0123456789]*.meta" | cut -d '-' -f 2 | cut -d '.' -f 1; done | sort -n); do 8 | if [ $checkpoint -gt $start ]; then 9 | echo $checkpoint; 10 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 11 | --train_dir="$MODEL_DIR" \ 12 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 13 | --eval_data_pattern="/Youtube-8M/data/video/validate/validatea*" \ 14 | --frame_features=False \ 15 | --feature_names="mean_rgb,mean_audio" \ 16 | --feature_sizes="1024,128" \ 17 | --batch_size=256 \ 18 | --model=$MODEL \ 19 | --moe_num_mixtures=16 \ 20 | --run_once=True 21 | fi 22 | done 23 | 24 | -------------------------------------------------------------------------------- /youtube-8m-ensemble/ensemble_scripts/infer-attention_matrix_model.sh: -------------------------------------------------------------------------------- 1 | model=$1 2 | conf=$2 3 | checkpoint=$3 4 | test_path=/Youtube-8M/model_predictions/test 5 | 6 | test_data_patterns="" 7 | for d in $(cat $conf); do 8 | test_data_patterns="${test_path}/${d}/*.tfrecord${test_data_patterns:+,$test_data_patterns}" 9 | done 10 | echo "$test_data_patterns" 11 | input_data_pattern="${test_path}/model_input/*.tfrecord" 12 | 13 | #CUDA_VISIBLE_DEVICES=0 python inference.py \ 14 | python inference.py \ 15 | --model_checkpoint_path="../model/${model}/model.ckpt-${checkpoint}" \ 16 | --output_file="../model/${model}/predictions.${model}.csv" \ 17 | --model="AttentionMatrixModel" \ 18 | --moe_num_mixtures=4 \ 19 | --attention_matrix_rank=8 \ 20 | --batch_size=1024 \ 21 | --input_data_pattern="$input_data_pattern" \ 22 | --input_data_patterns="$test_data_patterns" 23 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/eval_scripts/eval-lstmmemory.sh: -------------------------------------------------------------------------------- 1 | 2 | GPU_ID="1" 3 | EVERY=100 4 | MODEL=LstmMemoryModel 5 | MODEL_DIR="../model/lstmmemory_moe8_batch256" 6 | 7 | start=$1 8 | DIR="$(pwd)" 9 | 10 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 11 | echo $checkpoint; 12 | if [ $checkpoint -gt $start ]; then 13 | echo $checkpoint; 14 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 15 | --train_dir="$MODEL_DIR" \ 16 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 17 | --eval_data_pattern="/Youtube-8M/data/frame/validate/validatea*" \ 18 | --frame_features=True \ 19 | --feature_names="rgb,audio" \ 20 | --feature_sizes="1024,128" \ 21 | --batch_size=32 \ 22 | --model=$MODEL \ 23 | --moe_num_mixtures=8 \ 24 | --lstm_layers=2 \ 25 | --rnn_swap_memory=True \ 26 | --run_once=True 27 | fi 28 | done 29 | 30 | -------------------------------------------------------------------------------- /youtube-8m-zhangteng/YM_labels_model.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import tensorflow as tf 4 | from tensorflow import flags 5 | import numpy as np 6 | import csv 7 | 8 | flags.DEFINE_string("src_path_1", "predictions_best.csv", "") 9 | 10 | def main(): 11 | labels_uni = np.zeros([4716,1]) 12 | with open(flags.FLAGS.src_path_1, "rt", encoding='utf-8') as csvfile: 13 | spamreader = csv.reader(csvfile) 14 | line_num = 0 15 | for row in spamreader: 16 | line_num += 1 17 | print('the '+str(line_num)+'th file is processing') 18 | if line_num==1: 19 | continue 20 | lbs = row[1].split() 21 | for i in range(0,len(lbs),2): 22 | labels_uni[int(lbs[i])] += 1 23 | np.savetxt('labels_model.out', labels_uni, delimiter=',') 24 | 25 | if __name__=='__main__': 26 | main() 27 | -------------------------------------------------------------------------------- /youtube-8m-ensemble/ensemble_scripts/after_submission_no1.conf: -------------------------------------------------------------------------------- 1 | video_relabel_combine_chain 2 | video_very_deep_combine_chain 3 | lstmmemory_cell1024_layer2_moe8 4 | lstmmemory_cell2048_layer2_moe4 5 | lstmparallelfinaloutput1024_moe8 6 | lstmparallelmemory1024_moe8 7 | lstmgate_cell1024_layer1_moe8 8 | lstmglu2_cell1024_layer1_moe8 9 | lstmbiglu_cell1024_layer1_moe8 10 | biunilstm1024_moe4 11 | lstm_random_mean_moe8 12 | lstm_shortlayers_moe8 13 | framehop_lstm 14 | cnnlstmmemory1024_moe8 15 | attentionlstm_moe4 16 | lstmattlstm1024_moe8 17 | lstm_attention8_max 18 | lstm2_attention8_max 19 | lstm_positional_attention8max 20 | cnn_deep_combine_chain 21 | deep_cnn_deep_combine_chain 22 | lstm_cnn_deep_combine_chain 23 | multilstmmemory1024_moe4_deep_chain 24 | lstmmem1024_layer2_moe4_deep_combine_chain_add_length 25 | multires_lstm_deep_combine_chain 26 | lstm_gate_multiscale4_moe4 27 | lstm_multiscale4_moe4 28 | -------------------------------------------------------------------------------- /youtube-8m-ensemble/ensemble_scripts/infer-attention_linmatrix_model.sh: -------------------------------------------------------------------------------- 1 | model=$1 2 | conf=$2 3 | checkpoint=$3 4 | test_path=/Youtube-8M/model_predictions/test 5 | 6 | test_data_patterns="" 7 | for d in $(cat $conf); do 8 | test_data_patterns="${test_path}/${d}/*.tfrecord${test_data_patterns:+,$test_data_patterns}" 9 | done 10 | echo "$test_data_patterns" 11 | input_data_pattern="${test_path}/model_input/*.tfrecord" 12 | 13 | #CUDA_VISIBLE_DEVICES=0 python inference.py \ 14 | python inference.py \ 15 | --model_checkpoint_path="../model/${model}/model.ckpt-${checkpoint}" \ 16 | --output_file="../model/${model}/predictions.${model}.csv" \ 17 | --model="AttentionLinmatrixModel" \ 18 | --moe_num_mixtures=16 \ 19 | --attention_matrix_rank=4 \ 20 | --batch_size=1024 \ 21 | --input_data_pattern="$input_data_pattern" \ 22 | --input_data_patterns="$test_data_patterns" 23 | -------------------------------------------------------------------------------- /youtube-8m-ensemble/ensemble_scripts/train-moe_model.sh: -------------------------------------------------------------------------------- 1 | model=$1 2 | conf=$2 3 | 4 | DEFAULT_GPU_ID=0 5 | if [ -z ${CUDA_VISIBLE_DEVICES+x} ]; then 6 | GPU_ID=$DEFAULT_GPU_ID 7 | echo "set CUDA_VISIBLE_DEVICES to default('$GPU_ID')" 8 | else 9 | GPU_ID=$CUDA_VISIBLE_DEVICES 10 | echo "set CUDA_VISIBLE_DEVICES to external('$GPU_ID')" 11 | fi 12 | 13 | train_path=/Youtube-8M/model_predictions/ensemble_train 14 | train_data_patterns="" 15 | for d in $(cat $conf); do 16 | train_data_patterns="${train_path}/${d}/*.tfrecord${train_data_patterns:+,$train_data_patterns}" 17 | done 18 | echo $train_data_patterns 19 | 20 | CUDA_VISIBLE_DEVICES="$GPU_ID" python train.py \ 21 | --train_dir="../model/${model}" \ 22 | --train_data_patterns="$train_data_patterns" \ 23 | --model=MoeModel \ 24 | --keep_checkpoint_every_n_hours=0.1 \ 25 | --batch_size=1024 \ 26 | --num_epochs=5 27 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/eval_scripts/eval-chain-model-suprelu.sh: -------------------------------------------------------------------------------- 1 | 2 | start=$1 3 | GPU_ID=1 4 | MODEL=ChainSupportReluMoeModel 5 | MODEL_DIR="../model/video_chain_support_relu_moe16_ce" 6 | 7 | for checkpoint in $(for filename in $MODEL_DIR/model.ckpt-*.meta; do echo $filename | grep -o "ckpt-[0123456789]*.meta" | cut -d '-' -f 2 | cut -d '.' -f 1; done | sort -n); do 8 | if [ $checkpoint -gt $start ]; then 9 | echo $checkpoint; 10 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 11 | --train_dir="$MODEL_DIR" \ 12 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 13 | --eval_data_pattern="/Youtube-8M/data/video/validate/validatea*" \ 14 | --frame_features=False \ 15 | --feature_names="mean_rgb,mean_audio" \ 16 | --feature_sizes="1024,128" \ 17 | --batch_size=256 \ 18 | --model=$MODEL \ 19 | --moe_num_mixtures=16 \ 20 | --run_once=True 21 | fi 22 | done 23 | 24 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/eval_scripts/eval-lstmmem-cnnlstm.sh: -------------------------------------------------------------------------------- 1 | 2 | GPU_ID="1" 3 | EVERY=1000 4 | MODEL=CnnLstmMemoryModel 5 | MODEL_DIR="../model/cnnlstmmemory1024_moe8" 6 | 7 | start=$1 8 | DIR="$(pwd)" 9 | 10 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 11 | echo $checkpoint; 12 | if [ $checkpoint -gt $start ]; then 13 | echo $checkpoint; 14 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 15 | --train_dir="$MODEL_DIR" \ 16 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 17 | --eval_data_pattern="/Youtube-8M/data/frame/validate/validatea*" \ 18 | --frame_features=True \ 19 | --feature_names="rgb,audio" \ 20 | --feature_sizes="1024,128" \ 21 | --batch_size=32 \ 22 | --model=$MODEL \ 23 | --moe_num_mixtures=8 \ 24 | --lstm_layers=2 \ 25 | --rnn_swap_memory=True \ 26 | --run_once=True 27 | fi 28 | done 29 | 30 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/eval_scripts/eval-att-lstm.sh: -------------------------------------------------------------------------------- 1 | 2 | GPU_ID=1 3 | EVERY=500 4 | MODEL=LstmAttentionLstmModel 5 | MODEL_DIR="../model/lstmattlstm1024_moe8" 6 | 7 | start=$1 8 | DIR="$(pwd)" 9 | 10 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 11 | echo $checkpoint; 12 | if [ $checkpoint -gt $start ]; then 13 | echo $checkpoint; 14 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 15 | --train_dir="$MODEL_DIR" \ 16 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 17 | --eval_data_pattern="/Youtube-8M-validate/validatea*" \ 18 | --frame_features=True \ 19 | --feature_names="rgb,audio" \ 20 | --feature_sizes="1024,128" \ 21 | --batch_size=128 \ 22 | --model=$MODEL \ 23 | --lstm_cells=1024 \ 24 | --lstm_layers=1 \ 25 | --moe_num_mixtures=8 \ 26 | --num_readers=1 \ 27 | --run_once=True 28 | fi 29 | done 30 | 31 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/data_augmentation_scripts/run-chaining-cnn.sh: -------------------------------------------------------------------------------- 1 | 2 | CUDA_VISIBLE_DEVICES=0 python train.py \ 3 | --train_dir="../model/dataaugmentation_chaining_cnn/" \ 4 | --train_data_pattern="/Youtube-8M/data/frame/largetrain/*.tfrecord" \ 5 | --frame_features=True \ 6 | --feature_names="rgb,audio" \ 7 | --feature_sizes="1024,128" \ 8 | --model=CnnDeepCombineChainModel \ 9 | --deep_chain_layers=4 \ 10 | --deep_chain_relu_cells=128 \ 11 | --moe_num_mixtures=4 \ 12 | --multitask=True \ 13 | --label_loss=MultiTaskCrossEntropyLoss \ 14 | --support_type="label,label,label,label" \ 15 | --support_loss_percent=0.05 \ 16 | --num_readers=4 \ 17 | --batch_size=40 \ 18 | --data_augmenter=HalfAugmenter \ 19 | --keep_checkpoint_every_n_hours=1.0 \ 20 | --base_learning_rate=0.001 21 | 22 | -------------------------------------------------------------------------------- /youtube-8m-zhangteng/eval_scripts/eval-lstmglu2_1024_moe8.sh: -------------------------------------------------------------------------------- 1 | 2 | GPU_ID=1 3 | EVERY=1000 4 | MODEL=LstmGlu2Model 5 | MODEL_DIR="../model/frame_level_lstm_glu2_model" 6 | start=0 7 | DIR="$(pwd)" 8 | 9 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 10 | echo $checkpoint; 11 | if [[ $checkpoint -gt $start ]]; then 12 | 13 | echo $checkpoint; 14 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 15 | --train_dir="$MODEL_DIR" \ 16 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 17 | --eval_data_pattern="/Youtube-8M/data/frame/validate/validatea*" \ 18 | --frame_features=True \ 19 | --feature_names="rgb,audio" \ 20 | --feature_sizes="1024,128" \ 21 | --batch_size=64 \ 22 | --model=$MODEL \ 23 | --video_level_classifier_model=MoeModel \ 24 | --moe_num_mixtures=8 \ 25 | --train=False \ 26 | --run_once=True 27 | fi 28 | done -------------------------------------------------------------------------------- /youtube-8m-zhangteng/eval_scripts/eval-video_relabel_combine_chain.sh: -------------------------------------------------------------------------------- 1 | 2 | GPU_ID=0 3 | EVERY=1000 4 | MODEL=MoeMix4Model 5 | MODEL_DIR="../model/video_level_moemix4_relabel_model" 6 | start=0 7 | DIR="$(pwd)" 8 | 9 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 10 | echo $checkpoint; 11 | if [[ $checkpoint -gt $start ]]; then 12 | 13 | echo $checkpoint; 14 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 15 | --train_dir="$MODEL_DIR" \ 16 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 17 | --eval_data_pattern="/Youtube-8M/data/video/validate/validatea*" \ 18 | --frame_features=False \ 19 | --feature_names="mean_rgb,mean_audio" \ 20 | --feature_sizes="1024,128" \ 21 | --batch_size=128 \ 22 | --model=$MODEL \ 23 | --class_size=100 \ 24 | --moe_num_mixtures=4 \ 25 | --moe_layers=3 \ 26 | --run_once=True 27 | fi 28 | done -------------------------------------------------------------------------------- /youtube-8m-ensemble/ensemble_scripts/infer-linear_model.sh: -------------------------------------------------------------------------------- 1 | model=$1 2 | conf=$2 3 | checkpoint=$3 4 | 5 | DEFAULT_GPU_ID=0 6 | if [ -z ${CUDA_VISIBLE_DEVICES+x} ]; then 7 | GPU_ID=$DEFAULT_GPU_ID 8 | echo "set CUDA_VISIBLE_DEVICES to default('$GPU_ID')" 9 | else 10 | GPU_ID=$CUDA_VISIBLE_DEVICES 11 | echo "set CUDA_VISIBLE_DEVICES to external('$GPU_ID')" 12 | fi 13 | 14 | test_path=/Youtube-8M/model_predictions/test 15 | test_data_patterns="" 16 | for d in $(cat $conf); do 17 | test_data_patterns="${test_path}/${d}/*.tfrecord${test_data_patterns:+,$test_data_patterns}" 18 | done 19 | echo "$test_data_patterns" 20 | 21 | CUDA_VISIBLE_DEVICES=${GPU_ID} python inference.py \ 22 | --model_checkpoint_path="../model/${model}/model.ckpt-$checkpoint" \ 23 | --output_file="../model/${model}/predictions.csv" \ 24 | --model="LinearRegressionModel" \ 25 | --input_data_patterns="$test_data_patterns" 26 | -------------------------------------------------------------------------------- /youtube-8m-ensemble/ensemble_scripts/train-linear_model.sh: -------------------------------------------------------------------------------- 1 | model=$1 2 | conf=$2 3 | 4 | DEFAULT_GPU_ID=0 5 | if [ -z ${CUDA_VISIBLE_DEVICES+x} ]; then 6 | GPU_ID=$DEFAULT_GPU_ID 7 | echo "set CUDA_VISIBLE_DEVICES to default('$GPU_ID')" 8 | else 9 | GPU_ID=$CUDA_VISIBLE_DEVICES 10 | echo "set CUDA_VISIBLE_DEVICES to external('$GPU_ID')" 11 | fi 12 | 13 | train_path=/Youtube-8M/model_predictions/ensemble_train 14 | train_data_patterns="" 15 | for d in $(cat $conf); do 16 | train_data_patterns="${train_path}/${d}/*.tfrecord${train_data_patterns:+,$train_data_patterns}" 17 | done 18 | echo $train_data_patterns 19 | 20 | CUDA_VISIBLE_DEVICES="$GPU_ID" python train.py \ 21 | --train_dir="../model/${model}" \ 22 | --train_data_patterns="$train_data_patterns" \ 23 | --model=LinearRegressionModel \ 24 | --keep_checkpoint_every_n_hours=0.1 \ 25 | --batch_size=1024 \ 26 | --num_epochs=1 27 | -------------------------------------------------------------------------------- /youtube-8m-ensemble/models.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS-IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Contains the base class for models.""" 16 | 17 | class BaseModel(object): 18 | """Inherit from this class when implementing new models.""" 19 | 20 | def create_model(self, unused_model_input, **unused_params): 21 | raise NotImplementedError() 22 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/eval_scripts/eval-chain-moe-freq.sh: -------------------------------------------------------------------------------- 1 | 2 | GPU_ID="1" 3 | EVERY=100 4 | MODEL=ChainSupportReluMoeModel 5 | MODEL_DIR="../model/video_chain_moe16_ce_freq" 6 | 7 | start=$1 8 | DIR="$(pwd)" 9 | 10 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 11 | echo $checkpoint; 12 | if [ $checkpoint -gt $start ]; then 13 | echo $checkpoint; 14 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 15 | --train_dir="$MODEL_DIR" \ 16 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 17 | --eval_data_pattern="/Youtube-8M/data/video/validate/validatea*" \ 18 | --frame_features=False \ 19 | --feature_names="mean_rgb,mean_audio" \ 20 | --feature_sizes="1024,128" \ 21 | --batch_size=256 \ 22 | --num_supports=200 \ 23 | --support_type="frequent" \ 24 | --model=$MODEL \ 25 | --moe_num_mixtures=16 \ 26 | --run_once=True 27 | fi 28 | done 29 | 30 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/eval_scripts/eval-lstmmem-dropout.sh: -------------------------------------------------------------------------------- 1 | 2 | GPU_ID=0 3 | EVERY=1000 4 | MODEL=LstmMemoryModel 5 | MODEL_DIR="../model/lstmmemory1024_moe8_dropout" 6 | 7 | start=$1 8 | DIR="$(pwd)" 9 | 10 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 11 | echo $checkpoint; 12 | if [ $checkpoint -gt $start ]; then 13 | echo $checkpoint; 14 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 15 | --train_dir="$MODEL_DIR" \ 16 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 17 | --eval_data_pattern="/Youtube-8M/data/frame/validate/validatea*" \ 18 | --frame_features=True \ 19 | --feature_names="rgb,audio" \ 20 | --feature_sizes="1024,128" \ 21 | --model=$MODEL \ 22 | --moe_num_mixtures=8 \ 23 | --lstm_cells=1024 \ 24 | --lstm_layers=2 \ 25 | --dropout=True \ 26 | --batch_size=64 \ 27 | --run_once=True 28 | fi 29 | done 30 | 31 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/models.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS-IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Contains the base class for models.""" 16 | 17 | class BaseModel(object): 18 | """Inherit from this class when implementing new models.""" 19 | 20 | def create_model(self, unused_model_input, **unused_params): 21 | raise NotImplementedError() 22 | -------------------------------------------------------------------------------- /youtube-8m-zhangteng/eval_scripts/eval-lstm_random_moe8.sh: -------------------------------------------------------------------------------- 1 | 2 | GPU_ID=1 3 | EVERY=1000 4 | MODEL=LstmRandomModel 5 | MODEL_DIR="../model/frame_level_lstm_random_model" 6 | start=0 7 | DIR="$(pwd)" 8 | 9 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 10 | echo $checkpoint; 11 | if [[ $checkpoint -gt $start ]]; then 12 | 13 | echo $checkpoint; 14 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 15 | --train_dir="$MODEL_DIR" \ 16 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 17 | --eval_data_pattern="/Youtube-8M/data/frame/validate/validatea*" \ 18 | --frame_features=True \ 19 | --feature_names="rgb,audio" \ 20 | --feature_sizes="1024,128" \ 21 | --batch_size=64 \ 22 | --model=$MODEL \ 23 | --video_level_classifier_model=MoeModel \ 24 | --moe_num_mixtures=8 \ 25 | --train=False \ 26 | --run_once=True 27 | fi 28 | done 29 | 30 | -------------------------------------------------------------------------------- /youtube-8m-zhangteng/eval_scripts/eval-lstmgate1024_moe8.sh: -------------------------------------------------------------------------------- 1 | 2 | GPU_ID=1 3 | EVERY=1000 4 | MODEL=LstmGateModel 5 | MODEL_DIR="../model/frame_level_lstm_gate_model" 6 | start=0 7 | DIR="$(pwd)" 8 | 9 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 10 | echo $checkpoint; 11 | if [[ $checkpoint -gt $start ]]; then 12 | 13 | echo $checkpoint; 14 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 15 | --train_dir="$MODEL_DIR" \ 16 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 17 | --eval_data_pattern="/Youtube-8M/data/frame/validate/validatea*" \ 18 | --frame_features=True \ 19 | --feature_names="rgb,audio" \ 20 | --feature_sizes="1024,128" \ 21 | --batch_size=64 \ 22 | --model=$MODEL \ 23 | --video_level_classifier_model=MoeModel \ 24 | --moe_num_mixtures=8 \ 25 | --train=False \ 26 | --run_once=True 27 | fi 28 | done 29 | 30 | 31 | -------------------------------------------------------------------------------- /youtube-8m-zhangteng/models.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS-IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Contains the base class for models.""" 16 | 17 | class BaseModel(object): 18 | """Inherit from this class when implementing new models.""" 19 | 20 | def create_model(self, unused_model_input, **unused_params): 21 | raise NotImplementedError() 22 | -------------------------------------------------------------------------------- /youtube-8m-ensemble/ensemble_scripts/train-nonunit_matrix_model.sh: -------------------------------------------------------------------------------- 1 | model=$1 2 | conf=$2 3 | 4 | DEFAULT_GPU_ID=0 5 | if [ -z ${CUDA_VISIBLE_DEVICES+x} ]; then 6 | GPU_ID=$DEFAULT_GPU_ID 7 | echo "set CUDA_VISIBLE_DEVICES to default('$GPU_ID')" 8 | else 9 | GPU_ID=$CUDA_VISIBLE_DEVICES 10 | echo "set CUDA_VISIBLE_DEVICES to external('$GPU_ID')" 11 | fi 12 | 13 | train_path=/Youtube-8M/model_predictions/ensemble_train 14 | train_data_patterns="" 15 | for d in $(cat $conf); do 16 | train_data_patterns="${train_path}/${d}/*.tfrecord${train_data_patterns:+,$train_data_patterns}" 17 | done 18 | echo $train_data_patterns 19 | 20 | CUDA_VISIBLE_DEVICES="$GPU_ID" python train.py \ 21 | --train_dir="../model/${model}" \ 22 | --train_data_patterns="$train_data_patterns" \ 23 | --model=NonunitMatrixRegressionModel \ 24 | --keep_checkpoint_every_n_hours=0.1 \ 25 | --batch_size=1024 \ 26 | --num_epochs=5 27 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/eval_scripts/eval-lstm-attention-8max.sh: -------------------------------------------------------------------------------- 1 | 2 | GPU_ID="1" 3 | EVERY=200 4 | MODEL=LstmAttentionMaxPoolingModel 5 | MODEL_DIR="../model/lstmattention8max_bagging/base_model" 6 | 7 | start=$1 8 | DIR="$(pwd)" 9 | 10 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 11 | echo $checkpoint; 12 | if [ $checkpoint -gt $start ]; then 13 | echo $checkpoint; 14 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 15 | --train_dir="$MODEL_DIR" \ 16 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 17 | --eval_data_pattern="/Youtube-8M/data/frame/validate/validatea*" \ 18 | --frame_features=True \ 19 | --feature_names="rgb,audio" \ 20 | --feature_sizes="1024,128" \ 21 | --model=$MODEL \ 22 | --moe_num_mixtures=8 \ 23 | --lstm_attentions=8 \ 24 | --rnn_swap_memory=True \ 25 | --batch_size=32 \ 26 | --run_once=True 27 | fi 28 | done 29 | 30 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/eval_scripts/eval-video-deep-chain.sh: -------------------------------------------------------------------------------- 1 | GPU_ID=1 2 | EVERY=500 3 | MODEL=DeepChainModel 4 | MODEL_DIR="../model/video_deep_chain_moe4" 5 | 6 | start=$1 7 | DIR="$(pwd)" 8 | 9 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 10 | echo $checkpoint; 11 | if [ $checkpoint -gt $start ]; then 12 | echo $checkpoint; 13 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 14 | --train_dir="$MODEL_DIR" \ 15 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 16 | --eval_data_pattern="/Youtube-8M/data/video/validate/validatea*" \ 17 | --frame_features=False \ 18 | --feature_names="mean_rgb,mean_audio" \ 19 | --feature_sizes="1024,128" \ 20 | --batch_size=128 \ 21 | --model=$MODEL \ 22 | --deep_chain_relu_cells=200 \ 23 | --deep_chain_layers=3 \ 24 | --moe_num_mixtures=4 \ 25 | --num_readers=1 \ 26 | --run_once=True 27 | fi 28 | done 29 | 30 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/infer_scripts/infer-distillation-cnn-dcc.sh: -------------------------------------------------------------------------------- 1 | 2 | for part in train; do 3 | CUDA_VISIBLE_DEVICES=0 python inference-pre-ensemble.py \ 4 | --output_dir="/Youtube-8M/model_predictions_local/${part}/distillation/distillchain_cnn_dcc" \ 5 | --model_checkpoint_path="../model/distillation/distillchain_cnn_dcc/model.ckpt-74488" \ 6 | --input_data_pattern="/Youtube-8M/data/frame/${part}/*.tfrecord" \ 7 | --frame_features=True \ 8 | --feature_names="rgb,audio" \ 9 | --feature_sizes="1024,128" \ 10 | --distill_data_pattern="/Youtube-8M/model_predictions/${part}/distillation/ensemble_mean_model/*.tfrecord" \ 11 | --distillation_features=True \ 12 | --distillation_as_input=True \ 13 | --model=DistillchainCnnDeepCombineChainModel \ 14 | --deep_chain_layers=3 \ 15 | --deep_chain_relu_cells=256 \ 16 | --moe_num_mixtures=4 \ 17 | --batch_size=128 \ 18 | --file_size=4096 19 | done 20 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/infer_scripts/infer-distillchain-cnn-dcc.sh: -------------------------------------------------------------------------------- 1 | 2 | for part in ensemble_train test ensemble_validate; do 3 | CUDA_VISIBLE_DEVICES=1 python inference-pre-ensemble.py \ 4 | --output_dir="/Youtube-8M/model_predictions/${part}/distillchain_cnn_dcc" \ 5 | --model_checkpoint_path="../model/distillchain_cnn_dcc/model.ckpt-113446" \ 6 | --input_data_pattern="/Youtube-8M/data/frame/${part}/*.tfrecord" \ 7 | --frame_features=True \ 8 | --feature_names="rgb,audio" \ 9 | --feature_sizes="1024,128" \ 10 | --distill_data_pattern="/Youtube-8M/model_predictions/${part}/distillation/ensemble_mean_model/*.tfrecord" \ 11 | --distillation_features=True \ 12 | --distillation_as_input=True \ 13 | --model=DistillchainCnnDeepCombineChainModel \ 14 | --deep_chain_layers=3 \ 15 | --deep_chain_relu_cells=256 \ 16 | --moe_num_mixtures=4 \ 17 | --batch_size=32 \ 18 | --file_size=4096 19 | done 20 | -------------------------------------------------------------------------------- /youtube-8m-ensemble/ensemble_scripts/infer-attention_moe_matrix_model.sh: -------------------------------------------------------------------------------- 1 | model=$1 2 | conf=$2 3 | checkpoint=$3 4 | test_path=/Youtube-8M/model_predictions/test 5 | 6 | test_data_patterns="" 7 | for d in $(cat $conf); do 8 | test_data_patterns="${test_path}/${d}/*.tfrecord${test_data_patterns:+,$test_data_patterns}" 9 | done 10 | echo "$test_data_patterns" 11 | input_data_pattern="${test_path}/model_input/*.tfrecord" 12 | 13 | #CUDA_VISIBLE_DEVICES=0 python inference.py \ 14 | python inference.py \ 15 | --model_checkpoint_path="../model/${model}/model.ckpt-${checkpoint}" \ 16 | --output_file="../model/${model}/predictions.${model}.csv" \ 17 | --model="AttentionMatrixModel" \ 18 | --moe_num_mixtures=4 \ 19 | --attention_relu_cells=256 \ 20 | --attention_matrix_rank=4 \ 21 | --batch_size=1024 \ 22 | --input_data_pattern="$input_data_pattern" \ 23 | --input_data_patterns="$test_data_patterns" 24 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/eval_scripts/eval-lstmmem-noise.sh: -------------------------------------------------------------------------------- 1 | 2 | GPU_ID=1 3 | EVERY=1000 4 | MODEL=LstmMemoryModel 5 | MODEL_DIR="../model/lstmmemory1024_moe8_noise" 6 | 7 | start=$1 8 | DIR="$(pwd)" 9 | 10 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 11 | echo $checkpoint; 12 | if [ $checkpoint -gt $start ]; then 13 | echo $checkpoint; 14 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 15 | --train_dir="$MODEL_DIR" \ 16 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 17 | --eval_data_pattern="/Youtube-8M/data/frame/validate/validatea*" \ 18 | --frame_features=True \ 19 | --feature_names="rgb,audio" \ 20 | --feature_sizes="1024,128" \ 21 | --model=$MODEL \ 22 | --moe_num_mixtures=8 \ 23 | --lstm_cells=1024 \ 24 | --lstm_layers=2 \ 25 | --batch_size=64 \ 26 | --rnn_swap_memory=True \ 27 | --run_once=True 28 | fi 29 | done 30 | 31 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/eval_scripts/eval-lstmmem2048.sh: -------------------------------------------------------------------------------- 1 | 2 | GPU_ID=0 3 | EVERY=1000 4 | MODEL=LstmMemoryModel 5 | MODEL_DIR="../model/lstmmemory2048_moe4_batch128" 6 | 7 | start=$1 8 | DIR="$(pwd)" 9 | 10 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 11 | echo $checkpoint; 12 | if [ $checkpoint -gt $start ]; then 13 | echo $checkpoint; 14 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 15 | --train_dir="$MODEL_DIR" \ 16 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 17 | --eval_data_pattern="/Youtube-8M/data/frame/validate/validatea*" \ 18 | --frame_features=True \ 19 | --feature_names="rgb,audio" \ 20 | --feature_sizes="1024,128" \ 21 | --model=$MODEL \ 22 | --moe_num_mixtures=4 \ 23 | --lstm_cells=2048 \ 24 | --lstm_layers=2 \ 25 | --batch_size=64 \ 26 | --rnn_swap_memory=True \ 27 | --run_once=True 28 | fi 29 | done 30 | 31 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/eval_scripts/eval-lstmmemory-layer1.sh: -------------------------------------------------------------------------------- 1 | GPU_ID="1" 2 | EVERY=2000 3 | MODEL=LstmMemoryModel 4 | MODEL_DIR="../model/lstmmemory1024_layer1_moe8" 5 | 6 | start=$1 7 | DIR="$(pwd)" 8 | 9 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 10 | echo $checkpoint; 11 | if [ $checkpoint -gt $start ]; then 12 | echo $checkpoint; 13 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 14 | --train_dir="$MODEL_DIR" \ 15 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 16 | --eval_data_pattern="/Youtube-8M/data/frame/validate/validatea*" \ 17 | --frame_features=True \ 18 | --feature_names="rgb,audio" \ 19 | --feature_sizes="1024,128" \ 20 | --model=$MODEL \ 21 | --lstm_cells=1024 \ 22 | --lstm_layers=1 \ 23 | --moe_num_mixtures=8 \ 24 | --rnn_swap_memory=False \ 25 | --batch_size=128 \ 26 | --run_once=True 27 | fi 28 | done 29 | 30 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/eval_scripts/eval-lstmoutput-parallel.sh: -------------------------------------------------------------------------------- 1 | GPU_ID=1 2 | EVERY=500 3 | MODEL=LstmParallelFinaloutputModel 4 | MODEL_DIR="../model/lstmparallelfinaloutput1024_moe8" 5 | 6 | start=$1 7 | DIR="$(pwd)" 8 | 9 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 10 | echo $checkpoint; 11 | if [ $checkpoint -gt $start ]; then 12 | echo $checkpoint; 13 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 14 | --train_dir="$MODEL_DIR" \ 15 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 16 | --eval_data_pattern="/Youtube-8M/data/frame/validate/validatea*" \ 17 | --frame_features=True \ 18 | --feature_names="rgb,audio" \ 19 | --feature_sizes="1024,128" \ 20 | --batch_size=64 \ 21 | --model=$MODEL\ 22 | --lstm_layers=2 \ 23 | --lstm_cells="1024,128" \ 24 | --moe_num_mixtures=8 \ 25 | --num_readers=1 \ 26 | --run_once=True 27 | fi 28 | done 29 | 30 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/eval_scripts/eval-moe-batchagreement1.sh: -------------------------------------------------------------------------------- 1 | 2 | GPU_ID=1 3 | EVERY=100 4 | MODEL=MoeModel 5 | MODEL_DIR="../model/video_moe16_batchagreement0.001" 6 | 7 | start=$1 8 | DIR="$(pwd)" 9 | 10 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 11 | echo $checkpoint; 12 | if [ $checkpoint -gt $start ]; then 13 | echo $checkpoint; 14 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 15 | --train_dir="$MODEL_DIR" \ 16 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 17 | --eval_data_pattern="/Youtube-8M/data/video/validate/validatea*" \ 18 | --frame_features=False \ 19 | --feature_names="mean_rgb,mean_audio" \ 20 | --feature_sizes="1024,128" \ 21 | --label_loss=BatchAgreementCrossEntropyLoss \ 22 | --batch_agreement=0.001 \ 23 | --batch_size=128 \ 24 | --model=$MODEL \ 25 | --moe_num_mixtures=16 \ 26 | --run_once=True 27 | fi 28 | done 29 | 30 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/eval_scripts/eval-moe-batchagreement2.sh: -------------------------------------------------------------------------------- 1 | 2 | GPU_ID=1 3 | EVERY=100 4 | MODEL=MoeModel 5 | MODEL_DIR="../model/video_moe16_batchagreement0.003" 6 | 7 | start=$1 8 | DIR="$(pwd)" 9 | 10 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 11 | echo $checkpoint; 12 | if [ $checkpoint -gt $start ]; then 13 | echo $checkpoint; 14 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 15 | --train_dir="$MODEL_DIR" \ 16 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 17 | --eval_data_pattern="/Youtube-8M/data/video/validate/validatea*" \ 18 | --frame_features=False \ 19 | --feature_names="mean_rgb,mean_audio" \ 20 | --feature_sizes="1024,128" \ 21 | --label_loss=BatchAgreementCrossEntropyLoss \ 22 | --batch_agreement=0.003 \ 23 | --batch_size=128 \ 24 | --model=$MODEL \ 25 | --moe_num_mixtures=16 \ 26 | --run_once=True 27 | fi 28 | done 29 | 30 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/eval_scripts/eval-moe-batchagreement3.sh: -------------------------------------------------------------------------------- 1 | 2 | GPU_ID=1 3 | EVERY=100 4 | MODEL=MoeModel 5 | MODEL_DIR="../model/video_moe16_batchagreement0.01" 6 | 7 | start=$1 8 | DIR="$(pwd)" 9 | 10 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 11 | echo $checkpoint; 12 | if [ $checkpoint -gt $start ]; then 13 | echo $checkpoint; 14 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 15 | --train_dir="$MODEL_DIR" \ 16 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 17 | --eval_data_pattern="/Youtube-8M/data/video/validate/validatea*" \ 18 | --frame_features=False \ 19 | --feature_names="mean_rgb,mean_audio" \ 20 | --feature_sizes="1024,128" \ 21 | --label_loss=BatchAgreementCrossEntropyLoss \ 22 | --batch_agreement=0.01 \ 23 | --batch_size=128 \ 24 | --model=$MODEL \ 25 | --moe_num_mixtures=16 \ 26 | --run_once=True 27 | fi 28 | done 29 | 30 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/training_scripts/run-chaining-deep-cnn.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0 python train.py \ 2 | --train_dir="../model/deep_cnn_deep_combine_chain/" \ 3 | --train_data_pattern="/Youtube-8M/data/frame/train/train*" \ 4 | --frame_features=True \ 5 | --feature_names="rgb,audio" \ 6 | --feature_sizes="1024,128" \ 7 | --model=DeepCnnDeepCombineChainModel \ 8 | --deep_chain_layers=2 \ 9 | --deep_chain_relu_cells=256 \ 10 | --deep_cnn_base_size=128 \ 11 | --label_loss=MultiTaskCrossEntropyLoss \ 12 | --multitask=True \ 13 | --support_type="label,label" \ 14 | --support_loss_percent=0.1 \ 15 | --moe_num_mixtures=4 \ 16 | --batch_size=128 \ 17 | --dropout=True \ 18 | --keep_prob=1.0 \ 19 | --num_readers=4 \ 20 | --num_epochs=2 \ 21 | --keep_checkpoint_every_n_hours=0.5 \ 22 | --base_learning_rate=0.001 23 | 24 | -------------------------------------------------------------------------------- /youtube-8m-zhangteng/eval_scripts/eval-video_notzero_combine_chain.sh: -------------------------------------------------------------------------------- 1 | GPU_ID=0 2 | EVERY=1000 3 | MODEL=MoeMix4Model 4 | MODEL_DIR="../model/video_level_moemix4_model" 5 | start=0 6 | DIR="$(pwd)" 7 | 8 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 9 | echo $checkpoint; 10 | if [[ $checkpoint -gt $start ]]; then 11 | 12 | echo $checkpoint; 13 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 14 | --train_dir="$MODEL_DIR" \ 15 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 16 | --eval_data_pattern="/Youtube-8M/data/video/validate/validatea*" \ 17 | --frame_features=False \ 18 | --feature_names="mean_rgb,mean_audio" \ 19 | --feature_sizes="1024,128" \ 20 | --batch_size=128 \ 21 | --model=$MODEL \ 22 | --class_size=100 \ 23 | --moe_num_mixtures=4 \ 24 | --moe_layers=3 \ 25 | --run_once=True 26 | fi 27 | done 28 | 29 | 30 | -------------------------------------------------------------------------------- /youtube-8m-zhangteng/infer_scripts/infer-lstmglu2_1024_moe8.sh: -------------------------------------------------------------------------------- 1 | 2 | for part in ensemble_train ensemble_validate test; do 3 | #for part in ensemble_validate test; do 4 | CUDA_VISIBLE_DEVICES=1 python inference_with_rebuild.py \ 5 | --output_dir="/Youtube-8M/model_predictions/${part}/lstmglu2_cell1024_layer1_moe8" \ 6 | --model_checkpoint_path="../model/frame_level_lstm_glu2_model/model.ckpt-132334" \ 7 | --input_data_pattern="/Youtube-8M/data/frame/${part}/*.tfrecord" \ 8 | --frame_features=True \ 9 | --feature_names="rgb,audio" \ 10 | --feature_sizes="1024,128" \ 11 | --model=LstmGlu2Model \ 12 | --video_level_classifier_model=MoeModel \ 13 | --moe_num_extend=4 \ 14 | --moe_method=None \ 15 | --lstm_cells=1024 \ 16 | --moe_num_mixtures=8 \ 17 | --train=False \ 18 | --batch_size=32 \ 19 | --file_size=4096 20 | done 21 | -------------------------------------------------------------------------------- /youtube-8m-ensemble/ensemble_scripts/infer-matrix_model.sh: -------------------------------------------------------------------------------- 1 | model=$1 2 | conf=$2 3 | checkpoint=$3 4 | 5 | DEFAULT_GPU_ID=0 6 | if [ -z ${CUDA_VISIBLE_DEVICES+x} ]; then 7 | GPU_ID=$DEFAULT_GPU_ID 8 | echo "set CUDA_VISIBLE_DEVICES to default('$GPU_ID')" 9 | else 10 | GPU_ID=$CUDA_VISIBLE_DEVICES 11 | echo "set CUDA_VISIBLE_DEVICES to external('$GPU_ID')" 12 | fi 13 | 14 | test_path=/Youtube-8M/model_predictions/test 15 | test_data_patterns="" 16 | for d in $(cat $conf); do 17 | test_data_patterns="${test_path}/${d}/*.tfrecord${test_data_patterns:+,$test_data_patterns}" 18 | done 19 | echo "$test_data_patterns" 20 | 21 | CUDA_VISIBLE_DEVICES="$GPU_ID" python inference.py \ 22 | --model_checkpoint_path="../model/${model}/model.ckpt-${checkpoint}" \ 23 | --output_file="../model/${model}/predictions.csv" \ 24 | --model="MatrixRegressionModel" \ 25 | --batch_size=1024 \ 26 | --input_data_patterns="$test_data_patterns" 27 | -------------------------------------------------------------------------------- /youtube-8m-ensemble/ensemble_scripts/train-matrix_model.sh: -------------------------------------------------------------------------------- 1 | model=$1 2 | conf=$2 3 | postfix=$3 4 | 5 | DEFAULT_GPU_ID=0 6 | if [ -z ${CUDA_VISIBLE_DEVICES+x} ]; then 7 | GPU_ID=$DEFAULT_GPU_ID 8 | echo "set CUDA_VISIBLE_DEVICES to default('$GPU_ID')" 9 | else 10 | GPU_ID=$CUDA_VISIBLE_DEVICES 11 | echo "set CUDA_VISIBLE_DEVICES to external('$GPU_ID')" 12 | fi 13 | 14 | train_path=/Youtube-8M/model_predictions${postfix}/ensemble_train 15 | train_data_patterns="" 16 | for d in $(cat $conf); do 17 | train_data_patterns="${train_path}/${d}/*.tfrecord${train_data_patterns:+,$train_data_patterns}" 18 | done 19 | echo "$train_data_patterns" 20 | 21 | CUDA_VISIBLE_DEVICES="$GPU_ID" python train.py \ 22 | --train_dir="../model/${model}" \ 23 | --train_data_patterns="$train_data_patterns" \ 24 | --model=MatrixRegressionModel \ 25 | --keep_checkpoint_every_n_hours=0.1 \ 26 | --batch_size=1024 \ 27 | --num_epochs=2 28 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/eval_scripts/eval-lstmmem-augmenter.sh: -------------------------------------------------------------------------------- 1 | 2 | GPU_ID=0 3 | EVERY=1000 4 | MODEL=LstmMemoryModel 5 | MODEL_DIR="../model/lstmmemory1024_moe8_augment" 6 | 7 | start=$1 8 | DIR="$(pwd)" 9 | 10 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 11 | echo $checkpoint; 12 | if [ $checkpoint -gt $start ]; then 13 | echo $checkpoint; 14 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 15 | --train_dir="$MODEL_DIR" \ 16 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 17 | --eval_data_pattern="/Youtube-8M/data/frame/validate/validatea*" \ 18 | --frame_features=True \ 19 | --feature_names="rgb,audio" \ 20 | --feature_sizes="1024,128" \ 21 | --model=$MODEL \ 22 | --moe_num_mixtures=8 \ 23 | --lstm_cells=1024 \ 24 | --lstm_layers=2 \ 25 | --batch_size=64 \ 26 | --rnn_swap_memory=True \ 27 | --run_once=True 28 | fi 29 | done 30 | 31 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/eval_scripts/eval-lstmmem-input-noise.sh: -------------------------------------------------------------------------------- 1 | 2 | noise_level=$1 3 | start=$2 4 | 5 | EVERY=1000 6 | MODEL=LstmMemoryModel 7 | MODEL_DIR="../model/lstmmemory1024_moe8_input_noise/noise_level_$noise_level" 8 | 9 | DIR="$(pwd)" 10 | 11 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 12 | echo $checkpoint; 13 | if [ $checkpoint -gt $start ]; then 14 | echo $checkpoint; 15 | python eval.py \ 16 | --train_dir="$MODEL_DIR" \ 17 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 18 | --eval_data_pattern="/Youtube-8M/data/frame/validate/validatea*" \ 19 | --frame_features=True \ 20 | --feature_names="rgb,audio" \ 21 | --feature_sizes="1024,128" \ 22 | --model=LstmMemoryModel \ 23 | --lstm_layers=2 \ 24 | --lstm_cells=1024 \ 25 | --moe_num_mixtures=8 \ 26 | --batch_size=128 \ 27 | --num_readers=4 \ 28 | --run_once=True 29 | fi 30 | done 31 | 32 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/eval_scripts/eval-lstmmem-parallel.sh: -------------------------------------------------------------------------------- 1 | 2 | start=$1 3 | GPU_ID=0 4 | MODEL=LstmParallelMemoryModel 5 | MODEL_DIR="../model/lstmparallelmemory1024_moe8" 6 | 7 | for checkpoint in $(for filename in $MODEL_DIR/model.ckpt-*.meta; do echo $filename | grep -o "ckpt-[0123456789]*.meta" | cut -d '-' -f 2 | cut -d '.' -f 1; done | sort -n); do 8 | if [ $checkpoint -gt $start ]; then 9 | echo $checkpoint; 10 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 11 | --train_dir="$MODEL_DIR" \ 12 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 13 | --eval_data_pattern="/Youtube-8M/data/frame/validate/validatea*" \ 14 | --frame_features=True \ 15 | --feature_names="rgb,audio" \ 16 | --feature_sizes="1024,128" \ 17 | --lstm_cells="1024,128" \ 18 | --batch_size=128 \ 19 | --model=$MODEL \ 20 | --moe_num_mixtures=8 \ 21 | --rnn_swap_memory=True \ 22 | --run_once=True 23 | fi 24 | done 25 | 26 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/eval_scripts/eval-lstmmemory-audio.sh: -------------------------------------------------------------------------------- 1 | 2 | GPU_ID="0" 3 | EVERY=2000 4 | MODEL=LstmMemoryModel 5 | MODEL_DIR="../model/audio_lstmmemory1024_layer1_moe8" 6 | 7 | start=$1 8 | DIR="$(pwd)" 9 | 10 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 11 | echo $checkpoint; 12 | if [ $checkpoint -gt $start ]; then 13 | echo $checkpoint; 14 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 15 | --train_dir="$MODEL_DIR" \ 16 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 17 | --eval_data_pattern="/Youtube-8M/data/frame/validate/validatea*" \ 18 | --frame_features=True \ 19 | --feature_names="audio" \ 20 | --feature_sizes="128" \ 21 | --model=$MODEL \ 22 | --lstm_cells=512 \ 23 | --lstm_layers=1 \ 24 | --moe_num_mixtures=8 \ 25 | --rnn_swap_memory=False \ 26 | --batch_size=128 \ 27 | --run_once=True 28 | fi 29 | done 30 | 31 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/training_scripts/run-cascade-76-multiscale-cnn-lstm.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0 python train.py \ 2 | --train_dir="../model/distillchain_v2_multiscale_cnnlstm" \ 3 | --train_data_pattern="/Youtube-8M/distillation_v2/frame/train/*.tfrecord" \ 4 | --frame_features=True \ 5 | --feature_names="rgb,audio" \ 6 | --feature_sizes="1024,128" \ 7 | --distillation_features=True \ 8 | --distillation_as_input=True \ 9 | --model=DistillchainMultiscaleCnnLstmModel \ 10 | --multiscale_cnn_lstm_layers=3 \ 11 | --moe_num_mixtures=4 \ 12 | --rnn_swap_memory=True \ 13 | --multitask=True \ 14 | --label_loss=MultiTaskCrossEntropyLoss \ 15 | --support_loss_percent=1.0 \ 16 | --support_type="label,label,label" \ 17 | --is_training=True \ 18 | --num_readers=4 \ 19 | --batch_size=128 \ 20 | --num_epochs=3 \ 21 | --base_learning_rate=0.001 22 | 23 | -------------------------------------------------------------------------------- /youtube-8m-zhangteng/infer_scripts/infer-lstm_multiscale4_moe4.sh: -------------------------------------------------------------------------------- 1 | 2 | #for part in ensemble_train ensemble_validate test; do 3 | for part in ensemble_validate test; do 4 | CUDA_VISIBLE_DEVICES=0 python inference_with_rebuild.py \ 5 | --output_dir="/Youtube-8M/model_predictions/${part}/lstm_multiscale4_moe4" \ 6 | --model_checkpoint_path="../model/frame_level_lstm_multiscale_model/model.ckpt-227882" \ 7 | --input_data_pattern="/Youtube-8M/data/frame/${part}/*.tfrecord" \ 8 | --frame_features=True \ 9 | --feature_names="rgb,audio" \ 10 | --feature_sizes="1024,128" \ 11 | --model=LstmMultiscaleModel \ 12 | --video_level_classifier_model=MoeModel \ 13 | --moe_num_extend=4 \ 14 | --moe_method=None \ 15 | --lstm_cells=1024 \ 16 | --moe_num_mixtures=4 \ 17 | --train=False \ 18 | --batch_size=32 \ 19 | --file_size=4096 20 | done 21 | -------------------------------------------------------------------------------- /youtube-8m-zhangteng/infer_scripts/infer-lstmbiglu_1024_moe8.sh: -------------------------------------------------------------------------------- 1 | 2 | for part in ensemble_train ensemble_validate test; do 3 | #for part in ensemble_validate test; do 4 | CUDA_VISIBLE_DEVICES=1 python inference_with_rebuild.py \ 5 | --output_dir="/Youtube-8M/model_predictions/${part}/lstmbiglu_cell1024_layer1_moe8" \ 6 | --model_checkpoint_path="../model/frame_level_lstm_biglu_model/model.ckpt-130983" \ 7 | --input_data_pattern="/Youtube-8M/data/frame/${part}/*.tfrecord" \ 8 | --frame_features=True \ 9 | --feature_names="rgb,audio" \ 10 | --feature_sizes="1024,128" \ 11 | --model=LstmBigluModel \ 12 | --video_level_classifier_model=MoeModel \ 13 | --moe_num_extend=4 \ 14 | --moe_method=None \ 15 | --lstm_cells=1024 \ 16 | --moe_num_mixtures=4 \ 17 | --train=False \ 18 | --batch_size=32 \ 19 | --file_size=4096 20 | done 21 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/eval_scripts/eval-lstmmem-layernorm.sh: -------------------------------------------------------------------------------- 1 | GPU_ID=0 2 | EVERY=500 3 | MODEL=LayerNormLstmMemoryModel 4 | MODEL_DIR="../model/layernormlstmmemory1024_moe8" 5 | 6 | start=$1 7 | DIR="$(pwd)" 8 | 9 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 10 | echo $checkpoint; 11 | if [ $checkpoint -gt $start ]; then 12 | echo $checkpoint; 13 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 14 | --train_dir="$MODEL_DIR" \ 15 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 16 | --eval_data_pattern="/Youtube-8M-validate/validatea*" \ 17 | --frame_features=True \ 18 | --feature_names="rgb,audio" \ 19 | --feature_sizes="1024,128" \ 20 | --batch_size=128 \ 21 | --model=$MODEL \ 22 | --lstm_cells=1024 \ 23 | --lstm_layers=2 \ 24 | --moe_num_mixtures=8 \ 25 | --num_readers=1 \ 26 | --rnn_swap_memory=True \ 27 | --run_once=True 28 | fi 29 | done 30 | 31 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/eval_scripts/eval-lstmmem-lowres.sh: -------------------------------------------------------------------------------- 1 | 2 | GPU_ID="0" 3 | EVERY=1000 4 | MODEL=LstmMemoryModel 5 | MODEL_DIR="../model/lstmmemory_moe8_lowres" 6 | 7 | start=$1 8 | DIR="$(pwd)" 9 | 10 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 11 | echo $checkpoint; 12 | if [ $checkpoint -gt $start ]; then 13 | echo $checkpoint; 14 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 15 | --train_dir="$MODEL_DIR" \ 16 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 17 | --eval_data_pattern="/Youtube-8M/data/frame/validate/validatea*" \ 18 | --frame_features=True \ 19 | --feature_names="rgb,audio" \ 20 | --feature_sizes="1024,128" \ 21 | --batch_size=32 \ 22 | --model=$MODEL \ 23 | --moe_num_mixtures=8 \ 24 | --lstm_layers=2 \ 25 | --feature_transformer=ResolutionTransformer \ 26 | --rnn_swap_memory=True \ 27 | --run_once=True 28 | fi 29 | done 30 | 31 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/eval_scripts/eval-moe-topk-batchagreement1.sh: -------------------------------------------------------------------------------- 1 | 2 | GPU_ID=1 3 | EVERY=100 4 | MODEL=MoeModel 5 | MODEL_DIR="../model/video_moe16_topk_batchagreement0.3" 6 | 7 | start=$1 8 | DIR="$(pwd)" 9 | 10 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 11 | echo $checkpoint; 12 | if [ $checkpoint -gt $start ]; then 13 | echo $checkpoint; 14 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 15 | --train_dir="$MODEL_DIR" \ 16 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 17 | --eval_data_pattern="/Youtube-8M/data/video/validate/validatea*" \ 18 | --frame_features=False \ 19 | --feature_names="mean_rgb,mean_audio" \ 20 | --feature_sizes="1024,128" \ 21 | --label_loss=TopKBatchAgreementCrossEntropyLoss \ 22 | --batch_agreement=0.3 \ 23 | --batch_size=128 \ 24 | --model=$MODEL \ 25 | --moe_num_mixtures=16 \ 26 | --run_once=True 27 | fi 28 | done 29 | 30 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/eval_scripts/eval-moe-topk-batchagreement2.sh: -------------------------------------------------------------------------------- 1 | 2 | GPU_ID=1 3 | EVERY=100 4 | MODEL=MoeModel 5 | MODEL_DIR="../model/video_moe16_topk_batchagreement1" 6 | 7 | start=$1 8 | DIR="$(pwd)" 9 | 10 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 11 | echo $checkpoint; 12 | if [ $checkpoint -gt $start ]; then 13 | echo $checkpoint; 14 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 15 | --train_dir="$MODEL_DIR" \ 16 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 17 | --eval_data_pattern="/Youtube-8M/data/video/validate/validatea*" \ 18 | --frame_features=False \ 19 | --feature_names="mean_rgb,mean_audio" \ 20 | --feature_sizes="1024,128" \ 21 | --label_loss=TopKBatchAgreementCrossEntropyLoss \ 22 | --batch_agreement=1 \ 23 | --batch_size=128 \ 24 | --model=$MODEL \ 25 | --moe_num_mixtures=16 \ 26 | --run_once=True 27 | fi 28 | done 29 | 30 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/eval_scripts/eval-moe-topk-batchagreement3.sh: -------------------------------------------------------------------------------- 1 | 2 | GPU_ID=1 3 | EVERY=100 4 | MODEL=MoeModel 5 | MODEL_DIR="../model/video_moe16_topk_batchagreement3" 6 | 7 | start=$1 8 | DIR="$(pwd)" 9 | 10 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 11 | echo $checkpoint; 12 | if [ $checkpoint -gt $start ]; then 13 | echo $checkpoint; 14 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 15 | --train_dir="$MODEL_DIR" \ 16 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 17 | --eval_data_pattern="/Youtube-8M/data/video/validate/validatea*" \ 18 | --frame_features=False \ 19 | --feature_names="mean_rgb,mean_audio" \ 20 | --feature_sizes="1024,128" \ 21 | --label_loss=TopKBatchAgreementCrossEntropyLoss \ 22 | --batch_agreement=3 \ 23 | --batch_size=128 \ 24 | --model=$MODEL \ 25 | --moe_num_mixtures=16 \ 26 | --run_once=True 27 | fi 28 | done 29 | 30 | -------------------------------------------------------------------------------- /youtube-8m-zhangteng/eval_scripts/eval-lstm_attention8_max.sh: -------------------------------------------------------------------------------- 1 | GPU_ID=1 2 | EVERY=1000 3 | MODEL=LstmExtendModel 4 | MODEL_DIR="../model/frame_level_lstm_extend8_model" 5 | start=0 6 | DIR="$(pwd)" 7 | 8 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 9 | echo $checkpoint; 10 | if [[ $checkpoint -gt $start ]]; then 11 | 12 | echo $checkpoint; 13 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 14 | --train_dir="$MODEL_DIR" \ 15 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 16 | --eval_data_pattern="/Youtube-8M/data/frame/validate/validatea*" \ 17 | --frame_features=True \ 18 | --feature_names="rgb,audio" \ 19 | --feature_sizes="1024,128" \ 20 | --batch_size=64 \ 21 | --model=$MODEL \ 22 | --video_level_classifier_model=MoeExtendModel \ 23 | --moe_num_extend=8 \ 24 | --moe_num_mixtures=8 \ 25 | --train=False \ 26 | --run_once=True 27 | fi 28 | done 29 | -------------------------------------------------------------------------------- /youtube-8m-zhangteng/eval_scripts/eval-video_knowledge_combine_chain.sh: -------------------------------------------------------------------------------- 1 | GPU_ID=0 2 | EVERY=1000 3 | MODEL=MoeKnowledgeModel 4 | MODEL_DIR="../model/video_level_moeknowledge_model" 5 | start=0 6 | DIR="$(pwd)" 7 | 8 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 9 | echo $checkpoint; 10 | if [[ $checkpoint -gt $start ]]; then 11 | 12 | echo $checkpoint; 13 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 14 | --train_dir="$MODEL_DIR" \ 15 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 16 | --eval_data_pattern="/Youtube-8M/data/video/validate/validatea*" \ 17 | --frame_features=False \ 18 | --feature_names="mean_rgb,mean_audio" \ 19 | --feature_sizes="1024,128" \ 20 | --batch_size=128 \ 21 | --model=$MODEL \ 22 | --class_size=100 \ 23 | --moe_num_mixtures=4 \ 24 | --moe_layers=3 \ 25 | --run_once=True 26 | fi 27 | done 28 | 29 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/training_scripts/run-chaining-shared-lstm.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0 python train.py \ 2 | --train_dir="../model/lstmmem1024_deep_combine_chain_length" \ 3 | --train_data_pattern="/Youtube-8M/data/frame/train/train*" \ 4 | --frame_features=True \ 5 | --feature_names="rgb,audio" \ 6 | --feature_sizes="1024,128" \ 7 | --model=LstmMemoryModel \ 8 | --video_level_classifier_model=DeepCombineChainModel \ 9 | --moe_num_mixtures=4 \ 10 | --deep_chain_relu_cells=256 \ 11 | --deep_chain_layers=1 \ 12 | --deep_chain_use_length=True \ 13 | --label_loss=MultiTaskCrossEntropyLoss \ 14 | --multitask=True \ 15 | --support_type="label" \ 16 | --num_supports=4716 \ 17 | --support_loss_percent=0.05 \ 18 | --keep_checkpoint_every_n_hour=1.0 \ 19 | --base_learning_rate=0.0005 \ 20 | --num_readers=8 \ 21 | --num_epochs=5 \ 22 | --rnn_swap_memory=True \ 23 | --batch_size=128 24 | 25 | -------------------------------------------------------------------------------- /youtube-8m-zhangteng/eval_scripts/eval-lstm_shortlayers_moe8.sh: -------------------------------------------------------------------------------- 1 | 2 | GPU_ID=1 3 | EVERY=1000 4 | MODEL=LstmLayerModel 5 | MODEL_DIR="../model/frame_level_lstm_layer_model" 6 | start=0 7 | DIR="$(pwd)" 8 | 9 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 10 | echo $checkpoint; 11 | if [[ $checkpoint -gt $start ]]; then 12 | 13 | echo $checkpoint; 14 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 15 | --train_dir="$MODEL_DIR" \ 16 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 17 | --eval_data_pattern="/Youtube-8M/data/frame/validate/validatea*" \ 18 | --frame_features=True \ 19 | --feature_names="rgb,audio" \ 20 | --feature_sizes="1024,128" \ 21 | --batch_size=64 \ 22 | --model=$MODEL \ 23 | --video_level_classifier_model=MoeModel \ 24 | --lstm_length=10 \ 25 | --moe_num_mixtures=8 \ 26 | --train=False \ 27 | --run_once=True 28 | fi 29 | done 30 | 31 | -------------------------------------------------------------------------------- /youtube-8m-zhangteng/eval_scripts/eval-video_softmax_combine_chain.sh: -------------------------------------------------------------------------------- 1 | 2 | GPU_ID=0 3 | EVERY=1000 4 | MODEL=MoeSoftmaxModel 5 | MODEL_DIR="../model/video_level_moesoftmax_model" 6 | start=0 7 | DIR="$(pwd)" 8 | 9 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 10 | echo $checkpoint; 11 | if [[ $checkpoint -gt $start ]]; then 12 | 13 | echo $checkpoint; 14 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 15 | --train_dir="$MODEL_DIR" \ 16 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 17 | --eval_data_pattern="/Youtube-8M/data/video/validate/validatea*" \ 18 | --frame_features=False \ 19 | --feature_names="mean_rgb,mean_audio" \ 20 | --feature_sizes="1024,128" \ 21 | --batch_size=128 \ 22 | --model=$MODEL \ 23 | --class_size=100 \ 24 | --moe_num_mixtures=4 \ 25 | --moe_layers=3 \ 26 | --run_once=True 27 | fi 28 | done 29 | 30 | 31 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/infer_scripts/infer-distillchain-v2-video-dcc.sh: -------------------------------------------------------------------------------- 1 | 2 | for part in ensemble_train ensemble_validate test; do 3 | CUDA_VISIBLE_DEVICES=0 python inference-pre-ensemble.py \ 4 | --output_dir="/Youtube-8M/model_predictions/${part}/distillchain_v2_video_dcc" \ 5 | --model_checkpoint_path="../model/distillchain_v2_video_dcc/model.ckpt-17137" \ 6 | --input_data_pattern="/Youtube-8M/data/video/${part}/*.tfrecord" \ 7 | --distill_data_pattern="/Youtube-8M/model_predictions/${part}/distillation/ensemble_v2_matrix_model/*.tfrecord" \ 8 | --frame_features=False \ 9 | --feature_names="mean_rgb,mean_audio" \ 10 | --feature_sizes="1024,128" \ 11 | --distillation_features=True \ 12 | --distillation_as_input=True \ 13 | --model=DistillchainDeepCombineChainModel \ 14 | --moe_num_mixtures=4 \ 15 | --deep_chain_layers=4 \ 16 | --deep_chain_relu_cells=256 \ 17 | --batch_size=1024 \ 18 | --file_size=4096 19 | done 20 | -------------------------------------------------------------------------------- /youtube-8m-zhangteng/eval_scripts/eval-lstm2_attention8_max.sh: -------------------------------------------------------------------------------- 1 | 2 | GPU_ID=1 3 | EVERY=1000 4 | MODEL=InputExtendModel 5 | MODEL_DIR="../model/frame_level_input_extend_model" 6 | start=0 7 | DIR="$(pwd)" 8 | 9 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 10 | echo $checkpoint; 11 | if [[ $checkpoint -gt $start ]]; then 12 | 13 | echo $checkpoint; 14 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 15 | --train_dir="$MODEL_DIR" \ 16 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 17 | --eval_data_pattern="/Youtube-8M/data/frame/validate/validatea*" \ 18 | --frame_features=True \ 19 | --feature_names="rgb,audio" \ 20 | --feature_sizes="1024,128" \ 21 | --batch_size=64 \ 22 | --model=$MODEL \ 23 | --video_level_classifier_model=MoeExtendModel \ 24 | --moe_num_extend=4 \ 25 | --moe_num_mixtures=8 \ 26 | --train=False \ 27 | --run_once=True 28 | fi 29 | done 30 | 31 | -------------------------------------------------------------------------------- /youtube-8m-zhangteng/eval_scripts/eval-lstm_multiscale4_moe4.sh: -------------------------------------------------------------------------------- 1 | 2 | GPU_ID=1 3 | EVERY=1000 4 | MODEL=LstmMultiscaleModel 5 | MODEL_DIR="../model/frame_level_lstm_multiscale_model" 6 | start=0 7 | DIR="$(pwd)" 8 | 9 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 10 | echo $checkpoint; 11 | if [[ $checkpoint -gt $start ]]; then 12 | 13 | echo $checkpoint; 14 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 15 | --train_dir="$MODEL_DIR" \ 16 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 17 | --eval_data_pattern="/Youtube-8M/data/frame/validate/validatea*" \ 18 | --frame_features=True \ 19 | --feature_names="rgb,audio" \ 20 | --feature_sizes="1024,128" \ 21 | --batch_size=64 \ 22 | --model=$MODEL \ 23 | --video_level_classifier_model=MoeModel \ 24 | --moe_num_extend=4 \ 25 | --moe_num_mixtures=4 \ 26 | --train=False \ 27 | --run_once=True 28 | fi 29 | done 30 | 31 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/data_augmentation_scripts/run-chaining-video.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES="0" python train.py \ 2 | --train_dir="../model/dataaugmentation_chaining_video/" \ 3 | --train_data_pattern="/Youtube-8M/data/frame/largetrain/*.tfrecord" \ 4 | --frame_features=True \ 5 | --feature_names="rgb,audio" \ 6 | --feature_sizes="1024,128" \ 7 | --model=DeepCombineChainModel \ 8 | --label_loss=MultiTaskCrossEntropyLoss \ 9 | --multitask=True \ 10 | --support_type="label,label,label,label,label,label,label,label" \ 11 | --support_loss_percent=0.025 \ 12 | --moe_num_mixtures=2 \ 13 | --keep_checkpoint_every_n_hours=0.25 \ 14 | --num_readers=4 \ 15 | --deep_chain_layers=8 \ 16 | --deep_chain_relu_cells=40 \ 17 | --batch_size=200 \ 18 | --data_augmenter=HalfVideoAugmenter \ 19 | --num_epochs=5 \ 20 | --base_learning_rate=0.005 21 | 22 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/eval_scripts/eval-lstm-look-back.sh: -------------------------------------------------------------------------------- 1 | 2 | GPU_ID="1" 3 | EVERY=1000 4 | MODEL=LstmLookBackModel 5 | MODEL_DIR="../model/lstmlookback1024_moe8" 6 | 7 | start=$1 8 | DIR="$(pwd)" 9 | 10 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 11 | echo $checkpoint; 12 | if [ $checkpoint -gt $start ]; then 13 | echo $checkpoint; 14 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 15 | --train_dir="$MODEL_DIR" \ 16 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 17 | --eval_data_pattern="/Youtube-8M/data/frame/validate/validatea*" \ 18 | --frame_features=True \ 19 | --feature_names="rgb,audio" \ 20 | --feature_sizes="1024,128" \ 21 | --batch_size=32 \ 22 | --model=$MODEL \ 23 | --moe_num_mixtures=8 \ 24 | --lstm_layers=2 \ 25 | --lstm_cells="1024,128" \ 26 | --lstm_look_back=3 \ 27 | --rnn_swap_memory=True \ 28 | --run_once=True 29 | fi 30 | done 31 | 32 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/infer_scripts/infer-distillchain-lstmparalleloutput.sh: -------------------------------------------------------------------------------- 1 | 2 | for part in ensemble_train ensemble_validate test; do 3 | CUDA_VISIBLE_DEVICES=1 python inference-pre-ensemble.py \ 4 | --output_dir="/Youtube-8M/model_predictions/${part}/distillchain_lstmparalleloutput" \ 5 | --model_checkpoint_path="../model/distillchain_lstmparalleloutput/model.ckpt-75261" \ 6 | --input_data_pattern="/Youtube-8M/data/frame/${part}/*.tfrecord" \ 7 | --frame_features=True \ 8 | --feature_names="rgb,audio" \ 9 | --feature_sizes="1024,128" \ 10 | --distill_data_pattern="/Youtube-8M/model_predictions/${part}/distillation/ensemble_mean_model/*.tfrecord" \ 11 | --distillation_features=True \ 12 | --distillation_as_input=True \ 13 | --model=DistillchainLstmParallelFinaloutputModel \ 14 | --rnn_swap_memory=True \ 15 | --lstm_cells="1024,128" \ 16 | --moe_num_mixtures=8 \ 17 | --batch_size=128 \ 18 | --file_size=4096 19 | done 20 | -------------------------------------------------------------------------------- /youtube-8m-zhangteng/infer_scripts/infer-lstmgate1024_moe8.sh: -------------------------------------------------------------------------------- 1 | 2 | for part in ensemble_train ensemble_validate test; do 3 | #for part in ensemble_validate test; do 4 | CUDA_VISIBLE_DEVICES=0 python inference_with_rebuild.py \ 5 | --output_dir="/Youtube-8M/model_predictions/${part}/lstmgate_cell1024_layer1_moe8" \ 6 | --model_checkpoint_path="../model/frame_level_lstm_gate_model/model.ckpt-151107" \ 7 | --input_data_pattern="/Youtube-8M/data/frame/${part}/*.tfrecord" \ 8 | --frame_features=True \ 9 | --feature_names="rgb,audio" \ 10 | --feature_sizes="1024,128" \ 11 | --model=LstmGateModel \ 12 | --video_level_classifier_model=MoeModel \ 13 | --moe_num_extend=4 \ 14 | --moe_method=None \ 15 | --lstm_cells=1024 \ 16 | --moe_num_mixtures=8 \ 17 | --train=False \ 18 | --batch_size=32 \ 19 | --file_size=4096 20 | done 21 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/eval_scripts/eval-lstmmem-no-transform.sh: -------------------------------------------------------------------------------- 1 | 2 | GPU_ID=1 3 | EVERY=500 4 | MODEL=LstmMemoryModel 5 | MODEL_DIR="../model/lstmmemory1024_moe8_no_transform" 6 | 7 | start=$1 8 | DIR="$(pwd)" 9 | 10 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 11 | echo $checkpoint; 12 | if [ $checkpoint -gt $start ]; then 13 | echo $checkpoint; 14 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 15 | --train_dir="$MODEL_DIR" \ 16 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 17 | --eval_data_pattern="/Youtube-8M/data/frame/validate/validatea*" \ 18 | --frame_features=True \ 19 | --feature_names="rgb,audio" \ 20 | --feature_sizes="1024,128" \ 21 | --feature_transformer=IdenticalTransformer \ 22 | --batch_size=64 \ 23 | --model=$MODEL \ 24 | --lstm_cells=1024 \ 25 | --moe_num_mixtures=8 \ 26 | --rnn_swap_memory=True \ 27 | --run_once=True 28 | fi 29 | done 30 | 31 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/training_scripts/run-cascade-75-chaining-lstm.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0 python train.py \ 2 | --train_dir="../model/distillchain_multilstm_dcc" \ 3 | --train_data_pattern="/Youtube-8M/distillation/frame/train/*.tfrecord" \ 4 | --frame_features=True \ 5 | --feature_names="rgb,audio" \ 6 | --feature_sizes="1024,128" \ 7 | --distillation_features=True \ 8 | --distillation_as_input=True \ 9 | --model=DistillchainLstmMemoryDeepCombineChainModel \ 10 | --lstm_layers=1 \ 11 | --lstm_cells=1024 \ 12 | --moe_num_mixtures=4 \ 13 | --distillation_relu_cells=256 \ 14 | --deep_chain_relu_cells=256 \ 15 | --deep_chain_layers=2 \ 16 | --rnn_swap_memory=True \ 17 | --multitask=True \ 18 | --label_loss=MultiTaskCrossEntropyLoss \ 19 | --support_type="label,label" \ 20 | --support_loss_percent=0.1 \ 21 | --num_readers=4 \ 22 | --batch_size=128 \ 23 | --num_epochs=3 \ 24 | --keep_checkpoint_every_n_hours=1.0 \ 25 | --base_learning_rate=0.001 26 | 27 | -------------------------------------------------------------------------------- /youtube-8m-zhangteng/infer_scripts/infer-lstm_gate_multiscale4_moe4.sh: -------------------------------------------------------------------------------- 1 | 2 | for part in ensemble_train ensemble_validate test; do 3 | #for part in ensemble_validate test; do 4 | CUDA_VISIBLE_DEVICES=0 python inference_with_rebuild.py \ 5 | --output_dir="/Youtube-8M/model_predictions/${part}/lstm_gate_multiscale4_moe4" \ 6 | --model_checkpoint_path="../model/frame_level_lstm_multiscale2_model/model.ckpt-193803" \ 7 | --input_data_pattern="/Youtube-8M/data/frame/${part}/*.tfrecord" \ 8 | --frame_features=True \ 9 | --feature_names="rgb,audio" \ 10 | --feature_sizes="1024,128" \ 11 | --model=LstmMultiscale2Model \ 12 | --video_level_classifier_model=MoeModel \ 13 | --moe_num_extend=4 \ 14 | --moe_method=None \ 15 | --lstm_cells=1024 \ 16 | --moe_num_mixtures=4 \ 17 | --norm=False \ 18 | --train=False \ 19 | --batch_size=32 \ 20 | --file_size=4096 21 | done 22 | -------------------------------------------------------------------------------- /youtube-8m-ensemble/ensemble_scripts/train-matrix_model_lr.sh: -------------------------------------------------------------------------------- 1 | model=$1 2 | conf=$2 3 | learn_rate=$3 4 | 5 | DEFAULT_GPU_ID=0 6 | if [ -z ${CUDA_VISIBLE_DEVICES+x} ]; then 7 | GPU_ID=$DEFAULT_GPU_ID 8 | echo "set CUDA_VISIBLE_DEVICES to default('$GPU_ID')" 9 | else 10 | GPU_ID=$CUDA_VISIBLE_DEVICES 11 | echo "set CUDA_VISIBLE_DEVICES to external('$GPU_ID')" 12 | fi 13 | 14 | train_path=/Youtube-8M/model_predictions/ensemble_train 15 | train_data_patterns="" 16 | for d in $(cat $conf); do 17 | train_data_patterns="${train_path}/${d}/*.tfrecord${train_data_patterns:+,$train_data_patterns}" 18 | done 19 | echo "$train_data_patterns" 20 | 21 | CUDA_VISIBLE_DEVICES="$GPU_ID" python train.py \ 22 | --train_dir="../model/${model}" \ 23 | --train_data_patterns="$train_data_patterns" \ 24 | --model=MatrixRegressionModel \ 25 | --base_learning_rate=$learn_rate \ 26 | --keep_checkpoint_every_n_hours=0.1 \ 27 | --batch_size=1024 \ 28 | --num_epochs=2 29 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/infer_scripts/infer-distillchain-v2-lstmparalleloutput.sh: -------------------------------------------------------------------------------- 1 | 2 | for part in ensemble_train ensemble_validate test; do 3 | CUDA_VISIBLE_DEVICES=0 python inference-pre-ensemble.py \ 4 | --output_dir="/Youtube-8M/model_predictions/${part}/distillchain_v2_lstmparalleloutput" \ 5 | --model_checkpoint_path="../model/distillchain_v2_lstmparalleloutput/model.ckpt-74190" \ 6 | --input_data_pattern="/Youtube-8M/data/frame/${part}/*.tfrecord" \ 7 | --frame_features=True \ 8 | --feature_names="rgb,audio" \ 9 | --feature_sizes="1024,128" \ 10 | --distill_data_pattern="/Youtube-8M/model_predictions/${part}/distillation/ensemble_v2_matrix_model/*.tfrecord" \ 11 | --distillation_features=False \ 12 | --distillation_as_input=True \ 13 | --model=DistillchainLstmParallelFinaloutputModel \ 14 | --rnn_swap_memory=True \ 15 | --lstm_cells="1024,128" \ 16 | --moe_num_mixtures=4 \ 17 | --batch_size=128 \ 18 | --file_size=4096 19 | done 20 | -------------------------------------------------------------------------------- /youtube-8m-ensemble/ensemble_scripts/preensemble-mean_model.sh: -------------------------------------------------------------------------------- 1 | model=$1 2 | conf=$2 3 | part=$3 4 | 5 | DEFAULT_GPU_ID=0 6 | if [ -z ${CUDA_VISIBLE_DEVICES+x} ]; then 7 | GPU_ID=$DEFAULT_GPU_ID 8 | echo "set CUDA_VISIBLE_DEVICES to default('$GPU_ID')" 9 | else 10 | GPU_ID=$CUDA_VISIBLE_DEVICES 11 | echo "set CUDA_VISIBLE_DEVICES to external('$GPU_ID')" 12 | fi 13 | 14 | test_path=/Youtube-8M/model_predictions/${part} 15 | test_data_patterns="" 16 | for d in $(cat $conf); do 17 | test_data_patterns="${test_path}/${d}/*.tfrecord${test_data_patterns:+,$test_data_patterns}" 18 | done 19 | echo "$test_data_patterns" 20 | 21 | CUDA_VISIBLE_DEVICES="$GPU_ID" python inference-pre-ensemble.py \ 22 | --output_dir="/Youtube-8M/model_predictions/${part}/${model}" \ 23 | --model_checkpoint_path="../model/${model}/model.ckpt-0" \ 24 | --input_data_patterns="$test_data_patterns" \ 25 | --model="MeanModel" \ 26 | --batch_size=1024 \ 27 | --file_size=4096 28 | -------------------------------------------------------------------------------- /youtube-8m-ensemble/training_utils/sample_conf.py: -------------------------------------------------------------------------------- 1 | import random 2 | from datetime import datetime 3 | import tensorflow as tf 4 | from tensorflow import flags 5 | FLAGS = flags.FLAGS 6 | 7 | if __name__=="__main__": 8 | flags.DEFINE_string("main_conf_file", "", "The conf file to sample from.") 9 | flags.DEFINE_string("sub_conf_file", "", "The conf file to randomly generate.") 10 | 11 | if __name__=="__main__": 12 | with open(FLAGS.main_conf_file) as F: 13 | methods = [] 14 | sample_methods = [] 15 | 16 | # methods 17 | for line in F: 18 | m = line.strip() 19 | if m: 20 | methods.append(m) 21 | 22 | # random sample 23 | random.seed(datetime.now()) 24 | for i in xrange(len(methods)): 25 | index = random.randint(0,len(methods)-1) 26 | m = methods[index] 27 | sample_methods.append(m) 28 | 29 | # output weight 30 | with open(FLAGS.sub_conf_file, "w") as out: 31 | out.writelines([m+"\n" for m in sample_methods]) 32 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/eval_scripts/eval-mm-lstm.sh: -------------------------------------------------------------------------------- 1 | 2 | start=$1 3 | GPU_ID=0 4 | MODEL=MatchingMatrixLstmMemoryModel 5 | MODEL_DIR="../model/mmlstmmemory1024_moe8" 6 | 7 | for checkpoint in $(for filename in $MODEL_DIR/model.ckpt-*.meta; do echo $filename | grep -o "ckpt-[0123456789]*.meta" | cut -d '-' -f 2 | cut -d '.' -f 1; done | sort -n); do 8 | if [ $checkpoint -gt $start ]; then 9 | echo $checkpoint; 10 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 11 | --train_dir="$MODEL_DIR" \ 12 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 13 | --eval_data_pattern="/Youtube-8M/data/frame/validate/validatea*" \ 14 | --frame_features=True \ 15 | --feature_names="rgb,audio" \ 16 | --feature_sizes="1024,128" \ 17 | --batch_size=128 \ 18 | --model=$MODEL \ 19 | --mm_label_embedding=256 \ 20 | --moe_num_mixtures=8 \ 21 | --lstm_cells=1024 \ 22 | --lstm_layers=2 \ 23 | --rnn_swap_memory=True \ 24 | --run_once=True 25 | fi 26 | done 27 | 28 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/training_scripts/run-cascade-75-chaining-lstm-cnn.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0 python train.py \ 2 | --train_dir="../model/distillchain_lstmcnn_dcc" \ 3 | --train_data_pattern="/Youtube-8M/distillation/frame/train/*.tfrecord" \ 4 | --frame_features=True \ 5 | --feature_names="rgb,audio" \ 6 | --feature_sizes="1024,128" \ 7 | --distillation_features=True \ 8 | --distillation_as_input=True \ 9 | --model=DistillchainLstmCnnDeepCombineChainModel \ 10 | --deep_chain_layers=2 \ 11 | --deep_chain_relu_cells=128 \ 12 | --distillchain_relu_cells=256 \ 13 | --moe_num_mixtures=4 \ 14 | --lstm_layers=1 \ 15 | --lstm_cells="1024,128" \ 16 | --rnn_swap_memory=True \ 17 | --multitask=True \ 18 | --label_loss=MultiTaskCrossEntropyLoss \ 19 | --support_type="label,label" \ 20 | --support_loss_percent=0.1 \ 21 | --num_readers=4 \ 22 | --batch_size=128 \ 23 | --num_epochs=3 \ 24 | --keep_checkpoint_every_n_hours=1.0 \ 25 | --base_learning_rate=0.001 26 | 27 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/training_scripts/run-cascade-75-chaining-video.sh: -------------------------------------------------------------------------------- 1 | noise_level=0.15 2 | 3 | CUDA_VISIBLE_DEVICES=0 python train.py \ 4 | --train_dir="../model/distillchain_video_dcc" \ 5 | --train_data_pattern="/Youtube-8M/distillation/video/train/*.tfrecord" \ 6 | --distillation_features=True \ 7 | --distillation_as_input=True \ 8 | --frame_features=False \ 9 | --feature_names="mean_rgb,mean_audio" \ 10 | --feature_sizes="1024,128" \ 11 | --model=DistillchainDeepCombineChainModel \ 12 | --moe_num_mixtures=4 \ 13 | --deep_chain_layers=4 \ 14 | --deep_chain_relu_cells=256 \ 15 | --data_augmenter=NoiseAugmenter \ 16 | --input_noise_level=$noise_level \ 17 | --multitask=True \ 18 | --label_loss=MultiTaskCrossEntropyLoss \ 19 | --support_type="label,label,label,label" \ 20 | --num_supports=18864 \ 21 | --support_loss_percent=0.05 \ 22 | --base_learning_rate=0.006 \ 23 | --keep_checkpoint_every_n_hour=0.25 \ 24 | --num_readers=5 \ 25 | --num_epochs=6 \ 26 | --batch_size=512 27 | 28 | -------------------------------------------------------------------------------- /youtube-8m-zhangteng/eval_scripts/eval-lstm_gate_multiscale4_moe4.sh: -------------------------------------------------------------------------------- 1 | 2 | GPU_ID=1 3 | EVERY=1000 4 | MODEL=LstmMultiscale2Model 5 | MODEL_DIR="../model/frame_level_lstm_multiscale2_model" 6 | start=0 7 | DIR="$(pwd)" 8 | 9 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 10 | echo $checkpoint; 11 | if [[ $checkpoint -gt $start ]]; then 12 | 13 | echo $checkpoint; 14 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 15 | --train_dir="$MODEL_DIR" \ 16 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 17 | --eval_data_pattern="/Youtube-8M/data/frame/validate/validatea*" \ 18 | --frame_features=True \ 19 | --feature_names="rgb,audio" \ 20 | --feature_sizes="1024,128" \ 21 | --batch_size=64 \ 22 | --model=$MODEL \ 23 | --video_level_classifier_model=MoeModel \ 24 | --norm=False \ 25 | --moe_num_extend=4 \ 26 | --moe_num_mixtures=4 \ 27 | --train=False \ 28 | --run_once=True 29 | fi 30 | done 31 | 32 | -------------------------------------------------------------------------------- /youtube-8m-ensemble/ensemble_scripts/auto-preensemble-matrix_model.sh: -------------------------------------------------------------------------------- 1 | model=$1 2 | conf=$2 3 | part=$3 4 | postfix=$4 5 | 6 | DEFAULT_GPU_ID=0 7 | if [ -z ${CUDA_VISIBLE_DEVICES+x} ]; then 8 | GPU_ID=$DEFAULT_GPU_ID 9 | echo "set CUDA_VISIBLE_DEVICES to default('$GPU_ID')" 10 | else 11 | GPU_ID=$CUDA_VISIBLE_DEVICES 12 | echo "set CUDA_VISIBLE_DEVICES to external('$GPU_ID')" 13 | fi 14 | 15 | test_path=/Youtube-8M/model_predictions${postfix}/${part} 16 | test_data_patterns="" 17 | for d in $(cat $conf); do 18 | test_data_patterns="${test_path}/${d}/*.tfrecord${test_data_patterns:+,$test_data_patterns}" 19 | done 20 | echo "$test_data_patterns" 21 | 22 | CUDA_VISIBLE_DEVICES="$GPU_ID" python inference-pre-ensemble.py \ 23 | --output_dir="/Youtube-8M/model_predictions/${part}/${model}" \ 24 | --train_dir="../model/${model}" \ 25 | --input_data_patterns="$test_data_patterns" \ 26 | --model="MatrixRegressionModel" \ 27 | --batch_size=1024 \ 28 | --file_size=4096 29 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/eval_scripts/eval-lstm-positional-attention-8max.sh: -------------------------------------------------------------------------------- 1 | GPU_ID="1" 2 | EVERY=1000 3 | MODEL=LstmPositionalAttentionMaxPoolingModel 4 | MODEL_DIR="../x27_model/lstm_positional_attention8max" 5 | 6 | start=70000 7 | DIR="$(pwd)" 8 | 9 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 10 | echo $checkpoint; 11 | if [ $checkpoint -gt $start ]; then 12 | echo $checkpoint; 13 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 14 | --train_dir="$MODEL_DIR" \ 15 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 16 | --eval_data_pattern="/Youtube-8M/data/frame/validate/validatea*" \ 17 | --frame_features=True \ 18 | --feature_names="rgb,audio" \ 19 | --feature_sizes="1024,128" \ 20 | --model=$MODEL \ 21 | --moe_num_mixtures=8 \ 22 | --lstm_attentions=8 \ 23 | --positional_embedding_size=32 \ 24 | --rnn_swap_memory=True \ 25 | --batch_size=32 \ 26 | --run_once=True 27 | fi 28 | done 29 | 30 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/eval_scripts/eval-lstmmem-feature.sh: -------------------------------------------------------------------------------- 1 | 2 | GPU_ID="1" 3 | EVERY=100 4 | MODEL=LstmMemoryModel 5 | MODEL_DIR="../model/lstmmemory_moe8_feature" 6 | 7 | start=$1 8 | DIR="$(pwd)" 9 | 10 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 11 | echo $checkpoint; 12 | if [ $checkpoint -gt $start ]; then 13 | echo $checkpoint; 14 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 15 | --train_dir="$MODEL_DIR" \ 16 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 17 | --eval_data_pattern="/Youtube-8M/data/frame/validate/validatea*" \ 18 | --frame_features=True \ 19 | --feature_names="rgb,audio" \ 20 | --feature_sizes="1024,128" \ 21 | --feature_transformer=EngineerTransformer \ 22 | --engineer_types="identical,avg,std,diff" \ 23 | --batch_size=32 \ 24 | --model=$MODEL \ 25 | --moe_num_mixtures=8 \ 26 | --lstm_layers=2 \ 27 | --rnn_swap_memory=True \ 28 | --run_once=True 29 | fi 30 | done 31 | 32 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/training_scripts/run-cascade-76-chaining-lstm-cnn.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0 python train.py \ 2 | --train_dir="../model/distillchain_v2_lstmcnn_dcc" \ 3 | --train_data_pattern="/Youtube-8M/distillation_v2/frame/train/*.tfrecord" \ 4 | --frame_features=True \ 5 | --feature_names="rgb,audio" \ 6 | --feature_sizes="1024,128" \ 7 | --distillation_features=True \ 8 | --distillation_as_input=True \ 9 | --model=DistillchainLstmCnnDeepCombineChainModel \ 10 | --deep_chain_layers=2 \ 11 | --deep_chain_relu_cells=128 \ 12 | --distillchain_relu_cells=256 \ 13 | --moe_num_mixtures=4 \ 14 | --lstm_layers=1 \ 15 | --lstm_cells="1024,128" \ 16 | --rnn_swap_memory=True \ 17 | --multitask=True \ 18 | --label_loss=MultiTaskCrossEntropyLoss \ 19 | --support_type="label,label" \ 20 | --support_loss_percent=0.1 \ 21 | --num_readers=4 \ 22 | --batch_size=128 \ 23 | --num_epochs=3 \ 24 | --keep_checkpoint_every_n_hours=1.0 \ 25 | --base_learning_rate=0.001 26 | 27 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/training_scripts/run-cascade-76-chaining-video.sh: -------------------------------------------------------------------------------- 1 | noise_level=0.15 2 | 3 | CUDA_VISIBLE_DEVICES=0 python train.py \ 4 | --train_dir="../model/distillchain_v2_video_dcc" \ 5 | --train_data_pattern="/Youtube-8M/distillation_v2/video/train/*.tfrecord" \ 6 | --distillation_features=True \ 7 | --distillation_as_input=True \ 8 | --frame_features=False \ 9 | --feature_names="mean_rgb,mean_audio" \ 10 | --feature_sizes="1024,128" \ 11 | --model=DistillchainDeepCombineChainModel \ 12 | --moe_num_mixtures=4 \ 13 | --deep_chain_layers=4 \ 14 | --deep_chain_relu_cells=256 \ 15 | --data_augmenter=NoiseAugmenter \ 16 | --input_noise_level=$noise_level \ 17 | --multitask=True \ 18 | --label_loss=MultiTaskCrossEntropyLoss \ 19 | --support_type="label,label,label,label" \ 20 | --num_supports=18864 \ 21 | --support_loss_percent=0.05 \ 22 | --base_learning_rate=0.007 \ 23 | --keep_checkpoint_every_n_hour=0.25 \ 24 | --num_readers=5 \ 25 | --num_epochs=3 \ 26 | --batch_size=512 27 | 28 | -------------------------------------------------------------------------------- /youtube-8m-ensemble/model_selection_scripts/get_top_k.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from tensorflow import flags 3 | 4 | FLAGS = flags.FLAGS 5 | 6 | if __name__=="__main__": 7 | flags.DEFINE_string("log_file", "", "The file that log models performances.") 8 | flags.DEFINE_string("sorted_log_file", "", "The file that log models performances (sorted by GAP).") 9 | flags.DEFINE_integer("top_k", 10, "The number of top models reserved.") 10 | 11 | if __name__=="__main__": 12 | log_file = FLAGS.log_file 13 | with open(log_file) as F: 14 | lines = F.readlines() 15 | models = map(lambda x: x.strip(), lines[::2]) 16 | perfs = map(lambda x: float(x.strip().split("=")[-1]), lines[1::2]) 17 | perfs = perfs[:len(models)] 18 | model_perfs = sorted(zip(perfs, models), reverse=True) 19 | 20 | with open(FLAGS.sorted_log_file, "w") as Fo: 21 | Fo.writelines(["%f\t%s\n"%(x,y) for x,y in model_perfs]) 22 | 23 | for perf, model in model_perfs[:FLAGS.top_k]: 24 | print model 25 | 26 | 27 | -------------------------------------------------------------------------------- /youtube-8m-ensemble/ensemble_scripts/ensemble_no7.conf: -------------------------------------------------------------------------------- 1 | attentionlstm_moe4 2 | biunilstm1024_moe4 3 | cnn_deep_combine_chain 4 | cnnlstmmemory1024_moe8 5 | deep_cnn_deep_combine_chain 6 | framehop_lstm 7 | lstm2_attention8_max 8 | lstm_attention8_max 9 | lstm_random_mean_moe8 10 | lstmattlstm1024_moe8 11 | lstm_cnn_deep_combine_chain 12 | lstmmem1024_layer2_moe4_deep_combine_chain_add_length 13 | lstmmemory_cell1024_layer2_moe8 14 | lstmmemory_cell2048_layer2_moe4 15 | lstmparallelfinaloutput1024_moe8 16 | lstmparallelmemory1024_moe8 17 | lstm_shortlayers_moe8 18 | multilstmmemory1024_moe4_deep_chain 19 | multires_lstm_deep_combine_chain 20 | video_very_deep_combine_chain 21 | video_dcc_bagging 22 | video_cc_structure_bagging/ensemble_matrix_model 23 | video_dcc_boosting/ensemble_matrix_model 24 | video_dcc_boosting_discardhopeless/ensemble_matrix_model 25 | video_dcc_boosting_weightclip/ensemble_matrix_model 26 | cnn_deep_combine_chain_bagging/ensemble_matrix_model 27 | lstmparalleloutput_bagging/ensemble_mean_model 28 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/infer_scripts/infer-video-distillchain-video-dcc.sh: -------------------------------------------------------------------------------- 1 | 2 | for part in ensemble_train ensemble_validate test; do 3 | CUDA_VISIBLE_DEVICES=0 python inference-pre-ensemble.py \ 4 | --output_dir="/Youtube-8M/model_predictions/${part}/distillchain_video_dcc" \ 5 | --model_checkpoint_path="../model/distillchain_video_dcc/model.ckpt-19296" \ 6 | --input_data_pattern="/Youtube-8M/data/video/${part}/*.tfrecord" \ 7 | --frame_features=False \ 8 | --feature_names="mean_rgb,mean_audio" \ 9 | --feature_sizes="1024,128" \ 10 | --distill_data_pattern="/Youtube-8M/model_predictions/${part}/distillation/ensemble_mean_model/*.tfrecord" \ 11 | --distillation_features=True \ 12 | --distillation_as_input=True \ 13 | --model=DistillchainDeepCombineChainModel \ 14 | --moe_num_mixtures=4 \ 15 | --deep_chain_layers=4 \ 16 | --deep_chain_relu_cells=256 \ 17 | --batch_size=1024 \ 18 | --file_size=4096 19 | done 20 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/eval_scripts/eval-distill-video-dcc-noise-scene1.sh: -------------------------------------------------------------------------------- 1 | percent="$1" 2 | start=0 3 | 4 | GPU_ID=0 5 | EVERY=200 6 | MODEL=DeepCombineChainModel 7 | MODEL_DIR="../model/distillation_video_dcc_noise/scene1_percent_${percent}" 8 | 9 | DIR="$(pwd)" 10 | 11 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 12 | echo $checkpoint; 13 | if [ $checkpoint -gt $start ]; then 14 | echo $checkpoint; 15 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 16 | --train_dir="$MODEL_DIR" \ 17 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 18 | --eval_data_pattern="/Youtube-8M/data/video/validate/validatea*" \ 19 | --frame_features=False \ 20 | --feature_names="mean_rgb,mean_audio" \ 21 | --feature_sizes="1024,128" \ 22 | --model=$MODEL \ 23 | --deep_chain_relu_cells=256 \ 24 | --deep_chain_layers=4 \ 25 | --moe_num_mixtures=4 \ 26 | --batch_size=1024 \ 27 | --num_readers=1 \ 28 | --run_once=True 29 | fi 30 | done 31 | 32 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/eval_scripts/eval-distill-video-dcc-noise-scene2.sh: -------------------------------------------------------------------------------- 1 | percent="$1" 2 | start=0 3 | 4 | GPU_ID=0 5 | EVERY=200 6 | MODEL=DeepCombineChainModel 7 | MODEL_DIR="../model/distillation_video_dcc_noise/scene2_percent_${percent}" 8 | 9 | DIR="$(pwd)" 10 | 11 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 12 | echo $checkpoint; 13 | if [ $checkpoint -gt $start ]; then 14 | echo $checkpoint; 15 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 16 | --train_dir="$MODEL_DIR" \ 17 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 18 | --eval_data_pattern="/Youtube-8M/data/video/validate/validatea*" \ 19 | --frame_features=False \ 20 | --feature_names="mean_rgb,mean_audio" \ 21 | --feature_sizes="1024,128" \ 22 | --model=$MODEL \ 23 | --deep_chain_relu_cells=256 \ 24 | --deep_chain_layers=4 \ 25 | --moe_num_mixtures=4 \ 26 | --batch_size=1024 \ 27 | --num_readers=1 \ 28 | --run_once=True 29 | fi 30 | done 31 | 32 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/infer_scripts/infer-distillchain-v2-multiscal-cnnlstm.sh: -------------------------------------------------------------------------------- 1 | 2 | for part in ensemble_train ensemble_validate test; do 3 | CUDA_VISIBLE_DEVICES=0 python inference-pre-ensemble.py \ 4 | --output_dir="/Youtube-8M/model_predictions/${part}/distillchain_v2_multiscale_cnnlstm" \ 5 | --model_checkpoint_path="../model/distillchain_v2_multiscale_cnnlstm/model.ckpt-72344" \ 6 | --input_data_pattern="/Youtube-8M/data/frame/${part}/*.tfrecord" \ 7 | --distill_data_pattern="/Youtube-8M/model_predictions/${part}/distillation/ensemble_v2_matrix_model/*.tfrecord" \ 8 | --frame_features=True \ 9 | --feature_names="rgb,audio" \ 10 | --feature_sizes="1024,128" \ 11 | --distillation_features=False \ 12 | --distillation_as_input=True \ 13 | --model=DistillchainMultiscaleCnnLstmModel \ 14 | --multiscale_cnn_lstm_layers=3 \ 15 | --moe_num_mixtures=4 \ 16 | --rnn_swap_memory=True \ 17 | --is_training=False \ 18 | --batch_size=128 \ 19 | --file_size=4096 20 | done 21 | -------------------------------------------------------------------------------- /youtube-8m-ensemble/ensemble_scripts/preensemble-matrix_model.sh: -------------------------------------------------------------------------------- 1 | model=$1 2 | conf=$2 3 | part=$3 4 | checkpoint=$4 5 | 6 | DEFAULT_GPU_ID=0 7 | if [ -z ${CUDA_VISIBLE_DEVICES+x} ]; then 8 | GPU_ID=$DEFAULT_GPU_ID 9 | echo "set CUDA_VISIBLE_DEVICES to default('$GPU_ID')" 10 | else 11 | GPU_ID=$CUDA_VISIBLE_DEVICES 12 | echo "set CUDA_VISIBLE_DEVICES to external('$GPU_ID')" 13 | fi 14 | 15 | test_path=/Youtube-8M/model_predictions/${part} 16 | test_data_patterns="" 17 | for d in $(cat $conf); do 18 | test_data_patterns="${test_path}/${d}/*.tfrecord${test_data_patterns:+,$test_data_patterns}" 19 | done 20 | echo "$test_data_patterns" 21 | 22 | CUDA_VISIBLE_DEVICES="$GPU_ID" python inference-pre-ensemble.py \ 23 | --output_dir="/Youtube-8M/model_predictions/${part}/${model}" \ 24 | --model_checkpoint_path="../model/${model}/model.ckpt-${checkpoint}" \ 25 | --input_data_patterns="$test_data_patterns" \ 26 | --model="MatrixRegressionModel" \ 27 | --batch_size=1024 \ 28 | --file_size=4096 29 | -------------------------------------------------------------------------------- /youtube-8m-ensemble/model_selection_scripts/get_extend_candidates.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from tensorflow import flags 3 | 4 | FLAGS = flags.FLAGS 5 | 6 | if __name__=="__main__": 7 | flags.DEFINE_string("top_k_file", "", "The file that contains the top-k ensemble models.") 8 | flags.DEFINE_string("all_models_conf", "", "The file that contains all available single models.") 9 | 10 | if __name__=="__main__": 11 | all_models = [line.strip() for line in open(FLAGS.all_models_conf) if len(line.strip()) > 0] 12 | extend_candidates = set() 13 | with open(FLAGS.top_k_file) as F: 14 | ensemble_models = [line.strip().split(",") for line in F.readlines() if len(line.strip()) > 0] 15 | for em in ensemble_models: 16 | for model in all_models: 17 | if model not in em: 18 | new_combination = ",".join(sorted(em + [model])) 19 | if new_combination not in extend_candidates: 20 | extend_candidates.add(new_combination) 21 | for candidate in extend_candidates: 22 | print candidate 23 | 24 | -------------------------------------------------------------------------------- /youtube-8m-ensemble/ensemble_scripts/train-input_moe_model.sh: -------------------------------------------------------------------------------- 1 | model=$1 2 | conf=$2 3 | 4 | DEFAULT_GPU_ID=0 5 | if [ -z ${CUDA_VISIBLE_DEVICES+x} ]; then 6 | GPU_ID=$DEFAULT_GPU_ID 7 | echo "set CUDA_VISIBLE_DEVICES to default('$GPU_ID')" 8 | else 9 | GPU_ID=$CUDA_VISIBLE_DEVICES 10 | echo "set CUDA_VISIBLE_DEVICES to external('$GPU_ID')" 11 | fi 12 | 13 | train_path=/Youtube-8M/model_predictions/ensemble_train 14 | train_data_patterns="" 15 | for d in $(cat $conf); do 16 | train_data_patterns="${train_path}/${d}/*.tfrecord${train_data_patterns:+,$train_data_patterns}" 17 | done 18 | echo $train_data_patterns 19 | input_data_pattern="${train_path}/model_input/*.tfrecord" 20 | 21 | CUDA_VISIBLE_DEVICES="$GPU_ID" python train.py \ 22 | --train_dir="../model/${model}" \ 23 | --train_data_patterns="$train_data_patterns" \ 24 | --input_data_pattern="$input_data_pattern" \ 25 | --model=InputMoeModel \ 26 | --keep_checkpoint_every_n_hours=0.1 \ 27 | --batch_size=1024 \ 28 | --num_epochs=5 29 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/eval_scripts/eval-multitask-ce.sh: -------------------------------------------------------------------------------- 1 | GPU_ID=1 2 | EVERY=2000 3 | MODEL=LstmMemoryMultitaskModel 4 | MODEL_DIR="../model/lstmmemory1024_moe8_multitask_ce" 5 | 6 | start=$1 7 | DIR="$(pwd)" 8 | 9 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 10 | echo $checkpoint; 11 | if [ $checkpoint -gt $start ]; then 12 | echo $checkpoint; 13 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 14 | --train_dir="$MODEL_DIR" \ 15 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 16 | --eval_data_pattern="/Youtube-8M/data/frame/validate/validatea*" \ 17 | --frame_features=True \ 18 | --feature_names="rgb,audio" \ 19 | --feature_sizes="1024,128" \ 20 | --batch_size=32 \ 21 | --model=$MODEL \ 22 | --lstm_layers=2 \ 23 | --lstm_cells=1024 \ 24 | --multitask=True \ 25 | --moe_num_mixtures=8 \ 26 | --num_supports=25 \ 27 | --num_verticals=25 \ 28 | --vertical_file="resources/vertical.tsv" \ 29 | --run_once=True 30 | fi 31 | done 32 | 33 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/eval_scripts/eval-multitask.sh: -------------------------------------------------------------------------------- 1 | 2 | GPU_ID=0 3 | EVERY=2000 4 | MODEL=LstmMemoryMultitaskModel 5 | MODEL_DIR="../model/lstmmemory1024_moe8_multitask" 6 | 7 | start=$1 8 | DIR="$(pwd)" 9 | 10 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 11 | echo $checkpoint; 12 | if [ $checkpoint -gt $start ]; then 13 | echo $checkpoint; 14 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 15 | --train_dir="$MODEL_DIR" \ 16 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 17 | --eval_data_pattern="/Youtube-8M/data/frame/validate/validatea*" \ 18 | --frame_features=True \ 19 | --feature_names="rgb,audio" \ 20 | --feature_sizes="1024,128" \ 21 | --batch_size=64 \ 22 | --model=$MODEL \ 23 | --lstm_layers=2 \ 24 | --lstm_cells=1024 \ 25 | --multitask=True \ 26 | --moe_num_mixtures=8 \ 27 | --num_supports=25 \ 28 | --num_verticals=25 \ 29 | --vertical_file="resources/vertical.tsv" \ 30 | --run_once=True 31 | fi 32 | done 33 | 34 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/infer_scripts/infer-distillation-video-dcc.sh: -------------------------------------------------------------------------------- 1 | 2 | #for part in ensemble_train ensemble_validate test; do 3 | for part in train; do 4 | CUDA_VISIBLE_DEVICES=1 python inference-pre-ensemble.py \ 5 | --output_dir="/Youtube-8M/model_predictions_x32/${part}/distillation/distillchain_video_dcc" \ 6 | --model_checkpoint_path="../model/distillation/distillchain_video_dcc/model.ckpt-19296" \ 7 | --input_data_pattern="/Youtube-8M/data/video/${part}/*.tfrecord" \ 8 | --frame_features=False \ 9 | --feature_names="mean_rgb,mean_audio" \ 10 | --feature_sizes="1024,128" \ 11 | --distill_data_pattern="/Youtube-8M/model_predictions/${part}/distillation/ensemble_mean_model/*.tfrecord" \ 12 | --distillation_features=True \ 13 | --distillation_as_input=True \ 14 | --model=DistillchainDeepCombineChainModel \ 15 | --moe_num_mixtures=4 \ 16 | --deep_chain_layers=4 \ 17 | --deep_chain_relu_cells=256 \ 18 | --batch_size=32 \ 19 | --file_size=4096 20 | done 21 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/eval_scripts/eval-chain-moe-suprelu-vert+freq.sh: -------------------------------------------------------------------------------- 1 | 2 | start=$1 3 | GPU_ID=1 4 | MODEL=ChainSupportReluMoeModel 5 | MODEL_DIR="../model/video_chain_support_relu_moe16_ce_vert+freq" 6 | 7 | for checkpoint in $(for filename in $MODEL_DIR/model.ckpt-*.meta; do echo $filename | grep -o "ckpt-[0123456789]*.meta" | cut -d '-' -f 2 | cut -d '.' -f 1; done | sort -n); do 8 | if [ $checkpoint -gt $start ]; then 9 | echo $checkpoint; 10 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 11 | --train_dir="$MODEL_DIR" \ 12 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 13 | --eval_data_pattern="/Youtube-8M/data/video/validate/validatea*" \ 14 | --frame_features=False \ 15 | --feature_names="mean_rgb,mean_audio" \ 16 | --feature_sizes="1024,128" \ 17 | --batch_size=256 \ 18 | --support_type="vertical,frequent" \ 19 | --num_supports=225 \ 20 | --num_verticals=25 \ 21 | --num_frequents=200 \ 22 | --model=$MODEL \ 23 | --moe_num_mixtures=16 \ 24 | --run_once=True 25 | fi 26 | done 27 | 28 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/eval_scripts/eval-lstmmem-l2norm.sh: -------------------------------------------------------------------------------- 1 | GPU_ID=1 2 | EVERY=500 3 | MODEL=LstmMemoryNormalizationModel 4 | MODEL_DIR="../model/lstmmemory1024_moe8_l2norm" 5 | 6 | start=$1 7 | DIR="$(pwd)" 8 | 9 | for checkpoint in $(cd $MODEL_DIR && python ${DIR}/training_utils/select.py $EVERY); do 10 | echo $checkpoint; 11 | if [ $checkpoint -gt $start ]; then 12 | echo $checkpoint; 13 | CUDA_VISIBLE_DEVICES=$GPU_ID python eval.py \ 14 | --train_dir="$MODEL_DIR" \ 15 | --model_checkpoint_path="${MODEL_DIR}/model.ckpt-${checkpoint}" \ 16 | --eval_data_pattern="/Youtube-8M-validate/validatea*" \ 17 | --frame_features=True \ 18 | --feature_names="rgb,audio" \ 19 | --feature_sizes="1024,128" \ 20 | --batch_size=128 \ 21 | --model=$MODEL \ 22 | --lstm_normalization="l2_normalize" \ 23 | --feature_transformer="IdenticalTransformer" \ 24 | --lstm_cells=1024 \ 25 | --lstm_layers=2 \ 26 | --moe_num_mixtures=8 \ 27 | --num_readers=1 \ 28 | --rnn_swap_memory=True \ 29 | --run_once=True 30 | fi 31 | done 32 | 33 | -------------------------------------------------------------------------------- /youtube-8m-ensemble/ensemble_scripts/auto-preensemble-deep_combine_chain_model.sh: -------------------------------------------------------------------------------- 1 | model=$1 2 | conf=$2 3 | part=$3 4 | 5 | DEFAULT_GPU_ID=0 6 | if [ -z ${CUDA_VISIBLE_DEVICES+x} ]; then 7 | GPU_ID=$DEFAULT_GPU_ID 8 | echo "set CUDA_VISIBLE_DEVICES to default('$GPU_ID')" 9 | else 10 | GPU_ID=$CUDA_VISIBLE_DEVICES 11 | echo "set CUDA_VISIBLE_DEVICES to external('$GPU_ID')" 12 | fi 13 | 14 | test_path=/Youtube-8M/model_predictions/${part} 15 | test_data_patterns="" 16 | for d in $(cat $conf); do 17 | test_data_patterns="${test_path}/${d}/*.tfrecord${test_data_patterns:+,$test_data_patterns}" 18 | done 19 | echo "$test_data_patterns" 20 | 21 | CUDA_VISIBLE_DEVICES="$GPU_ID" python inference-pre-ensemble.py \ 22 | --output_dir="/Youtube-8M/model_predictions/${part}/${model}" \ 23 | --train_dir="../model/${model}" \ 24 | --input_data_patterns="$test_data_patterns" \ 25 | --model="DeepCombineChainModel" \ 26 | --deep_chain_relu_cells=128 \ 27 | --moe_num_mixtures=2 \ 28 | --batch_size=512 \ 29 | --file_size=4096 30 | -------------------------------------------------------------------------------- /youtube-8m-wangheda/all_video_models/logistic_model.py: -------------------------------------------------------------------------------- 1 | import math 2 | import models 3 | import tensorflow as tf 4 | import utils 5 | from tensorflow import flags 6 | import tensorflow.contrib.slim as slim 7 | FLAGS = flags.FLAGS 8 | 9 | class LogisticModel(models.BaseModel): 10 | """Logistic model with L2 regularization.""" 11 | 12 | def create_model(self, model_input, vocab_size, l2_penalty=1e-8, original_input=None, **unused_params): 13 | """Creates a logistic model. 14 | 15 | Args: 16 | model_input: 'batch' x 'num_features' matrix of input features. 17 | vocab_size: The number of classes in the dataset. 18 | 19 | Returns: 20 | A dictionary with a tensor containing the probability predictions of the 21 | model in the 'predictions' key. The dimensions of the tensor are 22 | batch_size x num_classes.""" 23 | output = slim.fully_connected( 24 | model_input, vocab_size, activation_fn=tf.nn.sigmoid, 25 | weights_regularizer=slim.l2_regularizer(l2_penalty)) 26 | return {"predictions": output} 27 | 28 | --------------------------------------------------------------------------------