├── CLUEdataset └── emotion │ ├── label_distribution.png │ ├── test.txt │ ├── train.txt │ └── valid.txt ├── README.md └── models ├── albert ├── albert_config │ ├── albert_config_base.json │ ├── albert_config_large.json │ ├── albert_config_tiny.json │ ├── albert_config_xlarge.json │ ├── albert_config_xxlarge.json │ ├── bert_config.json │ └── vocab.txt ├── bert_utils.py ├── create_pretrain_data.sh ├── create_pretraining_data.py ├── modeling.py ├── optimization.py ├── optimization_finetuning.py ├── resources │ ├── add_data_removing_dropout.jpg │ ├── albert_configuration.jpg │ ├── albert_performance.jpg │ ├── create_pretraining_data_roberta.py │ ├── shell_scripts │ │ └── create_pretrain_data_batch_webtext.sh │ └── state_of_the_art.jpg ├── run_classifier.py ├── run_classifier_afqmc.sh ├── run_classifier_cmnli.sh ├── run_classifier_csl.sh ├── run_classifier_iflytek.sh ├── run_classifier_tnews.sh ├── run_classifier_wsc.sh ├── run_pretraining.py ├── test_changes.py ├── tokenization.py └── tpu │ ├── run_classifier_inews.sh │ ├── run_classifier_inews_tiny.sh │ ├── run_classifier_lcqmc.sh │ ├── run_classifier_lcqmc_tiny.sh │ ├── run_classifier_thucnews.sh │ ├── run_classifier_thucnews_tiny.sh │ ├── run_classifier_tnews.sh │ ├── run_classifier_tnews_tiny.sh │ ├── run_classifier_xnli.sh │ └── run_classifier_xnli_tiny.sh ├── bert ├── .gitignore ├── CONTRIBUTING.md ├── LICENSE ├── __init__.py ├── conlleval.py ├── create_pretraining_data.py ├── extract_features.py ├── modeling.py ├── modeling_test.py ├── multilingual.md ├── optimization.py ├── optimization_test.py ├── predicting_movie_reviews_with_bert_on_tf_hub.ipynb ├── requirements.txt ├── run_classifier.py ├── run_classifier_afqmc.sh ├── run_classifier_cmnli.sh ├── run_classifier_csl.sh ├── run_classifier_emotion.sh ├── run_classifier_iflytek.sh ├── run_classifier_tnews.sh ├── run_classifier_with_tfhub.py ├── run_classifier_wsc.sh ├── run_ner.py ├── run_pretraining.py ├── run_squad.py ├── sample_text.txt ├── tf_metrics.py ├── tokenization.py ├── tokenization_test.py └── tpu │ ├── run_classifier_inews.sh │ ├── run_classifier_jdcomment.sh │ ├── run_classifier_lcqmc.sh │ ├── run_classifier_thucnews.sh │ ├── run_classifier_tnews.sh │ └── run_classifier_xnli.sh ├── bert_wwm_ext ├── .gitignore ├── CONTRIBUTING.md ├── LICENSE ├── __init__.py ├── conlleval.py ├── create_pretraining_data.py ├── extract_features.py ├── modeling.py ├── modeling_test.py ├── multilingual.md ├── optimization.py ├── optimization_test.py ├── predicting_movie_reviews_with_bert_on_tf_hub.ipynb ├── requirements.txt ├── run_classifier.py ├── run_classifier_afqmc.sh ├── run_classifier_cmnli.sh ├── run_classifier_csl.sh ├── run_classifier_iflytek.sh ├── run_classifier_tnews.sh ├── run_classifier_with_tfhub.py ├── run_classifier_wsc.sh ├── run_ner.py ├── run_ner_msra.sh ├── run_pretraining.py ├── run_squad.py ├── sample_text.txt ├── tf_metrics.py ├── tokenization.py ├── tokenization_test.py └── tpu │ ├── run_classifier_inews.sh │ ├── run_classifier_lcqmc.sh │ ├── run_classifier_thucnews.sh │ ├── run_classifier_tnews.sh │ └── run_classifier_xnli.sh ├── classifier_utils.py ├── copa_sh ├── convert_test.py ├── copa_eval_dev.sh ├── dev_label.txt └── eval_copa.py ├── ernie ├── .gitignore ├── CONTRIBUTING.md ├── LICENSE ├── __init__.py ├── conlleval.py ├── create_pretraining_data.py ├── extract_features.py ├── modeling.py ├── modeling_test.py ├── multilingual.md ├── optimization.py ├── optimization_test.py ├── predicting_movie_reviews_with_bert_on_tf_hub.ipynb ├── requirements.txt ├── run_classifier.py ├── run_classifier_afqmc.sh ├── run_classifier_cmnli.sh ├── run_classifier_csl.sh ├── run_classifier_iflytek.sh ├── run_classifier_tnews.sh ├── run_classifier_with_tfhub.py ├── run_classifier_wsc.sh ├── run_ner.py ├── run_ner_msra.sh ├── run_pretraining.py ├── run_squad.py ├── sample_text.txt ├── tf_metrics.py ├── tokenization.py ├── tokenization_test.py └── tpu │ ├── run_classifier_inews.sh │ ├── run_classifier_lcqmc.sh │ ├── run_classifier_thucnews.sh │ ├── run_classifier_tnews.sh │ └── run_classifier_xnli.sh ├── roberta ├── conlleval.py ├── create_pretrain_data.sh ├── create_pretraining_data.py ├── modeling.py ├── optimization.py ├── optimization_finetuning.py ├── resources │ ├── RoBERTa_zh_Large_Learning_Curve.png │ └── vocab.txt ├── run_classifier.py ├── run_classifier_afqmc.sh ├── run_classifier_cmnli.sh ├── run_classifier_csl.sh ├── run_classifier_iflytek.sh ├── run_classifier_tnews.sh ├── run_classifier_wsc.sh ├── run_ner.py ├── run_ner_msra.sh ├── run_pretraining.py ├── tf_metrics.py ├── tokenization.py └── tpu │ ├── run_classifier_inews.sh │ ├── run_classifier_jdcomment.sh │ ├── run_classifier_lcqmc.sh │ ├── run_classifier_thucnews.sh │ ├── run_classifier_tnews.sh │ └── run_classifier_xnli.sh ├── roberta_wwm_ext ├── CONTRIBUTING.md ├── LICENSE ├── __init__.py ├── conlleval.py ├── create_pretraining_data.py ├── extract_features.py ├── modeling.py ├── modeling_test.py ├── multilingual.md ├── optimization.py ├── optimization_test.py ├── requirements.txt ├── run_classifier.py ├── run_classifier_afqmc.sh ├── run_classifier_cmnli.sh ├── run_classifier_csl.sh ├── run_classifier_iflytek.sh ├── run_classifier_tnews.sh ├── run_classifier_with_tfhub.py ├── run_classifier_wsc.sh ├── run_ner.py ├── run_ner_msra.sh ├── run_pretraining.py ├── run_squad.py ├── tf_metrics.py ├── tokenization.py ├── tokenization_test.py └── tpu │ ├── run_classifier_inews.sh │ ├── run_classifier_jdcomment.sh │ ├── run_classifier_lcqmc.sh │ ├── run_classifier_thucnews.sh │ ├── run_classifier_tnews.sh │ └── run_classifier_xnli.sh ├── roberta_wwm_large_ext ├── CONTRIBUTING.md ├── LICENSE ├── __init__.py ├── conlleval.py ├── create_pretraining_data.py ├── extract_features.py ├── modeling.py ├── modeling_test.py ├── multilingual.md ├── optimization.py ├── optimization_test.py ├── predicting_movie_reviews_with_bert_on_tf_hub.ipynb ├── requirements.txt ├── run_classifier.py ├── run_classifier_afqmc.sh ├── run_classifier_cmnli.sh ├── run_classifier_csl.sh ├── run_classifier_iflytek.sh ├── run_classifier_tnews.sh ├── run_classifier_with_tfhub.py ├── run_classifier_wsc.sh ├── run_ner.py ├── run_ner_msra.sh ├── run_pretraining.py ├── run_squad.py ├── tf_metrics.py ├── tokenization.py ├── tokenization_test.py └── tpu │ ├── run_classifier_inews.sh │ ├── run_classifier_jdcomment.sh │ ├── run_classifier_lcqmc.sh │ ├── run_classifier_thucnews.sh │ ├── run_classifier_tnews.sh │ └── run_classifier_xnli.sh └── xlnet ├── __init__.py ├── cmrc2018_evaluate_drcd.py ├── data_utils.py ├── function_builder.py ├── gpu_utils.py ├── model_utils.py ├── modeling.py ├── prepro_utils.py ├── run_classifier.py ├── run_classifier_afqmc.sh ├── run_classifier_cmnli.sh ├── run_classifier_csl.sh ├── run_classifier_iflytek.sh ├── run_classifier_tnews.sh ├── run_classifier_wsc.sh ├── run_cmrc_drcd.py ├── spiece.model ├── squad_utils.py ├── summary.py ├── temp.sh ├── tpu ├── run_classifier_inews.sh ├── run_classifier_lcqmc.sh ├── run_classifier_tnews.sh └── run_classifier_xnli.sh ├── tpu_estimator.py └── xlnet.py /CLUEdataset/emotion/label_distribution.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLUEbenchmark/CLUEmotionAnalysis2020/ff1bc945fc3a5854e0013de5a2f222dd1da61516/CLUEdataset/emotion/label_distribution.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CLUEEmotion2020 2 | CLUE Emotion Analysis Dataset 情感分析数据集 3 | # Data Description 4 | This dataset in data directory is emotion analysis corpus, with each sample annotated with one emotion label. The label set is *like, happiness, sadness, anger, disgust, fear and surprise*. 5 | 6 | This dataset is from the following paper: 7 | 8 | ```Minglei Li, Yunfei Long, Qin Lu, and Wenjie Li. “Emotion Corpus Construction Based on Selection from Hashtags.” In Proceedings of International Conference on Language Resources and Evaluation (LREC). Portorož, Slovenia, 2016``` 9 | 10 | The corpus statistics and lable distribution are as follows: 11 | 12 | label_distribution 13 | 14 | The train, valid and test set is split by the ratio of 8:1:1 and encoded in UTF-8. 15 | 16 | # Baseline results 17 | 18 | Test results of different classification models on this dataset. 19 | 20 | | Models | Accuracy | Parameters | 21 | | --------- | -------- | ---------------------------------- | 22 | | BERT-base | 60.7% | Epoch 3, batch 32, max_seq_len 128 | 23 | 24 | # Reproduce the results 25 | 26 | The code is based on the original [CLUE source code](https://github.com/CLUEbenchmark/CLUE), which is based on the original Google BERT code, and the pre-trained language model is [BERT Base Chinese version](https://storage.googleapis.com/bert_models/2018_11_03/chinese_L-12_H-768_A-12.zip). 27 | 28 | 29 | 30 | ## Env 31 | 32 | ``` 33 | tensorflow 1.12 34 | ``` 35 | 36 | 37 | 38 | ## Run command 39 | 40 | ``` 41 | cd models/bert 42 | ./run_classifier_emotion.sh 43 | ``` 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /models/albert/albert_config/albert_config_base.json: -------------------------------------------------------------------------------- 1 | { 2 | "attention_probs_dropout_prob": 0.0, 3 | "directionality": "bidi", 4 | "hidden_act": "gelu", 5 | "hidden_dropout_prob": 0.0, 6 | "hidden_size": 768, 7 | "embedding_size": 128, 8 | "initializer_range": 0.02, 9 | "intermediate_size": 3072 , 10 | "max_position_embeddings": 512, 11 | "num_attention_heads": 12, 12 | "num_hidden_layers": 12, 13 | 14 | "pooler_fc_size": 768, 15 | "pooler_num_attention_heads": 12, 16 | "pooler_num_fc_layers": 3, 17 | "pooler_size_per_head": 128, 18 | "pooler_type": "first_token_transform", 19 | "type_vocab_size": 2, 20 | "vocab_size": 21128, 21 | "ln_type":"postln" 22 | 23 | } 24 | -------------------------------------------------------------------------------- /models/albert/albert_config/albert_config_large.json: -------------------------------------------------------------------------------- 1 | { 2 | "attention_probs_dropout_prob": 0.0, 3 | "directionality": "bidi", 4 | "hidden_act": "gelu", 5 | "hidden_dropout_prob": 0.0, 6 | "hidden_size": 1024, 7 | "embedding_size": 128, 8 | "initializer_range": 0.02, 9 | "intermediate_size": 4096, 10 | "max_position_embeddings": 512, 11 | "num_attention_heads": 16, 12 | "num_hidden_layers": 24, 13 | 14 | "pooler_fc_size": 768, 15 | "pooler_num_attention_heads": 12, 16 | "pooler_num_fc_layers": 3, 17 | "pooler_size_per_head": 128, 18 | "pooler_type": "first_token_transform", 19 | "type_vocab_size": 2, 20 | "vocab_size": 21128, 21 | "ln_type":"postln" 22 | 23 | } 24 | -------------------------------------------------------------------------------- /models/albert/albert_config/albert_config_tiny.json: -------------------------------------------------------------------------------- 1 | { 2 | "attention_probs_dropout_prob": 0.0, 3 | "directionality": "bidi", 4 | "hidden_act": "gelu", 5 | "hidden_dropout_prob": 0.0, 6 | "hidden_size": 312, 7 | "embedding_size": 128, 8 | "initializer_range": 0.02, 9 | "intermediate_size": 1248 , 10 | "max_position_embeddings": 512, 11 | "num_attention_heads": 12, 12 | "num_hidden_layers": 4, 13 | 14 | "pooler_fc_size": 768, 15 | "pooler_num_attention_heads": 12, 16 | "pooler_num_fc_layers": 3, 17 | "pooler_size_per_head": 128, 18 | "pooler_type": "first_token_transform", 19 | "type_vocab_size": 2, 20 | "vocab_size": 21128, 21 | "ln_type":"postln" 22 | 23 | } 24 | -------------------------------------------------------------------------------- /models/albert/albert_config/albert_config_xlarge.json: -------------------------------------------------------------------------------- 1 | { 2 | "attention_probs_dropout_prob": 0.0, 3 | "directionality": "bidi", 4 | "hidden_act": "gelu", 5 | "hidden_dropout_prob": 0.0, 6 | "hidden_size": 2048, 7 | "embedding_size": 128, 8 | "initializer_range": 0.02, 9 | "intermediate_size": 8192, 10 | "max_position_embeddings": 512, 11 | "num_attention_heads": 32, 12 | "num_hidden_layers": 24, 13 | 14 | "pooler_fc_size": 1024, 15 | "pooler_num_attention_heads": 64, 16 | "pooler_num_fc_layers": 3, 17 | "pooler_size_per_head": 128, 18 | "pooler_type": "first_token_transform", 19 | "type_vocab_size": 2, 20 | "vocab_size": 21128, 21 | "ln_type":"preln" 22 | 23 | } 24 | -------------------------------------------------------------------------------- /models/albert/albert_config/albert_config_xxlarge.json: -------------------------------------------------------------------------------- 1 | { 2 | "attention_probs_dropout_prob": 0.0, 3 | "directionality": "bidi", 4 | "hidden_act": "gelu", 5 | "hidden_dropout_prob": 0.0, 6 | "hidden_size": 4096, 7 | "embedding_size": 128, 8 | "initializer_range": 0.02, 9 | "intermediate_size": 16384, 10 | "max_position_embeddings": 512, 11 | "num_attention_heads": 64, 12 | "num_hidden_layers": 12, 13 | 14 | "pooler_fc_size": 1024, 15 | "pooler_num_attention_heads": 64, 16 | "pooler_num_fc_layers": 3, 17 | "pooler_size_per_head": 128, 18 | "pooler_type": "first_token_transform", 19 | "type_vocab_size": 2, 20 | "vocab_size": 21128, 21 | "ln_type":"preln" 22 | 23 | } 24 | -------------------------------------------------------------------------------- /models/albert/albert_config/bert_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "attention_probs_dropout_prob": 0.0, 3 | "directionality": "bidi", 4 | "hidden_act": "gelu", 5 | "hidden_dropout_prob": 0.0, 6 | "hidden_size": 768, 7 | "initializer_range": 0.02, 8 | "intermediate_size": 3072, 9 | "max_position_embeddings": 512, 10 | "num_attention_heads": 12, 11 | "num_hidden_layers": 12, 12 | "pooler_fc_size": 768, 13 | "pooler_num_attention_heads": 12, 14 | "pooler_num_fc_layers": 3, 15 | "pooler_size_per_head": 128, 16 | "pooler_type": "first_token_transform", 17 | "type_vocab_size": 2, 18 | "vocab_size": 21128 19 | } 20 | -------------------------------------------------------------------------------- /models/albert/create_pretrain_data.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | BERT_BASE_DIR=./albert_config 4 | python3 create_pretraining_data.py --do_whole_word_mask=True --input_file=data/news_zh_1.txt \ 5 | --output_file=data/tf_news_2016_zh_raw_news2016zh_1.tfrecord --vocab_file=$BERT_BASE_DIR/vocab.txt --do_lower_case=True \ 6 | --max_seq_length=512 --max_predictions_per_seq=51 --masked_lm_prob=0.10 -------------------------------------------------------------------------------- /models/albert/resources/add_data_removing_dropout.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLUEbenchmark/CLUEmotionAnalysis2020/ff1bc945fc3a5854e0013de5a2f222dd1da61516/models/albert/resources/add_data_removing_dropout.jpg -------------------------------------------------------------------------------- /models/albert/resources/albert_configuration.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLUEbenchmark/CLUEmotionAnalysis2020/ff1bc945fc3a5854e0013de5a2f222dd1da61516/models/albert/resources/albert_configuration.jpg -------------------------------------------------------------------------------- /models/albert/resources/albert_performance.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLUEbenchmark/CLUEmotionAnalysis2020/ff1bc945fc3a5854e0013de5a2f222dd1da61516/models/albert/resources/albert_performance.jpg -------------------------------------------------------------------------------- /models/albert/resources/shell_scripts/create_pretrain_data_batch_webtext.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | echo $1,$2 3 | 4 | BERT_BASE_DIR=./bert_config 5 | for((i=$1;i<=$2;i++)); 6 | do 7 | python3 create_pretraining_data.py --do_whole_word_mask=True --input_file=gs://raw_text/web_text_zh_raw/web_text_zh_$i.txt \ 8 | --output_file=gs://albert_zh/tf_records/tf_web_text_zh_$i.tfrecord --vocab_file=$BERT_BASE_DIR/vocab.txt --do_lower_case=True \ 9 | --max_seq_length=512 --max_predictions_per_seq=76 --masked_lm_prob=0.15 10 | done 11 | -------------------------------------------------------------------------------- /models/albert/resources/state_of_the_art.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLUEbenchmark/CLUEmotionAnalysis2020/ff1bc945fc3a5854e0013de5a2f222dd1da61516/models/albert/resources/state_of_the_art.jpg -------------------------------------------------------------------------------- /models/albert/run_classifier_afqmc.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # @Author: Li Yudong 3 | # @Date: 2019-11-28 4 | # @Last Modified by: bo.shi 5 | # @Last Modified time: 2019-12-05 10:29:59 6 | 7 | TASK_NAME="afqmc" 8 | MODEL_NAME="albert_xlarge_zh" 9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 10 | export CUDA_VISIBLE_DEVICES="0" 11 | export ALBERT_CONFIG_DIR=$CURRENT_DIR/albert_config 12 | export ALBERT_PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model 13 | export ALBERT_XLARGE_DIR=$ALBERT_PRETRAINED_MODELS_DIR/$MODEL_NAME 14 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset 15 | 16 | # download and unzip dataset 17 | if [ ! -d $GLUE_DATA_DIR ]; then 18 | mkdir -p $GLUE_DATA_DIR 19 | echo "makedir $GLUE_DATA_DIR" 20 | fi 21 | cd $GLUE_DATA_DIR 22 | if [ ! -d $TASK_NAME ]; then 23 | mkdir $TASK_NAME 24 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME" 25 | fi 26 | cd $TASK_NAME 27 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then 28 | rm * 29 | wget https://storage.googleapis.com/cluebenchmark/tasks/afqmc_public.zip 30 | unzip afqmc_public.zip 31 | rm afqmc_public.zip 32 | else 33 | echo "data exists" 34 | fi 35 | echo "Finish download dataset." 36 | 37 | # download model 38 | if [ ! -d $ALBERT_XLARGE_DIR ]; then 39 | mkdir -p $ALBERT_XLARGE_DIR 40 | echo "makedir $ALBERT_XLARGE_DIR" 41 | fi 42 | cd $ALBERT_XLARGE_DIR 43 | if [ ! -f "albert_config_xlarge.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "checkpoint" ] || [ ! -f "albert_model.ckpt.index" ] || [ ! -f "albert_model.ckpt.meta" ] || [ ! -f "albert_model.ckpt.data-00000-of-00001" ]; then 44 | rm * 45 | wget https://storage.googleapis.com/albert_zh/albert_xlarge_zh_177k.zip 46 | unzip albert_xlarge_zh_177k.zip 47 | rm albert_xlarge_zh_177k.zip 48 | else 49 | echo "model exists" 50 | fi 51 | echo "Finish download model." 52 | 53 | # run task 54 | cd $CURRENT_DIR 55 | echo "Start running..." 56 | if [ $# == 0 ]; then 57 | python run_classifier.py \ 58 | --task_name=$TASK_NAME \ 59 | --do_train=true \ 60 | --do_eval=true \ 61 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 62 | --vocab_file=$ALBERT_XLARGE_DIR/vocab.txt \ 63 | --bert_config_file=$ALBERT_XLARGE_DIR/albert_config_xlarge.json \ 64 | --init_checkpoint=$ALBERT_XLARGE_DIR/albert_model.ckpt \ 65 | --max_seq_length=128 \ 66 | --train_batch_size=16 \ 67 | --learning_rate=3e-5 \ 68 | --num_train_epochs=2.0 \ 69 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 70 | elif [ $1 == "predict" ]; then 71 | python run_classifier.py \ 72 | --task_name=$TASK_NAME \ 73 | --do_train=false \ 74 | --do_eval=false \ 75 | --do_predict=true \ 76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 77 | --vocab_file=$ALBERT_XLARGE_DIR/vocab.txt \ 78 | --bert_config_file=$ALBERT_XLARGE_DIR/albert_config_xlarge.json \ 79 | --init_checkpoint=$ALBERT_XLARGE_DIR/albert_model.ckpt \ 80 | --max_seq_length=128 \ 81 | --train_batch_size=16 \ 82 | --learning_rate=3e-5 \ 83 | --num_train_epochs=2.0 \ 84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 85 | fi 86 | -------------------------------------------------------------------------------- /models/albert/run_classifier_cmnli.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # @Author: Li Yudong 3 | # @Date: 2019-11-28 4 | # @Last Modified by: bo.shi 5 | # @Last Modified time: 2019-12-05 10:33:16 6 | 7 | TASK_NAME="cmnli" 8 | MODEL_NAME="albert_xlarge_zh" 9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 10 | export CUDA_VISIBLE_DEVICES="0" 11 | export ALBERT_CONFIG_DIR=$CURRENT_DIR/albert_config 12 | export ALBERT_PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model 13 | export ALBERT_XLARGE_DIR=$ALBERT_PRETRAINED_MODELS_DIR/$MODEL_NAME 14 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset 15 | 16 | # download and unzip dataset 17 | if [ ! -d $GLUE_DATA_DIR ]; then 18 | mkdir -p $GLUE_DATA_DIR 19 | echo "makedir $GLUE_DATA_DIR" 20 | fi 21 | cd $GLUE_DATA_DIR 22 | if [ ! -d $TASK_NAME ]; then 23 | mkdir $TASK_NAME 24 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME" 25 | fi 26 | cd $TASK_NAME 27 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then 28 | rm * 29 | wget https://storage.googleapis.com/cluebenchmark/tasks/cmnli_public.zip 30 | unzip cmnli_public.zip 31 | rm cmnli_public.zip 32 | else 33 | echo "data exists" 34 | fi 35 | echo "Finish download dataset." 36 | 37 | # download model 38 | if [ ! -d $ALBERT_XLARGE_DIR ]; then 39 | mkdir -p $ALBERT_XLARGE_DIR 40 | echo "makedir $ALBERT_XLARGE_DIR" 41 | fi 42 | cd $ALBERT_XLARGE_DIR 43 | if [ ! -f "albert_config_xlarge.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "checkpoint" ] || [ ! -f "albert_model.ckpt.index" ] || [ ! -f "albert_model.ckpt.meta" ] || [ ! -f "albert_model.ckpt.data-00000-of-00001" ]; then 44 | rm * 45 | wget https://storage.googleapis.com/albert_zh/albert_xlarge_zh_177k.zip 46 | unzip albert_xlarge_zh_177k.zip 47 | rm albert_xlarge_zh_177k.zip 48 | else 49 | echo "model exists" 50 | fi 51 | echo "Finish download model." 52 | 53 | # run task 54 | cd $CURRENT_DIR 55 | echo "Start running..." 56 | if [ $# == 0 ]; then 57 | python run_classifier.py \ 58 | --task_name=$TASK_NAME \ 59 | --do_train=true \ 60 | --do_eval=true \ 61 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 62 | --vocab_file=$ALBERT_XLARGE_DIR/vocab.txt \ 63 | --bert_config_file=$ALBERT_XLARGE_DIR/albert_config_xlarge.json \ 64 | --init_checkpoint=$ALBERT_XLARGE_DIR/albert_model.ckpt \ 65 | --max_seq_length=128 \ 66 | --train_batch_size=16 \ 67 | --learning_rate=3e-5 \ 68 | --num_train_epochs=2.0 \ 69 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 70 | elif [ $1 == "predict" ]; then 71 | python run_classifier.py \ 72 | --task_name=$TASK_NAME \ 73 | --do_train=false \ 74 | --do_eval=false \ 75 | --do_predict=true \ 76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 77 | --vocab_file=$ALBERT_XLARGE_DIR/vocab.txt \ 78 | --bert_config_file=$ALBERT_XLARGE_DIR/albert_config_xlarge.json \ 79 | --init_checkpoint=$ALBERT_XLARGE_DIR/albert_model.ckpt \ 80 | --max_seq_length=128 \ 81 | --train_batch_size=16 \ 82 | --learning_rate=3e-5 \ 83 | --num_train_epochs=2.0 \ 84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 85 | fi 86 | -------------------------------------------------------------------------------- /models/albert/run_classifier_csl.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # @Author: Li Yudong 3 | # @Date: 2019-11-28 4 | # @Last Modified by: bo.shi 5 | # @Last Modified time: 2019-12-05 10:33:24 6 | 7 | TASK_NAME="csl" 8 | MODEL_NAME="albert_xlarge_zh" 9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 10 | export CUDA_VISIBLE_DEVICES="0" 11 | export ALBERT_CONFIG_DIR=$CURRENT_DIR/albert_config 12 | export ALBERT_PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model 13 | export ALBERT_XLARGE_DIR=$ALBERT_PRETRAINED_MODELS_DIR/$MODEL_NAME 14 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset 15 | 16 | # download and unzip dataset 17 | if [ ! -d $GLUE_DATA_DIR ]; then 18 | mkdir -p $GLUE_DATA_DIR 19 | echo "makedir $GLUE_DATA_DIR" 20 | fi 21 | cd $GLUE_DATA_DIR 22 | if [ ! -d $TASK_NAME ]; then 23 | mkdir $TASK_NAME 24 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME" 25 | fi 26 | cd $TASK_NAME 27 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then 28 | rm * 29 | wget https://storage.googleapis.com/cluebenchmark/tasks/csl_public.zip 30 | unzip csl_public.zip 31 | rm csl_public.zip 32 | else 33 | echo "data exists" 34 | fi 35 | echo "Finish download dataset." 36 | 37 | # download model 38 | if [ ! -d $ALBERT_XLARGE_DIR ]; then 39 | mkdir -p $ALBERT_XLARGE_DIR 40 | echo "makedir $ALBERT_XLARGE_DIR" 41 | fi 42 | cd $ALBERT_XLARGE_DIR 43 | if [ ! -f "albert_config_xlarge.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "checkpoint" ] || [ ! -f "albert_model.ckpt.index" ] || [ ! -f "albert_model.ckpt.meta" ] || [ ! -f "albert_model.ckpt.data-00000-of-00001" ]; then 44 | rm * 45 | wget https://storage.googleapis.com/albert_zh/albert_xlarge_zh_177k.zip 46 | unzip albert_xlarge_zh_177k.zip 47 | rm albert_xlarge_zh_177k.zip 48 | else 49 | echo "model exists" 50 | fi 51 | echo "Finish download model." 52 | 53 | # run task 54 | cd $CURRENT_DIR 55 | echo "Start running..." 56 | if [ $# == 0 ]; then 57 | python run_classifier.py \ 58 | --task_name=$TASK_NAME \ 59 | --do_train=true \ 60 | --do_eval=true \ 61 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 62 | --vocab_file=$ALBERT_XLARGE_DIR/vocab.txt \ 63 | --bert_config_file=$ALBERT_XLARGE_DIR/albert_config_xlarge.json \ 64 | --init_checkpoint=$ALBERT_XLARGE_DIR/albert_model.ckpt \ 65 | --max_seq_length=128 \ 66 | --train_batch_size=16 \ 67 | --learning_rate=3e-5 \ 68 | --num_train_epochs=2.0 \ 69 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 70 | elif [ $1 == "predict" ]; then 71 | python run_classifier.py \ 72 | --task_name=$TASK_NAME \ 73 | --do_train=false \ 74 | --do_eval=false \ 75 | --do_predict=true \ 76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 77 | --vocab_file=$ALBERT_XLARGE_DIR/vocab.txt \ 78 | --bert_config_file=$ALBERT_XLARGE_DIR/albert_config_xlarge.json \ 79 | --init_checkpoint=$ALBERT_XLARGE_DIR/albert_model.ckpt \ 80 | --max_seq_length=128 \ 81 | --train_batch_size=16 \ 82 | --learning_rate=3e-5 \ 83 | --num_train_epochs=2.0 \ 84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 85 | fi 86 | 87 | -------------------------------------------------------------------------------- /models/albert/run_classifier_iflytek.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # @Author: Li Yudong 3 | # @Date: 2019-11-28 4 | # @Last Modified by: bo.shi 5 | # @Last Modified time: 2019-12-05 10:33:28 6 | 7 | TASK_NAME="iflytek" 8 | MODEL_NAME="albert_xlarge_zh" 9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 10 | export CUDA_VISIBLE_DEVICES="0" 11 | export ALBERT_CONFIG_DIR=$CURRENT_DIR/albert_config 12 | export ALBERT_PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model 13 | export ALBERT_XLARGE_DIR=$ALBERT_PRETRAINED_MODELS_DIR/$MODEL_NAME 14 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset 15 | 16 | # download and unzip dataset 17 | if [ ! -d $GLUE_DATA_DIR ]; then 18 | mkdir -p $GLUE_DATA_DIR 19 | echo "makedir $GLUE_DATA_DIR" 20 | fi 21 | cd $GLUE_DATA_DIR 22 | if [ ! -d $TASK_NAME ]; then 23 | mkdir $TASK_NAME 24 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME" 25 | fi 26 | cd $TASK_NAME 27 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then 28 | rm * 29 | wget https://storage.googleapis.com/cluebenchmark/tasks/iflytek_public.zip 30 | unzip iflytek_public.zip 31 | rm iflytek_public.zip 32 | else 33 | echo "data exists" 34 | fi 35 | echo "Finish download dataset." 36 | 37 | # download model 38 | if [ ! -d $ALBERT_XLARGE_DIR ]; then 39 | mkdir -p $ALBERT_XLARGE_DIR 40 | echo "makedir $ALBERT_XLARGE_DIR" 41 | fi 42 | cd $ALBERT_XLARGE_DIR 43 | if [ ! -f "albert_config_xlarge.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "checkpoint" ] || [ ! -f "albert_model.ckpt.index" ] || [ ! -f "albert_model.ckpt.meta" ] || [ ! -f "albert_model.ckpt.data-00000-of-00001" ]; then 44 | rm * 45 | wget https://storage.googleapis.com/albert_zh/albert_xlarge_zh_177k.zip 46 | unzip albert_xlarge_zh_177k.zip 47 | rm albert_xlarge_zh_177k.zip 48 | else 49 | echo "model exists" 50 | fi 51 | echo "Finish download model." 52 | 53 | # run task 54 | cd $CURRENT_DIR 55 | echo "Start running..." 56 | if [ $# == 0 ]; then 57 | python run_classifier.py \ 58 | --task_name=$TASK_NAME \ 59 | --do_train=true \ 60 | --do_eval=true \ 61 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 62 | --vocab_file=$ALBERT_XLARGE_DIR/vocab.txt \ 63 | --bert_config_file=$ALBERT_XLARGE_DIR/albert_config_xlarge.json \ 64 | --init_checkpoint=$ALBERT_XLARGE_DIR/albert_model.ckpt \ 65 | --max_seq_length=128 \ 66 | --train_batch_size=16 \ 67 | --learning_rate=3e-5 \ 68 | --num_train_epochs=2.0 \ 69 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 70 | elif [ $1 == "predict" ]; then 71 | python run_classifier.py \ 72 | --task_name=$TASK_NAME \ 73 | --do_train=false \ 74 | --do_eval=false \ 75 | --do_predict=true \ 76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 77 | --vocab_file=$ALBERT_XLARGE_DIR/vocab.txt \ 78 | --bert_config_file=$ALBERT_XLARGE_DIR/albert_config_xlarge.json \ 79 | --init_checkpoint=$ALBERT_XLARGE_DIR/albert_model.ckpt \ 80 | --max_seq_length=128 \ 81 | --train_batch_size=16 \ 82 | --learning_rate=3e-5 \ 83 | --num_train_epochs=2.0 \ 84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 85 | fi 86 | 87 | -------------------------------------------------------------------------------- /models/albert/run_classifier_tnews.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # @Author: bo.shi 3 | # @Date: 2019-11-04 09:56:36 4 | # @Last Modified by: bo.shi 5 | # @Last Modified time: 2019-12-05 10:33:33 6 | 7 | TASK_NAME="tnews" 8 | MODEL_NAME="albert_xlarge_zh" 9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 10 | export CUDA_VISIBLE_DEVICES="0" 11 | export ALBERT_CONFIG_DIR=$CURRENT_DIR/albert_config 12 | export ALBERT_PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model 13 | export ALBERT_XLARGE_DIR=$ALBERT_PRETRAINED_MODELS_DIR/$MODEL_NAME 14 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset 15 | 16 | # download and unzip dataset 17 | if [ ! -d $GLUE_DATA_DIR ]; then 18 | mkdir -p $GLUE_DATA_DIR 19 | echo "makedir $GLUE_DATA_DIR" 20 | fi 21 | cd $GLUE_DATA_DIR 22 | if [ ! -d $TASK_NAME ]; then 23 | mkdir $TASK_NAME 24 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME" 25 | fi 26 | cd $TASK_NAME 27 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then 28 | rm * 29 | wget https://storage.googleapis.com/cluebenchmark/tasks/tnews_public.zip 30 | unzip tnews_public.zip 31 | rm tnews_public.zip 32 | else 33 | echo "data exists" 34 | fi 35 | echo "Finish download dataset." 36 | 37 | # download modeltn 38 | if [ ! -d $ALBERT_XLARGE_DIR ]; then 39 | mkdir -p $ALBERT_XLARGE_DIR 40 | echo "makedir $ALBERT_XLARGE_DIR" 41 | fi 42 | cd $ALBERT_XLARGE_DIR 43 | if [ ! -f "albert_config_xlarge.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "checkpoint" ] || [ ! -f "albert_model.ckpt.index" ] || [ ! -f "albert_model.ckpt.meta" ] || [ ! -f "albert_model.ckpt.data-00000-of-00001" ]; then 44 | rm * 45 | wget https://storage.googleapis.com/albert_zh/albert_xlarge_zh_177k.zip 46 | unzip albert_xlarge_zh_177k.zip 47 | rm albert_xlarge_zh_177k.zip 48 | else 49 | echo "model exists" 50 | fi 51 | echo "Finish download model." 52 | 53 | # run task 54 | cd $CURRENT_DIR 55 | echo "Start running..." 56 | if [ $# == 0 ]; then 57 | python run_classifier.py \ 58 | --task_name=$TASK_NAME \ 59 | --do_train=true \ 60 | --do_eval=true \ 61 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 62 | --vocab_file=$ALBERT_XLARGE_DIR/vocab.txt \ 63 | --bert_config_file=$ALBERT_XLARGE_DIR/albert_config_xlarge.json \ 64 | --init_checkpoint=$ALBERT_XLARGE_DIR/albert_model.ckpt \ 65 | --max_seq_length=128 \ 66 | --train_batch_size=16 \ 67 | --learning_rate=3e-5 \ 68 | --num_train_epochs=2.0 \ 69 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 70 | elif [ $1 == "predict" ]; then 71 | python run_classifier.py \ 72 | --task_name=$TASK_NAME \ 73 | --do_train=false \ 74 | --do_eval=false \ 75 | --do_predict=true \ 76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 77 | --vocab_file=$ALBERT_XLARGE_DIR/vocab.txt \ 78 | --bert_config_file=$ALBERT_XLARGE_DIR/albert_config_xlarge.json \ 79 | --init_checkpoint=$ALBERT_XLARGE_DIR/albert_model.ckpt \ 80 | --max_seq_length=128 \ 81 | --train_batch_size=16 \ 82 | --learning_rate=3e-5 \ 83 | --num_train_epochs=2.0 \ 84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 85 | fi 86 | 87 | -------------------------------------------------------------------------------- /models/albert/run_classifier_wsc.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # @Author: Li Yudong 3 | # @Date: 2019-11-28 4 | # @Last Modified by: bo.shi 5 | # @Last Modified time: 2019-12-05 10:33:37 6 | 7 | TASK_NAME="wsc" 8 | MODEL_NAME="albert_xlarge_zh" 9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 10 | export CUDA_VISIBLE_DEVICES="0" 11 | export ALBERT_CONFIG_DIR=$CURRENT_DIR/albert_config 12 | export ALBERT_PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model 13 | export ALBERT_XLARGE_DIR=$ALBERT_PRETRAINED_MODELS_DIR/$MODEL_NAME 14 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset 15 | 16 | # download and unzip dataset 17 | if [ ! -d $GLUE_DATA_DIR ]; then 18 | mkdir -p $GLUE_DATA_DIR 19 | echo "makedir $GLUE_DATA_DIR" 20 | fi 21 | cd $GLUE_DATA_DIR 22 | if [ ! -d $TASK_NAME ]; then 23 | mkdir $TASK_NAME 24 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME" 25 | fi 26 | cd $TASK_NAME 27 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then 28 | rm * 29 | wget https://storage.googleapis.com/cluebenchmark/tasks/wsc_public.zip 30 | unzip wsc_public.zip 31 | rm wsc_public.zip 32 | else 33 | echo "data exists" 34 | fi 35 | echo "Finish download dataset." 36 | 37 | # download model 38 | if [ ! -d $ALBERT_XLARGE_DIR ]; then 39 | mkdir -p $ALBERT_XLARGE_DIR 40 | echo "makedir $ALBERT_XLARGE_DIR" 41 | fi 42 | cd $ALBERT_XLARGE_DIR 43 | if [ ! -f "albert_config_xlarge.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "checkpoint" ] || [ ! -f "albert_model.ckpt.index" ] || [ ! -f "albert_model.ckpt.meta" ] || [ ! -f "albert_model.ckpt.data-00000-of-00001" ]; then 44 | rm * 45 | wget https://storage.googleapis.com/albert_zh/albert_xlarge_zh_177k.zip 46 | unzip albert_xlarge_zh_177k.zip 47 | rm albert_xlarge_zh_177k.zip 48 | else 49 | echo "model exists" 50 | fi 51 | echo "Finish download model." 52 | 53 | # run task 54 | cd $CURRENT_DIR 55 | echo "Start running..." 56 | if [ $# == 0 ]; then 57 | python run_classifier.py \ 58 | --task_name=$TASK_NAME \ 59 | --do_train=true \ 60 | --do_eval=true \ 61 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 62 | --vocab_file=$ALBERT_XLARGE_DIR/vocab.txt \ 63 | --bert_config_file=$ALBERT_XLARGE_DIR/albert_config_xlarge.json \ 64 | --init_checkpoint=$ALBERT_XLARGE_DIR/albert_model.ckpt \ 65 | --max_seq_length=128 \ 66 | --train_batch_size=16 \ 67 | --learning_rate=3e-5 \ 68 | --num_train_epochs=2.0 \ 69 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 70 | elif [ $1 == "predict" ]; then 71 | python run_classifier.py \ 72 | --task_name=$TASK_NAME \ 73 | --do_train=false \ 74 | --do_eval=false \ 75 | --do_predict=true \ 76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 77 | --vocab_file=$ALBERT_XLARGE_DIR/vocab.txt \ 78 | --bert_config_file=$ALBERT_XLARGE_DIR/albert_config_xlarge.json \ 79 | --init_checkpoint=$ALBERT_XLARGE_DIR/albert_model.ckpt \ 80 | --max_seq_length=128 \ 81 | --train_batch_size=16 \ 82 | --learning_rate=3e-5 \ 83 | --num_train_epochs=2.0 \ 84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 85 | fi 86 | -------------------------------------------------------------------------------- /models/albert/tpu/run_classifier_inews.sh: -------------------------------------------------------------------------------- 1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S") 3 | TASK_NAME="inews" 4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/albert-xlarge/albert_xlarge_zh_183k 5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME 6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/albert-xlarge/albert_xlarge_zh_183k/tpu/$TASK_NAME/$CURRENT_TIME 7 | 8 | python $CURRENT_DIR/../run_classifier.py \ 9 | --task_name=$TASK_NAME \ 10 | --do_train=true \ 11 | --do_eval=true \ 12 | --data_dir=$DATA_DIR \ 13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \ 14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/albert_config_xlarge.json \ 15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/albert_model.ckpt \ 16 | --max_seq_length=512 \ 17 | --train_batch_size=16 \ 18 | --learning_rate=2e-5 \ 19 | --num_train_epochs=8.0 \ 20 | --output_dir=$OUTPUT_DIR \ 21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.1.101.2:8470 22 | -------------------------------------------------------------------------------- /models/albert/tpu/run_classifier_inews_tiny.sh: -------------------------------------------------------------------------------- 1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S") 3 | TASK_NAME="inews" 4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/albert_tiny/albert_tiny_207k 5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME 6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/albert_tiny/tpu/${TASK_NAME}/$CURRENT_TIME 7 | 8 | python3 $CURRENT_DIR/../run_classifier.py \ 9 | --task_name=$TASK_NAME \ 10 | --do_train=true \ 11 | --do_eval=true \ 12 | --data_dir=$DATA_DIR \ 13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \ 14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/albert_config_tiny.json \ 15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/albert_model.ckpt \ 16 | --max_seq_length=512 \ 17 | --train_batch_size=16 \ 18 | --learning_rate=6e-5 \ 19 | --num_train_epochs=10.0 \ 20 | --save_checkpoints_steps=600 \ 21 | --output_dir=$OUTPUT_DIR \ 22 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.240.1.2:8470 23 | -------------------------------------------------------------------------------- /models/albert/tpu/run_classifier_lcqmc.sh: -------------------------------------------------------------------------------- 1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S") 3 | TASK_NAME="lcqmc" 4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/albert-xlarge/albert_xlarge_zh_183k 5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/hard_$TASK_NAME 6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/albert-xlarge/albert_xlarge_zh_183k/tpu/${TASK_NAME}/$CURRENT_TIME 7 | 8 | python $CURRENT_DIR/../run_classifier.py \ 9 | --task_name=$TASK_NAME \ 10 | --do_train=true \ 11 | --do_eval=true \ 12 | --data_dir=$DATA_DIR \ 13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \ 14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/albert_config_xlarge.json \ 15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/albert_model.ckpt \ 16 | --max_seq_length=128 \ 17 | --train_batch_size=64 \ 18 | --learning_rate=2e-5 \ 19 | --num_train_epochs=3.0 \ 20 | --output_dir=$OUTPUT_DIR \ 21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.250.1.2:8470 22 | -------------------------------------------------------------------------------- /models/albert/tpu/run_classifier_lcqmc_tiny.sh: -------------------------------------------------------------------------------- 1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S") 3 | TASK_NAME="lcqmc" 4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/albert-tiny/albert_tiny_489k 5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/hard_${TASK_NAME} 6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/albert-tiny/tpu/${TASK_NAME}/$CURRENT_TIME 7 | 8 | python $CURRENT_DIR/../run_classifier.py \ 9 | --task_name=$TASK_NAME \ 10 | --do_train=true \ 11 | --do_eval=true \ 12 | --do_predict=true \ 13 | --data_dir=$DATA_DIR \ 14 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \ 15 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/albert_config_tiny.json \ 16 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/albert_model.ckpt \ 17 | --max_seq_length=128 \ 18 | --train_batch_size=32 \ 19 | --learning_rate=6e-5 \ 20 | --num_train_epochs=3.0 \ 21 | --output_dir=$OUTPUT_DIR \ 22 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://192.168.0.2:8470 23 | -------------------------------------------------------------------------------- /models/albert/tpu/run_classifier_thucnews.sh: -------------------------------------------------------------------------------- 1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S") 3 | TASK_NAME="thucnews" 4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/albert-xlarge/albert_xlarge_zh_183k 5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME 6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/albert-xlarge/albert_xlarge_zh_183k/tpu/$TASK_NAME/$CURRENT_TIME 7 | 8 | python $CURRENT_DIR/../run_classifier.py \ 9 | --task_name=$TASK_NAME \ 10 | --do_train=true \ 11 | --do_eval=true \ 12 | --data_dir=$DATA_DIR \ 13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \ 14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/albert_config_xlarge.json \ 15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/albert_model.ckpt \ 16 | --max_seq_length=512 \ 17 | --train_batch_size=32 \ 18 | --learning_rate=2e-5 \ 19 | --num_train_epochs=8.0 \ 20 | --output_dir=$OUTPUT_DIR \ 21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.2.101.2:8470 22 | -------------------------------------------------------------------------------- /models/albert/tpu/run_classifier_thucnews_tiny.sh: -------------------------------------------------------------------------------- 1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S") 3 | TASK_NAME="thucnews" 4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/albert-tiny/albert_tiny_489k 5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME 6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/albert-tiny/tpu/${TASK_NAME}/$CURRENT_TIME 7 | 8 | python $CURRENT_DIR/../run_classifier.py \ 9 | --task_name=$TASK_NAME \ 10 | --do_train=true \ 11 | --do_eval=true \ 12 | --data_dir=$DATA_DIR \ 13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \ 14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/albert_config_tiny.json \ 15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/albert_model.ckpt \ 16 | --max_seq_length=128 \ 17 | --train_batch_size=64 \ 18 | --learning_rate=1e-4 \ 19 | --num_train_epochs=5.0 \ 20 | --output_dir=$OUTPUT_DIR \ 21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.1.101.2:8470 22 | -------------------------------------------------------------------------------- /models/albert/tpu/run_classifier_tnews.sh: -------------------------------------------------------------------------------- 1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S") 3 | TASK_NAME="tnews" 4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/albert-xlarge/albert_xlarge_zh_183k 5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME 6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/albert-xlarge/albert_xlarge_zh_183k/tpu/$TASK_NAME/$CURRENT_TIME 7 | 8 | python $CURRENT_DIR/../run_classifier.py \ 9 | --task_name=$TASK_NAME \ 10 | --do_train=true \ 11 | --do_eval=true \ 12 | --data_dir=$DATA_DIR \ 13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \ 14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/albert_config_xlarge.json \ 15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/albert_model.ckpt \ 16 | --max_seq_length=128 \ 17 | --train_batch_size=32 \ 18 | --learning_rate=6e-5 \ 19 | --num_train_epochs=9.0 \ 20 | --output_dir=$OUTPUT_DIR \ 21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.240.1.2:8470 22 | -------------------------------------------------------------------------------- /models/albert/tpu/run_classifier_tnews_tiny.sh: -------------------------------------------------------------------------------- 1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S") 3 | TASK_NAME="tnews" 4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/albert-tiny/albert_tiny_489k 5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/hard_${TASK_NAME}_1 6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/albert-tiny/tpu/${TASK_NAME}/$CURRENT_TIME 7 | 8 | python $CURRENT_DIR/../run_classifier.py \ 9 | --task_name=$TASK_NAME \ 10 | --do_train=true \ 11 | --do_eval=true \ 12 | --do_predict=true \ 13 | --data_dir=$DATA_DIR \ 14 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \ 15 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/albert_config_tiny.json \ 16 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/albert_model.ckpt \ 17 | --max_seq_length=128 \ 18 | --train_batch_size=32 \ 19 | --learning_rate=6e-5 \ 20 | --num_train_epochs=3.0 \ 21 | --output_dir=$OUTPUT_DIR \ 22 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://172.20.0.2:8470 23 | -------------------------------------------------------------------------------- /models/albert/tpu/run_classifier_xnli.sh: -------------------------------------------------------------------------------- 1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S") 3 | TASK_NAME="xnli" 4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/albert-xlarge/albert_xlarge_zh_183k 5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME 6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/albert-xlarge/albert_xlarge_zh_183k/tpu/$TASK_NAME/$CURRENT_TIME 7 | 8 | python $CURRENT_DIR/../run_classifier.py \ 9 | --task_name=$TASK_NAME \ 10 | --do_train=true \ 11 | --do_eval=true \ 12 | --data_dir=$DATA_DIR \ 13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \ 14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/albert_config_xlarge.json \ 15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/albert_model.ckpt \ 16 | --max_seq_length=128 \ 17 | --train_batch_size=64 \ 18 | --learning_rate=2e-5 \ 19 | --num_train_epochs=8.0 \ 20 | --output_dir=$OUTPUT_DIR \ 21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.2.101.2:8470 22 | -------------------------------------------------------------------------------- /models/albert/tpu/run_classifier_xnli_tiny.sh: -------------------------------------------------------------------------------- 1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S") 3 | TASK_NAME="xnli" 4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/albert-tiny/albert_tiny_207k 5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME 6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/albert-tiny/albert_tiny_207k/tpu/$TASK_NAME/$CURRENT_TIME 7 | 8 | python $CURRENT_DIR/../run_classifier.py \ 9 | --task_name=$TASK_NAME \ 10 | --do_train=true \ 11 | --do_eval=true \ 12 | --data_dir=$DATA_DIR \ 13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \ 14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/albert_config_tiny.json \ 15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/albert_model.ckpt \ 16 | --max_seq_length=128 \ 17 | --train_batch_size=64 \ 18 | --learning_rate=2e-5 \ 19 | --num_train_epochs=8.0 \ 20 | --output_dir=$OUTPUT_DIR \ 21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.2.101.2:8470 22 | -------------------------------------------------------------------------------- /models/bert/.gitignore: -------------------------------------------------------------------------------- 1 | # Initially taken from Github's Python gitignore file 2 | 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | 53 | # Translations 54 | *.mo 55 | *.pot 56 | 57 | # Django stuff: 58 | *.log 59 | local_settings.py 60 | db.sqlite3 61 | 62 | # Flask stuff: 63 | instance/ 64 | .webassets-cache 65 | 66 | # Scrapy stuff: 67 | .scrapy 68 | 69 | # Sphinx documentation 70 | docs/_build/ 71 | 72 | # PyBuilder 73 | target/ 74 | 75 | # Jupyter Notebook 76 | .ipynb_checkpoints 77 | 78 | # IPython 79 | profile_default/ 80 | ipython_config.py 81 | 82 | # pyenv 83 | .python-version 84 | 85 | # celery beat schedule file 86 | celerybeat-schedule 87 | 88 | # SageMath parsed files 89 | *.sage.py 90 | 91 | # Environments 92 | .env 93 | .venv 94 | env/ 95 | venv/ 96 | ENV/ 97 | env.bak/ 98 | venv.bak/ 99 | 100 | # Spyder project settings 101 | .spyderproject 102 | .spyproject 103 | 104 | # Rope project settings 105 | .ropeproject 106 | 107 | # mkdocs documentation 108 | /site 109 | 110 | # mypy 111 | .mypy_cache/ 112 | .dmypy.json 113 | dmypy.json 114 | 115 | # Pyre type checker 116 | .pyre/ 117 | -------------------------------------------------------------------------------- /models/bert/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | BERT needs to maintain permanent compatibility with the pre-trained model files, 4 | so we do not plan to make any major changes to this library (other than what was 5 | promised in the README). However, we can accept small patches related to 6 | re-factoring and documentation. To submit contributes, there are just a few 7 | small guidelines you need to follow. 8 | 9 | ## Contributor License Agreement 10 | 11 | Contributions to this project must be accompanied by a Contributor License 12 | Agreement. You (or your employer) retain the copyright to your contribution; 13 | this simply gives us permission to use and redistribute your contributions as 14 | part of the project. Head over to to see 15 | your current agreements on file or to sign a new one. 16 | 17 | You generally only need to submit a CLA once, so if you've already submitted one 18 | (even if it was for a different project), you probably don't need to do it 19 | again. 20 | 21 | ## Code reviews 22 | 23 | All submissions, including submissions by project members, require review. We 24 | use GitHub pull requests for this purpose. Consult 25 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 26 | information on using pull requests. 27 | 28 | ## Community Guidelines 29 | 30 | This project follows 31 | [Google's Open Source Community Guidelines](https://opensource.google.com/conduct/). 32 | -------------------------------------------------------------------------------- /models/bert/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Google AI Language Team Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /models/bert/optimization_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Google AI Language Team Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | from __future__ import absolute_import 16 | from __future__ import division 17 | from __future__ import print_function 18 | 19 | import optimization 20 | import tensorflow as tf 21 | 22 | 23 | class OptimizationTest(tf.test.TestCase): 24 | 25 | def test_adam(self): 26 | with self.test_session() as sess: 27 | w = tf.get_variable( 28 | "w", 29 | shape=[3], 30 | initializer=tf.constant_initializer([0.1, -0.2, -0.1])) 31 | x = tf.constant([0.4, 0.2, -0.5]) 32 | loss = tf.reduce_mean(tf.square(x - w)) 33 | tvars = tf.trainable_variables() 34 | grads = tf.gradients(loss, tvars) 35 | global_step = tf.train.get_or_create_global_step() 36 | optimizer = optimization.AdamWeightDecayOptimizer(learning_rate=0.2) 37 | train_op = optimizer.apply_gradients(zip(grads, tvars), global_step) 38 | init_op = tf.group(tf.global_variables_initializer(), 39 | tf.local_variables_initializer()) 40 | sess.run(init_op) 41 | for _ in range(100): 42 | sess.run(train_op) 43 | w_np = sess.run(w) 44 | self.assertAllClose(w_np.flat, [0.4, 0.2, -0.5], rtol=1e-2, atol=1e-2) 45 | 46 | 47 | if __name__ == "__main__": 48 | tf.test.main() 49 | -------------------------------------------------------------------------------- /models/bert/requirements.txt: -------------------------------------------------------------------------------- 1 | tensorflow >= 1.11.0 # CPU Version of TensorFlow. 2 | # tensorflow-gpu >= 1.11.0 # GPU version of TensorFlow. 3 | -------------------------------------------------------------------------------- /models/bert/run_classifier_csl.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # @Author: Li Yudong 3 | # @Date: 2019-11-28 4 | # @Last Modified by: bo.shi 5 | # @Last Modified time: 2019-12-05 11:00:57 6 | 7 | TASK_NAME="csl" 8 | MODEL_NAME="chinese_L-12_H-768_A-12" 9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 10 | export CUDA_VISIBLE_DEVICES="0" 11 | export BERT_PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model 12 | export BERT_BASE_DIR=$BERT_PRETRAINED_MODELS_DIR/$MODEL_NAME 13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset 14 | 15 | # download and unzip dataset 16 | if [ ! -d $GLUE_DATA_DIR ]; then 17 | mkdir -p $GLUE_DATA_DIR 18 | echo "makedir $GLUE_DATA_DIR" 19 | fi 20 | cd $GLUE_DATA_DIR 21 | if [ ! -d $TASK_NAME ]; then 22 | mkdir $TASK_NAME 23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME" 24 | fi 25 | cd $TASK_NAME 26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then 27 | rm * 28 | wget https://storage.googleapis.com/cluebenchmark/tasks/csl_public.zip 29 | unzip csl_public.zip 30 | rm csl_public.zip 31 | else 32 | echo "data exists" 33 | fi 34 | echo "Finish download dataset." 35 | 36 | # download model 37 | if [ ! -d $BERT_PRETRAINED_MODELS_DIR ]; then 38 | mkdir -p $BERT_PRETRAINED_MODELS_DIR 39 | echo "makedir $BERT_PRETRAINED_MODELS_DIR" 40 | fi 41 | cd $BERT_PRETRAINED_MODELS_DIR 42 | if [ ! -d $MODEL_NAME ]; then 43 | wget https://storage.googleapis.com/bert_models/2018_11_03/chinese_L-12_H-768_A-12.zip 44 | unzip chinese_L-12_H-768_A-12.zip 45 | rm chinese_L-12_H-768_A-12.zip 46 | else 47 | cd $MODEL_NAME 48 | if [ ! -f "bert_config.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "bert_model.ckpt.index" ] || [ ! -f "bert_model.ckpt.meta" ] || [ ! -f "bert_model.ckpt.data-00000-of-00001" ]; then 49 | cd .. 50 | rm -rf $MODEL_NAME 51 | wget https://storage.googleapis.com/bert_models/2018_11_03/chinese_L-12_H-768_A-12.zip 52 | unzip chinese_L-12_H-768_A-12.zip 53 | rm chinese_L-12_H-768_A-12.zip 54 | else 55 | echo "model exists" 56 | fi 57 | fi 58 | echo "Finish download model." 59 | 60 | # run task 61 | cd $CURRENT_DIR 62 | echo "Start running..." 63 | if [ $# == 0 ]; then 64 | python run_classifier.py \ 65 | --task_name=$TASK_NAME \ 66 | --do_train=true \ 67 | --do_eval=true \ 68 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 69 | --vocab_file=$BERT_BASE_DIR/vocab.txt \ 70 | --bert_config_file=$BERT_BASE_DIR/bert_config.json \ 71 | --init_checkpoint=$BERT_BASE_DIR/bert_model.ckpt \ 72 | --max_seq_length=128 \ 73 | --train_batch_size=32 \ 74 | --learning_rate=2e-5 \ 75 | --num_train_epochs=3.0 \ 76 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 77 | elif [ $1 == "predict" ]; then 78 | echo "Start predict..." 79 | python run_classifier.py \ 80 | --task_name=$TASK_NAME \ 81 | --do_train=false \ 82 | --do_eval=false \ 83 | --do_predict=true \ 84 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 85 | --vocab_file=$BERT_BASE_DIR/vocab.txt \ 86 | --bert_config_file=$BERT_BASE_DIR/bert_config.json \ 87 | --init_checkpoint=$BERT_BASE_DIR/bert_model.ckpt \ 88 | --max_seq_length=128 \ 89 | --train_batch_size=32 \ 90 | --learning_rate=2e-5 \ 91 | --num_train_epochs=3.0 \ 92 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 93 | fi 94 | -------------------------------------------------------------------------------- /models/bert/run_classifier_emotion.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # @Author: bo.shi 3 | # @Date: 2019-11-04 09:56:36 4 | # @Last Modified by: bo.shi 5 | # @Last Modified time: 2019-12-05 11:01:06 6 | 7 | TASK_NAME="emotion" 8 | MODEL_NAME="chinese_L-12_H-768_A-12" 9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 10 | export CUDA_VISIBLE_DEVICES="0" 11 | export BERT_PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model 12 | export BERT_BASE_DIR=$BERT_PRETRAINED_MODELS_DIR/$MODEL_NAME 13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset 14 | 15 | # download and unzip dataset 16 | if [ ! -d $GLUE_DATA_DIR ]; then 17 | mkdir -p $GLUE_DATA_DIR 18 | echo "makedir $GLUE_DATA_DIR" 19 | fi 20 | cd $GLUE_DATA_DIR 21 | if [ ! -d $TASK_NAME ]; then 22 | mkdir $TASK_NAME 23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME" 24 | fi 25 | cd $TASK_NAME 26 | if [ ! -f "train.txt" ] || [ ! -f "valid.txt" ] || [ ! -f "test.txt" ]; then 27 | rm * 28 | echo "Pleae download the dataset with the source code from https://github.com/CLUEbenchmark/CLUEmotionAnalysis2020" 29 | else 30 | echo "data exists" 31 | fi 32 | echo "Finish download dataset." 33 | 34 | # download model 35 | if [ ! -d $BERT_PRETRAINED_MODELS_DIR ]; then 36 | mkdir -p $BERT_PRETRAINED_MODELS_DIR 37 | echo "makedir $BERT_PRETRAINED_MODELS_DIR" 38 | fi 39 | cd $BERT_PRETRAINED_MODELS_DIR 40 | if [ ! -d $MODEL_NAME ]; then 41 | wget https://storage.googleapis.com/bert_models/2018_11_03/chinese_L-12_H-768_A-12.zip 42 | unzip chinese_L-12_H-768_A-12.zip 43 | rm chinese_L-12_H-768_A-12.zip 44 | else 45 | cd $MODEL_NAME 46 | if [ ! -f "bert_config.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "bert_model.ckpt.index" ] || [ ! -f "bert_model.ckpt.meta" ] || [ ! -f "bert_model.ckpt.data-00000-of-00001" ]; then 47 | cd .. 48 | rm -rf $MODEL_NAME 49 | wget https://storage.googleapis.com/bert_models/2018_11_03/chinese_L-12_H-768_A-12.zip 50 | unzip chinese_L-12_H-768_A-12.zip 51 | rm chinese_L-12_H-768_A-12.zip 52 | else 53 | echo "model exists" 54 | fi 55 | fi 56 | echo "Finish download model." 57 | 58 | # run task 59 | cd $CURRENT_DIR 60 | echo "Start running..." 61 | if [ $# == 0 ]; then 62 | python run_classifier.py \ 63 | --task_name=$TASK_NAME \ 64 | --do_train=true \ 65 | --do_eval=true \ 66 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 67 | --vocab_file=$BERT_BASE_DIR/vocab.txt \ 68 | --bert_config_file=$BERT_BASE_DIR/bert_config.json \ 69 | --init_checkpoint=$BERT_BASE_DIR/bert_model.ckpt \ 70 | --max_seq_length=128 \ 71 | --train_batch_size=32 \ 72 | --learning_rate=2e-5 \ 73 | --num_train_epochs=3.0 \ 74 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 75 | elif [ $1 == "predict" ]; then 76 | echo "Start predict..." 77 | python run_classifier.py \ 78 | --task_name=$TASK_NAME \ 79 | --do_train=false \ 80 | --do_eval=false \ 81 | --do_predict=true \ 82 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 83 | --vocab_file=$BERT_BASE_DIR/vocab.txt \ 84 | --bert_config_file=$BERT_BASE_DIR/bert_config.json \ 85 | --init_checkpoint=$BERT_BASE_DIR/bert_model.ckpt \ 86 | --max_seq_length=128 \ 87 | --train_batch_size=32 \ 88 | --learning_rate=2e-5 \ 89 | --num_train_epochs=3.0 \ 90 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 91 | fi 92 | -------------------------------------------------------------------------------- /models/bert/tpu/run_classifier_inews.sh: -------------------------------------------------------------------------------- 1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S") 3 | TASK_NAME="inews" 4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/al/bert-base/chinese_L-12_H-768_A-12/ 5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME 6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/bert-base/chinese_L-12_H-768_A-12/tpu/$TASK_NAME/$CURRENT_TIME 7 | 8 | python $CURRENT_DIR/../run_classifier.py \ 9 | --task_name=$TASK_NAME \ 10 | --do_train=true \ 11 | --do_eval=true \ 12 | --data_dir=$DATA_DIR \ 13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \ 14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \ 15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \ 16 | --max_seq_length=512 \ 17 | --train_batch_size=16 \ 18 | --learning_rate=2e-5 \ 19 | --num_train_epochs=8.0 \ 20 | --output_dir=$OUTPUT_DIR \ 21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.1.101.2:8470 22 | -------------------------------------------------------------------------------- /models/bert/tpu/run_classifier_jdcomment.sh: -------------------------------------------------------------------------------- 1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S") 3 | TASK_NAME="jdcomment" 4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/bert-base/chinese_L-12_H-768_A-12 5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/hard_${TASK_NAME} 6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/bert-base/chinese_L-12_H-768_A-12/tpu/$TASK_NAME/$CURRENT_TIME 7 | echo $DATA_DIR 8 | python3 $CURRENT_DIR/../run_classifier.py \ 9 | --task_name=$TASK_NAME \ 10 | --do_train=true \ 11 | --do_eval=true \ 12 | --data_dir=$DATA_DIR \ 13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \ 14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \ 15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \ 16 | --max_seq_length=128 \ 17 | --train_batch_size=32 \ 18 | --learning_rate=2e-5 \ 19 | --num_train_epochs=3.0 \ 20 | --output_dir=$OUTPUT_DIR \ 21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://172.18.0.2:8470 22 | -------------------------------------------------------------------------------- /models/bert/tpu/run_classifier_lcqmc.sh: -------------------------------------------------------------------------------- 1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S") 3 | TASK_NAME="lcqmc" 4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/bert-base/chinese_L-12_H-768_A-12 5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/hard_$TASK_NAME 6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/bert-base/chinese_L-12_H-768_A-12/tpu/$TASK_NAME/$CURRENT_TIME 7 | 8 | python $CURRENT_DIR/../run_classifier.py \ 9 | --task_name=$TASK_NAME \ 10 | --do_train=true \ 11 | --do_eval=true \ 12 | --data_dir=$DATA_DIR \ 13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \ 14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \ 15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \ 16 | --max_seq_length=128 \ 17 | --train_batch_size=16 \ 18 | --learning_rate=2e-5 \ 19 | --num_train_epochs=8.0 \ 20 | --output_dir=$OUTPUT_DIR \ 21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://172.20.0.2:8470 22 | -------------------------------------------------------------------------------- /models/bert/tpu/run_classifier_thucnews.sh: -------------------------------------------------------------------------------- 1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S") 3 | TASK_NAME="thucnews" 4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/al/bert-base/chinese_L-12_H-768_A-12/ 5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME 6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/bert-base/chinese_L-12_H-768_A-12/tpu/$TASK_NAME/$CURRENT_TIME 7 | 8 | python $CURRENT_DIR/../run_classifier.py \ 9 | --task_name=$TASK_NAME \ 10 | --do_train=true \ 11 | --do_eval=true \ 12 | --data_dir=$DATA_DIR \ 13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \ 14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \ 15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \ 16 | --max_seq_length=512 \ 17 | --train_batch_size=16 \ 18 | --learning_rate=2e-5 \ 19 | --num_train_epochs=8.0 \ 20 | --output_dir=$OUTPUT_DIR \ 21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.1.101.2:8470 22 | -------------------------------------------------------------------------------- /models/bert/tpu/run_classifier_tnews.sh: -------------------------------------------------------------------------------- 1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S") 3 | TASK_NAME="tnews" 4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/bert-base/chinese_L-12_H-768_A-12 5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/hard_${TASK_NAME}_1 6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/bert-base/chinese_L-12_H-768_A-12/tpu/$TASK_NAME/$CURRENT_TIME 7 | 8 | python $CURRENT_DIR/../run_classifier.py \ 9 | --task_name=$TASK_NAME \ 10 | --do_train=true \ 11 | --do_eval=true \ 12 | --data_dir=$DATA_DIR \ 13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \ 14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \ 15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \ 16 | --max_seq_length=128 \ 17 | --train_batch_size=32 \ 18 | --learning_rate=2e-5 \ 19 | --num_train_epochs=3.0 \ 20 | --output_dir=$OUTPUT_DIR \ 21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://192.168.0.2:8470 22 | -------------------------------------------------------------------------------- /models/bert/tpu/run_classifier_xnli.sh: -------------------------------------------------------------------------------- 1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S") 3 | TASK_NAME="xnli" 4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/al/bert-base/chinese_L-12_H-768_A-12/ 5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME 6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/bert-base/chinese_L-12_H-768_A-12/tpu/$TASK_NAME/$CURRENT_TIME 7 | 8 | python $CURRENT_DIR/../run_classifier.py \ 9 | --task_name=$TASK_NAME \ 10 | --do_train=true \ 11 | --do_eval=true \ 12 | --data_dir=$DATA_DIR \ 13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \ 14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \ 15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \ 16 | --max_seq_length=512 \ 17 | --train_batch_size=16 \ 18 | --learning_rate=2e-5 \ 19 | --num_train_epochs=8.0 \ 20 | --output_dir=$OUTPUT_DIR \ 21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.1.101.2:8470 22 | -------------------------------------------------------------------------------- /models/bert_wwm_ext/.gitignore: -------------------------------------------------------------------------------- 1 | # Initially taken from Github's Python gitignore file 2 | 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | 53 | # Translations 54 | *.mo 55 | *.pot 56 | 57 | # Django stuff: 58 | *.log 59 | local_settings.py 60 | db.sqlite3 61 | 62 | # Flask stuff: 63 | instance/ 64 | .webassets-cache 65 | 66 | # Scrapy stuff: 67 | .scrapy 68 | 69 | # Sphinx documentation 70 | docs/_build/ 71 | 72 | # PyBuilder 73 | target/ 74 | 75 | # Jupyter Notebook 76 | .ipynb_checkpoints 77 | 78 | # IPython 79 | profile_default/ 80 | ipython_config.py 81 | 82 | # pyenv 83 | .python-version 84 | 85 | # celery beat schedule file 86 | celerybeat-schedule 87 | 88 | # SageMath parsed files 89 | *.sage.py 90 | 91 | # Environments 92 | .env 93 | .venv 94 | env/ 95 | venv/ 96 | ENV/ 97 | env.bak/ 98 | venv.bak/ 99 | 100 | # Spyder project settings 101 | .spyderproject 102 | .spyproject 103 | 104 | # Rope project settings 105 | .ropeproject 106 | 107 | # mkdocs documentation 108 | /site 109 | 110 | # mypy 111 | .mypy_cache/ 112 | .dmypy.json 113 | dmypy.json 114 | 115 | # Pyre type checker 116 | .pyre/ 117 | -------------------------------------------------------------------------------- /models/bert_wwm_ext/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | BERT needs to maintain permanent compatibility with the pre-trained model files, 4 | so we do not plan to make any major changes to this library (other than what was 5 | promised in the README). However, we can accept small patches related to 6 | re-factoring and documentation. To submit contributes, there are just a few 7 | small guidelines you need to follow. 8 | 9 | ## Contributor License Agreement 10 | 11 | Contributions to this project must be accompanied by a Contributor License 12 | Agreement. You (or your employer) retain the copyright to your contribution; 13 | this simply gives us permission to use and redistribute your contributions as 14 | part of the project. Head over to to see 15 | your current agreements on file or to sign a new one. 16 | 17 | You generally only need to submit a CLA once, so if you've already submitted one 18 | (even if it was for a different project), you probably don't need to do it 19 | again. 20 | 21 | ## Code reviews 22 | 23 | All submissions, including submissions by project members, require review. We 24 | use GitHub pull requests for this purpose. Consult 25 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 26 | information on using pull requests. 27 | 28 | ## Community Guidelines 29 | 30 | This project follows 31 | [Google's Open Source Community Guidelines](https://opensource.google.com/conduct/). 32 | -------------------------------------------------------------------------------- /models/bert_wwm_ext/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Google AI Language Team Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /models/bert_wwm_ext/optimization_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Google AI Language Team Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | from __future__ import absolute_import 16 | from __future__ import division 17 | from __future__ import print_function 18 | 19 | import optimization 20 | import tensorflow as tf 21 | 22 | 23 | class OptimizationTest(tf.test.TestCase): 24 | 25 | def test_adam(self): 26 | with self.test_session() as sess: 27 | w = tf.get_variable( 28 | "w", 29 | shape=[3], 30 | initializer=tf.constant_initializer([0.1, -0.2, -0.1])) 31 | x = tf.constant([0.4, 0.2, -0.5]) 32 | loss = tf.reduce_mean(tf.square(x - w)) 33 | tvars = tf.trainable_variables() 34 | grads = tf.gradients(loss, tvars) 35 | global_step = tf.train.get_or_create_global_step() 36 | optimizer = optimization.AdamWeightDecayOptimizer(learning_rate=0.2) 37 | train_op = optimizer.apply_gradients(zip(grads, tvars), global_step) 38 | init_op = tf.group(tf.global_variables_initializer(), 39 | tf.local_variables_initializer()) 40 | sess.run(init_op) 41 | for _ in range(100): 42 | sess.run(train_op) 43 | w_np = sess.run(w) 44 | self.assertAllClose(w_np.flat, [0.4, 0.2, -0.5], rtol=1e-2, atol=1e-2) 45 | 46 | 47 | if __name__ == "__main__": 48 | tf.test.main() 49 | -------------------------------------------------------------------------------- /models/bert_wwm_ext/requirements.txt: -------------------------------------------------------------------------------- 1 | tensorflow >= 1.11.0 # CPU Version of TensorFlow. 2 | # tensorflow-gpu >= 1.11.0 # GPU version of TensorFlow. 3 | -------------------------------------------------------------------------------- /models/bert_wwm_ext/run_classifier_afqmc.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # @Author: bo.shi 3 | # @Date: 2019-11-04 09:56:36 4 | # @Last Modified by: bo.shi 5 | # @Last Modified time: 2019-12-05 11:02:12 6 | 7 | TASK_NAME="afqmc" 8 | MODEL_NAME="chinese_wwm_ext_L-12_H-768_A-12" 9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 10 | export CUDA_VISIBLE_DEVICES="0" 11 | export BERT_PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model 12 | export BERT_WWM_BASE_DIR=$BERT_PRETRAINED_MODELS_DIR/$MODEL_NAME 13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset 14 | 15 | # download and unzip dataset 16 | if [ ! -d $GLUE_DATA_DIR ]; then 17 | mkdir -p $GLUE_DATA_DIR 18 | echo "makedir $GLUE_DATA_DIR" 19 | fi 20 | cd $GLUE_DATA_DIR 21 | if [ ! -d $TASK_NAME ]; then 22 | mkdir $TASK_NAME 23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME" 24 | fi 25 | cd $TASK_NAME 26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then 27 | rm * 28 | wget https://storage.googleapis.com/cluebenchmark/tasks/afqmc_public.zip 29 | unzip afqmc_public.zip 30 | rm afqmc_public.zip 31 | else 32 | echo "data exists" 33 | fi 34 | echo "Finish download dataset." 35 | 36 | # download model 37 | if [ ! -d $BERT_WWM_BASE_DIR ]; then 38 | mkdir -p $BERT_WWM_BASE_DIR 39 | echo "makedir $BERT_WWM_BASE_DIR" 40 | fi 41 | cd $BERT_WWM_BASE_DIR 42 | if [ ! -f "bert_config.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "bert_model.ckpt.index" ] || [ ! -f "bert_model.ckpt.meta" ] || [ ! -f "bert_model.ckpt.data-00000-of-00001" ]; then 43 | rm * 44 | wget -c https://storage.googleapis.com/chineseglue/pretrain_models/chinese_wwm_ext_L-12_H-768_A-12.zip 45 | unzip chinese_wwm_ext_L-12_H-768_A-12.zip 46 | rm chinese_wwm_ext_L-12_H-768_A-12.zip 47 | else 48 | echo "model exists" 49 | fi 50 | echo "Finish download model." 51 | 52 | # run task 53 | cd $CURRENT_DIR 54 | echo "Start running..." 55 | if [ $# == 0 ]; then: 56 | python run_classifier.py \ 57 | --task_name=$TASK_NAME \ 58 | --do_train=true \ 59 | --do_eval=true \ 60 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 61 | --vocab_file=$BERT_WWM_BASE_DIR/vocab.txt \ 62 | --bert_config_file=$BERT_WWM_BASE_DIR/bert_config.json \ 63 | --init_checkpoint=$BERT_WWM_BASE_DIR/bert_model.ckpt \ 64 | --max_seq_length=128 \ 65 | --train_batch_size=32 \ 66 | --learning_rate=2e-5 \ 67 | --num_train_epochs=3.0 \ 68 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 69 | elif [ $1 == "predict" ]; then 70 | echo "Start predict..." 71 | python run_classifier.py \ 72 | --task_name=$TASK_NAME \ 73 | --do_train=false \ 74 | --do_eval=false \ 75 | --do_predict=true \ 76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 77 | --vocab_file=$BERT_WWM_BASE_DIR/vocab.txt \ 78 | --bert_config_file=$BERT_WWM_BASE_DIR/bert_config.json \ 79 | --init_checkpoint=$BERT_WWM_BASE_DIR/bert_model.ckpt \ 80 | --max_seq_length=128 \ 81 | --train_batch_size=32 \ 82 | --learning_rate=2e-5 \ 83 | --num_train_epochs=3.0 \ 84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 85 | fi 86 | 87 | -------------------------------------------------------------------------------- /models/bert_wwm_ext/run_classifier_cmnli.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # @Author: bo.shi 3 | # @Date: 2019-11-04 09:56:36 4 | # @Last Modified by: bo.shi 5 | # @Last Modified time: 2019-12-05 11:02:51 6 | 7 | TASK_NAME="cmnli" 8 | MODEL_NAME="chinese_wwm_ext_L-12_H-768_A-12" 9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 10 | export CUDA_VISIBLE_DEVICES="0" 11 | export BERT_PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model 12 | export BERT_WWM_BASE_DIR=$BERT_PRETRAINED_MODELS_DIR/$MODEL_NAME 13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset 14 | 15 | # download and unzip dataset 16 | if [ ! -d $GLUE_DATA_DIR ]; then 17 | mkdir -p $GLUE_DATA_DIR 18 | echo "makedir $GLUE_DATA_DIR" 19 | fi 20 | cd $GLUE_DATA_DIR 21 | if [ ! -d $TASK_NAME ]; then 22 | mkdir $TASK_NAME 23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME" 24 | fi 25 | cd $TASK_NAME 26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then 27 | rm * 28 | wget https://storage.googleapis.com/cluebenchmark/tasks/cmnli_public.zip 29 | unzip cmnli_public.zip 30 | rm cmnli_public.zip 31 | else 32 | echo "data exists" 33 | fi 34 | echo "Finish download dataset." 35 | 36 | # download model 37 | if [ ! -d $BERT_WWM_BASE_DIR ]; then 38 | mkdir -p $BERT_WWM_BASE_DIR 39 | echo "makedir $BERT_WWM_BASE_DIR" 40 | fi 41 | cd $BERT_WWM_BASE_DIR 42 | if [ ! -f "bert_config.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "bert_model.ckpt.index" ] || [ ! -f "bert_model.ckpt.meta" ] || [ ! -f "bert_model.ckpt.data-00000-of-00001" ]; then 43 | rm * 44 | wget -c https://storage.googleapis.com/chineseglue/pretrain_models/chinese_wwm_ext_L-12_H-768_A-12.zip 45 | unzip chinese_wwm_ext_L-12_H-768_A-12.zip 46 | rm chinese_wwm_ext_L-12_H-768_A-12.zip 47 | else 48 | echo "model exists" 49 | fi 50 | echo "Finish download model." 51 | 52 | # run task 53 | cd $CURRENT_DIR 54 | echo "Start running..." 55 | if [ $# == 0 ]; then: 56 | python run_classifier.py \ 57 | --task_name=$TASK_NAME \ 58 | --do_train=true \ 59 | --do_eval=true \ 60 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 61 | --vocab_file=$BERT_WWM_BASE_DIR/vocab.txt \ 62 | --bert_config_file=$BERT_WWM_BASE_DIR/bert_config.json \ 63 | --init_checkpoint=$BERT_WWM_BASE_DIR/bert_model.ckpt \ 64 | --max_seq_length=128 \ 65 | --train_batch_size=32 \ 66 | --learning_rate=2e-5 \ 67 | --num_train_epochs=3.0 \ 68 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 69 | elif [ $1 == "predict" ]; then 70 | echo "Start predict..." 71 | python run_classifier.py \ 72 | --task_name=$TASK_NAME \ 73 | --do_train=false \ 74 | --do_eval=false \ 75 | --do_predict=true \ 76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 77 | --vocab_file=$BERT_WWM_BASE_DIR/vocab.txt \ 78 | --bert_config_file=$BERT_WWM_BASE_DIR/bert_config.json \ 79 | --init_checkpoint=$BERT_WWM_BASE_DIR/bert_model.ckpt \ 80 | --max_seq_length=128 \ 81 | --train_batch_size=32 \ 82 | --learning_rate=2e-5 \ 83 | --num_train_epochs=3.0 \ 84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 85 | fi 86 | 87 | -------------------------------------------------------------------------------- /models/bert_wwm_ext/run_classifier_csl.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # @Author: Li Yudong 3 | # @Date: 2019-11-28 4 | # @Last Modified by: bo.shi 5 | # @Last Modified time: 2019-12-05 11:02:59 6 | 7 | TASK_NAME="csl" 8 | MODEL_NAME="chinese_wwm_ext_L-12_H-768_A-12" 9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 10 | export CUDA_VISIBLE_DEVICES="0" 11 | export BERT_PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model 12 | export BERT_WWM_BASE_DIR=$BERT_PRETRAINED_MODELS_DIR/$MODEL_NAME 13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset 14 | 15 | # download and unzip dataset 16 | if [ ! -d $GLUE_DATA_DIR ]; then 17 | mkdir -p $GLUE_DATA_DIR 18 | echo "makedir $GLUE_DATA_DIR" 19 | fi 20 | cd $GLUE_DATA_DIR 21 | if [ ! -d $TASK_NAME ]; then 22 | mkdir $TASK_NAME 23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME" 24 | fi 25 | cd $TASK_NAME 26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then 27 | rm * 28 | wget https://storage.googleapis.com/cluebenchmark/tasks/csl_public.zip 29 | unzip csl_public.zip 30 | rm csl_public.zip 31 | else 32 | echo "data exists" 33 | fi 34 | echo "Finish download dataset." 35 | 36 | # download model 37 | if [ ! -d $BERT_WWM_BASE_DIR ]; then 38 | mkdir -p $BERT_WWM_BASE_DIR 39 | echo "makedir $BERT_WWM_BASE_DIR" 40 | fi 41 | cd $BERT_WWM_BASE_DIR 42 | if [ ! -f "bert_config.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "bert_model.ckpt.index" ] || [ ! -f "bert_model.ckpt.meta" ] || [ ! -f "bert_model.ckpt.data-00000-of-00001" ]; then 43 | rm * 44 | wget -c https://storage.googleapis.com/chineseglue/pretrain_models/chinese_wwm_ext_L-12_H-768_A-12.zip 45 | unzip chinese_wwm_ext_L-12_H-768_A-12.zip 46 | rm chinese_wwm_ext_L-12_H-768_A-12.zip 47 | else 48 | echo "model exists" 49 | fi 50 | echo "Finish download model." 51 | 52 | # run task 53 | cd $CURRENT_DIR 54 | echo "Start running..." 55 | if [ $# == 0 ]; then: 56 | python run_classifier.py \ 57 | --task_name=$TASK_NAME \ 58 | --do_train=true \ 59 | --do_eval=true \ 60 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 61 | --vocab_file=$BERT_WWM_BASE_DIR/vocab.txt \ 62 | --bert_config_file=$BERT_WWM_BASE_DIR/bert_config.json \ 63 | --init_checkpoint=$BERT_WWM_BASE_DIR/bert_model.ckpt \ 64 | --max_seq_length=128 \ 65 | --train_batch_size=32 \ 66 | --learning_rate=2e-5 \ 67 | --num_train_epochs=3.0 \ 68 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 69 | elif [ $1 == "predict" ]; then 70 | echo "Start predict..." 71 | python run_classifier.py \ 72 | --task_name=$TASK_NAME \ 73 | --do_train=false \ 74 | --do_eval=false \ 75 | --do_predict=true \ 76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 77 | --vocab_file=$BERT_WWM_BASE_DIR/vocab.txt \ 78 | --bert_config_file=$BERT_WWM_BASE_DIR/bert_config.json \ 79 | --init_checkpoint=$BERT_WWM_BASE_DIR/bert_model.ckpt \ 80 | --max_seq_length=128 \ 81 | --train_batch_size=32 \ 82 | --learning_rate=2e-5 \ 83 | --num_train_epochs=3.0 \ 84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 85 | fi 86 | -------------------------------------------------------------------------------- /models/bert_wwm_ext/run_classifier_iflytek.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # @Author: bo.shi 3 | # @Date: 2019-11-04 09:56:36 4 | # @Last Modified by: bo.shi 5 | # @Last Modified time: 2019-12-05 11:03:03 6 | 7 | TASK_NAME="iflytek" 8 | MODEL_NAME="chinese_wwm_ext_L-12_H-768_A-12" 9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 10 | export CUDA_VISIBLE_DEVICES="0" 11 | export BERT_PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model 12 | export BERT_WWM_BASE_DIR=$BERT_PRETRAINED_MODELS_DIR/$MODEL_NAME 13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset 14 | 15 | # download and unzip dataset 16 | if [ ! -d $GLUE_DATA_DIR ]; then 17 | mkdir -p $GLUE_DATA_DIR 18 | echo "makedir $GLUE_DATA_DIR" 19 | fi 20 | cd $GLUE_DATA_DIR 21 | if [ ! -d $TASK_NAME ]; then 22 | mkdir $TASK_NAME 23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME" 24 | fi 25 | cd $TASK_NAME 26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then 27 | rm * 28 | wget https://storage.googleapis.com/cluebenchmark/tasks/iflytek_public.zip 29 | unzip iflytek_public.zip 30 | rm iflytek_public.zip 31 | else 32 | echo "data exists" 33 | fi 34 | echo "Finish download dataset." 35 | 36 | # download model 37 | if [ ! -d $BERT_WWM_BASE_DIR ]; then 38 | mkdir -p $BERT_WWM_BASE_DIR 39 | echo "makedir $BERT_WWM_BASE_DIR" 40 | fi 41 | cd $BERT_WWM_BASE_DIR 42 | if [ ! -f "bert_config.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "bert_model.ckpt.index" ] || [ ! -f "bert_model.ckpt.meta" ] || [ ! -f "bert_model.ckpt.data-00000-of-00001" ]; then 43 | rm * 44 | wget -c https://storage.googleapis.com/chineseglue/pretrain_models/chinese_wwm_ext_L-12_H-768_A-12.zip 45 | unzip chinese_wwm_ext_L-12_H-768_A-12.zip 46 | rm chinese_wwm_ext_L-12_H-768_A-12.zip 47 | else 48 | echo "model exists" 49 | fi 50 | echo "Finish download model." 51 | 52 | # run task 53 | cd $CURRENT_DIR 54 | echo "Start running..." 55 | if [ $# == 0 ]; then: 56 | python run_classifier.py \ 57 | --task_name=$TASK_NAME \ 58 | --do_train=true \ 59 | --do_eval=true \ 60 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 61 | --vocab_file=$BERT_WWM_BASE_DIR/vocab.txt \ 62 | --bert_config_file=$BERT_WWM_BASE_DIR/bert_config.json \ 63 | --init_checkpoint=$BERT_WWM_BASE_DIR/bert_model.ckpt \ 64 | --max_seq_length=128 \ 65 | --train_batch_size=32 \ 66 | --learning_rate=2e-5 \ 67 | --num_train_epochs=3.0 \ 68 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 69 | elif [ $1 == "predict" ]; then 70 | echo "Start predict..." 71 | python run_classifier.py \ 72 | --task_name=$TASK_NAME \ 73 | --do_train=false \ 74 | --do_eval=false \ 75 | --do_predict=true \ 76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 77 | --vocab_file=$BERT_WWM_BASE_DIR/vocab.txt \ 78 | --bert_config_file=$BERT_WWM_BASE_DIR/bert_config.json \ 79 | --init_checkpoint=$BERT_WWM_BASE_DIR/bert_model.ckpt \ 80 | --max_seq_length=128 \ 81 | --train_batch_size=32 \ 82 | --learning_rate=2e-5 \ 83 | --num_train_epochs=3.0 \ 84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 85 | fi 86 | -------------------------------------------------------------------------------- /models/bert_wwm_ext/run_classifier_tnews.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # @Author: bo.shi 3 | # @Date: 2019-11-04 09:56:36 4 | # @Last Modified by: bo.shi 5 | # @Last Modified time: 2019-12-05 11:03:08 6 | 7 | TASK_NAME="tnews" 8 | MODEL_NAME="chinese_wwm_ext_L-12_H-768_A-12" 9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 10 | export CUDA_VISIBLE_DEVICES="0" 11 | export BERT_PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model 12 | export BERT_WWM_BASE_DIR=$BERT_PRETRAINED_MODELS_DIR/$MODEL_NAME 13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset 14 | 15 | # download and unzip dataset 16 | if [ ! -d $GLUE_DATA_DIR ]; then 17 | mkdir -p $GLUE_DATA_DIR 18 | echo "makedir $GLUE_DATA_DIR" 19 | fi 20 | cd $GLUE_DATA_DIR 21 | if [ ! -d $TASK_NAME ]; then 22 | mkdir $TASK_NAME 23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME" 24 | fi 25 | cd $TASK_NAME 26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then 27 | rm * 28 | wget https://storage.googleapis.com/cluebenchmark/tasks/tnews_public.zip 29 | unzip tnews_public.zip 30 | rm tnews_public.zip 31 | else 32 | echo "data exists" 33 | fi 34 | echo "Finish download dataset." 35 | 36 | # download model 37 | if [ ! -d $BERT_WWM_BASE_DIR ]; then 38 | mkdir -p $BERT_WWM_BASE_DIR 39 | echo "makedir $BERT_WWM_BASE_DIR" 40 | fi 41 | cd $BERT_WWM_BASE_DIR 42 | if [ ! -f "bert_config.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "bert_model.ckpt.index" ] || [ ! -f "bert_model.ckpt.meta" ] || [ ! -f "bert_model.ckpt.data-00000-of-00001" ]; then 43 | rm * 44 | wget -c https://storage.googleapis.com/chineseglue/pretrain_models/chinese_wwm_ext_L-12_H-768_A-12.zip 45 | unzip chinese_wwm_ext_L-12_H-768_A-12.zip 46 | rm chinese_wwm_ext_L-12_H-768_A-12.zip 47 | else 48 | echo "model exists" 49 | fi 50 | echo "Finish download model." 51 | 52 | # run task 53 | cd $CURRENT_DIR 54 | echo "Start running..." 55 | if [ $# == 0 ]; then: 56 | python run_classifier.py \ 57 | --task_name=$TASK_NAME \ 58 | --do_train=true \ 59 | --do_eval=true \ 60 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 61 | --vocab_file=$BERT_WWM_BASE_DIR/vocab.txt \ 62 | --bert_config_file=$BERT_WWM_BASE_DIR/bert_config.json \ 63 | --init_checkpoint=$BERT_WWM_BASE_DIR/bert_model.ckpt \ 64 | --max_seq_length=128 \ 65 | --train_batch_size=32 \ 66 | --learning_rate=2e-5 \ 67 | --num_train_epochs=3.0 \ 68 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 69 | elif [ $1 == "predict" ]; then 70 | echo "Start predict..." 71 | python run_classifier.py \ 72 | --task_name=$TASK_NAME \ 73 | --do_train=false \ 74 | --do_eval=false \ 75 | --do_predict=true \ 76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 77 | --vocab_file=$BERT_WWM_BASE_DIR/vocab.txt \ 78 | --bert_config_file=$BERT_WWM_BASE_DIR/bert_config.json \ 79 | --init_checkpoint=$BERT_WWM_BASE_DIR/bert_model.ckpt \ 80 | --max_seq_length=128 \ 81 | --train_batch_size=32 \ 82 | --learning_rate=2e-5 \ 83 | --num_train_epochs=3.0 \ 84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 85 | fi 86 | -------------------------------------------------------------------------------- /models/bert_wwm_ext/run_classifier_wsc.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # @Author: bo.shi 3 | # @Date: 2019-11-04 09:56:36 4 | # @Last Modified by: bo.shi 5 | # @Last Modified time: 2019-12-05 11:03:13 6 | 7 | TASK_NAME="wsc" 8 | MODEL_NAME="chinese_wwm_ext_L-12_H-768_A-12" 9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 10 | export CUDA_VISIBLE_DEVICES="0" 11 | export BERT_PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model 12 | export BERT_WWM_BASE_DIR=$BERT_PRETRAINED_MODELS_DIR/$MODEL_NAME 13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset 14 | 15 | # download and unzip dataset 16 | if [ ! -d $GLUE_DATA_DIR ]; then 17 | mkdir -p $GLUE_DATA_DIR 18 | echo "makedir $GLUE_DATA_DIR" 19 | fi 20 | cd $GLUE_DATA_DIR 21 | if [ ! -d $TASK_NAME ]; then 22 | mkdir $TASK_NAME 23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME" 24 | fi 25 | cd $TASK_NAME 26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then 27 | rm * 28 | wget https://storage.googleapis.com/cluebenchmark/tasks/wsc_public.zip 29 | unzip wsc_public.zip 30 | rm wsc_public.zip 31 | else 32 | echo "data exists" 33 | fi 34 | echo "Finish download dataset." 35 | 36 | # download model 37 | if [ ! -d $BERT_WWM_BASE_DIR ]; then 38 | mkdir -p $BERT_WWM_BASE_DIR 39 | echo "makedir $BERT_WWM_BASE_DIR" 40 | fi 41 | cd $BERT_WWM_BASE_DIR 42 | if [ ! -f "bert_config.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "bert_model.ckpt.index" ] || [ ! -f "bert_model.ckpt.meta" ] || [ ! -f "bert_model.ckpt.data-00000-of-00001" ]; then 43 | rm * 44 | wget -c https://storage.googleapis.com/chineseglue/pretrain_models/chinese_wwm_ext_L-12_H-768_A-12.zip 45 | unzip chinese_wwm_ext_L-12_H-768_A-12.zip 46 | rm chinese_wwm_ext_L-12_H-768_A-12.zip 47 | else 48 | echo "model exists" 49 | fi 50 | echo "Finish download model." 51 | 52 | # run task 53 | cd $CURRENT_DIR 54 | echo "Start running..." 55 | if [ $# == 0 ]; then: 56 | python run_classifier.py \ 57 | --task_name=$TASK_NAME \ 58 | --do_train=true \ 59 | --do_eval=true \ 60 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 61 | --vocab_file=$BERT_WWM_BASE_DIR/vocab.txt \ 62 | --bert_config_file=$BERT_WWM_BASE_DIR/bert_config.json \ 63 | --init_checkpoint=$BERT_WWM_BASE_DIR/bert_model.ckpt \ 64 | --max_seq_length=128 \ 65 | --train_batch_size=32 \ 66 | --learning_rate=2e-5 \ 67 | --num_train_epochs=3.0 \ 68 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 69 | elif [ $1 == "predict" ]; then 70 | echo "Start predict..." 71 | python run_classifier.py \ 72 | --task_name=$TASK_NAME \ 73 | --do_train=false \ 74 | --do_eval=false \ 75 | --do_predict=true \ 76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 77 | --vocab_file=$BERT_WWM_BASE_DIR/vocab.txt \ 78 | --bert_config_file=$BERT_WWM_BASE_DIR/bert_config.json \ 79 | --init_checkpoint=$BERT_WWM_BASE_DIR/bert_model.ckpt \ 80 | --max_seq_length=128 \ 81 | --train_batch_size=32 \ 82 | --learning_rate=2e-5 \ 83 | --num_train_epochs=3.0 \ 84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 85 | fi 86 | -------------------------------------------------------------------------------- /models/bert_wwm_ext/run_ner_msra.sh: -------------------------------------------------------------------------------- 1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 2 | export CUDA_VISIBLE_DEVICES="0" 3 | export BERT_BASE_DIR=$CURRENT_DIR/prev_trained_model/chinese_wwm_ext_L-12_H-768_A-12 4 | export GLUE_DIR=$CURRENT_DIR/../../glue/chineseGLUEdatasets/ 5 | TASK_NAME="msraner" 6 | 7 | python run_ner.py \ 8 | --task_name=$TASK_NAME \ 9 | --do_train=true \ 10 | --do_eval=false \ 11 | --do_predict=true \ 12 | --data_dir=$GLUE_DIR/$TASK_NAME \ 13 | --vocab_file=$BERT_BASE_DIR/vocab.txt \ 14 | --bert_config_file=$BERT_BASE_DIR/bert_config.json \ 15 | --init_checkpoint=$BERT_BASE_DIR/bert_model.ckpt \ 16 | --max_seq_length=256 \ 17 | --train_batch_size=16 \ 18 | --learning_rate=2e-5 \ 19 | --num_train_epochs=5.0 \ 20 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 21 | -------------------------------------------------------------------------------- /models/bert_wwm_ext/tpu/run_classifier_inews.sh: -------------------------------------------------------------------------------- 1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S") 3 | TASK_NAME="inews" 4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/bert-wwm-ext-base/chinese_wwm_ext_L-12_H-768_A-12 5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME 6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/bert-wwm-ext-base/chinese_wwm_ext_L-12_H-768_A-12/tpu/$TASK_NAME/$CURRENT_TIME 7 | 8 | python $CURRENT_DIR/../run_classifier.py \ 9 | --task_name=$TASK_NAME \ 10 | --do_train=true \ 11 | --do_eval=true \ 12 | --data_dir=$DATA_DIR \ 13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \ 14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \ 15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \ 16 | --max_seq_length=512 \ 17 | --train_batch_size=16 \ 18 | --learning_rate=2e-5 \ 19 | --num_train_epochs=8.0 \ 20 | --output_dir=$OUTPUT_DIR \ 21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.1.101.2:8470 22 | -------------------------------------------------------------------------------- /models/bert_wwm_ext/tpu/run_classifier_lcqmc.sh: -------------------------------------------------------------------------------- 1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S") 3 | TASK_NAME="lcqmc" 4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/bert-wwm-ext-base/chinese_wwm_ext_L-12_H-768_A-12 5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/hard_$TASK_NAME 6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/bert-wwm-ext-base/chinese_wwm_ext_L-12_H-768_A-12/tpu/$TASK_NAME/$CURRENT_TIME 7 | 8 | python $CURRENT_DIR/../run_classifier.py \ 9 | --task_name=$TASK_NAME \ 10 | --do_train=true \ 11 | --do_eval=true \ 12 | --data_dir=$DATA_DIR \ 13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \ 14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \ 15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \ 16 | --max_seq_length=128 \ 17 | --train_batch_size=16 \ 18 | --learning_rate=2e-5 \ 19 | --num_train_epochs=3.0 \ 20 | --output_dir=$OUTPUT_DIR \ 21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://192.168.0.2:8470 22 | -------------------------------------------------------------------------------- /models/bert_wwm_ext/tpu/run_classifier_thucnews.sh: -------------------------------------------------------------------------------- 1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S") 3 | TASK_NAME="inews" 4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/bert-wwm-ext-base/chinese_wwm_ext_L-12_H-768_A-12 5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME 6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/bert-wwm-ext-base/chinese_wwm_ext_L-12_H-768_A-12/tpu/$TASK_NAME/$CURRENT_TIME 7 | 8 | python $CURRENT_DIR/../run_classifier.py \ 9 | --task_name=$TASK_NAME \ 10 | --do_train=true \ 11 | --do_eval=true \ 12 | --data_dir=$DATA_DIR \ 13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \ 14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \ 15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \ 16 | --max_seq_length=512 \ 17 | --train_batch_size=16 \ 18 | --learning_rate=2e-5 \ 19 | --num_train_epochs=8.0 \ 20 | --output_dir=$OUTPUT_DIR \ 21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.1.101.2:8470 22 | -------------------------------------------------------------------------------- /models/bert_wwm_ext/tpu/run_classifier_tnews.sh: -------------------------------------------------------------------------------- 1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S") 3 | TASK_NAME="tnews" 4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/bert-wwm-ext-base/chinese_wwm_ext_L-12_H-768_A-12 5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/hard_${TASK_NAME}_1 6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/bert-wwm-ext-base/chinese_wwm_ext_L-12_H-768_A-12/tpu/$TASK_NAME/$CURRENT_TIME 7 | 8 | python $CURRENT_DIR/../run_classifier.py \ 9 | --task_name=$TASK_NAME \ 10 | --do_train=true \ 11 | --do_eval=true \ 12 | --data_dir=$DATA_DIR \ 13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \ 14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \ 15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \ 16 | --max_seq_length=128 \ 17 | --train_batch_size=16 \ 18 | --learning_rate=2e-5 \ 19 | --num_train_epochs=3.0 \ 20 | --output_dir=$OUTPUT_DIR \ 21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.230.1.2:8470 22 | -------------------------------------------------------------------------------- /models/bert_wwm_ext/tpu/run_classifier_xnli.sh: -------------------------------------------------------------------------------- 1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S") 3 | TASK_NAME="xnli" 4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/bert-wwm-ext-base/chinese_wwm_ext_L-12_H-768_A-12 5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME 6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/bert-wwm-ext-base/chinese_wwm_ext_L-12_H-768_A-12/tpu/$TASK_NAME/$CURRENT_TIME 7 | 8 | python $CURRENT_DIR/../run_classifier.py \ 9 | --task_name=$TASK_NAME \ 10 | --do_train=true \ 11 | --do_eval=true \ 12 | --data_dir=$DATA_DIR \ 13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \ 14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \ 15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \ 16 | --max_seq_length=512 \ 17 | --train_batch_size=16 \ 18 | --learning_rate=2e-5 \ 19 | --num_train_epochs=8.0 \ 20 | --output_dir=$OUTPUT_DIR \ 21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.1.101.2:8470 22 | -------------------------------------------------------------------------------- /models/copa_sh/convert_test.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | import sys 3 | import json 4 | 5 | test_file=sys.argv[1] 6 | predict_label = [] 7 | tmp = [] 8 | for line in open(test_file, 'r').readlines(): 9 | ss = line.strip().split('\t') 10 | if len(ss) == 2: 11 | tmp.append(ss[1]) 12 | else: 13 | print ('wrong format!!!: ' + line.strip()) 14 | 15 | i = 0 16 | while(i < len(tmp)-1): 17 | if tmp[i] >= tmp[i+1]: 18 | predict_label.append(str(0)) 19 | else: 20 | predict_label.append(str(1)) 21 | i += 2 22 | print ("predict_label size: " + str(len(predict_label))) 23 | 24 | res = {} 25 | for idx, label in enumerate(predict_label): 26 | res['id'] = idx 27 | res['label'] = label 28 | print(json.dumps(res, ensure_ascii=False)) 29 | 30 | 31 | -------------------------------------------------------------------------------- /models/copa_sh/copa_eval_dev.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ######################################################################### 3 | # File Name: copa_eval_dev.sh 4 | # Author: Junyi Li 5 | # Personal page: dukeenglish.github.io 6 | # Created Time: 21:39:07 2019-12-02 7 | ######################################################################### 8 | ''' 9 | 因为copa任务比较特殊,所以特地提供了一个额外的评估脚本。如果想要测试获取 10 | 和网络上公布一致的dev结果,请将dev.json作为test.json进行预测,将预测结果 11 | 进行评测,使用下面的命令。 12 | 如果不进行评测,则可以利用第二条命令生成submit格式的test结果 13 | ''' 14 | python eval_copa.py copa_output/test_result.tsv 15 | python covert_test.py copa_output/test_result.tsv 16 | 17 | -------------------------------------------------------------------------------- /models/copa_sh/dev_label.txt: -------------------------------------------------------------------------------- 1 | 0 1 2 | 1 1 3 | 2 0 4 | 3 1 5 | 4 1 6 | 5 0 7 | 6 0 8 | 7 0 9 | 8 0 10 | 9 0 11 | 10 0 12 | 11 1 13 | 12 1 14 | 13 1 15 | 14 0 16 | 15 0 17 | 16 1 18 | 17 0 19 | 18 1 20 | 19 0 21 | 20 0 22 | 21 1 23 | 22 0 24 | 23 0 25 | 24 0 26 | 25 1 27 | 26 1 28 | 27 1 29 | 28 1 30 | 29 1 31 | 30 1 32 | 31 1 33 | 32 0 34 | 33 0 35 | 34 0 36 | 35 1 37 | 36 0 38 | 37 1 39 | 38 1 40 | 39 0 41 | 40 0 42 | 41 0 43 | 42 1 44 | 43 0 45 | 44 1 46 | 45 1 47 | 46 0 48 | 47 0 49 | 48 1 50 | 49 0 51 | 50 0 52 | 51 1 53 | 52 1 54 | 53 0 55 | 54 0 56 | 55 1 57 | 56 0 58 | 57 0 59 | 58 1 60 | 59 0 61 | 60 1 62 | 61 0 63 | 62 0 64 | 63 1 65 | 64 0 66 | 65 0 67 | 66 0 68 | 67 1 69 | 68 0 70 | 69 0 71 | 70 0 72 | 71 0 73 | 72 1 74 | 73 1 75 | 74 1 76 | 75 1 77 | 76 1 78 | 77 0 79 | 78 1 80 | 79 0 81 | 80 0 82 | 81 0 83 | 82 0 84 | 83 1 85 | 84 1 86 | 85 0 87 | 86 0 88 | 87 0 89 | 88 0 90 | 89 0 91 | 90 0 92 | 91 1 93 | 92 1 94 | 93 0 95 | 94 0 96 | 95 1 97 | 96 1 98 | 97 0 99 | 98 1 100 | 99 1 101 | -------------------------------------------------------------------------------- /models/copa_sh/eval_copa.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | import sys 3 | 4 | test_file=sys.argv[1] 5 | predict_label = [] 6 | tmp = [] 7 | for line in open(test_file, 'r').readlines(): 8 | ss = line.strip().split('\t') 9 | if len(ss) == 2: 10 | tmp.append(ss[1]) 11 | else: 12 | print ('wrong format!!!: ' + line.strip()) 13 | 14 | i = 0 15 | while(i < len(tmp)-1): 16 | if tmp[i] >= tmp[i+1]: 17 | predict_label.append(str(0)) 18 | else: 19 | predict_label.append(str(1)) 20 | i += 2 21 | print ("predict_label size: " + str(len(predict_label))) 22 | 23 | 24 | golden_file = 'dev_label.txt' 25 | golden_label=[] 26 | for line in open(golden_file, 'r').readlines(): 27 | ss = line.strip().split('\t') 28 | if len(ss) == 2: 29 | golden_label.append(ss[1]) 30 | else: 31 | print ('wrong format!!!: ' + line.strip()) 32 | 33 | print ('golden_label size: ' + str(len(golden_label))) 34 | correct_count = 0 35 | wrong_count = 0 36 | for i in range(0, len(golden_label)): 37 | if golden_label[i] == predict_label[i]: 38 | correct_count += 1 39 | else: 40 | wrong_count += 1 41 | print ("correct_count: " + str(correct_count)) 42 | print ("wrong_count: " + str(wrong_count)) 43 | print ("precision: " + str( correct_count * 1.0 / len(golden_label))) 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /models/ernie/.gitignore: -------------------------------------------------------------------------------- 1 | # Initially taken from Github's Python gitignore file 2 | 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | 53 | # Translations 54 | *.mo 55 | *.pot 56 | 57 | # Django stuff: 58 | *.log 59 | local_settings.py 60 | db.sqlite3 61 | 62 | # Flask stuff: 63 | instance/ 64 | .webassets-cache 65 | 66 | # Scrapy stuff: 67 | .scrapy 68 | 69 | # Sphinx documentation 70 | docs/_build/ 71 | 72 | # PyBuilder 73 | target/ 74 | 75 | # Jupyter Notebook 76 | .ipynb_checkpoints 77 | 78 | # IPython 79 | profile_default/ 80 | ipython_config.py 81 | 82 | # pyenv 83 | .python-version 84 | 85 | # celery beat schedule file 86 | celerybeat-schedule 87 | 88 | # SageMath parsed files 89 | *.sage.py 90 | 91 | # Environments 92 | .env 93 | .venv 94 | env/ 95 | venv/ 96 | ENV/ 97 | env.bak/ 98 | venv.bak/ 99 | 100 | # Spyder project settings 101 | .spyderproject 102 | .spyproject 103 | 104 | # Rope project settings 105 | .ropeproject 106 | 107 | # mkdocs documentation 108 | /site 109 | 110 | # mypy 111 | .mypy_cache/ 112 | .dmypy.json 113 | dmypy.json 114 | 115 | # Pyre type checker 116 | .pyre/ 117 | -------------------------------------------------------------------------------- /models/ernie/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | BERT needs to maintain permanent compatibility with the pre-trained model files, 4 | so we do not plan to make any major changes to this library (other than what was 5 | promised in the README). However, we can accept small patches related to 6 | re-factoring and documentation. To submit contributes, there are just a few 7 | small guidelines you need to follow. 8 | 9 | ## Contributor License Agreement 10 | 11 | Contributions to this project must be accompanied by a Contributor License 12 | Agreement. You (or your employer) retain the copyright to your contribution; 13 | this simply gives us permission to use and redistribute your contributions as 14 | part of the project. Head over to to see 15 | your current agreements on file or to sign a new one. 16 | 17 | You generally only need to submit a CLA once, so if you've already submitted one 18 | (even if it was for a different project), you probably don't need to do it 19 | again. 20 | 21 | ## Code reviews 22 | 23 | All submissions, including submissions by project members, require review. We 24 | use GitHub pull requests for this purpose. Consult 25 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 26 | information on using pull requests. 27 | 28 | ## Community Guidelines 29 | 30 | This project follows 31 | [Google's Open Source Community Guidelines](https://opensource.google.com/conduct/). 32 | -------------------------------------------------------------------------------- /models/ernie/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Google AI Language Team Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /models/ernie/optimization_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Google AI Language Team Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | from __future__ import absolute_import 16 | from __future__ import division 17 | from __future__ import print_function 18 | 19 | import optimization 20 | import tensorflow as tf 21 | 22 | 23 | class OptimizationTest(tf.test.TestCase): 24 | 25 | def test_adam(self): 26 | with self.test_session() as sess: 27 | w = tf.get_variable( 28 | "w", 29 | shape=[3], 30 | initializer=tf.constant_initializer([0.1, -0.2, -0.1])) 31 | x = tf.constant([0.4, 0.2, -0.5]) 32 | loss = tf.reduce_mean(tf.square(x - w)) 33 | tvars = tf.trainable_variables() 34 | grads = tf.gradients(loss, tvars) 35 | global_step = tf.train.get_or_create_global_step() 36 | optimizer = optimization.AdamWeightDecayOptimizer(learning_rate=0.2) 37 | train_op = optimizer.apply_gradients(zip(grads, tvars), global_step) 38 | init_op = tf.group(tf.global_variables_initializer(), 39 | tf.local_variables_initializer()) 40 | sess.run(init_op) 41 | for _ in range(100): 42 | sess.run(train_op) 43 | w_np = sess.run(w) 44 | self.assertAllClose(w_np.flat, [0.4, 0.2, -0.5], rtol=1e-2, atol=1e-2) 45 | 46 | 47 | if __name__ == "__main__": 48 | tf.test.main() 49 | -------------------------------------------------------------------------------- /models/ernie/requirements.txt: -------------------------------------------------------------------------------- 1 | tensorflow >= 1.11.0 # CPU Version of TensorFlow. 2 | # tensorflow-gpu >= 1.11.0 # GPU version of TensorFlow. 3 | -------------------------------------------------------------------------------- /models/ernie/run_classifier_afqmc.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # @Author: bo.shi 3 | # @Date: 2019-11-04 09:56:36 4 | # @Last Modified by: bo.shi 5 | # @Last Modified time: 2019-12-05 11:04:18 6 | 7 | TASK_NAME="afqmc" 8 | MODEL_NAME="baidu_ernie" 9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 10 | export CUDA_VISIBLE_DEVICES="0" 11 | export PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model 12 | export ERNIE_DIR=$PRETRAINED_MODELS_DIR/$MODEL_NAME 13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset 14 | 15 | # download and unzip dataset 16 | if [ ! -d $GLUE_DATA_DIR ]; then 17 | mkdir -p $GLUE_DATA_DIR 18 | echo "makedir $GLUE_DATA_DIR" 19 | fi 20 | cd $GLUE_DATA_DIR 21 | if [ ! -d $TASK_NAME ]; then 22 | mkdir $TASK_NAME 23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME" 24 | fi 25 | cd $TASK_NAME 26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then 27 | rm * 28 | wget https://storage.googleapis.com/cluebenchmark/tasks/afqmc_public.zip 29 | unzip afqmc_public.zip 30 | rm afqmc_public.zip 31 | else 32 | echo "data exists" 33 | fi 34 | echo "Finish download dataset." 35 | 36 | # download model 37 | if [ ! -d $ERNIE_DIR ]; then 38 | mkdir -p $ERNIE_DIR 39 | echo "makedir $ERNIE_DIR" 40 | fi 41 | cd $ERNIE_DIR 42 | if [ ! -f "bert_config.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "bert_model.ckpt.index" ] || [ ! -f "bert_model.ckpt.meta" ] || [ ! -f "bert_model.ckpt.data-00000-of-00001" ]; then 43 | rm * 44 | wget https://storage.googleapis.com/chineseglue/pretrain_models/baidu_ernie.zip 45 | unzip baidu_ernie.zip 46 | rm baidu_ernie.zip 47 | else 48 | echo "model exists" 49 | fi 50 | echo "Finish download model." 51 | 52 | # run task 53 | cd $CURRENT_DIR 54 | echo "Start running..." 55 | if [ $# == 0 ]; then 56 | python run_classifier.py \ 57 | --task_name=$TASK_NAME \ 58 | --do_train=true \ 59 | --do_eval=true \ 60 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 61 | --vocab_file=$ERNIE_DIR/vocab.txt \ 62 | --bert_config_file=$ERNIE_DIR/bert_config.json \ 63 | --init_checkpoint=$ERNIE_DIR/bert_model.ckpt \ 64 | --max_seq_length=128 \ 65 | --train_batch_size=32 \ 66 | --learning_rate=2e-5 \ 67 | --num_train_epochs=3.0 \ 68 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 69 | elif [ $1 == "predict" ]; then 70 | echo "Start predict..." 71 | python run_classifier.py \ 72 | --task_name=$TASK_NAME \ 73 | --do_train=false \ 74 | --do_eval=false \ 75 | --do_predict=true \ 76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 77 | --vocab_file=$ERNIE_DIR/vocab.txt \ 78 | --bert_config_file=$ERNIE_DIR/bert_config.json \ 79 | --init_checkpoint=$ERNIE_DIR/bert_model.ckpt \ 80 | --max_seq_length=128 \ 81 | --train_batch_size=32 \ 82 | --learning_rate=2e-5 \ 83 | --num_train_epochs=3.0 \ 84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 85 | fi 86 | -------------------------------------------------------------------------------- /models/ernie/run_classifier_cmnli.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # @Author: bo.shi 3 | # @Date: 2019-11-04 09:56:36 4 | # @Last Modified by: bo.shi 5 | # @Last Modified time: 2019-12-05 11:04:49 6 | 7 | TASK_NAME="cmnli" 8 | MODEL_NAME="baidu_ernie" 9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 10 | export CUDA_VISIBLE_DEVICES="0" 11 | export PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model 12 | export ERNIE_DIR=$PRETRAINED_MODELS_DIR/$MODEL_NAME 13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset 14 | 15 | # download and unzip dataset 16 | if [ ! -d $GLUE_DATA_DIR ]; then 17 | mkdir -p $GLUE_DATA_DIR 18 | echo "makedir $GLUE_DATA_DIR" 19 | fi 20 | cd $GLUE_DATA_DIR 21 | if [ ! -d $TASK_NAME ]; then 22 | mkdir $TASK_NAME 23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME" 24 | fi 25 | cd $TASK_NAME 26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then 27 | rm * 28 | wget https://storage.googleapis.com/cluebenchmark/tasks/cmnli_public.zip 29 | unzip cmnli_public.zip 30 | rm cmnli_public.zip 31 | else 32 | echo "data exists" 33 | fi 34 | echo "Finish download dataset." 35 | 36 | # download model 37 | if [ ! -d $ERNIE_DIR ]; then 38 | mkdir -p $ERNIE_DIR 39 | echo "makedir $ERNIE_DIR" 40 | fi 41 | cd $ERNIE_DIR 42 | if [ ! -f "bert_config.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "bert_model.ckpt.index" ] || [ ! -f "bert_model.ckpt.meta" ] || [ ! -f "bert_model.ckpt.data-00000-of-00001" ]; then 43 | rm * 44 | wget https://storage.googleapis.com/chineseglue/pretrain_models/baidu_ernie.zip 45 | unzip baidu_ernie.zip 46 | rm baidu_ernie.zip 47 | else 48 | echo "model exists" 49 | fi 50 | echo "Finish download model." 51 | 52 | # run task 53 | cd $CURRENT_DIR 54 | echo "Start running..." 55 | if [ $# == 0 ]; then 56 | python run_classifier.py \ 57 | --task_name=$TASK_NAME \ 58 | --do_train=true \ 59 | --do_eval=true \ 60 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 61 | --vocab_file=$ERNIE_DIR/vocab.txt \ 62 | --bert_config_file=$ERNIE_DIR/bert_config.json \ 63 | --init_checkpoint=$ERNIE_DIR/bert_model.ckpt \ 64 | --max_seq_length=128 \ 65 | --train_batch_size=32 \ 66 | --learning_rate=2e-5 \ 67 | --num_train_epochs=3.0 \ 68 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 69 | elif [ $1 == "predict" ]; then 70 | echo "Start predict..." 71 | python run_classifier.py \ 72 | --task_name=$TASK_NAME \ 73 | --do_train=false \ 74 | --do_eval=false \ 75 | --do_predict=true \ 76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 77 | --vocab_file=$ERNIE_DIR/vocab.txt \ 78 | --bert_config_file=$ERNIE_DIR/bert_config.json \ 79 | --init_checkpoint=$ERNIE_DIR/bert_model.ckpt \ 80 | --max_seq_length=128 \ 81 | --train_batch_size=32 \ 82 | --learning_rate=2e-5 \ 83 | --num_train_epochs=3.0 \ 84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 85 | fi 86 | -------------------------------------------------------------------------------- /models/ernie/run_classifier_csl.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # @Author: Li Yudong 3 | # @Date: 2019-11-28 4 | # @Last Modified by: bo.shi 5 | # @Last Modified time: 2019-12-05 11:04:58 6 | 7 | TASK_NAME="csl" 8 | MODEL_NAME="baidu_ernie" 9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 10 | export CUDA_VISIBLE_DEVICES="0" 11 | export PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model 12 | export ERNIE_DIR=$PRETRAINED_MODELS_DIR/$MODEL_NAME 13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset 14 | 15 | # download and unzip dataset 16 | if [ ! -d $GLUE_DATA_DIR ]; then 17 | mkdir -p $GLUE_DATA_DIR 18 | echo "makedir $GLUE_DATA_DIR" 19 | fi 20 | cd $GLUE_DATA_DIR 21 | if [ ! -d $TASK_NAME ]; then 22 | mkdir $TASK_NAME 23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME" 24 | fi 25 | cd $TASK_NAME 26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then 27 | rm * 28 | wget https://storage.googleapis.com/cluebenchmark/tasks/csl_public.zip 29 | unzip csl_public.zip 30 | rm csl_public.zip 31 | else 32 | echo "data exists" 33 | fi 34 | echo "Finish download dataset." 35 | 36 | # download model 37 | if [ ! -d $ERNIE_DIR ]; then 38 | mkdir -p $ERNIE_DIR 39 | echo "makedir $ERNIE_DIR" 40 | fi 41 | cd $ERNIE_DIR 42 | if [ ! -f "bert_config.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "bert_model.ckpt.index" ] || [ ! -f "bert_model.ckpt.meta" ] || [ ! -f "bert_model.ckpt.data-00000-of-00001" ]; then 43 | rm * 44 | wget https://storage.googleapis.com/chineseglue/pretrain_models/baidu_ernie.zip 45 | unzip baidu_ernie.zip 46 | rm baidu_ernie.zip 47 | else 48 | echo "model exists" 49 | fi 50 | echo "Finish download model." 51 | 52 | # run task 53 | cd $CURRENT_DIR 54 | echo "Start running..." 55 | if [ $# == 0 ]; then 56 | python run_classifier.py \ 57 | --task_name=$TASK_NAME \ 58 | --do_train=true \ 59 | --do_eval=true \ 60 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 61 | --vocab_file=$ERNIE_DIR/vocab.txt \ 62 | --bert_config_file=$ERNIE_DIR/bert_config.json \ 63 | --init_checkpoint=$ERNIE_DIR/bert_model.ckpt \ 64 | --max_seq_length=128 \ 65 | --train_batch_size=32 \ 66 | --learning_rate=2e-5 \ 67 | --num_train_epochs=3.0 \ 68 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 69 | elif [ $1 == "predict" ]; then 70 | echo "Start predict..." 71 | python run_classifier.py \ 72 | --task_name=$TASK_NAME \ 73 | --do_train=false \ 74 | --do_eval=false \ 75 | --do_predict=true \ 76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 77 | --vocab_file=$ERNIE_DIR/vocab.txt \ 78 | --bert_config_file=$ERNIE_DIR/bert_config.json \ 79 | --init_checkpoint=$ERNIE_DIR/bert_model.ckpt \ 80 | --max_seq_length=128 \ 81 | --train_batch_size=32 \ 82 | --learning_rate=2e-5 \ 83 | --num_train_epochs=3.0 \ 84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 85 | fi 86 | -------------------------------------------------------------------------------- /models/ernie/run_classifier_iflytek.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # @Author: bo.shi 3 | # @Date: 2019-11-04 09:56:36 4 | # @Last Modified by: bo.shi 5 | # @Last Modified time: 2019-12-05 11:05:02 6 | 7 | TASK_NAME="iflytek" 8 | MODEL_NAME="baidu_ernie" 9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 10 | export CUDA_VISIBLE_DEVICES="0" 11 | export PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model 12 | export ERNIE_DIR=$PRETRAINED_MODELS_DIR/$MODEL_NAME 13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset 14 | 15 | # download and unzip dataset 16 | if [ ! -d $GLUE_DATA_DIR ]; then 17 | mkdir -p $GLUE_DATA_DIR 18 | echo "makedir $GLUE_DATA_DIR" 19 | fi 20 | cd $GLUE_DATA_DIR 21 | if [ ! -d $TASK_NAME ]; then 22 | mkdir $TASK_NAME 23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME" 24 | fi 25 | cd $TASK_NAME 26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then 27 | rm * 28 | wget https://storage.googleapis.com/cluebenchmark/tasks/iflytek_public.zip 29 | unzip iflytek_public.zip 30 | rm iflytek_public.zip 31 | else 32 | echo "data exists" 33 | fi 34 | echo "Finish download dataset." 35 | 36 | # download model 37 | if [ ! -d $ERNIE_DIR ]; then 38 | mkdir -p $ERNIE_DIR 39 | echo "makedir $ERNIE_DIR" 40 | fi 41 | cd $ERNIE_DIR 42 | if [ ! -f "bert_config.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "bert_model.ckpt.index" ] || [ ! -f "bert_model.ckpt.meta" ] || [ ! -f "bert_model.ckpt.data-00000-of-00001" ]; then 43 | rm * 44 | wget https://storage.googleapis.com/chineseglue/pretrain_models/baidu_ernie.zip 45 | unzip baidu_ernie.zip 46 | rm baidu_ernie.zip 47 | else 48 | echo "model exists" 49 | fi 50 | echo "Finish download model." 51 | 52 | # run task 53 | cd $CURRENT_DIR 54 | echo "Start running..." 55 | if [ $# == 0 ]; then 56 | python run_classifier.py \ 57 | --task_name=$TASK_NAME \ 58 | --do_train=true \ 59 | --do_eval=true \ 60 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 61 | --vocab_file=$ERNIE_DIR/vocab.txt \ 62 | --bert_config_file=$ERNIE_DIR/bert_config.json \ 63 | --init_checkpoint=$ERNIE_DIR/bert_model.ckpt \ 64 | --max_seq_length=128 \ 65 | --train_batch_size=32 \ 66 | --learning_rate=2e-5 \ 67 | --num_train_epochs=3.0 \ 68 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 69 | elif [ $1 == "predict" ]; then 70 | echo "Start predict..." 71 | python run_classifier.py \ 72 | --task_name=$TASK_NAME \ 73 | --do_train=false \ 74 | --do_eval=false \ 75 | --do_predict=true \ 76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 77 | --vocab_file=$ERNIE_DIR/vocab.txt \ 78 | --bert_config_file=$ERNIE_DIR/bert_config.json \ 79 | --init_checkpoint=$ERNIE_DIR/bert_model.ckpt \ 80 | --max_seq_length=128 \ 81 | --train_batch_size=32 \ 82 | --learning_rate=2e-5 \ 83 | --num_train_epochs=3.0 \ 84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 85 | fi 86 | -------------------------------------------------------------------------------- /models/ernie/run_classifier_tnews.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # @Author: bo.shi 3 | # @Date: 2019-11-04 09:56:36 4 | # @Last Modified by: bo.shi 5 | # @Last Modified time: 2019-12-05 11:05:06 6 | 7 | TASK_NAME="tnews" 8 | MODEL_NAME="baidu_ernie" 9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 10 | export CUDA_VISIBLE_DEVICES="0" 11 | export PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model 12 | export ERNIE_DIR=$PRETRAINED_MODELS_DIR/$MODEL_NAME 13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset 14 | 15 | # download and unzip dataset 16 | if [ ! -d $GLUE_DATA_DIR ]; then 17 | mkdir -p $GLUE_DATA_DIR 18 | echo "makedir $GLUE_DATA_DIR" 19 | fi 20 | cd $GLUE_DATA_DIR 21 | if [ ! -d $TASK_NAME ]; then 22 | mkdir $TASK_NAME 23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME" 24 | fi 25 | cd $TASK_NAME 26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then 27 | rm * 28 | wget https://storage.googleapis.com/cluebenchmark/tasks/tnews_public.zip 29 | unzip tnews_public.zip 30 | rm tnews_public.zip 31 | else 32 | echo "data exists" 33 | fi 34 | echo "Finish download dataset." 35 | 36 | # download model 37 | if [ ! -d $ERNIE_DIR ]; then 38 | mkdir -p $ERNIE_DIR 39 | echo "makedir $ERNIE_DIR" 40 | fi 41 | cd $ERNIE_DIR 42 | if [ ! -f "bert_config.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "bert_model.ckpt.index" ] || [ ! -f "bert_model.ckpt.meta" ] || [ ! -f "bert_model.ckpt.data-00000-of-00001" ]; then 43 | rm * 44 | wget https://storage.googleapis.com/chineseglue/pretrain_models/baidu_ernie.zip 45 | unzip baidu_ernie.zip 46 | rm baidu_ernie.zip 47 | else 48 | echo "model exists" 49 | fi 50 | echo "Finish download model." 51 | 52 | # run task 53 | cd $CURRENT_DIR 54 | echo "Start running..." 55 | if [ $# == 0 ]; then 56 | python run_classifier.py \ 57 | --task_name=$TASK_NAME \ 58 | --do_train=true \ 59 | --do_eval=true \ 60 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 61 | --vocab_file=$ERNIE_DIR/vocab.txt \ 62 | --bert_config_file=$ERNIE_DIR/bert_config.json \ 63 | --init_checkpoint=$ERNIE_DIR/bert_model.ckpt \ 64 | --max_seq_length=128 \ 65 | --train_batch_size=32 \ 66 | --learning_rate=2e-5 \ 67 | --num_train_epochs=3.0 \ 68 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 69 | elif [ $1 == "predict" ]; then 70 | echo "Start predict..." 71 | python run_classifier.py \ 72 | --task_name=$TASK_NAME \ 73 | --do_train=false \ 74 | --do_eval=false \ 75 | --do_predict=true \ 76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 77 | --vocab_file=$ERNIE_DIR/vocab.txt \ 78 | --bert_config_file=$ERNIE_DIR/bert_config.json \ 79 | --init_checkpoint=$ERNIE_DIR/bert_model.ckpt \ 80 | --max_seq_length=128 \ 81 | --train_batch_size=32 \ 82 | --learning_rate=2e-5 \ 83 | --num_train_epochs=3.0 \ 84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 85 | fi 86 | -------------------------------------------------------------------------------- /models/ernie/run_classifier_wsc.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # @Author: bo.shi 3 | # @Date: 2019-11-04 09:56:36 4 | # @Last Modified by: bo.shi 5 | # @Last Modified time: 2019-12-05 11:05:13 6 | 7 | TASK_NAME="wsc" 8 | MODEL_NAME="baidu_ernie" 9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 10 | export CUDA_VISIBLE_DEVICES="0" 11 | export PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model 12 | export ERNIE_DIR=$PRETRAINED_MODELS_DIR/$MODEL_NAME 13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset 14 | 15 | # download and unzip dataset 16 | if [ ! -d $GLUE_DATA_DIR ]; then 17 | mkdir -p $GLUE_DATA_DIR 18 | echo "makedir $GLUE_DATA_DIR" 19 | fi 20 | cd $GLUE_DATA_DIR 21 | if [ ! -d $TASK_NAME ]; then 22 | mkdir $TASK_NAME 23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME" 24 | fi 25 | cd $TASK_NAME 26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then 27 | rm * 28 | wget https://storage.googleapis.com/cluebenchmark/tasks/wsc_public.zip 29 | unzip wsc_public.zip 30 | rm wsc_public.zip 31 | else 32 | echo "data exists" 33 | fi 34 | echo "Finish download dataset." 35 | 36 | # download model 37 | if [ ! -d $ERNIE_DIR ]; then 38 | mkdir -p $ERNIE_DIR 39 | echo "makedir $ERNIE_DIR" 40 | fi 41 | cd $ERNIE_DIR 42 | if [ ! -f "bert_config.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "bert_model.ckpt.index" ] || [ ! -f "bert_model.ckpt.meta" ] || [ ! -f "bert_model.ckpt.data-00000-of-00001" ]; then 43 | rm * 44 | wget https://storage.googleapis.com/chineseglue/pretrain_models/baidu_ernie.zip 45 | unzip baidu_ernie.zip 46 | rm baidu_ernie.zip 47 | else 48 | echo "model exists" 49 | fi 50 | echo "Finish download model." 51 | 52 | # run task 53 | cd $CURRENT_DIR 54 | echo "Start running..." 55 | if [ $# == 0 ]; then 56 | python run_classifier.py \ 57 | --task_name=$TASK_NAME \ 58 | --do_train=true \ 59 | --do_eval=true \ 60 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 61 | --vocab_file=$ERNIE_DIR/vocab.txt \ 62 | --bert_config_file=$ERNIE_DIR/bert_config.json \ 63 | --init_checkpoint=$ERNIE_DIR/bert_model.ckpt \ 64 | --max_seq_length=128 \ 65 | --train_batch_size=32 \ 66 | --learning_rate=2e-5 \ 67 | --num_train_epochs=3.0 \ 68 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 69 | elif [ $1 == "predict" ]; then 70 | echo "Start predict..." 71 | python run_classifier.py \ 72 | --task_name=$TASK_NAME \ 73 | --do_train=false \ 74 | --do_eval=false \ 75 | --do_predict=true \ 76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 77 | --vocab_file=$ERNIE_DIR/vocab.txt \ 78 | --bert_config_file=$ERNIE_DIR/bert_config.json \ 79 | --init_checkpoint=$ERNIE_DIR/bert_model.ckpt \ 80 | --max_seq_length=128 \ 81 | --train_batch_size=32 \ 82 | --learning_rate=2e-5 \ 83 | --num_train_epochs=3.0 \ 84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 85 | fi 86 | -------------------------------------------------------------------------------- /models/ernie/run_ner_msra.sh: -------------------------------------------------------------------------------- 1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 2 | export CUDA_VISIBLE_DEVICES="0" 3 | export BERT_BASE_DIR=$CURRENT_DIR/prev_trained_model/tensorflow 4 | export GLUE_DIR=$CURRENT_DIR/../../glue/chineseGLUEdatasets/ 5 | TASK_NAME="msraner" 6 | 7 | python run_ner.py \ 8 | --task_name=$TASK_NAME \ 9 | --do_train=true \ 10 | --do_eval=false \ 11 | --do_predict=true \ 12 | --data_dir=$GLUE_DIR/$TASK_NAME \ 13 | --vocab_file=$BERT_BASE_DIR/vocab.txt \ 14 | --bert_config_file=$BERT_BASE_DIR/bert_config.json \ 15 | --init_checkpoint=$BERT_BASE_DIR/bert_model.ckpt \ 16 | --max_seq_length=256 \ 17 | --train_batch_size=16 \ 18 | --learning_rate=2e-5 \ 19 | --num_train_epochs=5.0 \ 20 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 21 | -------------------------------------------------------------------------------- /models/ernie/tpu/run_classifier_inews.sh: -------------------------------------------------------------------------------- 1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S") 3 | TASK_NAME="inews" 4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/ernie1.0-base/baidu_ernie 5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME 6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/ernie1.0-base/baidu_ernie/tpu/$TASK_NAME/$CURRENT_TIME 7 | 8 | python $CURRENT_DIR/../run_classifier.py \ 9 | --task_name=$TASK_NAME \ 10 | --do_train=true \ 11 | --do_eval=true \ 12 | --data_dir=$DATA_DIR \ 13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \ 14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \ 15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \ 16 | --max_seq_length=512 \ 17 | --train_batch_size=16 \ 18 | --learning_rate=2e-5 \ 19 | --num_train_epochs=8.0 \ 20 | --output_dir=$OUTPUT_DIR \ 21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.1.101.2:8470 22 | -------------------------------------------------------------------------------- /models/ernie/tpu/run_classifier_lcqmc.sh: -------------------------------------------------------------------------------- 1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S") 3 | TASK_NAME="lcqmc" 4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/ernie1.0-base/baidu_ernie 5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/hard_$TASK_NAME 6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/ernie1.0-base/baidu_ernie/tpu/$TASK_NAME/$CURRENT_TIME 7 | 8 | python $CURRENT_DIR/../run_classifier.py \ 9 | --task_name=$TASK_NAME \ 10 | --do_train=true \ 11 | --do_eval=true \ 12 | --data_dir=$DATA_DIR \ 13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \ 14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \ 15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \ 16 | --max_seq_length=128 \ 17 | --train_batch_size=16 \ 18 | --learning_rate=2e-5 \ 19 | --num_train_epochs=3.0 \ 20 | --output_dir=$OUTPUT_DIR \ 21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://172.18.0.2:8470 22 | -------------------------------------------------------------------------------- /models/ernie/tpu/run_classifier_thucnews.sh: -------------------------------------------------------------------------------- 1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S") 3 | TASK_NAME="inews" 4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/ernie1.0-base/baidu_ernie 5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME 6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/ernie1.0-base/baidu_ernie/tpu/$TASK_NAME/$CURRENT_TIME 7 | 8 | python $CURRENT_DIR/../run_classifier.py \ 9 | --task_name=$TASK_NAME \ 10 | --do_train=true \ 11 | --do_eval=true \ 12 | --data_dir=$DATA_DIR \ 13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \ 14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \ 15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \ 16 | --max_seq_length=512 \ 17 | --train_batch_size=16 \ 18 | --learning_rate=2e-5 \ 19 | --num_train_epochs=8.0 \ 20 | --output_dir=$OUTPUT_DIR \ 21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.1.101.2:8470 22 | -------------------------------------------------------------------------------- /models/ernie/tpu/run_classifier_tnews.sh: -------------------------------------------------------------------------------- 1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S") 3 | TASK_NAME="tnews" 4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/ernie1.0-base/baidu_ernie 5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/hard_${TASK_NAME}_1 6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/ernie1.0-base/baidu_ernie/tpu/$TASK_NAME/$CURRENT_TIME 7 | 8 | python $CURRENT_DIR/../run_classifier.py \ 9 | --task_name=$TASK_NAME \ 10 | --do_train=true \ 11 | --do_eval=true \ 12 | --data_dir=$DATA_DIR \ 13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \ 14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \ 15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \ 16 | --max_seq_length=128 \ 17 | --train_batch_size=16 \ 18 | --learning_rate=2e-5 \ 19 | --num_train_epochs=3.0 \ 20 | --output_dir=$OUTPUT_DIR \ 21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://172.16.0.2:8470 22 | -------------------------------------------------------------------------------- /models/ernie/tpu/run_classifier_xnli.sh: -------------------------------------------------------------------------------- 1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S") 3 | TASK_NAME="xnli" 4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/ernie1.0-base/baidu_ernie 5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME 6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/ernie1.0-base/baidu_ernie/tpu/$TASK_NAME/$CURRENT_TIME 7 | 8 | python $CURRENT_DIR/../run_classifier.py \ 9 | --task_name=$TASK_NAME \ 10 | --do_train=true \ 11 | --do_eval=true \ 12 | --data_dir=$DATA_DIR \ 13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \ 14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \ 15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \ 16 | --max_seq_length=512 \ 17 | --train_batch_size=16 \ 18 | --learning_rate=2e-5 \ 19 | --num_train_epochs=8.0 \ 20 | --output_dir=$OUTPUT_DIR \ 21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.1.101.2:8470 22 | -------------------------------------------------------------------------------- /models/roberta/create_pretrain_data.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | echo $1,$2 3 | 4 | for((i=$1;i<=$2;i++)); 5 | do 6 | python3 create_pretraining_data.py --do_whole_word_mask=True --input_file=./raw_text/news2016zh_$i.txt \ 7 | --output_file=./tf_records_all/tf_news2016zh_$i.tfrecord --vocab_file=./resources/vocab.txt \ 8 | --do_lower_case=True --max_seq_length=256 --max_predictions_per_seq=23 --masked_lm_prob=0.10 --random_seed=12345 --dupe_factor=5 9 | done 10 | -------------------------------------------------------------------------------- /models/roberta/resources/RoBERTa_zh_Large_Learning_Curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLUEbenchmark/CLUEmotionAnalysis2020/ff1bc945fc3a5854e0013de5a2f222dd1da61516/models/roberta/resources/RoBERTa_zh_Large_Learning_Curve.png -------------------------------------------------------------------------------- /models/roberta/run_classifier_afqmc.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # @Author: bo.shi 3 | # @Date: 2019-11-04 09:56:36 4 | # @Last Modified by: bo.shi 5 | # @Last Modified time: 2019-12-05 11:19:53 6 | 7 | TASK_NAME="afqmc" 8 | MODEL_NAME="roeberta_zh_L-24_H-1024_A-16" 9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 10 | export CUDA_VISIBLE_DEVICES="0" 11 | export PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model 12 | export ROBERTA_LARGE_DIR=$PRETRAINED_MODELS_DIR/$MODEL_NAME 13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset 14 | 15 | # download and unzip dataset 16 | if [ ! -d $GLUE_DATA_DIR ]; then 17 | mkdir -p $GLUE_DATA_DIR 18 | echo "makedir $GLUE_DATA_DIR" 19 | fi 20 | cd $GLUE_DATA_DIR 21 | if [ ! -d $TASK_NAME ]; then 22 | mkdir $TASK_NAME 23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME" 24 | fi 25 | cd $TASK_NAME 26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then 27 | rm * 28 | wget https://storage.googleapis.com/cluebenchmark/tasks/afqmc_public.zip 29 | unzip afqmc_public.zip 30 | rm afqmc_public.zip 31 | else 32 | echo "data exists" 33 | fi 34 | echo "Finish download dataset." 35 | 36 | # download model 37 | if [ ! -d $ROBERTA_LARGE_DIR ]; then 38 | mkdir -p $ROBERTA_LARGE_DIR 39 | echo "makedir $ROBERTA_LARGE_DIR" 40 | fi 41 | cd $ROBERTA_LARGE_DIR 42 | if [ ! -f "bert_config_large.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "checkpoint" ] || [ ! -f "roberta_zh_large_model.ckpt.index" ] || [ ! -f "roberta_zh_large_model.ckpt.meta" ] || [ ! -f "roberta_zh_large_model.ckpt.data-00000-of-00001" ]; then 43 | rm * 44 | wget -c https://storage.googleapis.com/chineseglue/pretrain_models/roeberta_zh_L-24_H-1024_A-16.zip 45 | unzip roeberta_zh_L-24_H-1024_A-16.zip 46 | rm roeberta_zh_L-24_H-1024_A-16.zip 47 | else 48 | echo "model exists" 49 | fi 50 | echo "Finish download model." 51 | 52 | # run task 53 | cd $CURRENT_DIR 54 | echo "Start running..." 55 | if [ $# == 0 ]; then 56 | python run_classifier.py \ 57 | --task_name=$TASK_NAME \ 58 | --do_train=true \ 59 | --do_eval=true \ 60 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 61 | --vocab_file=$ROBERTA_LARGE_DIR/vocab.txt \ 62 | --bert_config_file=$ROBERTA_LARGE_DIR/bert_config_large.json \ 63 | --init_checkpoint=$ROBERTA_LARGE_DIR/roberta_zh_large_model.ckpt \ 64 | --max_seq_length=128 \ 65 | --train_batch_size=32 \ 66 | --learning_rate=2e-5 \ 67 | --num_train_epochs=3.0 \ 68 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 69 | elif [ $1 == "predict" ]; then 70 | echo "Start predict..." 71 | python run_classifier.py \ 72 | --task_name=$TASK_NAME \ 73 | --do_train=false \ 74 | --do_eval=false \ 75 | --do_predict=true \ 76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 77 | --vocab_file=$ROBERTA_LARGE_DIR/vocab.txt \ 78 | --bert_config_file=$ROBERTA_LARGE_DIR/bert_config_large.json \ 79 | --init_checkpoint=$ROBERTA_LARGE_DIR/roberta_zh_large_model.ckpt \ 80 | --max_seq_length=128 \ 81 | --train_batch_size=32 \ 82 | --learning_rate=2e-5 \ 83 | --num_train_epochs=3.0 \ 84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 85 | fi 86 | -------------------------------------------------------------------------------- /models/roberta/run_classifier_cmnli.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # @Author: bo.shi 3 | # @Date: 2019-11-04 09:56:36 4 | # @Last Modified by: bo.shi 5 | # @Last Modified time: 2019-12-05 11:20:14 6 | 7 | TASK_NAME="cmnli" 8 | MODEL_NAME="roeberta_zh_L-24_H-1024_A-16" 9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 10 | export CUDA_VISIBLE_DEVICES="0" 11 | export PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model 12 | export ROBERTA_LARGE_DIR=$PRETRAINED_MODELS_DIR/$MODEL_NAME 13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset 14 | 15 | # download and unzip dataset 16 | if [ ! -d $GLUE_DATA_DIR ]; then 17 | mkdir -p $GLUE_DATA_DIR 18 | echo "makedir $GLUE_DATA_DIR" 19 | fi 20 | cd $GLUE_DATA_DIR 21 | if [ ! -d $TASK_NAME ]; then 22 | mkdir $TASK_NAME 23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME" 24 | fi 25 | cd $TASK_NAME 26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then 27 | rm * 28 | wget https://storage.googleapis.com/cluebenchmark/tasks/cmnli_public.zip 29 | unzip cmnli_public.zip 30 | rm cmnli_public.zip 31 | else 32 | echo "data exists" 33 | fi 34 | echo "Finish download dataset." 35 | 36 | # download model 37 | if [ ! -d $ROBERTA_LARGE_DIR ]; then 38 | mkdir -p $ROBERTA_LARGE_DIR 39 | echo "makedir $ROBERTA_LARGE_DIR" 40 | fi 41 | cd $ROBERTA_LARGE_DIR 42 | if [ ! -f "bert_config_large.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "checkpoint" ] || [ ! -f "roberta_zh_large_model.ckpt.index" ] || [ ! -f "roberta_zh_large_model.ckpt.meta" ] || [ ! -f "roberta_zh_large_model.ckpt.data-00000-of-00001" ]; then 43 | rm * 44 | wget -c https://storage.googleapis.com/chineseglue/pretrain_models/roeberta_zh_L-24_H-1024_A-16.zip 45 | unzip roeberta_zh_L-24_H-1024_A-16.zip 46 | rm roeberta_zh_L-24_H-1024_A-16.zip 47 | else 48 | echo "model exists" 49 | fi 50 | echo "Finish download model." 51 | 52 | # run task 53 | cd $CURRENT_DIR 54 | echo "Start running..." 55 | if [ $# == 0 ]; then 56 | python run_classifier.py \ 57 | --task_name=$TASK_NAME \ 58 | --do_train=true \ 59 | --do_eval=true \ 60 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 61 | --vocab_file=$ROBERTA_LARGE_DIR/vocab.txt \ 62 | --bert_config_file=$ROBERTA_LARGE_DIR/bert_config_large.json \ 63 | --init_checkpoint=$ROBERTA_LARGE_DIR/roberta_zh_large_model.ckpt \ 64 | --max_seq_length=128 \ 65 | --train_batch_size=32 \ 66 | --learning_rate=2e-5 \ 67 | --num_train_epochs=3.0 \ 68 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 69 | elif [ $1 == "predict" ]; then 70 | echo "Start predict..." 71 | python run_classifier.py \ 72 | --task_name=$TASK_NAME \ 73 | --do_train=false \ 74 | --do_eval=false \ 75 | --do_predict=true \ 76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 77 | --vocab_file=$ROBERTA_LARGE_DIR/vocab.txt \ 78 | --bert_config_file=$ROBERTA_LARGE_DIR/bert_config_large.json \ 79 | --init_checkpoint=$ROBERTA_LARGE_DIR/roberta_zh_large_model.ckpt \ 80 | --max_seq_length=128 \ 81 | --train_batch_size=32 \ 82 | --learning_rate=2e-5 \ 83 | --num_train_epochs=3.0 \ 84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 85 | fi -------------------------------------------------------------------------------- /models/roberta/run_classifier_csl.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # @Author: Li Yudong 3 | # @Date: 2019-11-28 4 | # @Last Modified by: bo.shi 5 | # @Last Modified time: 2019-12-05 11:20:24 6 | 7 | TASK_NAME="csl" 8 | MODEL_NAME="roeberta_zh_L-24_H-1024_A-16" 9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 10 | export CUDA_VISIBLE_DEVICES="0" 11 | export PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model 12 | export ROBERTA_LARGE_DIR=$PRETRAINED_MODELS_DIR/$MODEL_NAME 13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset 14 | 15 | # download and unzip dataset 16 | if [ ! -d $GLUE_DATA_DIR ]; then 17 | mkdir -p $GLUE_DATA_DIR 18 | echo "makedir $GLUE_DATA_DIR" 19 | fi 20 | cd $GLUE_DATA_DIR 21 | if [ ! -d $TASK_NAME ]; then 22 | mkdir $TASK_NAME 23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME" 24 | fi 25 | cd $TASK_NAME 26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then 27 | rm * 28 | wget https://storage.googleapis.com/cluebenchmark/tasks/csl_public.zip 29 | unzip csl_public.zip 30 | rm csl_public.zip 31 | else 32 | echo "data exists" 33 | fi 34 | echo "Finish download dataset." 35 | 36 | # download model 37 | if [ ! -d $ROBERTA_LARGE_DIR ]; then 38 | mkdir -p $ROBERTA_LARGE_DIR 39 | echo "makedir $ROBERTA_LARGE_DIR" 40 | fi 41 | cd $ROBERTA_LARGE_DIR 42 | if [ ! -f "bert_config_large.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "checkpoint" ] || [ ! -f "roberta_zh_large_model.ckpt.index" ] || [ ! -f "roberta_zh_large_model.ckpt.meta" ] || [ ! -f "roberta_zh_large_model.ckpt.data-00000-of-00001" ]; then 43 | rm * 44 | wget -c https://storage.googleapis.com/chineseglue/pretrain_models/roeberta_zh_L-24_H-1024_A-16.zip 45 | unzip roeberta_zh_L-24_H-1024_A-16.zip 46 | rm roeberta_zh_L-24_H-1024_A-16.zip 47 | else 48 | echo "model exists" 49 | fi 50 | echo "Finish download model." 51 | 52 | # run task 53 | cd $CURRENT_DIR 54 | echo "Start running..." 55 | if [ $# == 0 ]; then 56 | python run_classifier.py \ 57 | --task_name=$TASK_NAME \ 58 | --do_train=true \ 59 | --do_eval=true \ 60 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 61 | --vocab_file=$ROBERTA_LARGE_DIR/vocab.txt \ 62 | --bert_config_file=$ROBERTA_LARGE_DIR/bert_config_large.json \ 63 | --init_checkpoint=$ROBERTA_LARGE_DIR/roberta_zh_large_model.ckpt \ 64 | --max_seq_length=128 \ 65 | --train_batch_size=32 \ 66 | --learning_rate=2e-5 \ 67 | --num_train_epochs=3.0 \ 68 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 69 | elif [ $1 == "predict" ]; then 70 | echo "Start predict..." 71 | python run_classifier.py \ 72 | --task_name=$TASK_NAME \ 73 | --do_train=false \ 74 | --do_eval=false \ 75 | --do_predict=true \ 76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 77 | --vocab_file=$ROBERTA_LARGE_DIR/vocab.txt \ 78 | --bert_config_file=$ROBERTA_LARGE_DIR/bert_config_large.json \ 79 | --init_checkpoint=$ROBERTA_LARGE_DIR/roberta_zh_large_model.ckpt \ 80 | --max_seq_length=128 \ 81 | --train_batch_size=32 \ 82 | --learning_rate=2e-5 \ 83 | --num_train_epochs=3.0 \ 84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 85 | fi 86 | -------------------------------------------------------------------------------- /models/roberta/run_classifier_iflytek.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # @Author: bo.shi 3 | # @Date: 2019-11-04 09:56:36 4 | # @Last Modified by: bo.shi 5 | # @Last Modified time: 2019-12-05 11:20:28 6 | 7 | TASK_NAME="iflytek" 8 | MODEL_NAME="roeberta_zh_L-24_H-1024_A-16" 9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 10 | export CUDA_VISIBLE_DEVICES="0" 11 | export PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model 12 | export ROBERTA_LARGE_DIR=$PRETRAINED_MODELS_DIR/$MODEL_NAME 13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset 14 | 15 | # download and unzip dataset 16 | if [ ! -d $GLUE_DATA_DIR ]; then 17 | mkdir -p $GLUE_DATA_DIR 18 | echo "makedir $GLUE_DATA_DIR" 19 | fi 20 | cd $GLUE_DATA_DIR 21 | if [ ! -d $TASK_NAME ]; then 22 | mkdir $TASK_NAME 23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME" 24 | fi 25 | cd $TASK_NAME 26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then 27 | rm * 28 | wget https://storage.googleapis.com/cluebenchmark/tasks/iflytek_public.zip 29 | unzip iflytek_public.zip 30 | rm iflytek_public.zip 31 | else 32 | echo "data exists" 33 | fi 34 | echo "Finish download dataset." 35 | 36 | # download model 37 | if [ ! -d $ROBERTA_LARGE_DIR ]; then 38 | mkdir -p $ROBERTA_LARGE_DIR 39 | echo "makedir $ROBERTA_LARGE_DIR" 40 | fi 41 | cd $ROBERTA_LARGE_DIR 42 | if [ ! -f "bert_config_large.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "checkpoint" ] || [ ! -f "roberta_zh_large_model.ckpt.index" ] || [ ! -f "roberta_zh_large_model.ckpt.meta" ] || [ ! -f "roberta_zh_large_model.ckpt.data-00000-of-00001" ]; then 43 | rm * 44 | wget -c https://storage.googleapis.com/chineseglue/pretrain_models/roeberta_zh_L-24_H-1024_A-16.zip 45 | unzip roeberta_zh_L-24_H-1024_A-16.zip 46 | rm roeberta_zh_L-24_H-1024_A-16.zip 47 | else 48 | echo "model exists" 49 | fi 50 | echo "Finish download model." 51 | 52 | # run task 53 | cd $CURRENT_DIR 54 | echo "Start running..." 55 | if [ $# == 0 ]; then 56 | python run_classifier.py \ 57 | --task_name=$TASK_NAME \ 58 | --do_train=true \ 59 | --do_eval=true \ 60 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 61 | --vocab_file=$ROBERTA_LARGE_DIR/vocab.txt \ 62 | --bert_config_file=$ROBERTA_LARGE_DIR/bert_config_large.json \ 63 | --init_checkpoint=$ROBERTA_LARGE_DIR/roberta_zh_large_model.ckpt \ 64 | --max_seq_length=128 \ 65 | --train_batch_size=32 \ 66 | --learning_rate=2e-5 \ 67 | --num_train_epochs=3.0 \ 68 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 69 | elif [ $1 == "predict" ]; then 70 | echo "Start predict..." 71 | python run_classifier.py \ 72 | --task_name=$TASK_NAME \ 73 | --do_train=false \ 74 | --do_eval=false \ 75 | --do_predict=true \ 76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 77 | --vocab_file=$ROBERTA_LARGE_DIR/vocab.txt \ 78 | --bert_config_file=$ROBERTA_LARGE_DIR/bert_config_large.json \ 79 | --init_checkpoint=$ROBERTA_LARGE_DIR/roberta_zh_large_model.ckpt \ 80 | --max_seq_length=128 \ 81 | --train_batch_size=32 \ 82 | --learning_rate=2e-5 \ 83 | --num_train_epochs=3.0 \ 84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 85 | fi 86 | -------------------------------------------------------------------------------- /models/roberta/run_classifier_tnews.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # @Author: bo.shi 3 | # @Date: 2019-11-04 09:56:36 4 | # @Last Modified by: bo.shi 5 | # @Last Modified time: 2019-12-05 11:20:34 6 | 7 | TASK_NAME="tnews" 8 | MODEL_NAME="roeberta_zh_L-24_H-1024_A-16" 9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 10 | export CUDA_VISIBLE_DEVICES="0" 11 | export PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model 12 | export ROBERTA_LARGE_DIR=$PRETRAINED_MODELS_DIR/$MODEL_NAME 13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset 14 | 15 | # download and unzip dataset 16 | if [ ! -d $GLUE_DATA_DIR ]; then 17 | mkdir -p $GLUE_DATA_DIR 18 | echo "makedir $GLUE_DATA_DIR" 19 | fi 20 | cd $GLUE_DATA_DIR 21 | if [ ! -d $TASK_NAME ]; then 22 | mkdir $TASK_NAME 23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME" 24 | fi 25 | cd $TASK_NAME 26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then 27 | rm * 28 | wget https://storage.googleapis.com/cluebenchmark/tasks/tnews_public.zip 29 | unzip tnews_public.zip 30 | rm tnews_public.zip 31 | else 32 | echo "data exists" 33 | fi 34 | echo "Finish download dataset." 35 | 36 | # download model 37 | if [ ! -d $ROBERTA_LARGE_DIR ]; then 38 | mkdir -p $ROBERTA_LARGE_DIR 39 | echo "makedir $ROBERTA_LARGE_DIR" 40 | fi 41 | cd $ROBERTA_LARGE_DIR 42 | if [ ! -f "bert_config_large.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "checkpoint" ] || [ ! -f "roberta_zh_large_model.ckpt.index" ] || [ ! -f "roberta_zh_large_model.ckpt.meta" ] || [ ! -f "roberta_zh_large_model.ckpt.data-00000-of-00001" ]; then 43 | rm * 44 | wget -c https://storage.googleapis.com/chineseglue/pretrain_models/roeberta_zh_L-24_H-1024_A-16.zip 45 | unzip roeberta_zh_L-24_H-1024_A-16.zip 46 | rm roeberta_zh_L-24_H-1024_A-16.zip 47 | else 48 | echo "model exists" 49 | fi 50 | echo "Finish download model." 51 | 52 | # run task 53 | cd $CURRENT_DIR 54 | echo "Start running..." 55 | if [ $# == 0 ]; then 56 | python run_classifier.py \ 57 | --task_name=$TASK_NAME \ 58 | --do_train=true \ 59 | --do_eval=true \ 60 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 61 | --vocab_file=$ROBERTA_LARGE_DIR/vocab.txt \ 62 | --bert_config_file=$ROBERTA_LARGE_DIR/bert_config_large.json \ 63 | --init_checkpoint=$ROBERTA_LARGE_DIR/roberta_zh_large_model.ckpt \ 64 | --max_seq_length=128 \ 65 | --train_batch_size=32 \ 66 | --learning_rate=2e-5 \ 67 | --num_train_epochs=3.0 \ 68 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 69 | elif [ $1 == "predict" ]; then 70 | echo "Start predict..." 71 | python run_classifier.py \ 72 | --task_name=$TASK_NAME \ 73 | --do_train=false \ 74 | --do_eval=false \ 75 | --do_predict=true \ 76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 77 | --vocab_file=$ROBERTA_LARGE_DIR/vocab.txt \ 78 | --bert_config_file=$ROBERTA_LARGE_DIR/bert_config_large.json \ 79 | --init_checkpoint=$ROBERTA_LARGE_DIR/roberta_zh_large_model.ckpt \ 80 | --max_seq_length=128 \ 81 | --train_batch_size=32 \ 82 | --learning_rate=2e-5 \ 83 | --num_train_epochs=3.0 \ 84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 85 | fi 86 | -------------------------------------------------------------------------------- /models/roberta/run_classifier_wsc.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # @Author: bo.shi 3 | # @Date: 2019-11-04 09:56:36 4 | # @Last Modified by: bo.shi 5 | # @Last Modified time: 2019-12-05 11:20:39 6 | 7 | TASK_NAME="wsc" 8 | MODEL_NAME="roeberta_zh_L-24_H-1024_A-16" 9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 10 | export CUDA_VISIBLE_DEVICES="0" 11 | export PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model 12 | export ROBERTA_LARGE_DIR=$PRETRAINED_MODELS_DIR/$MODEL_NAME 13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset 14 | 15 | # download and unzip dataset 16 | if [ ! -d $GLUE_DATA_DIR ]; then 17 | mkdir -p $GLUE_DATA_DIR 18 | echo "makedir $GLUE_DATA_DIR" 19 | fi 20 | cd $GLUE_DATA_DIR 21 | if [ ! -d $TASK_NAME ]; then 22 | mkdir $TASK_NAME 23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME" 24 | fi 25 | cd $TASK_NAME 26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then 27 | rm * 28 | wget https://storage.googleapis.com/cluebenchmark/tasks/wsc_public.zip 29 | unzip wsc_public.zip 30 | rm wsc_public.zip 31 | else 32 | echo "data exists" 33 | fi 34 | echo "Finish download dataset." 35 | 36 | # download model 37 | if [ ! -d $ROBERTA_LARGE_DIR ]; then 38 | mkdir -p $ROBERTA_LARGE_DIR 39 | echo "makedir $ROBERTA_LARGE_DIR" 40 | fi 41 | cd $ROBERTA_LARGE_DIR 42 | if [ ! -f "bert_config_large.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "checkpoint" ] || [ ! -f "roberta_zh_large_model.ckpt.index" ] || [ ! -f "roberta_zh_large_model.ckpt.meta" ] || [ ! -f "roberta_zh_large_model.ckpt.data-00000-of-00001" ]; then 43 | rm * 44 | wget -c https://storage.googleapis.com/chineseglue/pretrain_models/roeberta_zh_L-24_H-1024_A-16.zip 45 | unzip roeberta_zh_L-24_H-1024_A-16.zip 46 | rm roeberta_zh_L-24_H-1024_A-16.zip 47 | else 48 | echo "model exists" 49 | fi 50 | echo "Finish download model." 51 | 52 | # run task 53 | cd $CURRENT_DIR 54 | echo "Start running..." 55 | if [ $# == 0 ]; then 56 | python run_classifier.py \ 57 | --task_name=$TASK_NAME \ 58 | --do_train=true \ 59 | --do_eval=true \ 60 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 61 | --vocab_file=$ROBERTA_LARGE_DIR/vocab.txt \ 62 | --bert_config_file=$ROBERTA_LARGE_DIR/bert_config_large.json \ 63 | --init_checkpoint=$ROBERTA_LARGE_DIR/roberta_zh_large_model.ckpt \ 64 | --max_seq_length=128 \ 65 | --train_batch_size=32 \ 66 | --learning_rate=2e-5 \ 67 | --num_train_epochs=3.0 \ 68 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 69 | elif [ $1 == "predict" ]; then 70 | echo "Start predict..." 71 | python run_classifier.py \ 72 | --task_name=$TASK_NAME \ 73 | --do_train=false \ 74 | --do_eval=false \ 75 | --do_predict=true \ 76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 77 | --vocab_file=$ROBERTA_LARGE_DIR/vocab.txt \ 78 | --bert_config_file=$ROBERTA_LARGE_DIR/bert_config_large.json \ 79 | --init_checkpoint=$ROBERTA_LARGE_DIR/roberta_zh_large_model.ckpt \ 80 | --max_seq_length=128 \ 81 | --train_batch_size=32 \ 82 | --learning_rate=2e-5 \ 83 | --num_train_epochs=3.0 \ 84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 85 | fi 86 | -------------------------------------------------------------------------------- /models/roberta/run_ner_msra.sh: -------------------------------------------------------------------------------- 1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 2 | export CUDA_VISIBLE_DEVICES="0" 3 | export BERT_LARGE_DIR=$CURRENT_DIR/prev_trained_model/roberta_zh_L-24_H-1024_A-16 4 | export GLUE_DIR=$CURRENT_DIR/../../glue/chineseGLUEdatasets/ 5 | TASK_NAME="msraner" 6 | 7 | python run_ner.py \ 8 | --task_name=$TASK_NAME \ 9 | --do_train=true \ 10 | --do_eval=false \ 11 | --do_predict=true \ 12 | --data_dir=$GLUE_DIR/$TASK_NAME \ 13 | --vocab_file=$BERT_LARGE_DIR/vocab.txt \ 14 | --bert_config_file=$BERT_LARGE_DIR/bert_config_large.json \ 15 | --init_checkpoint=$BERT_LARGE_DIR/roberta_zh_large_model.ckpt \ 16 | --max_seq_length=256 \ 17 | --train_batch_size=8 \ 18 | --learning_rate=2e-5 \ 19 | --num_train_epochs=5.0 \ 20 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 21 | -------------------------------------------------------------------------------- /models/roberta/tpu/run_classifier_inews.sh: -------------------------------------------------------------------------------- 1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S") 3 | TASK_NAME="inews" 4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/roberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16 5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME 6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/oberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16/tpu/$TASK_NAME/$CURRENT_TIME 7 | 8 | python $CURRENT_DIR/../run_classifier.py \ 9 | --task_name=$TASK_NAME \ 10 | --do_train=true \ 11 | --do_eval=true \ 12 | --data_dir=$DATA_DIR \ 13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \ 14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \ 15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \ 16 | --max_seq_length=512 \ 17 | --train_batch_size=32 \ 18 | --learning_rate=2e-5 \ 19 | --num_train_epochs=8.0 \ 20 | --output_dir=$OUTPUT_DIR \ 21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.1.101.2:8470 22 | -------------------------------------------------------------------------------- /models/roberta/tpu/run_classifier_jdcomment.sh: -------------------------------------------------------------------------------- 1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S") 3 | TASK_NAME="jdcomment" 4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/roberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16 5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/hard_${TASK_NAME} 6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/roberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16/tpu/$TASK_NAME/$CURRENT_TIME 7 | 8 | python $CURRENT_DIR/../run_classifier.py \ 9 | --task_name=$TASK_NAME \ 10 | --do_train=true \ 11 | --do_eval=true \ 12 | --data_dir=$DATA_DIR \ 13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \ 14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \ 15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \ 16 | --max_seq_length=128 \ 17 | --train_batch_size=32 \ 18 | --learning_rate=2e-5 \ 19 | --num_train_epochs=8.0 \ 20 | --output_dir=$OUTPUT_DIR \ 21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.230.1.2:8470 22 | -------------------------------------------------------------------------------- /models/roberta/tpu/run_classifier_lcqmc.sh: -------------------------------------------------------------------------------- 1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S") 3 | TASK_NAME="lcqmc" 4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/roberta-large/roeberta_zh_L-24_H-1024_A-16 5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/hard_${TASK_NAME} 6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/roberta-large/roeberta_zh_L-24_H-1024_A-16/tpu/$TASK_NAME/$CURRENT_TIME 7 | 8 | python $CURRENT_DIR/../run_classifier.py \ 9 | --task_name=$TASK_NAME \ 10 | --do_train=true \ 11 | --do_eval=true \ 12 | --data_dir=$DATA_DIR \ 13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \ 14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config_large.json \ 15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/roberta_zh_large_model.ckpt \ 16 | --max_seq_length=128 \ 17 | --train_batch_size=16 \ 18 | --learning_rate=2e-5 \ 19 | --num_train_epochs=3.0 \ 20 | --output_dir=$OUTPUT_DIR \ 21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://172.16.0.2:8470 22 | -------------------------------------------------------------------------------- /models/roberta/tpu/run_classifier_thucnews.sh: -------------------------------------------------------------------------------- 1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S") 3 | TASK_NAME="thucnews" 4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/roberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16 5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME 6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/oberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16/tpu/$TASK_NAME/$CURRENT_TIME 7 | 8 | python $CURRENT_DIR/../run_classifier.py \ 9 | --task_name=$TASK_NAME \ 10 | --do_train=true \ 11 | --do_eval=true \ 12 | --data_dir=$DATA_DIR \ 13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \ 14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \ 15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \ 16 | --max_seq_length=512 \ 17 | --train_batch_size=32 \ 18 | --learning_rate=2e-5 \ 19 | --num_train_epochs=8.0 \ 20 | --output_dir=$OUTPUT_DIR \ 21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.1.101.2:8470 22 | -------------------------------------------------------------------------------- /models/roberta/tpu/run_classifier_tnews.sh: -------------------------------------------------------------------------------- 1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S") 3 | TASK_NAME="tnews" 4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/roberta-large/roeberta_zh_L-24_H-1024_A-16 5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/hard_${TASK_NAME}_1 6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/roberta-large/roeberta_zh_L-24_H-1024_A-16/tpu/$TASK_NAME/$CURRENT_TIME 7 | 8 | python $CURRENT_DIR/../run_classifier.py \ 9 | --task_name=$TASK_NAME \ 10 | --do_train=true \ 11 | --do_eval=true \ 12 | --data_dir=$DATA_DIR \ 13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \ 14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config_large.json \ 15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/roberta_zh_large_model.ckpt \ 16 | --max_seq_length=128 \ 17 | --train_batch_size=16 \ 18 | --learning_rate=2e-5 \ 19 | --num_train_epochs=3.0 \ 20 | --output_dir=$OUTPUT_DIR \ 21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://172.18.0.2:8470 22 | -------------------------------------------------------------------------------- /models/roberta/tpu/run_classifier_xnli.sh: -------------------------------------------------------------------------------- 1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S") 3 | TASK_NAME="xnli" 4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/roberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16 5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME 6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/oberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16/tpu/$TASK_NAME/$CURRENT_TIME 7 | 8 | python $CURRENT_DIR/../run_classifier.py \ 9 | --task_name=$TASK_NAME \ 10 | --do_train=true \ 11 | --do_eval=true \ 12 | --data_dir=$DATA_DIR \ 13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \ 14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \ 15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \ 16 | --max_seq_length=512 \ 17 | --train_batch_size=32 \ 18 | --learning_rate=2e-5 \ 19 | --num_train_epochs=8.0 \ 20 | --output_dir=$OUTPUT_DIR \ 21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.1.101.2:8470 22 | -------------------------------------------------------------------------------- /models/roberta_wwm_ext/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | BERT needs to maintain permanent compatibility with the pre-trained model files, 4 | so we do not plan to make any major changes to this library (other than what was 5 | promised in the README). However, we can accept small patches related to 6 | re-factoring and documentation. To submit contributes, there are just a few 7 | small guidelines you need to follow. 8 | 9 | ## Contributor License Agreement 10 | 11 | Contributions to this project must be accompanied by a Contributor License 12 | Agreement. You (or your employer) retain the copyright to your contribution; 13 | this simply gives us permission to use and redistribute your contributions as 14 | part of the project. Head over to to see 15 | your current agreements on file or to sign a new one. 16 | 17 | You generally only need to submit a CLA once, so if you've already submitted one 18 | (even if it was for a different project), you probably don't need to do it 19 | again. 20 | 21 | ## Code reviews 22 | 23 | All submissions, including submissions by project members, require review. We 24 | use GitHub pull requests for this purpose. Consult 25 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 26 | information on using pull requests. 27 | 28 | ## Community Guidelines 29 | 30 | This project follows 31 | [Google's Open Source Community Guidelines](https://opensource.google.com/conduct/). 32 | -------------------------------------------------------------------------------- /models/roberta_wwm_ext/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Google AI Language Team Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /models/roberta_wwm_ext/optimization_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Google AI Language Team Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | from __future__ import absolute_import 16 | from __future__ import division 17 | from __future__ import print_function 18 | 19 | import optimization 20 | import tensorflow as tf 21 | 22 | 23 | class OptimizationTest(tf.test.TestCase): 24 | 25 | def test_adam(self): 26 | with self.test_session() as sess: 27 | w = tf.get_variable( 28 | "w", 29 | shape=[3], 30 | initializer=tf.constant_initializer([0.1, -0.2, -0.1])) 31 | x = tf.constant([0.4, 0.2, -0.5]) 32 | loss = tf.reduce_mean(tf.square(x - w)) 33 | tvars = tf.trainable_variables() 34 | grads = tf.gradients(loss, tvars) 35 | global_step = tf.train.get_or_create_global_step() 36 | optimizer = optimization.AdamWeightDecayOptimizer(learning_rate=0.2) 37 | train_op = optimizer.apply_gradients(zip(grads, tvars), global_step) 38 | init_op = tf.group(tf.global_variables_initializer(), 39 | tf.local_variables_initializer()) 40 | sess.run(init_op) 41 | for _ in range(100): 42 | sess.run(train_op) 43 | w_np = sess.run(w) 44 | self.assertAllClose(w_np.flat, [0.4, 0.2, -0.5], rtol=1e-2, atol=1e-2) 45 | 46 | 47 | if __name__ == "__main__": 48 | tf.test.main() 49 | -------------------------------------------------------------------------------- /models/roberta_wwm_ext/requirements.txt: -------------------------------------------------------------------------------- 1 | tensorflow >= 1.11.0 # CPU Version of TensorFlow. 2 | # tensorflow-gpu >= 1.11.0 # GPU version of TensorFlow. 3 | -------------------------------------------------------------------------------- /models/roberta_wwm_ext/run_classifier_afqmc.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # @Author: bo.shi 3 | # @Date: 2019-11-04 09:56:36 4 | # @Last Modified by: bo.shi 5 | # @Last Modified time: 2019-12-05 11:21:27 6 | 7 | TASK_NAME="afqmc" 8 | MODEL_NAME="chinese_roberta_wwm_ext_L-12_H-768_A-12" 9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 10 | export CUDA_VISIBLE_DEVICES="0" 11 | export PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model 12 | export ROBERTA_WWM_DIR=$PRETRAINED_MODELS_DIR/$MODEL_NAME 13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset 14 | 15 | # download and unzip dataset 16 | if [ ! -d $GLUE_DATA_DIR ]; then 17 | mkdir -p $GLUE_DATA_DIR 18 | echo "makedir $GLUE_DATA_DIR" 19 | fi 20 | cd $GLUE_DATA_DIR 21 | if [ ! -d $TASK_NAME ]; then 22 | mkdir $TASK_NAME 23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME" 24 | fi 25 | cd $TASK_NAME 26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then 27 | rm * 28 | wget https://storage.googleapis.com/cluebenchmark/tasks/afqmc_public.zip 29 | unzip afqmc_public.zip 30 | rm afqmc_public.zip 31 | else 32 | echo "data exists" 33 | fi 34 | echo "Finish download dataset." 35 | 36 | # download model 37 | if [ ! -d $ROBERTA_WWM_DIR ]; then 38 | mkdir -p $ROBERTA_WWM_DIR 39 | echo "makedir $ROBERTA_WWM_DIR" 40 | fi 41 | cd $ROBERTA_WWM_DIR 42 | if [ ! -f "bert_config.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "bert_model.ckpt.index" ] || [ ! -f "bert_model.ckpt.meta" ] || [ ! -f "bert_model.ckpt.data-00000-of-00001" ]; then 43 | rm * 44 | wget -c https://storage.googleapis.com/chineseglue/pretrain_models/chinese_roberta_wwm_ext_L-12_H-768_A-12.zip 45 | unzip chinese_roberta_wwm_ext_L-12_H-768_A-12.zip 46 | rm chinese_roberta_wwm_ext_L-12_H-768_A-12.zip 47 | else 48 | echo "model exists" 49 | fi 50 | echo "Finish download model." 51 | 52 | # run task 53 | cd $CURRENT_DIR 54 | echo "Start running..." 55 | if [ $# == 0 ]; then 56 | python run_classifier.py \ 57 | --task_name=$TASK_NAME \ 58 | --do_train=true \ 59 | --do_eval=true \ 60 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 61 | --vocab_file=$ROBERTA_WWM_DIR/vocab.txt \ 62 | --bert_config_file=$ROBERTA_WWM_DIR/bert_config.json \ 63 | --init_checkpoint=$ROBERTA_WWM_DIR/bert_model.ckpt \ 64 | --max_seq_length=128 \ 65 | --train_batch_size=32 \ 66 | --learning_rate=2e-5 \ 67 | --num_train_epochs=3.0 \ 68 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 69 | elif [ $1 == "predict" ]; then 70 | echo "Start predict..." 71 | python run_classifier.py \ 72 | --task_name=$TASK_NAME \ 73 | --do_train=false \ 74 | --do_eval=false \ 75 | --do_predict=true \ 76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 77 | --vocab_file=$ROBERTA_WWM_DIR/vocab.txt \ 78 | --bert_config_file=$ROBERTA_WWM_DIR/bert_config.json \ 79 | --init_checkpoint=$ROBERTA_WWM_DIR/bert_model.ckpt \ 80 | --max_seq_length=128 \ 81 | --train_batch_size=32 \ 82 | --learning_rate=2e-5 \ 83 | --num_train_epochs=3.0 \ 84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 85 | fi 86 | -------------------------------------------------------------------------------- /models/roberta_wwm_ext/run_classifier_cmnli.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # @Author: bo.shi 3 | # @Date: 2019-11-04 09:56:36 4 | # @Last Modified by: bo.shi 5 | # @Last Modified time: 2019-12-05 11:21:50 6 | 7 | TASK_NAME="cmnli" 8 | MODEL_NAME="chinese_roberta_wwm_ext_L-12_H-768_A-12" 9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 10 | export CUDA_VISIBLE_DEVICES="0" 11 | export PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model 12 | export ROBERTA_WWM_DIR=$PRETRAINED_MODELS_DIR/$MODEL_NAME 13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset 14 | 15 | # download and unzip dataset 16 | if [ ! -d $GLUE_DATA_DIR ]; then 17 | mkdir -p $GLUE_DATA_DIR 18 | echo "makedir $GLUE_DATA_DIR" 19 | fi 20 | cd $GLUE_DATA_DIR 21 | if [ ! -d $TASK_NAME ]; then 22 | mkdir $TASK_NAME 23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME" 24 | fi 25 | cd $TASK_NAME 26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then 27 | rm * 28 | wget https://storage.googleapis.com/cluebenchmark/tasks/cmnli_public.zip 29 | unzip cmnli_public.zip 30 | rm cmnli_public.zip 31 | else 32 | echo "data exists" 33 | fi 34 | echo "Finish download dataset." 35 | 36 | # download model 37 | if [ ! -d $ROBERTA_WWM_DIR ]; then 38 | mkdir -p $ROBERTA_WWM_DIR 39 | echo "makedir $ROBERTA_WWM_DIR" 40 | fi 41 | cd $ROBERTA_WWM_DIR 42 | if [ ! -f "bert_config.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "bert_model.ckpt.index" ] || [ ! -f "bert_model.ckpt.meta" ] || [ ! -f "bert_model.ckpt.data-00000-of-00001" ]; then 43 | rm * 44 | wget -c https://storage.googleapis.com/chineseglue/pretrain_models/chinese_roberta_wwm_ext_L-12_H-768_A-12.zip 45 | unzip chinese_roberta_wwm_ext_L-12_H-768_A-12.zip 46 | rm chinese_roberta_wwm_ext_L-12_H-768_A-12.zip 47 | else 48 | echo "model exists" 49 | fi 50 | echo "Finish download model." 51 | 52 | # run task 53 | cd $CURRENT_DIR 54 | echo "Start running..." 55 | if [ $# == 0 ]; then 56 | python run_classifier.py \ 57 | --task_name=$TASK_NAME \ 58 | --do_train=true \ 59 | --do_eval=true \ 60 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 61 | --vocab_file=$ROBERTA_WWM_DIR/vocab.txt \ 62 | --bert_config_file=$ROBERTA_WWM_DIR/bert_config.json \ 63 | --init_checkpoint=$ROBERTA_WWM_DIR/bert_model.ckpt \ 64 | --max_seq_length=128 \ 65 | --train_batch_size=32 \ 66 | --learning_rate=2e-5 \ 67 | --num_train_epochs=3.0 \ 68 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 69 | elif [ $1 == "predict" ]; then 70 | echo "Start predict..." 71 | python run_classifier.py \ 72 | --task_name=$TASK_NAME \ 73 | --do_train=false \ 74 | --do_eval=false \ 75 | --do_predict=true \ 76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 77 | --vocab_file=$ROBERTA_WWM_DIR/vocab.txt \ 78 | --bert_config_file=$ROBERTA_WWM_DIR/bert_config.json \ 79 | --init_checkpoint=$ROBERTA_WWM_DIR/bert_model.ckpt \ 80 | --max_seq_length=128 \ 81 | --train_batch_size=32 \ 82 | --learning_rate=2e-5 \ 83 | --num_train_epochs=3.0 \ 84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 85 | fi 86 | -------------------------------------------------------------------------------- /models/roberta_wwm_ext/run_classifier_csl.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # @Author: Li Yudong 3 | # @Date: 2019-11-28 4 | # @Last Modified by: bo.shi 5 | # @Last Modified time: 2019-12-05 11:21:58 6 | 7 | TASK_NAME="csl" 8 | MODEL_NAME="chinese_roberta_wwm_ext_L-12_H-768_A-12" 9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 10 | export CUDA_VISIBLE_DEVICES="0" 11 | export PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model 12 | export ROBERTA_WWM_DIR=$PRETRAINED_MODELS_DIR/$MODEL_NAME 13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset 14 | 15 | # download and unzip dataset 16 | if [ ! -d $GLUE_DATA_DIR ]; then 17 | mkdir -p $GLUE_DATA_DIR 18 | echo "makedir $GLUE_DATA_DIR" 19 | fi 20 | cd $GLUE_DATA_DIR 21 | if [ ! -d $TASK_NAME ]; then 22 | mkdir $TASK_NAME 23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME" 24 | fi 25 | cd $TASK_NAME 26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then 27 | rm * 28 | wget https://storage.googleapis.com/cluebenchmark/tasks/csl_public.zip 29 | unzip csl_public.zip 30 | rm csl_public.zip 31 | else 32 | echo "data exists" 33 | fi 34 | echo "Finish download dataset." 35 | 36 | # download model 37 | if [ ! -d $ROBERTA_WWM_DIR ]; then 38 | mkdir -p $ROBERTA_WWM_DIR 39 | echo "makedir $ROBERTA_WWM_DIR" 40 | fi 41 | cd $ROBERTA_WWM_DIR 42 | if [ ! -f "bert_config.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "bert_model.ckpt.index" ] || [ ! -f "bert_model.ckpt.meta" ] || [ ! -f "bert_model.ckpt.data-00000-of-00001" ]; then 43 | rm * 44 | wget -c https://storage.googleapis.com/chineseglue/pretrain_models/chinese_roberta_wwm_ext_L-12_H-768_A-12.zip 45 | unzip chinese_roberta_wwm_ext_L-12_H-768_A-12.zip 46 | rm chinese_roberta_wwm_ext_L-12_H-768_A-12.zip 47 | else 48 | echo "model exists" 49 | fi 50 | echo "Finish download model." 51 | 52 | # run task 53 | cd $CURRENT_DIR 54 | echo "Start running..." 55 | if [ $# == 0 ]; then 56 | python run_classifier.py \ 57 | --task_name=$TASK_NAME \ 58 | --do_train=true \ 59 | --do_eval=true \ 60 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 61 | --vocab_file=$ROBERTA_WWM_DIR/vocab.txt \ 62 | --bert_config_file=$ROBERTA_WWM_DIR/bert_config.json \ 63 | --init_checkpoint=$ROBERTA_WWM_DIR/bert_model.ckpt \ 64 | --max_seq_length=128 \ 65 | --train_batch_size=32 \ 66 | --learning_rate=2e-5 \ 67 | --num_train_epochs=3.0 \ 68 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 69 | elif [ $1 == "predict" ]; then 70 | echo "Start predict..." 71 | python run_classifier.py \ 72 | --task_name=$TASK_NAME \ 73 | --do_train=false \ 74 | --do_eval=false \ 75 | --do_predict=true \ 76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 77 | --vocab_file=$ROBERTA_WWM_DIR/vocab.txt \ 78 | --bert_config_file=$ROBERTA_WWM_DIR/bert_config.json \ 79 | --init_checkpoint=$ROBERTA_WWM_DIR/bert_model.ckpt \ 80 | --max_seq_length=128 \ 81 | --train_batch_size=32 \ 82 | --learning_rate=2e-5 \ 83 | --num_train_epochs=3.0 \ 84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 85 | fi 86 | -------------------------------------------------------------------------------- /models/roberta_wwm_ext/run_classifier_iflytek.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # @Author: bo.shi 3 | # @Date: 2019-11-04 09:56:36 4 | # @Last Modified by: bo.shi 5 | # @Last Modified time: 2019-12-05 11:22:02 6 | 7 | TASK_NAME="iflytek" 8 | MODEL_NAME="chinese_roberta_wwm_ext_L-12_H-768_A-12" 9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 10 | export CUDA_VISIBLE_DEVICES="0" 11 | export PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model 12 | export ROBERTA_WWM_DIR=$PRETRAINED_MODELS_DIR/$MODEL_NAME 13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset 14 | 15 | # download and unzip dataset 16 | if [ ! -d $GLUE_DATA_DIR ]; then 17 | mkdir -p $GLUE_DATA_DIR 18 | echo "makedir $GLUE_DATA_DIR" 19 | fi 20 | cd $GLUE_DATA_DIR 21 | if [ ! -d $TASK_NAME ]; then 22 | mkdir $TASK_NAME 23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME" 24 | fi 25 | cd $TASK_NAME 26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then 27 | rm * 28 | wget https://storage.googleapis.com/cluebenchmark/tasks/iflytek_public.zip 29 | unzip iflytek_public.zip 30 | rm iflytek_public.zip 31 | else 32 | echo "data exists" 33 | fi 34 | echo "Finish download dataset." 35 | 36 | # download model 37 | if [ ! -d $ROBERTA_WWM_DIR ]; then 38 | mkdir -p $ROBERTA_WWM_DIR 39 | echo "makedir $ROBERTA_WWM_DIR" 40 | fi 41 | cd $ROBERTA_WWM_DIR 42 | if [ ! -f "bert_config.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "bert_model.ckpt.index" ] || [ ! -f "bert_model.ckpt.meta" ] || [ ! -f "bert_model.ckpt.data-00000-of-00001" ]; then 43 | rm * 44 | wget -c https://storage.googleapis.com/chineseglue/pretrain_models/chinese_roberta_wwm_ext_L-12_H-768_A-12.zip 45 | unzip chinese_roberta_wwm_ext_L-12_H-768_A-12.zip 46 | rm chinese_roberta_wwm_ext_L-12_H-768_A-12.zip 47 | else 48 | echo "model exists" 49 | fi 50 | echo "Finish download model." 51 | 52 | # run task 53 | cd $CURRENT_DIR 54 | echo "Start running..." 55 | if [ $# == 0 ]; then 56 | python run_classifier.py \ 57 | --task_name=$TASK_NAME \ 58 | --do_train=true \ 59 | --do_eval=true \ 60 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 61 | --vocab_file=$ROBERTA_WWM_DIR/vocab.txt \ 62 | --bert_config_file=$ROBERTA_WWM_DIR/bert_config.json \ 63 | --init_checkpoint=$ROBERTA_WWM_DIR/bert_model.ckpt \ 64 | --max_seq_length=128 \ 65 | --train_batch_size=32 \ 66 | --learning_rate=2e-5 \ 67 | --num_train_epochs=3.0 \ 68 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 69 | elif [ $1 == "predict" ]; then 70 | echo "Start predict..." 71 | python run_classifier.py \ 72 | --task_name=$TASK_NAME \ 73 | --do_train=false \ 74 | --do_eval=false \ 75 | --do_predict=true \ 76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 77 | --vocab_file=$ROBERTA_WWM_DIR/vocab.txt \ 78 | --bert_config_file=$ROBERTA_WWM_DIR/bert_config.json \ 79 | --init_checkpoint=$ROBERTA_WWM_DIR/bert_model.ckpt \ 80 | --max_seq_length=128 \ 81 | --train_batch_size=32 \ 82 | --learning_rate=2e-5 \ 83 | --num_train_epochs=3.0 \ 84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 85 | fi 86 | -------------------------------------------------------------------------------- /models/roberta_wwm_ext/run_classifier_tnews.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # @Author: bo.shi 3 | # @Date: 2019-11-04 09:56:36 4 | # @Last Modified by: bo.shi 5 | # @Last Modified time: 2019-12-05 11:22:06 6 | 7 | TASK_NAME="tnews" 8 | MODEL_NAME="chinese_roberta_wwm_ext_L-12_H-768_A-12" 9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 10 | export CUDA_VISIBLE_DEVICES="0" 11 | export PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model 12 | export ROBERTA_WWM_DIR=$PRETRAINED_MODELS_DIR/$MODEL_NAME 13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset 14 | 15 | # download and unzip dataset 16 | if [ ! -d $GLUE_DATA_DIR ]; then 17 | mkdir -p $GLUE_DATA_DIR 18 | echo "makedir $GLUE_DATA_DIR" 19 | fi 20 | cd $GLUE_DATA_DIR 21 | if [ ! -d $TASK_NAME ]; then 22 | mkdir $TASK_NAME 23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME" 24 | fi 25 | cd $TASK_NAME 26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then 27 | rm * 28 | wget https://storage.googleapis.com/cluebenchmark/tasks/tnews_public.zip 29 | unzip tnews_public.zip 30 | rm tnews_public.zip 31 | else 32 | echo "data exists" 33 | fi 34 | echo "Finish download dataset." 35 | 36 | # download model 37 | if [ ! -d $ROBERTA_WWM_DIR ]; then 38 | mkdir -p $ROBERTA_WWM_DIR 39 | echo "makedir $ROBERTA_WWM_DIR" 40 | fi 41 | cd $ROBERTA_WWM_DIR 42 | if [ ! -f "bert_config.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "bert_model.ckpt.index" ] || [ ! -f "bert_model.ckpt.meta" ] || [ ! -f "bert_model.ckpt.data-00000-of-00001" ]; then 43 | rm * 44 | wget -c https://storage.googleapis.com/chineseglue/pretrain_models/chinese_roberta_wwm_ext_L-12_H-768_A-12.zip 45 | unzip chinese_roberta_wwm_ext_L-12_H-768_A-12.zip 46 | rm chinese_roberta_wwm_ext_L-12_H-768_A-12.zip 47 | else 48 | echo "model exists" 49 | fi 50 | echo "Finish download model." 51 | 52 | # run task 53 | cd $CURRENT_DIR 54 | echo "Start running..." 55 | if [ $# == 0 ]; then 56 | python run_classifier.py \ 57 | --task_name=$TASK_NAME \ 58 | --do_train=true \ 59 | --do_eval=true \ 60 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 61 | --vocab_file=$ROBERTA_WWM_DIR/vocab.txt \ 62 | --bert_config_file=$ROBERTA_WWM_DIR/bert_config.json \ 63 | --init_checkpoint=$ROBERTA_WWM_DIR/bert_model.ckpt \ 64 | --max_seq_length=128 \ 65 | --train_batch_size=32 \ 66 | --learning_rate=2e-5 \ 67 | --num_train_epochs=3.0 \ 68 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 69 | elif [ $1 == "predict" ]; then 70 | echo "Start predict..." 71 | python run_classifier.py \ 72 | --task_name=$TASK_NAME \ 73 | --do_train=false \ 74 | --do_eval=false \ 75 | --do_predict=true \ 76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 77 | --vocab_file=$ROBERTA_WWM_DIR/vocab.txt \ 78 | --bert_config_file=$ROBERTA_WWM_DIR/bert_config.json \ 79 | --init_checkpoint=$ROBERTA_WWM_DIR/bert_model.ckpt \ 80 | --max_seq_length=128 \ 81 | --train_batch_size=32 \ 82 | --learning_rate=2e-5 \ 83 | --num_train_epochs=3.0 \ 84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 85 | fi 86 | 87 | -------------------------------------------------------------------------------- /models/roberta_wwm_ext/run_classifier_wsc.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # @Author: bo.shi 3 | # @Date: 2019-11-04 09:56:36 4 | # @Last Modified by: bo.shi 5 | # @Last Modified time: 2019-12-05 11:22:11 6 | 7 | TASK_NAME="wsc" 8 | MODEL_NAME="chinese_roberta_wwm_ext_L-12_H-768_A-12" 9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 10 | export CUDA_VISIBLE_DEVICES="0" 11 | export PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model 12 | export ROBERTA_WWM_DIR=$PRETRAINED_MODELS_DIR/$MODEL_NAME 13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset 14 | 15 | # download and unzip dataset 16 | if [ ! -d $GLUE_DATA_DIR ]; then 17 | mkdir -p $GLUE_DATA_DIR 18 | echo "makedir $GLUE_DATA_DIR" 19 | fi 20 | cd $GLUE_DATA_DIR 21 | if [ ! -d $TASK_NAME ]; then 22 | mkdir $TASK_NAME 23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME" 24 | fi 25 | cd $TASK_NAME 26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then 27 | rm * 28 | wget https://storage.googleapis.com/cluebenchmark/tasks/wsc_public.zip 29 | unzip wsc_public.zip 30 | rm wsc_public.zip 31 | else 32 | echo "data exists" 33 | fi 34 | echo "Finish download dataset." 35 | 36 | # download model 37 | if [ ! -d $ROBERTA_WWM_DIR ]; then 38 | mkdir -p $ROBERTA_WWM_DIR 39 | echo "makedir $ROBERTA_WWM_DIR" 40 | fi 41 | cd $ROBERTA_WWM_DIR 42 | if [ ! -f "bert_config.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "bert_model.ckpt.index" ] || [ ! -f "bert_model.ckpt.meta" ] || [ ! -f "bert_model.ckpt.data-00000-of-00001" ]; then 43 | rm * 44 | wget -c https://storage.googleapis.com/chineseglue/pretrain_models/chinese_roberta_wwm_ext_L-12_H-768_A-12.zip 45 | unzip chinese_roberta_wwm_ext_L-12_H-768_A-12.zip 46 | rm chinese_roberta_wwm_ext_L-12_H-768_A-12.zip 47 | else 48 | echo "model exists" 49 | fi 50 | echo "Finish download model." 51 | 52 | # run task 53 | cd $CURRENT_DIR 54 | echo "Start running..." 55 | if [ $# == 0 ]; then 56 | python run_classifier.py \ 57 | --task_name=$TASK_NAME \ 58 | --do_train=true \ 59 | --do_eval=true \ 60 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 61 | --vocab_file=$ROBERTA_WWM_DIR/vocab.txt \ 62 | --bert_config_file=$ROBERTA_WWM_DIR/bert_config.json \ 63 | --init_checkpoint=$ROBERTA_WWM_DIR/bert_model.ckpt \ 64 | --max_seq_length=128 \ 65 | --train_batch_size=32 \ 66 | --learning_rate=2e-5 \ 67 | --num_train_epochs=3.0 \ 68 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 69 | elif [ $1 == "predict" ]; then 70 | echo "Start predict..." 71 | python run_classifier.py \ 72 | --task_name=$TASK_NAME \ 73 | --do_train=false \ 74 | --do_eval=false \ 75 | --do_predict=true \ 76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 77 | --vocab_file=$ROBERTA_WWM_DIR/vocab.txt \ 78 | --bert_config_file=$ROBERTA_WWM_DIR/bert_config.json \ 79 | --init_checkpoint=$ROBERTA_WWM_DIR/bert_model.ckpt \ 80 | --max_seq_length=128 \ 81 | --train_batch_size=32 \ 82 | --learning_rate=2e-5 \ 83 | --num_train_epochs=3.0 \ 84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 85 | fi 86 | 87 | -------------------------------------------------------------------------------- /models/roberta_wwm_ext/run_ner_msra.sh: -------------------------------------------------------------------------------- 1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 2 | export CUDA_VISIBLE_DEVICES="0" 3 | export BERT_BASE_DIR=$CURRENT_DIR/prev_trained_model/chinese_roberta_wwm_ext_L-12_H-768_A-12 4 | export GLUE_DIR=$CURRENT_DIR/../../glue/chineseGLUEdatasets/ 5 | TASK_NAME="msraner" 6 | 7 | python run_ner.py \ 8 | --task_name=$TASK_NAME \ 9 | --do_train=true \ 10 | --do_eval=false \ 11 | --do_predict=true \ 12 | --data_dir=$GLUE_DIR/$TASK_NAME \ 13 | --vocab_file=$BERT_BASE_DIR/vocab.txt \ 14 | --bert_config_file=$BERT_BASE_DIR/bert_config.json \ 15 | --init_checkpoint=$BERT_BASE_DIR/bert_model.ckpt \ 16 | --max_seq_length=256 \ 17 | --train_batch_size=16 \ 18 | --learning_rate=2e-5 \ 19 | --num_train_epochs=5.0 \ 20 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 21 | -------------------------------------------------------------------------------- /models/roberta_wwm_ext/tpu/run_classifier_inews.sh: -------------------------------------------------------------------------------- 1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S") 3 | TASK_NAME="inews" 4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/roberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16 5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME 6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/oberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16/tpu/$TASK_NAME/$CURRENT_TIME 7 | 8 | python $CURRENT_DIR/../run_classifier.py \ 9 | --task_name=$TASK_NAME \ 10 | --do_train=true \ 11 | --do_eval=true \ 12 | --data_dir=$DATA_DIR \ 13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \ 14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \ 15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \ 16 | --max_seq_length=512 \ 17 | --train_batch_size=32 \ 18 | --learning_rate=2e-5 \ 19 | --num_train_epochs=8.0 \ 20 | --output_dir=$OUTPUT_DIR \ 21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.1.101.2:8470 22 | -------------------------------------------------------------------------------- /models/roberta_wwm_ext/tpu/run_classifier_jdcomment.sh: -------------------------------------------------------------------------------- 1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S") 3 | TASK_NAME="jdcomment" 4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/roberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16 5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/hard_${TASK_NAME} 6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/roberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16/tpu/$TASK_NAME/$CURRENT_TIME 7 | 8 | python $CURRENT_DIR/../run_classifier.py \ 9 | --task_name=$TASK_NAME \ 10 | --do_train=true \ 11 | --do_eval=true \ 12 | --data_dir=$DATA_DIR \ 13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \ 14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \ 15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \ 16 | --max_seq_length=128 \ 17 | --train_batch_size=32 \ 18 | --learning_rate=2e-5 \ 19 | --num_train_epochs=8.0 \ 20 | --output_dir=$OUTPUT_DIR \ 21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.230.1.2:8470 22 | -------------------------------------------------------------------------------- /models/roberta_wwm_ext/tpu/run_classifier_lcqmc.sh: -------------------------------------------------------------------------------- 1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S") 3 | TASK_NAME="lcqmc" 4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/roberta-wwm-ext-base/chinese_roberta_wwm_ext_L-12_H-768_A-12 5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/hard_${TASK_NAME} 6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/roberta-wwm-ext-base/chinese_roberta_wwm_ext_L-12_H-768_A-12/tpu/$TASK_NAME/$CURRENT_TIME 7 | 8 | python $CURRENT_DIR/../run_classifier.py \ 9 | --task_name=$TASK_NAME \ 10 | --do_train=true \ 11 | --do_eval=true \ 12 | --data_dir=$DATA_DIR \ 13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \ 14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \ 15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \ 16 | --max_seq_length=128 \ 17 | --train_batch_size=16 \ 18 | --learning_rate=2e-5 \ 19 | --num_train_epochs=3.0 \ 20 | --output_dir=$OUTPUT_DIR \ 21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://172.16.0.2:8470 22 | -------------------------------------------------------------------------------- /models/roberta_wwm_ext/tpu/run_classifier_thucnews.sh: -------------------------------------------------------------------------------- 1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S") 3 | TASK_NAME="thucnews" 4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/roberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16 5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME 6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/oberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16/tpu/$TASK_NAME/$CURRENT_TIME 7 | 8 | python $CURRENT_DIR/../run_classifier.py \ 9 | --task_name=$TASK_NAME \ 10 | --do_train=true \ 11 | --do_eval=true \ 12 | --data_dir=$DATA_DIR \ 13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \ 14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \ 15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \ 16 | --max_seq_length=512 \ 17 | --train_batch_size=32 \ 18 | --learning_rate=2e-5 \ 19 | --num_train_epochs=8.0 \ 20 | --output_dir=$OUTPUT_DIR \ 21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.1.101.2:8470 22 | -------------------------------------------------------------------------------- /models/roberta_wwm_ext/tpu/run_classifier_tnews.sh: -------------------------------------------------------------------------------- 1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S") 3 | TASK_NAME="tnews" 4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/roberta-wwm-ext-base/chinese_roberta_wwm_ext_L-12_H-768_A-12 5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/hard_${TASK_NAME}_1 6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/roberta-wwm-ext-base/chinese_roberta_wwm_ext_L-12_H-768_A-12/tpu/$TASK_NAME/$CURRENT_TIME 7 | 8 | python $CURRENT_DIR/../run_classifier.py \ 9 | --task_name=$TASK_NAME \ 10 | --do_train=true \ 11 | --do_eval=true \ 12 | --data_dir=$DATA_DIR \ 13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \ 14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \ 15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \ 16 | --max_seq_length=128 \ 17 | --train_batch_size=16 \ 18 | --learning_rate=2e-5 \ 19 | --num_train_epochs=3.0 \ 20 | --output_dir=$OUTPUT_DIR \ 21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://172.18.0.2:8470 22 | -------------------------------------------------------------------------------- /models/roberta_wwm_ext/tpu/run_classifier_xnli.sh: -------------------------------------------------------------------------------- 1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S") 3 | TASK_NAME="xnli" 4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/roberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16 5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME 6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/oberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16/tpu/$TASK_NAME/$CURRENT_TIME 7 | 8 | python $CURRENT_DIR/../run_classifier.py \ 9 | --task_name=$TASK_NAME \ 10 | --do_train=true \ 11 | --do_eval=true \ 12 | --data_dir=$DATA_DIR \ 13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \ 14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \ 15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \ 16 | --max_seq_length=512 \ 17 | --train_batch_size=32 \ 18 | --learning_rate=2e-5 \ 19 | --num_train_epochs=8.0 \ 20 | --output_dir=$OUTPUT_DIR \ 21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.1.101.2:8470 22 | -------------------------------------------------------------------------------- /models/roberta_wwm_large_ext/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | BERT needs to maintain permanent compatibility with the pre-trained model files, 4 | so we do not plan to make any major changes to this library (other than what was 5 | promised in the README). However, we can accept small patches related to 6 | re-factoring and documentation. To submit contributes, there are just a few 7 | small guidelines you need to follow. 8 | 9 | ## Contributor License Agreement 10 | 11 | Contributions to this project must be accompanied by a Contributor License 12 | Agreement. You (or your employer) retain the copyright to your contribution; 13 | this simply gives us permission to use and redistribute your contributions as 14 | part of the project. Head over to to see 15 | your current agreements on file or to sign a new one. 16 | 17 | You generally only need to submit a CLA once, so if you've already submitted one 18 | (even if it was for a different project), you probably don't need to do it 19 | again. 20 | 21 | ## Code reviews 22 | 23 | All submissions, including submissions by project members, require review. We 24 | use GitHub pull requests for this purpose. Consult 25 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 26 | information on using pull requests. 27 | 28 | ## Community Guidelines 29 | 30 | This project follows 31 | [Google's Open Source Community Guidelines](https://opensource.google.com/conduct/). 32 | -------------------------------------------------------------------------------- /models/roberta_wwm_large_ext/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Google AI Language Team Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /models/roberta_wwm_large_ext/optimization_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Google AI Language Team Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | from __future__ import absolute_import 16 | from __future__ import division 17 | from __future__ import print_function 18 | 19 | import optimization 20 | import tensorflow as tf 21 | 22 | 23 | class OptimizationTest(tf.test.TestCase): 24 | 25 | def test_adam(self): 26 | with self.test_session() as sess: 27 | w = tf.get_variable( 28 | "w", 29 | shape=[3], 30 | initializer=tf.constant_initializer([0.1, -0.2, -0.1])) 31 | x = tf.constant([0.4, 0.2, -0.5]) 32 | loss = tf.reduce_mean(tf.square(x - w)) 33 | tvars = tf.trainable_variables() 34 | grads = tf.gradients(loss, tvars) 35 | global_step = tf.train.get_or_create_global_step() 36 | optimizer = optimization.AdamWeightDecayOptimizer(learning_rate=0.2) 37 | train_op = optimizer.apply_gradients(zip(grads, tvars), global_step) 38 | init_op = tf.group(tf.global_variables_initializer(), 39 | tf.local_variables_initializer()) 40 | sess.run(init_op) 41 | for _ in range(100): 42 | sess.run(train_op) 43 | w_np = sess.run(w) 44 | self.assertAllClose(w_np.flat, [0.4, 0.2, -0.5], rtol=1e-2, atol=1e-2) 45 | 46 | 47 | if __name__ == "__main__": 48 | tf.test.main() 49 | -------------------------------------------------------------------------------- /models/roberta_wwm_large_ext/requirements.txt: -------------------------------------------------------------------------------- 1 | tensorflow >= 1.11.0 # CPU Version of TensorFlow. 2 | # tensorflow-gpu >= 1.11.0 # GPU version of TensorFlow. 3 | -------------------------------------------------------------------------------- /models/roberta_wwm_large_ext/run_classifier_afqmc.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # @Author: bo.shi 3 | # @Date: 2019-11-04 09:56:36 4 | # @Last Modified by: bo.shi 5 | # @Last Modified time: 2019-12-05 11:23:18 6 | 7 | TASK_NAME="afqmc" 8 | MODEL_NAME="chinese_roberta_wwm_large_ext_L-24_H-1024_A-16" 9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 10 | export CUDA_VISIBLE_DEVICES="0" 11 | export PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model 12 | export ROBERTA_WWM_LARGE_DIR=$PRETRAINED_MODELS_DIR/$MODEL_NAME 13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset 14 | 15 | # download and unzip dataset 16 | if [ ! -d $GLUE_DATA_DIR ]; then 17 | mkdir -p $GLUE_DATA_DIR 18 | echo "makedir $GLUE_DATA_DIR" 19 | fi 20 | cd $GLUE_DATA_DIR 21 | if [ ! -d $TASK_NAME ]; then 22 | mkdir $TASK_NAME 23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME" 24 | fi 25 | cd $TASK_NAME 26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then 27 | rm * 28 | wget https://storage.googleapis.com/cluebenchmark/tasks/afqmc_public.zip 29 | unzip afqmc_public.zip 30 | rm afqmc_public.zip 31 | else 32 | echo "data exists" 33 | fi 34 | echo "Finish download dataset." 35 | 36 | # download model 37 | if [ ! -d $ROBERTA_WWM_LARGE_DIR ]; then 38 | mkdir -p $ROBERTA_WWM_LARGE_DIR 39 | echo "makedir $ROBERTA_WWM_LARGE_DIR" 40 | fi 41 | cd $ROBERTA_WWM_LARGE_DIR 42 | if [ ! -f "bert_config.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "bert_model.ckpt.index" ] || [ ! -f "bert_model.ckpt.meta" ] || [ ! -f "bert_model.ckpt.data-00000-of-00001" ]; then 43 | rm * 44 | wget -c https://storage.googleapis.com/chineseglue/pretrain_models/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16.zip 45 | unzip chinese_roberta_wwm_large_ext_L-24_H-1024_A-16.zip 46 | rm chinese_roberta_wwm_large_ext_L-24_H-1024_A-16.zip 47 | else 48 | echo "model exists" 49 | fi 50 | echo "Finish download model." 51 | 52 | # run task 53 | cd $CURRENT_DIR 54 | echo "Start running..." 55 | if [ $1 == "predict" ]; then 56 | python run_classifier.py \ 57 | --task_name=$TASK_NAME \ 58 | --do_train=true \ 59 | --do_eval=true \ 60 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 61 | --vocab_file=$ROBERTA_WWM_LARGE_DIR/vocab.txt \ 62 | --bert_config_file=$ROBERTA_WWM_LARGE_DIR/bert_config.json \ 63 | --init_checkpoint=$ROBERTA_WWM_LARGE_DIR/bert_model.ckpt \ 64 | --max_seq_length=128 \ 65 | --train_batch_size=32 \ 66 | --learning_rate=2e-5 \ 67 | --num_train_epochs=3.0 \ 68 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 69 | elif [ $1 == "predict" ]; then 70 | echo "Start predict..." 71 | python run_classifier.py \ 72 | --task_name=$TASK_NAME \ 73 | --do_train=false \ 74 | --do_eval=false \ 75 | --do_predict=true \ 76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 77 | --vocab_file=$ROBERTA_WWM_LARGE_DIR/vocab.txt \ 78 | --bert_config_file=$ROBERTA_WWM_LARGE_DIR/bert_config.json \ 79 | --init_checkpoint=$ROBERTA_WWM_LARGE_DIR/bert_model.ckpt \ 80 | --max_seq_length=128 \ 81 | --train_batch_size=32 \ 82 | --learning_rate=2e-5 \ 83 | --num_train_epochs=3.0 \ 84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 85 | fi 86 | -------------------------------------------------------------------------------- /models/roberta_wwm_large_ext/run_classifier_cmnli.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # @Author: bo.shi 3 | # @Date: 2019-11-04 09:56:36 4 | # @Last Modified by: bo.shi 5 | # @Last Modified time: 2019-12-05 11:23:30 6 | 7 | TASK_NAME="cmnli" 8 | MODEL_NAME="chinese_roberta_wwm_large_ext_L-24_H-1024_A-16" 9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 10 | export CUDA_VISIBLE_DEVICES="0" 11 | export PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model 12 | export ROBERTA_WWM_LARGE_DIR=$PRETRAINED_MODELS_DIR/$MODEL_NAME 13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset 14 | 15 | # download and unzip dataset 16 | if [ ! -d $GLUE_DATA_DIR ]; then 17 | mkdir -p $GLUE_DATA_DIR 18 | echo "makedir $GLUE_DATA_DIR" 19 | fi 20 | cd $GLUE_DATA_DIR 21 | if [ ! -d $TASK_NAME ]; then 22 | mkdir $TASK_NAME 23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME" 24 | fi 25 | cd $TASK_NAME 26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then 27 | rm * 28 | wget https://storage.googleapis.com/cluebenchmark/tasks/cmnli_public.zip 29 | unzip cmnli_public.zip 30 | rm cmnli_public.zip 31 | else 32 | echo "data exists" 33 | fi 34 | echo "Finish download dataset." 35 | 36 | # download model 37 | if [ ! -d $ROBERTA_WWM_LARGE_DIR ]; then 38 | mkdir -p $ROBERTA_WWM_LARGE_DIR 39 | echo "makedir $ROBERTA_WWM_LARGE_DIR" 40 | fi 41 | cd $ROBERTA_WWM_LARGE_DIR 42 | if [ ! -f "bert_config.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "bert_model.ckpt.index" ] || [ ! -f "bert_model.ckpt.meta" ] || [ ! -f "bert_model.ckpt.data-00000-of-00001" ]; then 43 | rm * 44 | wget -c https://storage.googleapis.com/chineseglue/pretrain_models/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16.zip 45 | unzip chinese_roberta_wwm_large_ext_L-24_H-1024_A-16.zip 46 | rm chinese_roberta_wwm_large_ext_L-24_H-1024_A-16.zip 47 | else 48 | echo "model exists" 49 | fi 50 | echo "Finish download model." 51 | 52 | # run task 53 | cd $CURRENT_DIR 54 | echo "Start running..." 55 | if [ $1 == "predict" ]; then 56 | python run_classifier.py \ 57 | --task_name=$TASK_NAME \ 58 | --do_train=true \ 59 | --do_eval=true \ 60 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 61 | --vocab_file=$ROBERTA_WWM_LARGE_DIR/vocab.txt \ 62 | --bert_config_file=$ROBERTA_WWM_LARGE_DIR/bert_config.json \ 63 | --init_checkpoint=$ROBERTA_WWM_LARGE_DIR/bert_model.ckpt \ 64 | --max_seq_length=128 \ 65 | --train_batch_size=32 \ 66 | --learning_rate=2e-5 \ 67 | --num_train_epochs=3.0 \ 68 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 69 | elif [ $1 == "predict" ]; then 70 | echo "Start predict..." 71 | python run_classifier.py \ 72 | --task_name=$TASK_NAME \ 73 | --do_train=false \ 74 | --do_eval=false \ 75 | --do_predict=true \ 76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 77 | --vocab_file=$ROBERTA_WWM_LARGE_DIR/vocab.txt \ 78 | --bert_config_file=$ROBERTA_WWM_LARGE_DIR/bert_config.json \ 79 | --init_checkpoint=$ROBERTA_WWM_LARGE_DIR/bert_model.ckpt \ 80 | --max_seq_length=128 \ 81 | --train_batch_size=32 \ 82 | --learning_rate=2e-5 \ 83 | --num_train_epochs=3.0 \ 84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 85 | fi 86 | -------------------------------------------------------------------------------- /models/roberta_wwm_large_ext/run_classifier_csl.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # @Author: bo.shi 3 | # @Date: 2019-11-04 09:56:36 4 | # @Last Modified by: bo.shi 5 | # @Last Modified time: 2019-12-05 11:23:41 6 | 7 | TASK_NAME="csl" 8 | MODEL_NAME="chinese_roberta_wwm_large_ext_L-24_H-1024_A-16" 9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 10 | export CUDA_VISIBLE_DEVICES="0" 11 | export PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model 12 | export ROBERTA_WWM_LARGE_DIR=$PRETRAINED_MODELS_DIR/$MODEL_NAME 13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset 14 | 15 | # download and unzip dataset 16 | if [ ! -d $GLUE_DATA_DIR ]; then 17 | mkdir -p $GLUE_DATA_DIR 18 | echo "makedir $GLUE_DATA_DIR" 19 | fi 20 | cd $GLUE_DATA_DIR 21 | if [ ! -d $TASK_NAME ]; then 22 | mkdir $TASK_NAME 23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME" 24 | fi 25 | cd $TASK_NAME 26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then 27 | rm * 28 | wget https://storage.googleapis.com/cluebenchmark/tasks/csl_public.zip 29 | unzip csl_public.zip 30 | rm csl_public.zip 31 | else 32 | echo "data exists" 33 | fi 34 | echo "Finish download dataset." 35 | 36 | # download model 37 | if [ ! -d $ROBERTA_WWM_LARGE_DIR ]; then 38 | mkdir -p $ROBERTA_WWM_LARGE_DIR 39 | echo "makedir $ROBERTA_WWM_LARGE_DIR" 40 | fi 41 | cd $ROBERTA_WWM_LARGE_DIR 42 | if [ ! -f "bert_config.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "bert_model.ckpt.index" ] || [ ! -f "bert_model.ckpt.meta" ] || [ ! -f "bert_model.ckpt.data-00000-of-00001" ]; then 43 | rm * 44 | wget -c https://storage.googleapis.com/chineseglue/pretrain_models/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16.zip 45 | unzip chinese_roberta_wwm_large_ext_L-24_H-1024_A-16.zip 46 | rm chinese_roberta_wwm_large_ext_L-24_H-1024_A-16.zip 47 | else 48 | echo "model exists" 49 | fi 50 | echo "Finish download model." 51 | 52 | # run task 53 | cd $CURRENT_DIR 54 | echo "Start running..." 55 | if [ $1 == "predict" ]; then 56 | python run_classifier.py \ 57 | --task_name=$TASK_NAME \ 58 | --do_train=true \ 59 | --do_eval=true \ 60 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 61 | --vocab_file=$ROBERTA_WWM_LARGE_DIR/vocab.txt \ 62 | --bert_config_file=$ROBERTA_WWM_LARGE_DIR/bert_config.json \ 63 | --init_checkpoint=$ROBERTA_WWM_LARGE_DIR/bert_model.ckpt \ 64 | --max_seq_length=128 \ 65 | --train_batch_size=32 \ 66 | --learning_rate=2e-5 \ 67 | --num_train_epochs=3.0 \ 68 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 69 | elif [ $1 == "predict" ]; then 70 | echo "Start predict..." 71 | python run_classifier.py \ 72 | --task_name=$TASK_NAME \ 73 | --do_train=false \ 74 | --do_eval=false \ 75 | --do_predict=true \ 76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 77 | --vocab_file=$ROBERTA_WWM_LARGE_DIR/vocab.txt \ 78 | --bert_config_file=$ROBERTA_WWM_LARGE_DIR/bert_config.json \ 79 | --init_checkpoint=$ROBERTA_WWM_LARGE_DIR/bert_model.ckpt \ 80 | --max_seq_length=128 \ 81 | --train_batch_size=32 \ 82 | --learning_rate=2e-5 \ 83 | --num_train_epochs=3.0 \ 84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 85 | fi 86 | 87 | 88 | -------------------------------------------------------------------------------- /models/roberta_wwm_large_ext/run_classifier_iflytek.sh: -------------------------------------------------------------------------------- 1 | 2 | #!/usr/bin/env bash 3 | # @Author: bo.shi 4 | # @Date: 2019-11-04 09:56:36 5 | # @Last Modified by: bo.shi 6 | # @Last Modified time: 2019-12-05 11:23:45 7 | 8 | TASK_NAME="iflytek" 9 | MODEL_NAME="chinese_roberta_wwm_large_ext_L-24_H-1024_A-16" 10 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 11 | export CUDA_VISIBLE_DEVICES="0" 12 | export PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model 13 | export ROBERTA_WWM_LARGE_DIR=$PRETRAINED_MODELS_DIR/$MODEL_NAME 14 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset 15 | 16 | # download and unzip dataset 17 | if [ ! -d $GLUE_DATA_DIR ]; then 18 | mkdir -p $GLUE_DATA_DIR 19 | echo "makedir $GLUE_DATA_DIR" 20 | fi 21 | cd $GLUE_DATA_DIR 22 | if [ ! -d $TASK_NAME ]; then 23 | mkdir $TASK_NAME 24 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME" 25 | fi 26 | cd $TASK_NAME 27 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then 28 | rm * 29 | wget https://storage.googleapis.com/cluebenchmark/tasks/iflytek_public.zip 30 | unzip iflytek_public.zip 31 | rm iflytek_public.zip 32 | else 33 | echo "data exists" 34 | fi 35 | echo "Finish download dataset." 36 | 37 | # download model 38 | if [ ! -d $ROBERTA_WWM_LARGE_DIR ]; then 39 | mkdir -p $ROBERTA_WWM_LARGE_DIR 40 | echo "makedir $ROBERTA_WWM_LARGE_DIR" 41 | fi 42 | cd $ROBERTA_WWM_LARGE_DIR 43 | if [ ! -f "bert_config.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "bert_model.ckpt.index" ] || [ ! -f "bert_model.ckpt.meta" ] || [ ! -f "bert_model.ckpt.data-00000-of-00001" ]; then 44 | rm * 45 | wget -c https://storage.googleapis.com/chineseglue/pretrain_models/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16.zip 46 | unzip chinese_roberta_wwm_large_ext_L-24_H-1024_A-16.zip 47 | rm chinese_roberta_wwm_large_ext_L-24_H-1024_A-16.zip 48 | else 49 | echo "model exists" 50 | fi 51 | echo "Finish download model." 52 | 53 | # run task 54 | cd $CURRENT_DIR 55 | echo "Start running..." 56 | if [ $1 == "predict" ]; then 57 | python run_classifier.py \ 58 | --task_name=$TASK_NAME \ 59 | --do_train=true \ 60 | --do_eval=true \ 61 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 62 | --vocab_file=$ROBERTA_WWM_LARGE_DIR/vocab.txt \ 63 | --bert_config_file=$ROBERTA_WWM_LARGE_DIR/bert_config.json \ 64 | --init_checkpoint=$ROBERTA_WWM_LARGE_DIR/bert_model.ckpt \ 65 | --max_seq_length=128 \ 66 | --train_batch_size=32 \ 67 | --learning_rate=2e-5 \ 68 | --num_train_epochs=3.0 \ 69 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 70 | elif [ $1 == "predict" ]; then 71 | echo "Start predict..." 72 | python run_classifier.py \ 73 | --task_name=$TASK_NAME \ 74 | --do_train=false \ 75 | --do_eval=false \ 76 | --do_predict=true \ 77 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 78 | --vocab_file=$ROBERTA_WWM_LARGE_DIR/vocab.txt \ 79 | --bert_config_file=$ROBERTA_WWM_LARGE_DIR/bert_config.json \ 80 | --init_checkpoint=$ROBERTA_WWM_LARGE_DIR/bert_model.ckpt \ 81 | --max_seq_length=128 \ 82 | --train_batch_size=32 \ 83 | --learning_rate=2e-5 \ 84 | --num_train_epochs=3.0 \ 85 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 86 | fi 87 | 88 | -------------------------------------------------------------------------------- /models/roberta_wwm_large_ext/run_classifier_tnews.sh: -------------------------------------------------------------------------------- 1 | 2 | #!/usr/bin/env bash 3 | # @Author: bo.shi 4 | # @Date: 2019-11-04 09:56:36 5 | # @Last Modified by: bo.shi 6 | # @Last Modified time: 2019-12-05 11:23:49 7 | 8 | TASK_NAME="tnews" 9 | MODEL_NAME="chinese_roberta_wwm_large_ext_L-24_H-1024_A-16" 10 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 11 | export CUDA_VISIBLE_DEVICES="0" 12 | export PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model 13 | export ROBERTA_WWM_LARGE_DIR=$PRETRAINED_MODELS_DIR/$MODEL_NAME 14 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset 15 | 16 | # download and unzip dataset 17 | if [ ! -d $GLUE_DATA_DIR ]; then 18 | mkdir -p $GLUE_DATA_DIR 19 | echo "makedir $GLUE_DATA_DIR" 20 | fi 21 | cd $GLUE_DATA_DIR 22 | if [ ! -d $TASK_NAME ]; then 23 | mkdir $TASK_NAME 24 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME" 25 | fi 26 | cd $TASK_NAME 27 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then 28 | rm * 29 | wget https://storage.googleapis.com/cluebenchmark/tasks/tnews_public.zip 30 | unzip tnews_public.zip 31 | rm tnews_public.zip 32 | else 33 | echo "data exists" 34 | fi 35 | echo "Finish download dataset." 36 | 37 | # download model 38 | if [ ! -d $ROBERTA_WWM_LARGE_DIR ]; then 39 | mkdir -p $ROBERTA_WWM_LARGE_DIR 40 | echo "makedir $ROBERTA_WWM_LARGE_DIR" 41 | fi 42 | cd $ROBERTA_WWM_LARGE_DIR 43 | if [ ! -f "bert_config.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "bert_model.ckpt.index" ] || [ ! -f "bert_model.ckpt.meta" ] || [ ! -f "bert_model.ckpt.data-00000-of-00001" ]; then 44 | rm * 45 | wget -c https://storage.googleapis.com/chineseglue/pretrain_models/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16.zip 46 | unzip chinese_roberta_wwm_large_ext_L-24_H-1024_A-16.zip 47 | rm chinese_roberta_wwm_large_ext_L-24_H-1024_A-16.zip 48 | else 49 | echo "model exists" 50 | fi 51 | echo "Finish download model." 52 | 53 | # run task 54 | cd $CURRENT_DIR 55 | echo "Start running..." 56 | if [ $1 == "predict" ]; then 57 | python run_classifier.py \ 58 | --task_name=$TASK_NAME \ 59 | --do_train=true \ 60 | --do_eval=true \ 61 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 62 | --vocab_file=$ROBERTA_WWM_LARGE_DIR/vocab.txt \ 63 | --bert_config_file=$ROBERTA_WWM_LARGE_DIR/bert_config.json \ 64 | --init_checkpoint=$ROBERTA_WWM_LARGE_DIR/bert_model.ckpt \ 65 | --max_seq_length=128 \ 66 | --train_batch_size=32 \ 67 | --learning_rate=2e-5 \ 68 | --num_train_epochs=3.0 \ 69 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 70 | elif [ $1 == "predict" ]; then 71 | echo "Start predict..." 72 | python run_classifier.py \ 73 | --task_name=$TASK_NAME \ 74 | --do_train=false \ 75 | --do_eval=false \ 76 | --do_predict=true \ 77 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 78 | --vocab_file=$ROBERTA_WWM_LARGE_DIR/vocab.txt \ 79 | --bert_config_file=$ROBERTA_WWM_LARGE_DIR/bert_config.json \ 80 | --init_checkpoint=$ROBERTA_WWM_LARGE_DIR/bert_model.ckpt \ 81 | --max_seq_length=128 \ 82 | --train_batch_size=32 \ 83 | --learning_rate=2e-5 \ 84 | --num_train_epochs=3.0 \ 85 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 86 | fi 87 | 88 | -------------------------------------------------------------------------------- /models/roberta_wwm_large_ext/run_classifier_wsc.sh: -------------------------------------------------------------------------------- 1 | 2 | #!/usr/bin/env bash 3 | # @Author: bo.shi 4 | # @Date: 2019-11-04 09:56:36 5 | # @Last Modified by: bo.shi 6 | # @Last Modified time: 2019-12-05 11:23:54 7 | 8 | TASK_NAME="wsc" 9 | MODEL_NAME="chinese_roberta_wwm_large_ext_L-24_H-1024_A-16" 10 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 11 | export CUDA_VISIBLE_DEVICES="0" 12 | export PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model 13 | export ROBERTA_WWM_LARGE_DIR=$PRETRAINED_MODELS_DIR/$MODEL_NAME 14 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset 15 | 16 | # download and unzip dataset 17 | if [ ! -d $GLUE_DATA_DIR ]; then 18 | mkdir -p $GLUE_DATA_DIR 19 | echo "makedir $GLUE_DATA_DIR" 20 | fi 21 | cd $GLUE_DATA_DIR 22 | if [ ! -d $TASK_NAME ]; then 23 | mkdir $TASK_NAME 24 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME" 25 | fi 26 | cd $TASK_NAME 27 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then 28 | rm * 29 | wget https://storage.googleapis.com/cluebenchmark/tasks/wsc_public.zip 30 | unzip wsc_public.zip 31 | rm wsc_public.zip 32 | else 33 | echo "data exists" 34 | fi 35 | echo "Finish download dataset." 36 | 37 | # download model 38 | if [ ! -d $ROBERTA_WWM_LARGE_DIR ]; then 39 | mkdir -p $ROBERTA_WWM_LARGE_DIR 40 | echo "makedir $ROBERTA_WWM_LARGE_DIR" 41 | fi 42 | cd $ROBERTA_WWM_LARGE_DIR 43 | if [ ! -f "bert_config.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "bert_model.ckpt.index" ] || [ ! -f "bert_model.ckpt.meta" ] || [ ! -f "bert_model.ckpt.data-00000-of-00001" ]; then 44 | rm * 45 | wget -c https://storage.googleapis.com/chineseglue/pretrain_models/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16.zip 46 | unzip chinese_roberta_wwm_large_ext_L-24_H-1024_A-16.zip 47 | rm chinese_roberta_wwm_large_ext_L-24_H-1024_A-16.zip 48 | else 49 | echo "model exists" 50 | fi 51 | echo "Finish download model." 52 | 53 | # run task 54 | cd $CURRENT_DIR 55 | echo "Start running..." 56 | if [ $1 == "predict" ]; then 57 | python run_classifier.py \ 58 | --task_name=$TASK_NAME \ 59 | --do_train=true \ 60 | --do_eval=true \ 61 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 62 | --vocab_file=$ROBERTA_WWM_LARGE_DIR/vocab.txt \ 63 | --bert_config_file=$ROBERTA_WWM_LARGE_DIR/bert_config.json \ 64 | --init_checkpoint=$ROBERTA_WWM_LARGE_DIR/bert_model.ckpt \ 65 | --max_seq_length=128 \ 66 | --train_batch_size=32 \ 67 | --learning_rate=2e-5 \ 68 | --num_train_epochs=3.0 \ 69 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 70 | elif [ $1 == "predict" ]; then 71 | echo "Start predict..." 72 | python run_classifier.py \ 73 | --task_name=$TASK_NAME \ 74 | --do_train=false \ 75 | --do_eval=false \ 76 | --do_predict=true \ 77 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \ 78 | --vocab_file=$ROBERTA_WWM_LARGE_DIR/vocab.txt \ 79 | --bert_config_file=$ROBERTA_WWM_LARGE_DIR/bert_config.json \ 80 | --init_checkpoint=$ROBERTA_WWM_LARGE_DIR/bert_model.ckpt \ 81 | --max_seq_length=128 \ 82 | --train_batch_size=32 \ 83 | --learning_rate=2e-5 \ 84 | --num_train_epochs=3.0 \ 85 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 86 | fi 87 | 88 | -------------------------------------------------------------------------------- /models/roberta_wwm_large_ext/run_ner_msra.sh: -------------------------------------------------------------------------------- 1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 2 | export CUDA_VISIBLE_DEVICES="0" 3 | export BERT_BASE_DIR=$CURRENT_DIR/prev_trained_model/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16 4 | export GLUE_DIR=$CURRENT_DIR/../../glue/chineseGLUEdatasets/ 5 | TASK_NAME="msraner" 6 | 7 | python run_ner.py \ 8 | --task_name=$TASK_NAME \ 9 | --do_train=true \ 10 | --do_eval=false \ 11 | --do_predict=true \ 12 | --data_dir=$GLUE_DIR/$TASK_NAME \ 13 | --vocab_file=$BERT_BASE_DIR/vocab.txt \ 14 | --bert_config_file=$BERT_BASE_DIR/bert_config.json \ 15 | --init_checkpoint=$BERT_BASE_DIR/bert_model.ckpt \ 16 | --max_seq_length=256 \ 17 | --train_batch_size=8 \ 18 | --learning_rate=2e-5 \ 19 | --num_train_epochs=5.0 \ 20 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/ 21 | -------------------------------------------------------------------------------- /models/roberta_wwm_large_ext/tpu/run_classifier_inews.sh: -------------------------------------------------------------------------------- 1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S") 3 | TASK_NAME="inews" 4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/roberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16 5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME 6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/oberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16/tpu/$TASK_NAME/$CURRENT_TIME 7 | 8 | python $CURRENT_DIR/../run_classifier.py \ 9 | --task_name=$TASK_NAME \ 10 | --do_train=true \ 11 | --do_eval=true \ 12 | --data_dir=$DATA_DIR \ 13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \ 14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \ 15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \ 16 | --max_seq_length=512 \ 17 | --train_batch_size=32 \ 18 | --learning_rate=2e-5 \ 19 | --num_train_epochs=8.0 \ 20 | --output_dir=$OUTPUT_DIR \ 21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.1.101.2:8470 22 | -------------------------------------------------------------------------------- /models/roberta_wwm_large_ext/tpu/run_classifier_jdcomment.sh: -------------------------------------------------------------------------------- 1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S") 3 | TASK_NAME="jdcomment" 4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/roberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16 5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/hard_${TASK_NAME} 6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/roberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16/tpu/$TASK_NAME/$CURRENT_TIME 7 | 8 | python $CURRENT_DIR/../run_classifier.py \ 9 | --task_name=$TASK_NAME \ 10 | --do_train=true \ 11 | --do_eval=true \ 12 | --data_dir=$DATA_DIR \ 13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \ 14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \ 15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \ 16 | --max_seq_length=128 \ 17 | --train_batch_size=32 \ 18 | --learning_rate=2e-5 \ 19 | --num_train_epochs=8.0 \ 20 | --output_dir=$OUTPUT_DIR \ 21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.230.1.2:8470 22 | -------------------------------------------------------------------------------- /models/roberta_wwm_large_ext/tpu/run_classifier_lcqmc.sh: -------------------------------------------------------------------------------- 1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S") 3 | TASK_NAME="lcqmc" 4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/roberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16 5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/hard_$TASK_NAME 6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/oberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16/tpu/$TASK_NAME/$CURRENT_TIME 7 | 8 | python $CURRENT_DIR/../run_classifier.py \ 9 | --task_name=$TASK_NAME \ 10 | --do_train=true \ 11 | --do_eval=true \ 12 | --data_dir=$DATA_DIR \ 13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \ 14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \ 15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \ 16 | --max_seq_length=128 \ 17 | --train_batch_size=16 \ 18 | --learning_rate=2e-5 \ 19 | --num_train_epochs=3.0 \ 20 | --output_dir=$OUTPUT_DIR \ 21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://172.18.0.2:8470 22 | -------------------------------------------------------------------------------- /models/roberta_wwm_large_ext/tpu/run_classifier_thucnews.sh: -------------------------------------------------------------------------------- 1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S") 3 | TASK_NAME="thucnews" 4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/roberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16 5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME 6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/oberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16/tpu/$TASK_NAME/$CURRENT_TIME 7 | 8 | python $CURRENT_DIR/../run_classifier.py \ 9 | --task_name=$TASK_NAME \ 10 | --do_train=true \ 11 | --do_eval=true \ 12 | --data_dir=$DATA_DIR \ 13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \ 14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \ 15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \ 16 | --max_seq_length=512 \ 17 | --train_batch_size=32 \ 18 | --learning_rate=2e-5 \ 19 | --num_train_epochs=8.0 \ 20 | --output_dir=$OUTPUT_DIR \ 21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.1.101.2:8470 22 | -------------------------------------------------------------------------------- /models/roberta_wwm_large_ext/tpu/run_classifier_tnews.sh: -------------------------------------------------------------------------------- 1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S") 3 | TASK_NAME="tnews" 4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/roberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16 5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/hard_${TASK_NAME}_1 6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/roberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16/tpu/$TASK_NAME/$CURRENT_TIME 7 | 8 | python $CURRENT_DIR/../run_classifier.py \ 9 | --task_name=$TASK_NAME \ 10 | --do_train=true \ 11 | --do_eval=true \ 12 | --data_dir=$DATA_DIR \ 13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \ 14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \ 15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \ 16 | --max_seq_length=128 \ 17 | --train_batch_size=16 \ 18 | --learning_rate=2e-5 \ 19 | --num_train_epochs=3.0 \ 20 | --output_dir=$OUTPUT_DIR \ 21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://172.18.0.2:8470 22 | -------------------------------------------------------------------------------- /models/roberta_wwm_large_ext/tpu/run_classifier_xnli.sh: -------------------------------------------------------------------------------- 1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S") 3 | TASK_NAME="xnli" 4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/roberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16 5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME 6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/oberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16/tpu/$TASK_NAME/$CURRENT_TIME 7 | 8 | python $CURRENT_DIR/../run_classifier.py \ 9 | --task_name=$TASK_NAME \ 10 | --do_train=true \ 11 | --do_eval=true \ 12 | --data_dir=$DATA_DIR \ 13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \ 14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \ 15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \ 16 | --max_seq_length=512 \ 17 | --train_batch_size=32 \ 18 | --learning_rate=2e-5 \ 19 | --num_train_epochs=8.0 \ 20 | --output_dir=$OUTPUT_DIR \ 21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.1.101.2:8470 22 | -------------------------------------------------------------------------------- /models/xlnet/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLUEbenchmark/CLUEmotionAnalysis2020/ff1bc945fc3a5854e0013de5a2f222dd1da61516/models/xlnet/__init__.py -------------------------------------------------------------------------------- /models/xlnet/gpu_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import os 6 | import tensorflow as tf 7 | 8 | def assign_to_gpu(gpu=0, ps_dev="/device:CPU:0"): 9 | def _assign(op): 10 | node_def = op if isinstance(op, tf.NodeDef) else op.node_def 11 | if node_def.op == "Variable": 12 | return ps_dev 13 | else: 14 | return "/gpu:%d" % gpu 15 | return _assign 16 | 17 | 18 | def average_grads_and_vars(tower_grads_and_vars): 19 | def average_dense(grad_and_vars): 20 | if len(grad_and_vars) == 1: 21 | return grad_and_vars[0][0] 22 | 23 | grad = grad_and_vars[0][0] 24 | for g, _ in grad_and_vars[1:]: 25 | grad += g 26 | return grad / len(grad_and_vars) 27 | 28 | def average_sparse(grad_and_vars): 29 | if len(grad_and_vars) == 1: 30 | return grad_and_vars[0][0] 31 | 32 | indices = [] 33 | values = [] 34 | for g, _ in grad_and_vars: 35 | indices += [g.indices] 36 | values += [g.values] 37 | indices = tf.concat(indices, 0) 38 | values = tf.concat(values, 0) / len(grad_and_vars) 39 | return tf.IndexedSlices(values, indices, grad_and_vars[0][0].dense_shape) 40 | 41 | average_grads_and_vars = [] 42 | for grad_and_vars in zip(*tower_grads_and_vars): 43 | if grad_and_vars[0][0] is None: 44 | grad = None 45 | elif isinstance(grad_and_vars[0][0], tf.IndexedSlices): 46 | grad = average_sparse(grad_and_vars) 47 | else: 48 | grad = average_dense(grad_and_vars) 49 | # Keep in mind that the Variables are redundant because they are shared 50 | # across towers. So .. we will just return the first tower's pointer to 51 | # the Variable. 52 | v = grad_and_vars[0][1] 53 | grad_and_var = (grad, v) 54 | average_grads_and_vars.append(grad_and_var) 55 | return average_grads_and_vars 56 | 57 | 58 | def load_from_checkpoint(saver, logdir): 59 | sess = tf.get_default_session() 60 | ckpt = tf.train.get_checkpoint_state(logdir) 61 | if ckpt and ckpt.model_checkpoint_path: 62 | if os.path.isabs(ckpt.model_checkpoint_path): 63 | # Restores from checkpoint with absolute path. 64 | saver.restore(sess, ckpt.model_checkpoint_path) 65 | else: 66 | # Restores from checkpoint with relative path. 67 | saver.restore(sess, os.path.join(logdir, ckpt.model_checkpoint_path)) 68 | return True 69 | return False 70 | -------------------------------------------------------------------------------- /models/xlnet/spiece.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLUEbenchmark/CLUEmotionAnalysis2020/ff1bc945fc3a5854e0013de5a2f222dd1da61516/models/xlnet/spiece.model -------------------------------------------------------------------------------- /models/xlnet/temp.sh: -------------------------------------------------------------------------------- 1 | a=`pwd` 2 | echo $a 3 | -------------------------------------------------------------------------------- /models/xlnet/tpu/run_classifier_inews.sh: -------------------------------------------------------------------------------- 1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S") 3 | TASK_NAME="inews" 4 | export XLNET_DIR=gs://models_zxw/prev_trained_models/nlp/xlnet-base/chinese_xlnet_base_L-12_H-768_A-12 5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME 6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/xlnet-base/chinese_xlnet_base_L-12_H-768_A-12/tpu/$TASK_NAME/$CURRENT_TIME 7 | 8 | python $CURRENT_DIR/../run_classifier.py \ 9 | --spiece_model_file=${CURRENT_DIR}/../spiece.model \ 10 | --model_config_path=${XLNET_DIR}/xlnet_config.json \ 11 | --init_checkpoint=${XLNET_DIR}/xlnet_model.ckpt \ 12 | --task_name=$TASK_NAME \ 13 | --do_train=True \ 14 | --do_eval=True \ 15 | --eval_all_ckpt=False \ 16 | --uncased=False \ 17 | --data_dir=$DATA_DIR \ 18 | --output_dir=${OUTPUT_DIR} \ 19 | --model_dir=${OUTPUT_DIR} \ 20 | --train_batch_size=32 \ 21 | --eval_batch_size=8 \ 22 | --num_hosts=1 \ 23 | --num_core_per_host=8 \ 24 | --num_train_epochs=3 \ 25 | --max_seq_length=128 \ 26 | --learning_rate=2e-5 \ 27 | --save_steps=1000 \ 28 | --use_tpu=True 29 | -------------------------------------------------------------------------------- /models/xlnet/tpu/run_classifier_lcqmc.sh: -------------------------------------------------------------------------------- 1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S") 3 | TASK_NAME="lcqmc" 4 | export XLNET_DIR=gs://models_zxw/prev_trained_models/nlp/xlnet-base/chinese_xlnet_base_L-12_H-768_A-12 5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME 6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/xlnet-base/chinese_xlnet_base_L-12_H-768_A-12/tpu/$TASK_NAME/$CURRENT_TIME 7 | 8 | python $CURRENT_DIR/../run_classifier.py \ 9 | --spiece_model_file=${CURRENT_DIR}/../spiece.model \ 10 | --model_config_path=${XLNET_DIR}/xlnet_config.json \ 11 | --init_checkpoint=${XLNET_DIR}/xlnet_model.ckpt \ 12 | --task_name=$TASK_NAME \ 13 | --do_train=True \ 14 | --do_eval=True \ 15 | --eval_all_ckpt=False \ 16 | --uncased=False \ 17 | --data_dir=$DATA_DIR \ 18 | --output_dir=${OUTPUT_DIR} \ 19 | --model_dir=${OUTPUT_DIR} \ 20 | --train_batch_size=32 \ 21 | --eval_batch_size=8 \ 22 | --num_hosts=1 \ 23 | --num_core_per_host=8 \ 24 | --num_train_epochs=3 \ 25 | --max_seq_length=128 \ 26 | --learning_rate=2e-5 \ 27 | --save_steps=1000 \ 28 | --use_tpu=True 29 | -------------------------------------------------------------------------------- /models/xlnet/tpu/run_classifier_tnews.sh: -------------------------------------------------------------------------------- 1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S") 3 | TASK_NAME="tnews" 4 | export XLNET_DIR=gs://models_zxw/prev_trained_models/nlp/xlnet-base/chinese_xlnet_base_L-12_H-768_A-12 5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/hard_${TASK_NAME}_1 6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/xlnet-base/chinese_xlnet_base_L-12_H-768_A-12/tpu/$TASK_NAME/$CURRENT_TIME 7 | 8 | python $CURRENT_DIR/../run_classifier.py \ 9 | --spiece_model_file=${CURRENT_DIR}/../spiece.model \ 10 | --model_config_path=${XLNET_DIR}/xlnet_config.json \ 11 | --init_checkpoint=${XLNET_DIR}/xlnet_model.ckpt \ 12 | --task_name=$TASK_NAME \ 13 | --do_train=True \ 14 | --do_eval=True \ 15 | --eval_all_ckpt=True \ 16 | --uncased=False \ 17 | --data_dir=$DATA_DIR \ 18 | --output_dir=${OUTPUT_DIR} \ 19 | --model_dir=${OUTPUT_DIR} \ 20 | --train_batch_size=16 \ 21 | --eval_batch_size=8 \ 22 | --num_hosts=1 \ 23 | --num_core_per_host=8 \ 24 | --num_train_epochs=3 \ 25 | --max_seq_length=128 \ 26 | --learning_rate=1e-5 \ 27 | --save_steps=1000 \ 28 | --use_tpu=True --tpu=grpc://192.168.0.2:8470 29 | -------------------------------------------------------------------------------- /models/xlnet/tpu/run_classifier_xnli.sh: -------------------------------------------------------------------------------- 1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S") 3 | TASK_NAME="xnli" 4 | export XLNET_DIR=gs://models_zxw/prev_trained_models/nlp/xlnet-base/chinese_xlnet_base_L-12_H-768_A-12 5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME 6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/xlnet-base/chinese_xlnet_base_L-12_H-768_A-12/tpu/$TASK_NAME/$CURRENT_TIME 7 | 8 | python $CURRENT_DIR/../run_classifier.py \ 9 | --spiece_model_file=${CURRENT_DIR}/../spiece.model \ 10 | --model_config_path=${XLNET_DIR}/xlnet_config.json \ 11 | --init_checkpoint=${XLNET_DIR}/xlnet_model.ckpt \ 12 | --task_name=$TASK_NAME \ 13 | --do_train=True \ 14 | --do_eval=True \ 15 | --eval_all_ckpt=False \ 16 | --uncased=False \ 17 | --data_dir=$DATA_DIR \ 18 | --output_dir=${OUTPUT_DIR} \ 19 | --model_dir=${OUTPUT_DIR} \ 20 | --train_batch_size=32 \ 21 | --eval_batch_size=8 \ 22 | --num_hosts=1 \ 23 | --num_core_per_host=8 \ 24 | --num_train_epochs=3 \ 25 | --max_seq_length=128 \ 26 | --learning_rate=2e-5 \ 27 | --save_steps=1000 \ 28 | --use_tpu=True 29 | --------------------------------------------------------------------------------