├── CLUEdataset
└── emotion
│ ├── label_distribution.png
│ ├── test.txt
│ ├── train.txt
│ └── valid.txt
├── README.md
└── models
├── albert
├── albert_config
│ ├── albert_config_base.json
│ ├── albert_config_large.json
│ ├── albert_config_tiny.json
│ ├── albert_config_xlarge.json
│ ├── albert_config_xxlarge.json
│ ├── bert_config.json
│ └── vocab.txt
├── bert_utils.py
├── create_pretrain_data.sh
├── create_pretraining_data.py
├── modeling.py
├── optimization.py
├── optimization_finetuning.py
├── resources
│ ├── add_data_removing_dropout.jpg
│ ├── albert_configuration.jpg
│ ├── albert_performance.jpg
│ ├── create_pretraining_data_roberta.py
│ ├── shell_scripts
│ │ └── create_pretrain_data_batch_webtext.sh
│ └── state_of_the_art.jpg
├── run_classifier.py
├── run_classifier_afqmc.sh
├── run_classifier_cmnli.sh
├── run_classifier_csl.sh
├── run_classifier_iflytek.sh
├── run_classifier_tnews.sh
├── run_classifier_wsc.sh
├── run_pretraining.py
├── test_changes.py
├── tokenization.py
└── tpu
│ ├── run_classifier_inews.sh
│ ├── run_classifier_inews_tiny.sh
│ ├── run_classifier_lcqmc.sh
│ ├── run_classifier_lcqmc_tiny.sh
│ ├── run_classifier_thucnews.sh
│ ├── run_classifier_thucnews_tiny.sh
│ ├── run_classifier_tnews.sh
│ ├── run_classifier_tnews_tiny.sh
│ ├── run_classifier_xnli.sh
│ └── run_classifier_xnli_tiny.sh
├── bert
├── .gitignore
├── CONTRIBUTING.md
├── LICENSE
├── __init__.py
├── conlleval.py
├── create_pretraining_data.py
├── extract_features.py
├── modeling.py
├── modeling_test.py
├── multilingual.md
├── optimization.py
├── optimization_test.py
├── predicting_movie_reviews_with_bert_on_tf_hub.ipynb
├── requirements.txt
├── run_classifier.py
├── run_classifier_afqmc.sh
├── run_classifier_cmnli.sh
├── run_classifier_csl.sh
├── run_classifier_emotion.sh
├── run_classifier_iflytek.sh
├── run_classifier_tnews.sh
├── run_classifier_with_tfhub.py
├── run_classifier_wsc.sh
├── run_ner.py
├── run_pretraining.py
├── run_squad.py
├── sample_text.txt
├── tf_metrics.py
├── tokenization.py
├── tokenization_test.py
└── tpu
│ ├── run_classifier_inews.sh
│ ├── run_classifier_jdcomment.sh
│ ├── run_classifier_lcqmc.sh
│ ├── run_classifier_thucnews.sh
│ ├── run_classifier_tnews.sh
│ └── run_classifier_xnli.sh
├── bert_wwm_ext
├── .gitignore
├── CONTRIBUTING.md
├── LICENSE
├── __init__.py
├── conlleval.py
├── create_pretraining_data.py
├── extract_features.py
├── modeling.py
├── modeling_test.py
├── multilingual.md
├── optimization.py
├── optimization_test.py
├── predicting_movie_reviews_with_bert_on_tf_hub.ipynb
├── requirements.txt
├── run_classifier.py
├── run_classifier_afqmc.sh
├── run_classifier_cmnli.sh
├── run_classifier_csl.sh
├── run_classifier_iflytek.sh
├── run_classifier_tnews.sh
├── run_classifier_with_tfhub.py
├── run_classifier_wsc.sh
├── run_ner.py
├── run_ner_msra.sh
├── run_pretraining.py
├── run_squad.py
├── sample_text.txt
├── tf_metrics.py
├── tokenization.py
├── tokenization_test.py
└── tpu
│ ├── run_classifier_inews.sh
│ ├── run_classifier_lcqmc.sh
│ ├── run_classifier_thucnews.sh
│ ├── run_classifier_tnews.sh
│ └── run_classifier_xnli.sh
├── classifier_utils.py
├── copa_sh
├── convert_test.py
├── copa_eval_dev.sh
├── dev_label.txt
└── eval_copa.py
├── ernie
├── .gitignore
├── CONTRIBUTING.md
├── LICENSE
├── __init__.py
├── conlleval.py
├── create_pretraining_data.py
├── extract_features.py
├── modeling.py
├── modeling_test.py
├── multilingual.md
├── optimization.py
├── optimization_test.py
├── predicting_movie_reviews_with_bert_on_tf_hub.ipynb
├── requirements.txt
├── run_classifier.py
├── run_classifier_afqmc.sh
├── run_classifier_cmnli.sh
├── run_classifier_csl.sh
├── run_classifier_iflytek.sh
├── run_classifier_tnews.sh
├── run_classifier_with_tfhub.py
├── run_classifier_wsc.sh
├── run_ner.py
├── run_ner_msra.sh
├── run_pretraining.py
├── run_squad.py
├── sample_text.txt
├── tf_metrics.py
├── tokenization.py
├── tokenization_test.py
└── tpu
│ ├── run_classifier_inews.sh
│ ├── run_classifier_lcqmc.sh
│ ├── run_classifier_thucnews.sh
│ ├── run_classifier_tnews.sh
│ └── run_classifier_xnli.sh
├── roberta
├── conlleval.py
├── create_pretrain_data.sh
├── create_pretraining_data.py
├── modeling.py
├── optimization.py
├── optimization_finetuning.py
├── resources
│ ├── RoBERTa_zh_Large_Learning_Curve.png
│ └── vocab.txt
├── run_classifier.py
├── run_classifier_afqmc.sh
├── run_classifier_cmnli.sh
├── run_classifier_csl.sh
├── run_classifier_iflytek.sh
├── run_classifier_tnews.sh
├── run_classifier_wsc.sh
├── run_ner.py
├── run_ner_msra.sh
├── run_pretraining.py
├── tf_metrics.py
├── tokenization.py
└── tpu
│ ├── run_classifier_inews.sh
│ ├── run_classifier_jdcomment.sh
│ ├── run_classifier_lcqmc.sh
│ ├── run_classifier_thucnews.sh
│ ├── run_classifier_tnews.sh
│ └── run_classifier_xnli.sh
├── roberta_wwm_ext
├── CONTRIBUTING.md
├── LICENSE
├── __init__.py
├── conlleval.py
├── create_pretraining_data.py
├── extract_features.py
├── modeling.py
├── modeling_test.py
├── multilingual.md
├── optimization.py
├── optimization_test.py
├── requirements.txt
├── run_classifier.py
├── run_classifier_afqmc.sh
├── run_classifier_cmnli.sh
├── run_classifier_csl.sh
├── run_classifier_iflytek.sh
├── run_classifier_tnews.sh
├── run_classifier_with_tfhub.py
├── run_classifier_wsc.sh
├── run_ner.py
├── run_ner_msra.sh
├── run_pretraining.py
├── run_squad.py
├── tf_metrics.py
├── tokenization.py
├── tokenization_test.py
└── tpu
│ ├── run_classifier_inews.sh
│ ├── run_classifier_jdcomment.sh
│ ├── run_classifier_lcqmc.sh
│ ├── run_classifier_thucnews.sh
│ ├── run_classifier_tnews.sh
│ └── run_classifier_xnli.sh
├── roberta_wwm_large_ext
├── CONTRIBUTING.md
├── LICENSE
├── __init__.py
├── conlleval.py
├── create_pretraining_data.py
├── extract_features.py
├── modeling.py
├── modeling_test.py
├── multilingual.md
├── optimization.py
├── optimization_test.py
├── predicting_movie_reviews_with_bert_on_tf_hub.ipynb
├── requirements.txt
├── run_classifier.py
├── run_classifier_afqmc.sh
├── run_classifier_cmnli.sh
├── run_classifier_csl.sh
├── run_classifier_iflytek.sh
├── run_classifier_tnews.sh
├── run_classifier_with_tfhub.py
├── run_classifier_wsc.sh
├── run_ner.py
├── run_ner_msra.sh
├── run_pretraining.py
├── run_squad.py
├── tf_metrics.py
├── tokenization.py
├── tokenization_test.py
└── tpu
│ ├── run_classifier_inews.sh
│ ├── run_classifier_jdcomment.sh
│ ├── run_classifier_lcqmc.sh
│ ├── run_classifier_thucnews.sh
│ ├── run_classifier_tnews.sh
│ └── run_classifier_xnli.sh
└── xlnet
├── __init__.py
├── cmrc2018_evaluate_drcd.py
├── data_utils.py
├── function_builder.py
├── gpu_utils.py
├── model_utils.py
├── modeling.py
├── prepro_utils.py
├── run_classifier.py
├── run_classifier_afqmc.sh
├── run_classifier_cmnli.sh
├── run_classifier_csl.sh
├── run_classifier_iflytek.sh
├── run_classifier_tnews.sh
├── run_classifier_wsc.sh
├── run_cmrc_drcd.py
├── spiece.model
├── squad_utils.py
├── summary.py
├── temp.sh
├── tpu
├── run_classifier_inews.sh
├── run_classifier_lcqmc.sh
├── run_classifier_tnews.sh
└── run_classifier_xnli.sh
├── tpu_estimator.py
└── xlnet.py
/CLUEdataset/emotion/label_distribution.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CLUEbenchmark/CLUEmotionAnalysis2020/ff1bc945fc3a5854e0013de5a2f222dd1da61516/CLUEdataset/emotion/label_distribution.png
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # CLUEEmotion2020
2 | CLUE Emotion Analysis Dataset 情感分析数据集
3 | # Data Description
4 | This dataset in data directory is emotion analysis corpus, with each sample annotated with one emotion label. The label set is *like, happiness, sadness, anger, disgust, fear and surprise*.
5 |
6 | This dataset is from the following paper:
7 |
8 | ```Minglei Li, Yunfei Long, Qin Lu, and Wenjie Li. “Emotion Corpus Construction Based on Selection from Hashtags.” In Proceedings of International Conference on Language Resources and Evaluation (LREC). Portorož, Slovenia, 2016```
9 |
10 | The corpus statistics and lable distribution are as follows:
11 |
12 |
13 |
14 | The train, valid and test set is split by the ratio of 8:1:1 and encoded in UTF-8.
15 |
16 | # Baseline results
17 |
18 | Test results of different classification models on this dataset.
19 |
20 | | Models | Accuracy | Parameters |
21 | | --------- | -------- | ---------------------------------- |
22 | | BERT-base | 60.7% | Epoch 3, batch 32, max_seq_len 128 |
23 |
24 | # Reproduce the results
25 |
26 | The code is based on the original [CLUE source code](https://github.com/CLUEbenchmark/CLUE), which is based on the original Google BERT code, and the pre-trained language model is [BERT Base Chinese version](https://storage.googleapis.com/bert_models/2018_11_03/chinese_L-12_H-768_A-12.zip).
27 |
28 |
29 |
30 | ## Env
31 |
32 | ```
33 | tensorflow 1.12
34 | ```
35 |
36 |
37 |
38 | ## Run command
39 |
40 | ```
41 | cd models/bert
42 | ./run_classifier_emotion.sh
43 | ```
44 |
45 |
46 |
47 |
--------------------------------------------------------------------------------
/models/albert/albert_config/albert_config_base.json:
--------------------------------------------------------------------------------
1 | {
2 | "attention_probs_dropout_prob": 0.0,
3 | "directionality": "bidi",
4 | "hidden_act": "gelu",
5 | "hidden_dropout_prob": 0.0,
6 | "hidden_size": 768,
7 | "embedding_size": 128,
8 | "initializer_range": 0.02,
9 | "intermediate_size": 3072 ,
10 | "max_position_embeddings": 512,
11 | "num_attention_heads": 12,
12 | "num_hidden_layers": 12,
13 |
14 | "pooler_fc_size": 768,
15 | "pooler_num_attention_heads": 12,
16 | "pooler_num_fc_layers": 3,
17 | "pooler_size_per_head": 128,
18 | "pooler_type": "first_token_transform",
19 | "type_vocab_size": 2,
20 | "vocab_size": 21128,
21 | "ln_type":"postln"
22 |
23 | }
24 |
--------------------------------------------------------------------------------
/models/albert/albert_config/albert_config_large.json:
--------------------------------------------------------------------------------
1 | {
2 | "attention_probs_dropout_prob": 0.0,
3 | "directionality": "bidi",
4 | "hidden_act": "gelu",
5 | "hidden_dropout_prob": 0.0,
6 | "hidden_size": 1024,
7 | "embedding_size": 128,
8 | "initializer_range": 0.02,
9 | "intermediate_size": 4096,
10 | "max_position_embeddings": 512,
11 | "num_attention_heads": 16,
12 | "num_hidden_layers": 24,
13 |
14 | "pooler_fc_size": 768,
15 | "pooler_num_attention_heads": 12,
16 | "pooler_num_fc_layers": 3,
17 | "pooler_size_per_head": 128,
18 | "pooler_type": "first_token_transform",
19 | "type_vocab_size": 2,
20 | "vocab_size": 21128,
21 | "ln_type":"postln"
22 |
23 | }
24 |
--------------------------------------------------------------------------------
/models/albert/albert_config/albert_config_tiny.json:
--------------------------------------------------------------------------------
1 | {
2 | "attention_probs_dropout_prob": 0.0,
3 | "directionality": "bidi",
4 | "hidden_act": "gelu",
5 | "hidden_dropout_prob": 0.0,
6 | "hidden_size": 312,
7 | "embedding_size": 128,
8 | "initializer_range": 0.02,
9 | "intermediate_size": 1248 ,
10 | "max_position_embeddings": 512,
11 | "num_attention_heads": 12,
12 | "num_hidden_layers": 4,
13 |
14 | "pooler_fc_size": 768,
15 | "pooler_num_attention_heads": 12,
16 | "pooler_num_fc_layers": 3,
17 | "pooler_size_per_head": 128,
18 | "pooler_type": "first_token_transform",
19 | "type_vocab_size": 2,
20 | "vocab_size": 21128,
21 | "ln_type":"postln"
22 |
23 | }
24 |
--------------------------------------------------------------------------------
/models/albert/albert_config/albert_config_xlarge.json:
--------------------------------------------------------------------------------
1 | {
2 | "attention_probs_dropout_prob": 0.0,
3 | "directionality": "bidi",
4 | "hidden_act": "gelu",
5 | "hidden_dropout_prob": 0.0,
6 | "hidden_size": 2048,
7 | "embedding_size": 128,
8 | "initializer_range": 0.02,
9 | "intermediate_size": 8192,
10 | "max_position_embeddings": 512,
11 | "num_attention_heads": 32,
12 | "num_hidden_layers": 24,
13 |
14 | "pooler_fc_size": 1024,
15 | "pooler_num_attention_heads": 64,
16 | "pooler_num_fc_layers": 3,
17 | "pooler_size_per_head": 128,
18 | "pooler_type": "first_token_transform",
19 | "type_vocab_size": 2,
20 | "vocab_size": 21128,
21 | "ln_type":"preln"
22 |
23 | }
24 |
--------------------------------------------------------------------------------
/models/albert/albert_config/albert_config_xxlarge.json:
--------------------------------------------------------------------------------
1 | {
2 | "attention_probs_dropout_prob": 0.0,
3 | "directionality": "bidi",
4 | "hidden_act": "gelu",
5 | "hidden_dropout_prob": 0.0,
6 | "hidden_size": 4096,
7 | "embedding_size": 128,
8 | "initializer_range": 0.02,
9 | "intermediate_size": 16384,
10 | "max_position_embeddings": 512,
11 | "num_attention_heads": 64,
12 | "num_hidden_layers": 12,
13 |
14 | "pooler_fc_size": 1024,
15 | "pooler_num_attention_heads": 64,
16 | "pooler_num_fc_layers": 3,
17 | "pooler_size_per_head": 128,
18 | "pooler_type": "first_token_transform",
19 | "type_vocab_size": 2,
20 | "vocab_size": 21128,
21 | "ln_type":"preln"
22 |
23 | }
24 |
--------------------------------------------------------------------------------
/models/albert/albert_config/bert_config.json:
--------------------------------------------------------------------------------
1 | {
2 | "attention_probs_dropout_prob": 0.0,
3 | "directionality": "bidi",
4 | "hidden_act": "gelu",
5 | "hidden_dropout_prob": 0.0,
6 | "hidden_size": 768,
7 | "initializer_range": 0.02,
8 | "intermediate_size": 3072,
9 | "max_position_embeddings": 512,
10 | "num_attention_heads": 12,
11 | "num_hidden_layers": 12,
12 | "pooler_fc_size": 768,
13 | "pooler_num_attention_heads": 12,
14 | "pooler_num_fc_layers": 3,
15 | "pooler_size_per_head": 128,
16 | "pooler_type": "first_token_transform",
17 | "type_vocab_size": 2,
18 | "vocab_size": 21128
19 | }
20 |
--------------------------------------------------------------------------------
/models/albert/create_pretrain_data.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | BERT_BASE_DIR=./albert_config
4 | python3 create_pretraining_data.py --do_whole_word_mask=True --input_file=data/news_zh_1.txt \
5 | --output_file=data/tf_news_2016_zh_raw_news2016zh_1.tfrecord --vocab_file=$BERT_BASE_DIR/vocab.txt --do_lower_case=True \
6 | --max_seq_length=512 --max_predictions_per_seq=51 --masked_lm_prob=0.10
--------------------------------------------------------------------------------
/models/albert/resources/add_data_removing_dropout.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CLUEbenchmark/CLUEmotionAnalysis2020/ff1bc945fc3a5854e0013de5a2f222dd1da61516/models/albert/resources/add_data_removing_dropout.jpg
--------------------------------------------------------------------------------
/models/albert/resources/albert_configuration.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CLUEbenchmark/CLUEmotionAnalysis2020/ff1bc945fc3a5854e0013de5a2f222dd1da61516/models/albert/resources/albert_configuration.jpg
--------------------------------------------------------------------------------
/models/albert/resources/albert_performance.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CLUEbenchmark/CLUEmotionAnalysis2020/ff1bc945fc3a5854e0013de5a2f222dd1da61516/models/albert/resources/albert_performance.jpg
--------------------------------------------------------------------------------
/models/albert/resources/shell_scripts/create_pretrain_data_batch_webtext.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | echo $1,$2
3 |
4 | BERT_BASE_DIR=./bert_config
5 | for((i=$1;i<=$2;i++));
6 | do
7 | python3 create_pretraining_data.py --do_whole_word_mask=True --input_file=gs://raw_text/web_text_zh_raw/web_text_zh_$i.txt \
8 | --output_file=gs://albert_zh/tf_records/tf_web_text_zh_$i.tfrecord --vocab_file=$BERT_BASE_DIR/vocab.txt --do_lower_case=True \
9 | --max_seq_length=512 --max_predictions_per_seq=76 --masked_lm_prob=0.15
10 | done
11 |
--------------------------------------------------------------------------------
/models/albert/resources/state_of_the_art.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CLUEbenchmark/CLUEmotionAnalysis2020/ff1bc945fc3a5854e0013de5a2f222dd1da61516/models/albert/resources/state_of_the_art.jpg
--------------------------------------------------------------------------------
/models/albert/run_classifier_afqmc.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # @Author: Li Yudong
3 | # @Date: 2019-11-28
4 | # @Last Modified by: bo.shi
5 | # @Last Modified time: 2019-12-05 10:29:59
6 |
7 | TASK_NAME="afqmc"
8 | MODEL_NAME="albert_xlarge_zh"
9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
10 | export CUDA_VISIBLE_DEVICES="0"
11 | export ALBERT_CONFIG_DIR=$CURRENT_DIR/albert_config
12 | export ALBERT_PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model
13 | export ALBERT_XLARGE_DIR=$ALBERT_PRETRAINED_MODELS_DIR/$MODEL_NAME
14 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset
15 |
16 | # download and unzip dataset
17 | if [ ! -d $GLUE_DATA_DIR ]; then
18 | mkdir -p $GLUE_DATA_DIR
19 | echo "makedir $GLUE_DATA_DIR"
20 | fi
21 | cd $GLUE_DATA_DIR
22 | if [ ! -d $TASK_NAME ]; then
23 | mkdir $TASK_NAME
24 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME"
25 | fi
26 | cd $TASK_NAME
27 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then
28 | rm *
29 | wget https://storage.googleapis.com/cluebenchmark/tasks/afqmc_public.zip
30 | unzip afqmc_public.zip
31 | rm afqmc_public.zip
32 | else
33 | echo "data exists"
34 | fi
35 | echo "Finish download dataset."
36 |
37 | # download model
38 | if [ ! -d $ALBERT_XLARGE_DIR ]; then
39 | mkdir -p $ALBERT_XLARGE_DIR
40 | echo "makedir $ALBERT_XLARGE_DIR"
41 | fi
42 | cd $ALBERT_XLARGE_DIR
43 | if [ ! -f "albert_config_xlarge.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "checkpoint" ] || [ ! -f "albert_model.ckpt.index" ] || [ ! -f "albert_model.ckpt.meta" ] || [ ! -f "albert_model.ckpt.data-00000-of-00001" ]; then
44 | rm *
45 | wget https://storage.googleapis.com/albert_zh/albert_xlarge_zh_177k.zip
46 | unzip albert_xlarge_zh_177k.zip
47 | rm albert_xlarge_zh_177k.zip
48 | else
49 | echo "model exists"
50 | fi
51 | echo "Finish download model."
52 |
53 | # run task
54 | cd $CURRENT_DIR
55 | echo "Start running..."
56 | if [ $# == 0 ]; then
57 | python run_classifier.py \
58 | --task_name=$TASK_NAME \
59 | --do_train=true \
60 | --do_eval=true \
61 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
62 | --vocab_file=$ALBERT_XLARGE_DIR/vocab.txt \
63 | --bert_config_file=$ALBERT_XLARGE_DIR/albert_config_xlarge.json \
64 | --init_checkpoint=$ALBERT_XLARGE_DIR/albert_model.ckpt \
65 | --max_seq_length=128 \
66 | --train_batch_size=16 \
67 | --learning_rate=3e-5 \
68 | --num_train_epochs=2.0 \
69 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
70 | elif [ $1 == "predict" ]; then
71 | python run_classifier.py \
72 | --task_name=$TASK_NAME \
73 | --do_train=false \
74 | --do_eval=false \
75 | --do_predict=true \
76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
77 | --vocab_file=$ALBERT_XLARGE_DIR/vocab.txt \
78 | --bert_config_file=$ALBERT_XLARGE_DIR/albert_config_xlarge.json \
79 | --init_checkpoint=$ALBERT_XLARGE_DIR/albert_model.ckpt \
80 | --max_seq_length=128 \
81 | --train_batch_size=16 \
82 | --learning_rate=3e-5 \
83 | --num_train_epochs=2.0 \
84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
85 | fi
86 |
--------------------------------------------------------------------------------
/models/albert/run_classifier_cmnli.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # @Author: Li Yudong
3 | # @Date: 2019-11-28
4 | # @Last Modified by: bo.shi
5 | # @Last Modified time: 2019-12-05 10:33:16
6 |
7 | TASK_NAME="cmnli"
8 | MODEL_NAME="albert_xlarge_zh"
9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
10 | export CUDA_VISIBLE_DEVICES="0"
11 | export ALBERT_CONFIG_DIR=$CURRENT_DIR/albert_config
12 | export ALBERT_PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model
13 | export ALBERT_XLARGE_DIR=$ALBERT_PRETRAINED_MODELS_DIR/$MODEL_NAME
14 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset
15 |
16 | # download and unzip dataset
17 | if [ ! -d $GLUE_DATA_DIR ]; then
18 | mkdir -p $GLUE_DATA_DIR
19 | echo "makedir $GLUE_DATA_DIR"
20 | fi
21 | cd $GLUE_DATA_DIR
22 | if [ ! -d $TASK_NAME ]; then
23 | mkdir $TASK_NAME
24 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME"
25 | fi
26 | cd $TASK_NAME
27 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then
28 | rm *
29 | wget https://storage.googleapis.com/cluebenchmark/tasks/cmnli_public.zip
30 | unzip cmnli_public.zip
31 | rm cmnli_public.zip
32 | else
33 | echo "data exists"
34 | fi
35 | echo "Finish download dataset."
36 |
37 | # download model
38 | if [ ! -d $ALBERT_XLARGE_DIR ]; then
39 | mkdir -p $ALBERT_XLARGE_DIR
40 | echo "makedir $ALBERT_XLARGE_DIR"
41 | fi
42 | cd $ALBERT_XLARGE_DIR
43 | if [ ! -f "albert_config_xlarge.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "checkpoint" ] || [ ! -f "albert_model.ckpt.index" ] || [ ! -f "albert_model.ckpt.meta" ] || [ ! -f "albert_model.ckpt.data-00000-of-00001" ]; then
44 | rm *
45 | wget https://storage.googleapis.com/albert_zh/albert_xlarge_zh_177k.zip
46 | unzip albert_xlarge_zh_177k.zip
47 | rm albert_xlarge_zh_177k.zip
48 | else
49 | echo "model exists"
50 | fi
51 | echo "Finish download model."
52 |
53 | # run task
54 | cd $CURRENT_DIR
55 | echo "Start running..."
56 | if [ $# == 0 ]; then
57 | python run_classifier.py \
58 | --task_name=$TASK_NAME \
59 | --do_train=true \
60 | --do_eval=true \
61 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
62 | --vocab_file=$ALBERT_XLARGE_DIR/vocab.txt \
63 | --bert_config_file=$ALBERT_XLARGE_DIR/albert_config_xlarge.json \
64 | --init_checkpoint=$ALBERT_XLARGE_DIR/albert_model.ckpt \
65 | --max_seq_length=128 \
66 | --train_batch_size=16 \
67 | --learning_rate=3e-5 \
68 | --num_train_epochs=2.0 \
69 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
70 | elif [ $1 == "predict" ]; then
71 | python run_classifier.py \
72 | --task_name=$TASK_NAME \
73 | --do_train=false \
74 | --do_eval=false \
75 | --do_predict=true \
76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
77 | --vocab_file=$ALBERT_XLARGE_DIR/vocab.txt \
78 | --bert_config_file=$ALBERT_XLARGE_DIR/albert_config_xlarge.json \
79 | --init_checkpoint=$ALBERT_XLARGE_DIR/albert_model.ckpt \
80 | --max_seq_length=128 \
81 | --train_batch_size=16 \
82 | --learning_rate=3e-5 \
83 | --num_train_epochs=2.0 \
84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
85 | fi
86 |
--------------------------------------------------------------------------------
/models/albert/run_classifier_csl.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # @Author: Li Yudong
3 | # @Date: 2019-11-28
4 | # @Last Modified by: bo.shi
5 | # @Last Modified time: 2019-12-05 10:33:24
6 |
7 | TASK_NAME="csl"
8 | MODEL_NAME="albert_xlarge_zh"
9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
10 | export CUDA_VISIBLE_DEVICES="0"
11 | export ALBERT_CONFIG_DIR=$CURRENT_DIR/albert_config
12 | export ALBERT_PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model
13 | export ALBERT_XLARGE_DIR=$ALBERT_PRETRAINED_MODELS_DIR/$MODEL_NAME
14 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset
15 |
16 | # download and unzip dataset
17 | if [ ! -d $GLUE_DATA_DIR ]; then
18 | mkdir -p $GLUE_DATA_DIR
19 | echo "makedir $GLUE_DATA_DIR"
20 | fi
21 | cd $GLUE_DATA_DIR
22 | if [ ! -d $TASK_NAME ]; then
23 | mkdir $TASK_NAME
24 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME"
25 | fi
26 | cd $TASK_NAME
27 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then
28 | rm *
29 | wget https://storage.googleapis.com/cluebenchmark/tasks/csl_public.zip
30 | unzip csl_public.zip
31 | rm csl_public.zip
32 | else
33 | echo "data exists"
34 | fi
35 | echo "Finish download dataset."
36 |
37 | # download model
38 | if [ ! -d $ALBERT_XLARGE_DIR ]; then
39 | mkdir -p $ALBERT_XLARGE_DIR
40 | echo "makedir $ALBERT_XLARGE_DIR"
41 | fi
42 | cd $ALBERT_XLARGE_DIR
43 | if [ ! -f "albert_config_xlarge.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "checkpoint" ] || [ ! -f "albert_model.ckpt.index" ] || [ ! -f "albert_model.ckpt.meta" ] || [ ! -f "albert_model.ckpt.data-00000-of-00001" ]; then
44 | rm *
45 | wget https://storage.googleapis.com/albert_zh/albert_xlarge_zh_177k.zip
46 | unzip albert_xlarge_zh_177k.zip
47 | rm albert_xlarge_zh_177k.zip
48 | else
49 | echo "model exists"
50 | fi
51 | echo "Finish download model."
52 |
53 | # run task
54 | cd $CURRENT_DIR
55 | echo "Start running..."
56 | if [ $# == 0 ]; then
57 | python run_classifier.py \
58 | --task_name=$TASK_NAME \
59 | --do_train=true \
60 | --do_eval=true \
61 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
62 | --vocab_file=$ALBERT_XLARGE_DIR/vocab.txt \
63 | --bert_config_file=$ALBERT_XLARGE_DIR/albert_config_xlarge.json \
64 | --init_checkpoint=$ALBERT_XLARGE_DIR/albert_model.ckpt \
65 | --max_seq_length=128 \
66 | --train_batch_size=16 \
67 | --learning_rate=3e-5 \
68 | --num_train_epochs=2.0 \
69 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
70 | elif [ $1 == "predict" ]; then
71 | python run_classifier.py \
72 | --task_name=$TASK_NAME \
73 | --do_train=false \
74 | --do_eval=false \
75 | --do_predict=true \
76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
77 | --vocab_file=$ALBERT_XLARGE_DIR/vocab.txt \
78 | --bert_config_file=$ALBERT_XLARGE_DIR/albert_config_xlarge.json \
79 | --init_checkpoint=$ALBERT_XLARGE_DIR/albert_model.ckpt \
80 | --max_seq_length=128 \
81 | --train_batch_size=16 \
82 | --learning_rate=3e-5 \
83 | --num_train_epochs=2.0 \
84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
85 | fi
86 |
87 |
--------------------------------------------------------------------------------
/models/albert/run_classifier_iflytek.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # @Author: Li Yudong
3 | # @Date: 2019-11-28
4 | # @Last Modified by: bo.shi
5 | # @Last Modified time: 2019-12-05 10:33:28
6 |
7 | TASK_NAME="iflytek"
8 | MODEL_NAME="albert_xlarge_zh"
9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
10 | export CUDA_VISIBLE_DEVICES="0"
11 | export ALBERT_CONFIG_DIR=$CURRENT_DIR/albert_config
12 | export ALBERT_PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model
13 | export ALBERT_XLARGE_DIR=$ALBERT_PRETRAINED_MODELS_DIR/$MODEL_NAME
14 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset
15 |
16 | # download and unzip dataset
17 | if [ ! -d $GLUE_DATA_DIR ]; then
18 | mkdir -p $GLUE_DATA_DIR
19 | echo "makedir $GLUE_DATA_DIR"
20 | fi
21 | cd $GLUE_DATA_DIR
22 | if [ ! -d $TASK_NAME ]; then
23 | mkdir $TASK_NAME
24 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME"
25 | fi
26 | cd $TASK_NAME
27 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then
28 | rm *
29 | wget https://storage.googleapis.com/cluebenchmark/tasks/iflytek_public.zip
30 | unzip iflytek_public.zip
31 | rm iflytek_public.zip
32 | else
33 | echo "data exists"
34 | fi
35 | echo "Finish download dataset."
36 |
37 | # download model
38 | if [ ! -d $ALBERT_XLARGE_DIR ]; then
39 | mkdir -p $ALBERT_XLARGE_DIR
40 | echo "makedir $ALBERT_XLARGE_DIR"
41 | fi
42 | cd $ALBERT_XLARGE_DIR
43 | if [ ! -f "albert_config_xlarge.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "checkpoint" ] || [ ! -f "albert_model.ckpt.index" ] || [ ! -f "albert_model.ckpt.meta" ] || [ ! -f "albert_model.ckpt.data-00000-of-00001" ]; then
44 | rm *
45 | wget https://storage.googleapis.com/albert_zh/albert_xlarge_zh_177k.zip
46 | unzip albert_xlarge_zh_177k.zip
47 | rm albert_xlarge_zh_177k.zip
48 | else
49 | echo "model exists"
50 | fi
51 | echo "Finish download model."
52 |
53 | # run task
54 | cd $CURRENT_DIR
55 | echo "Start running..."
56 | if [ $# == 0 ]; then
57 | python run_classifier.py \
58 | --task_name=$TASK_NAME \
59 | --do_train=true \
60 | --do_eval=true \
61 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
62 | --vocab_file=$ALBERT_XLARGE_DIR/vocab.txt \
63 | --bert_config_file=$ALBERT_XLARGE_DIR/albert_config_xlarge.json \
64 | --init_checkpoint=$ALBERT_XLARGE_DIR/albert_model.ckpt \
65 | --max_seq_length=128 \
66 | --train_batch_size=16 \
67 | --learning_rate=3e-5 \
68 | --num_train_epochs=2.0 \
69 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
70 | elif [ $1 == "predict" ]; then
71 | python run_classifier.py \
72 | --task_name=$TASK_NAME \
73 | --do_train=false \
74 | --do_eval=false \
75 | --do_predict=true \
76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
77 | --vocab_file=$ALBERT_XLARGE_DIR/vocab.txt \
78 | --bert_config_file=$ALBERT_XLARGE_DIR/albert_config_xlarge.json \
79 | --init_checkpoint=$ALBERT_XLARGE_DIR/albert_model.ckpt \
80 | --max_seq_length=128 \
81 | --train_batch_size=16 \
82 | --learning_rate=3e-5 \
83 | --num_train_epochs=2.0 \
84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
85 | fi
86 |
87 |
--------------------------------------------------------------------------------
/models/albert/run_classifier_tnews.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # @Author: bo.shi
3 | # @Date: 2019-11-04 09:56:36
4 | # @Last Modified by: bo.shi
5 | # @Last Modified time: 2019-12-05 10:33:33
6 |
7 | TASK_NAME="tnews"
8 | MODEL_NAME="albert_xlarge_zh"
9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
10 | export CUDA_VISIBLE_DEVICES="0"
11 | export ALBERT_CONFIG_DIR=$CURRENT_DIR/albert_config
12 | export ALBERT_PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model
13 | export ALBERT_XLARGE_DIR=$ALBERT_PRETRAINED_MODELS_DIR/$MODEL_NAME
14 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset
15 |
16 | # download and unzip dataset
17 | if [ ! -d $GLUE_DATA_DIR ]; then
18 | mkdir -p $GLUE_DATA_DIR
19 | echo "makedir $GLUE_DATA_DIR"
20 | fi
21 | cd $GLUE_DATA_DIR
22 | if [ ! -d $TASK_NAME ]; then
23 | mkdir $TASK_NAME
24 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME"
25 | fi
26 | cd $TASK_NAME
27 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then
28 | rm *
29 | wget https://storage.googleapis.com/cluebenchmark/tasks/tnews_public.zip
30 | unzip tnews_public.zip
31 | rm tnews_public.zip
32 | else
33 | echo "data exists"
34 | fi
35 | echo "Finish download dataset."
36 |
37 | # download modeltn
38 | if [ ! -d $ALBERT_XLARGE_DIR ]; then
39 | mkdir -p $ALBERT_XLARGE_DIR
40 | echo "makedir $ALBERT_XLARGE_DIR"
41 | fi
42 | cd $ALBERT_XLARGE_DIR
43 | if [ ! -f "albert_config_xlarge.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "checkpoint" ] || [ ! -f "albert_model.ckpt.index" ] || [ ! -f "albert_model.ckpt.meta" ] || [ ! -f "albert_model.ckpt.data-00000-of-00001" ]; then
44 | rm *
45 | wget https://storage.googleapis.com/albert_zh/albert_xlarge_zh_177k.zip
46 | unzip albert_xlarge_zh_177k.zip
47 | rm albert_xlarge_zh_177k.zip
48 | else
49 | echo "model exists"
50 | fi
51 | echo "Finish download model."
52 |
53 | # run task
54 | cd $CURRENT_DIR
55 | echo "Start running..."
56 | if [ $# == 0 ]; then
57 | python run_classifier.py \
58 | --task_name=$TASK_NAME \
59 | --do_train=true \
60 | --do_eval=true \
61 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
62 | --vocab_file=$ALBERT_XLARGE_DIR/vocab.txt \
63 | --bert_config_file=$ALBERT_XLARGE_DIR/albert_config_xlarge.json \
64 | --init_checkpoint=$ALBERT_XLARGE_DIR/albert_model.ckpt \
65 | --max_seq_length=128 \
66 | --train_batch_size=16 \
67 | --learning_rate=3e-5 \
68 | --num_train_epochs=2.0 \
69 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
70 | elif [ $1 == "predict" ]; then
71 | python run_classifier.py \
72 | --task_name=$TASK_NAME \
73 | --do_train=false \
74 | --do_eval=false \
75 | --do_predict=true \
76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
77 | --vocab_file=$ALBERT_XLARGE_DIR/vocab.txt \
78 | --bert_config_file=$ALBERT_XLARGE_DIR/albert_config_xlarge.json \
79 | --init_checkpoint=$ALBERT_XLARGE_DIR/albert_model.ckpt \
80 | --max_seq_length=128 \
81 | --train_batch_size=16 \
82 | --learning_rate=3e-5 \
83 | --num_train_epochs=2.0 \
84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
85 | fi
86 |
87 |
--------------------------------------------------------------------------------
/models/albert/run_classifier_wsc.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # @Author: Li Yudong
3 | # @Date: 2019-11-28
4 | # @Last Modified by: bo.shi
5 | # @Last Modified time: 2019-12-05 10:33:37
6 |
7 | TASK_NAME="wsc"
8 | MODEL_NAME="albert_xlarge_zh"
9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
10 | export CUDA_VISIBLE_DEVICES="0"
11 | export ALBERT_CONFIG_DIR=$CURRENT_DIR/albert_config
12 | export ALBERT_PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model
13 | export ALBERT_XLARGE_DIR=$ALBERT_PRETRAINED_MODELS_DIR/$MODEL_NAME
14 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset
15 |
16 | # download and unzip dataset
17 | if [ ! -d $GLUE_DATA_DIR ]; then
18 | mkdir -p $GLUE_DATA_DIR
19 | echo "makedir $GLUE_DATA_DIR"
20 | fi
21 | cd $GLUE_DATA_DIR
22 | if [ ! -d $TASK_NAME ]; then
23 | mkdir $TASK_NAME
24 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME"
25 | fi
26 | cd $TASK_NAME
27 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then
28 | rm *
29 | wget https://storage.googleapis.com/cluebenchmark/tasks/wsc_public.zip
30 | unzip wsc_public.zip
31 | rm wsc_public.zip
32 | else
33 | echo "data exists"
34 | fi
35 | echo "Finish download dataset."
36 |
37 | # download model
38 | if [ ! -d $ALBERT_XLARGE_DIR ]; then
39 | mkdir -p $ALBERT_XLARGE_DIR
40 | echo "makedir $ALBERT_XLARGE_DIR"
41 | fi
42 | cd $ALBERT_XLARGE_DIR
43 | if [ ! -f "albert_config_xlarge.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "checkpoint" ] || [ ! -f "albert_model.ckpt.index" ] || [ ! -f "albert_model.ckpt.meta" ] || [ ! -f "albert_model.ckpt.data-00000-of-00001" ]; then
44 | rm *
45 | wget https://storage.googleapis.com/albert_zh/albert_xlarge_zh_177k.zip
46 | unzip albert_xlarge_zh_177k.zip
47 | rm albert_xlarge_zh_177k.zip
48 | else
49 | echo "model exists"
50 | fi
51 | echo "Finish download model."
52 |
53 | # run task
54 | cd $CURRENT_DIR
55 | echo "Start running..."
56 | if [ $# == 0 ]; then
57 | python run_classifier.py \
58 | --task_name=$TASK_NAME \
59 | --do_train=true \
60 | --do_eval=true \
61 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
62 | --vocab_file=$ALBERT_XLARGE_DIR/vocab.txt \
63 | --bert_config_file=$ALBERT_XLARGE_DIR/albert_config_xlarge.json \
64 | --init_checkpoint=$ALBERT_XLARGE_DIR/albert_model.ckpt \
65 | --max_seq_length=128 \
66 | --train_batch_size=16 \
67 | --learning_rate=3e-5 \
68 | --num_train_epochs=2.0 \
69 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
70 | elif [ $1 == "predict" ]; then
71 | python run_classifier.py \
72 | --task_name=$TASK_NAME \
73 | --do_train=false \
74 | --do_eval=false \
75 | --do_predict=true \
76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
77 | --vocab_file=$ALBERT_XLARGE_DIR/vocab.txt \
78 | --bert_config_file=$ALBERT_XLARGE_DIR/albert_config_xlarge.json \
79 | --init_checkpoint=$ALBERT_XLARGE_DIR/albert_model.ckpt \
80 | --max_seq_length=128 \
81 | --train_batch_size=16 \
82 | --learning_rate=3e-5 \
83 | --num_train_epochs=2.0 \
84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
85 | fi
86 |
--------------------------------------------------------------------------------
/models/albert/tpu/run_classifier_inews.sh:
--------------------------------------------------------------------------------
1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S")
3 | TASK_NAME="inews"
4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/albert-xlarge/albert_xlarge_zh_183k
5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME
6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/albert-xlarge/albert_xlarge_zh_183k/tpu/$TASK_NAME/$CURRENT_TIME
7 |
8 | python $CURRENT_DIR/../run_classifier.py \
9 | --task_name=$TASK_NAME \
10 | --do_train=true \
11 | --do_eval=true \
12 | --data_dir=$DATA_DIR \
13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \
14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/albert_config_xlarge.json \
15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/albert_model.ckpt \
16 | --max_seq_length=512 \
17 | --train_batch_size=16 \
18 | --learning_rate=2e-5 \
19 | --num_train_epochs=8.0 \
20 | --output_dir=$OUTPUT_DIR \
21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.1.101.2:8470
22 |
--------------------------------------------------------------------------------
/models/albert/tpu/run_classifier_inews_tiny.sh:
--------------------------------------------------------------------------------
1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S")
3 | TASK_NAME="inews"
4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/albert_tiny/albert_tiny_207k
5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME
6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/albert_tiny/tpu/${TASK_NAME}/$CURRENT_TIME
7 |
8 | python3 $CURRENT_DIR/../run_classifier.py \
9 | --task_name=$TASK_NAME \
10 | --do_train=true \
11 | --do_eval=true \
12 | --data_dir=$DATA_DIR \
13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \
14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/albert_config_tiny.json \
15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/albert_model.ckpt \
16 | --max_seq_length=512 \
17 | --train_batch_size=16 \
18 | --learning_rate=6e-5 \
19 | --num_train_epochs=10.0 \
20 | --save_checkpoints_steps=600 \
21 | --output_dir=$OUTPUT_DIR \
22 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.240.1.2:8470
23 |
--------------------------------------------------------------------------------
/models/albert/tpu/run_classifier_lcqmc.sh:
--------------------------------------------------------------------------------
1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S")
3 | TASK_NAME="lcqmc"
4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/albert-xlarge/albert_xlarge_zh_183k
5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/hard_$TASK_NAME
6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/albert-xlarge/albert_xlarge_zh_183k/tpu/${TASK_NAME}/$CURRENT_TIME
7 |
8 | python $CURRENT_DIR/../run_classifier.py \
9 | --task_name=$TASK_NAME \
10 | --do_train=true \
11 | --do_eval=true \
12 | --data_dir=$DATA_DIR \
13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \
14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/albert_config_xlarge.json \
15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/albert_model.ckpt \
16 | --max_seq_length=128 \
17 | --train_batch_size=64 \
18 | --learning_rate=2e-5 \
19 | --num_train_epochs=3.0 \
20 | --output_dir=$OUTPUT_DIR \
21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.250.1.2:8470
22 |
--------------------------------------------------------------------------------
/models/albert/tpu/run_classifier_lcqmc_tiny.sh:
--------------------------------------------------------------------------------
1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S")
3 | TASK_NAME="lcqmc"
4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/albert-tiny/albert_tiny_489k
5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/hard_${TASK_NAME}
6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/albert-tiny/tpu/${TASK_NAME}/$CURRENT_TIME
7 |
8 | python $CURRENT_DIR/../run_classifier.py \
9 | --task_name=$TASK_NAME \
10 | --do_train=true \
11 | --do_eval=true \
12 | --do_predict=true \
13 | --data_dir=$DATA_DIR \
14 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \
15 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/albert_config_tiny.json \
16 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/albert_model.ckpt \
17 | --max_seq_length=128 \
18 | --train_batch_size=32 \
19 | --learning_rate=6e-5 \
20 | --num_train_epochs=3.0 \
21 | --output_dir=$OUTPUT_DIR \
22 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://192.168.0.2:8470
23 |
--------------------------------------------------------------------------------
/models/albert/tpu/run_classifier_thucnews.sh:
--------------------------------------------------------------------------------
1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S")
3 | TASK_NAME="thucnews"
4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/albert-xlarge/albert_xlarge_zh_183k
5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME
6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/albert-xlarge/albert_xlarge_zh_183k/tpu/$TASK_NAME/$CURRENT_TIME
7 |
8 | python $CURRENT_DIR/../run_classifier.py \
9 | --task_name=$TASK_NAME \
10 | --do_train=true \
11 | --do_eval=true \
12 | --data_dir=$DATA_DIR \
13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \
14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/albert_config_xlarge.json \
15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/albert_model.ckpt \
16 | --max_seq_length=512 \
17 | --train_batch_size=32 \
18 | --learning_rate=2e-5 \
19 | --num_train_epochs=8.0 \
20 | --output_dir=$OUTPUT_DIR \
21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.2.101.2:8470
22 |
--------------------------------------------------------------------------------
/models/albert/tpu/run_classifier_thucnews_tiny.sh:
--------------------------------------------------------------------------------
1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S")
3 | TASK_NAME="thucnews"
4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/albert-tiny/albert_tiny_489k
5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME
6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/albert-tiny/tpu/${TASK_NAME}/$CURRENT_TIME
7 |
8 | python $CURRENT_DIR/../run_classifier.py \
9 | --task_name=$TASK_NAME \
10 | --do_train=true \
11 | --do_eval=true \
12 | --data_dir=$DATA_DIR \
13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \
14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/albert_config_tiny.json \
15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/albert_model.ckpt \
16 | --max_seq_length=128 \
17 | --train_batch_size=64 \
18 | --learning_rate=1e-4 \
19 | --num_train_epochs=5.0 \
20 | --output_dir=$OUTPUT_DIR \
21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.1.101.2:8470
22 |
--------------------------------------------------------------------------------
/models/albert/tpu/run_classifier_tnews.sh:
--------------------------------------------------------------------------------
1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S")
3 | TASK_NAME="tnews"
4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/albert-xlarge/albert_xlarge_zh_183k
5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME
6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/albert-xlarge/albert_xlarge_zh_183k/tpu/$TASK_NAME/$CURRENT_TIME
7 |
8 | python $CURRENT_DIR/../run_classifier.py \
9 | --task_name=$TASK_NAME \
10 | --do_train=true \
11 | --do_eval=true \
12 | --data_dir=$DATA_DIR \
13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \
14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/albert_config_xlarge.json \
15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/albert_model.ckpt \
16 | --max_seq_length=128 \
17 | --train_batch_size=32 \
18 | --learning_rate=6e-5 \
19 | --num_train_epochs=9.0 \
20 | --output_dir=$OUTPUT_DIR \
21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.240.1.2:8470
22 |
--------------------------------------------------------------------------------
/models/albert/tpu/run_classifier_tnews_tiny.sh:
--------------------------------------------------------------------------------
1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S")
3 | TASK_NAME="tnews"
4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/albert-tiny/albert_tiny_489k
5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/hard_${TASK_NAME}_1
6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/albert-tiny/tpu/${TASK_NAME}/$CURRENT_TIME
7 |
8 | python $CURRENT_DIR/../run_classifier.py \
9 | --task_name=$TASK_NAME \
10 | --do_train=true \
11 | --do_eval=true \
12 | --do_predict=true \
13 | --data_dir=$DATA_DIR \
14 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \
15 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/albert_config_tiny.json \
16 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/albert_model.ckpt \
17 | --max_seq_length=128 \
18 | --train_batch_size=32 \
19 | --learning_rate=6e-5 \
20 | --num_train_epochs=3.0 \
21 | --output_dir=$OUTPUT_DIR \
22 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://172.20.0.2:8470
23 |
--------------------------------------------------------------------------------
/models/albert/tpu/run_classifier_xnli.sh:
--------------------------------------------------------------------------------
1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S")
3 | TASK_NAME="xnli"
4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/albert-xlarge/albert_xlarge_zh_183k
5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME
6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/albert-xlarge/albert_xlarge_zh_183k/tpu/$TASK_NAME/$CURRENT_TIME
7 |
8 | python $CURRENT_DIR/../run_classifier.py \
9 | --task_name=$TASK_NAME \
10 | --do_train=true \
11 | --do_eval=true \
12 | --data_dir=$DATA_DIR \
13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \
14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/albert_config_xlarge.json \
15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/albert_model.ckpt \
16 | --max_seq_length=128 \
17 | --train_batch_size=64 \
18 | --learning_rate=2e-5 \
19 | --num_train_epochs=8.0 \
20 | --output_dir=$OUTPUT_DIR \
21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.2.101.2:8470
22 |
--------------------------------------------------------------------------------
/models/albert/tpu/run_classifier_xnli_tiny.sh:
--------------------------------------------------------------------------------
1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S")
3 | TASK_NAME="xnli"
4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/albert-tiny/albert_tiny_207k
5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME
6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/albert-tiny/albert_tiny_207k/tpu/$TASK_NAME/$CURRENT_TIME
7 |
8 | python $CURRENT_DIR/../run_classifier.py \
9 | --task_name=$TASK_NAME \
10 | --do_train=true \
11 | --do_eval=true \
12 | --data_dir=$DATA_DIR \
13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \
14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/albert_config_tiny.json \
15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/albert_model.ckpt \
16 | --max_seq_length=128 \
17 | --train_batch_size=64 \
18 | --learning_rate=2e-5 \
19 | --num_train_epochs=8.0 \
20 | --output_dir=$OUTPUT_DIR \
21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.2.101.2:8470
22 |
--------------------------------------------------------------------------------
/models/bert/.gitignore:
--------------------------------------------------------------------------------
1 | # Initially taken from Github's Python gitignore file
2 |
3 | # Byte-compiled / optimized / DLL files
4 | __pycache__/
5 | *.py[cod]
6 | *$py.class
7 |
8 | # C extensions
9 | *.so
10 |
11 | # Distribution / packaging
12 | .Python
13 | build/
14 | develop-eggs/
15 | dist/
16 | downloads/
17 | eggs/
18 | .eggs/
19 | lib/
20 | lib64/
21 | parts/
22 | sdist/
23 | var/
24 | wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | .hypothesis/
51 | .pytest_cache/
52 |
53 | # Translations
54 | *.mo
55 | *.pot
56 |
57 | # Django stuff:
58 | *.log
59 | local_settings.py
60 | db.sqlite3
61 |
62 | # Flask stuff:
63 | instance/
64 | .webassets-cache
65 |
66 | # Scrapy stuff:
67 | .scrapy
68 |
69 | # Sphinx documentation
70 | docs/_build/
71 |
72 | # PyBuilder
73 | target/
74 |
75 | # Jupyter Notebook
76 | .ipynb_checkpoints
77 |
78 | # IPython
79 | profile_default/
80 | ipython_config.py
81 |
82 | # pyenv
83 | .python-version
84 |
85 | # celery beat schedule file
86 | celerybeat-schedule
87 |
88 | # SageMath parsed files
89 | *.sage.py
90 |
91 | # Environments
92 | .env
93 | .venv
94 | env/
95 | venv/
96 | ENV/
97 | env.bak/
98 | venv.bak/
99 |
100 | # Spyder project settings
101 | .spyderproject
102 | .spyproject
103 |
104 | # Rope project settings
105 | .ropeproject
106 |
107 | # mkdocs documentation
108 | /site
109 |
110 | # mypy
111 | .mypy_cache/
112 | .dmypy.json
113 | dmypy.json
114 |
115 | # Pyre type checker
116 | .pyre/
117 |
--------------------------------------------------------------------------------
/models/bert/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # How to Contribute
2 |
3 | BERT needs to maintain permanent compatibility with the pre-trained model files,
4 | so we do not plan to make any major changes to this library (other than what was
5 | promised in the README). However, we can accept small patches related to
6 | re-factoring and documentation. To submit contributes, there are just a few
7 | small guidelines you need to follow.
8 |
9 | ## Contributor License Agreement
10 |
11 | Contributions to this project must be accompanied by a Contributor License
12 | Agreement. You (or your employer) retain the copyright to your contribution;
13 | this simply gives us permission to use and redistribute your contributions as
14 | part of the project. Head over to to see
15 | your current agreements on file or to sign a new one.
16 |
17 | You generally only need to submit a CLA once, so if you've already submitted one
18 | (even if it was for a different project), you probably don't need to do it
19 | again.
20 |
21 | ## Code reviews
22 |
23 | All submissions, including submissions by project members, require review. We
24 | use GitHub pull requests for this purpose. Consult
25 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
26 | information on using pull requests.
27 |
28 | ## Community Guidelines
29 |
30 | This project follows
31 | [Google's Open Source Community Guidelines](https://opensource.google.com/conduct/).
32 |
--------------------------------------------------------------------------------
/models/bert/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2018 The Google AI Language Team Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
--------------------------------------------------------------------------------
/models/bert/optimization_test.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2018 The Google AI Language Team Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 |
19 | import optimization
20 | import tensorflow as tf
21 |
22 |
23 | class OptimizationTest(tf.test.TestCase):
24 |
25 | def test_adam(self):
26 | with self.test_session() as sess:
27 | w = tf.get_variable(
28 | "w",
29 | shape=[3],
30 | initializer=tf.constant_initializer([0.1, -0.2, -0.1]))
31 | x = tf.constant([0.4, 0.2, -0.5])
32 | loss = tf.reduce_mean(tf.square(x - w))
33 | tvars = tf.trainable_variables()
34 | grads = tf.gradients(loss, tvars)
35 | global_step = tf.train.get_or_create_global_step()
36 | optimizer = optimization.AdamWeightDecayOptimizer(learning_rate=0.2)
37 | train_op = optimizer.apply_gradients(zip(grads, tvars), global_step)
38 | init_op = tf.group(tf.global_variables_initializer(),
39 | tf.local_variables_initializer())
40 | sess.run(init_op)
41 | for _ in range(100):
42 | sess.run(train_op)
43 | w_np = sess.run(w)
44 | self.assertAllClose(w_np.flat, [0.4, 0.2, -0.5], rtol=1e-2, atol=1e-2)
45 |
46 |
47 | if __name__ == "__main__":
48 | tf.test.main()
49 |
--------------------------------------------------------------------------------
/models/bert/requirements.txt:
--------------------------------------------------------------------------------
1 | tensorflow >= 1.11.0 # CPU Version of TensorFlow.
2 | # tensorflow-gpu >= 1.11.0 # GPU version of TensorFlow.
3 |
--------------------------------------------------------------------------------
/models/bert/run_classifier_csl.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # @Author: Li Yudong
3 | # @Date: 2019-11-28
4 | # @Last Modified by: bo.shi
5 | # @Last Modified time: 2019-12-05 11:00:57
6 |
7 | TASK_NAME="csl"
8 | MODEL_NAME="chinese_L-12_H-768_A-12"
9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
10 | export CUDA_VISIBLE_DEVICES="0"
11 | export BERT_PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model
12 | export BERT_BASE_DIR=$BERT_PRETRAINED_MODELS_DIR/$MODEL_NAME
13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset
14 |
15 | # download and unzip dataset
16 | if [ ! -d $GLUE_DATA_DIR ]; then
17 | mkdir -p $GLUE_DATA_DIR
18 | echo "makedir $GLUE_DATA_DIR"
19 | fi
20 | cd $GLUE_DATA_DIR
21 | if [ ! -d $TASK_NAME ]; then
22 | mkdir $TASK_NAME
23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME"
24 | fi
25 | cd $TASK_NAME
26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then
27 | rm *
28 | wget https://storage.googleapis.com/cluebenchmark/tasks/csl_public.zip
29 | unzip csl_public.zip
30 | rm csl_public.zip
31 | else
32 | echo "data exists"
33 | fi
34 | echo "Finish download dataset."
35 |
36 | # download model
37 | if [ ! -d $BERT_PRETRAINED_MODELS_DIR ]; then
38 | mkdir -p $BERT_PRETRAINED_MODELS_DIR
39 | echo "makedir $BERT_PRETRAINED_MODELS_DIR"
40 | fi
41 | cd $BERT_PRETRAINED_MODELS_DIR
42 | if [ ! -d $MODEL_NAME ]; then
43 | wget https://storage.googleapis.com/bert_models/2018_11_03/chinese_L-12_H-768_A-12.zip
44 | unzip chinese_L-12_H-768_A-12.zip
45 | rm chinese_L-12_H-768_A-12.zip
46 | else
47 | cd $MODEL_NAME
48 | if [ ! -f "bert_config.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "bert_model.ckpt.index" ] || [ ! -f "bert_model.ckpt.meta" ] || [ ! -f "bert_model.ckpt.data-00000-of-00001" ]; then
49 | cd ..
50 | rm -rf $MODEL_NAME
51 | wget https://storage.googleapis.com/bert_models/2018_11_03/chinese_L-12_H-768_A-12.zip
52 | unzip chinese_L-12_H-768_A-12.zip
53 | rm chinese_L-12_H-768_A-12.zip
54 | else
55 | echo "model exists"
56 | fi
57 | fi
58 | echo "Finish download model."
59 |
60 | # run task
61 | cd $CURRENT_DIR
62 | echo "Start running..."
63 | if [ $# == 0 ]; then
64 | python run_classifier.py \
65 | --task_name=$TASK_NAME \
66 | --do_train=true \
67 | --do_eval=true \
68 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
69 | --vocab_file=$BERT_BASE_DIR/vocab.txt \
70 | --bert_config_file=$BERT_BASE_DIR/bert_config.json \
71 | --init_checkpoint=$BERT_BASE_DIR/bert_model.ckpt \
72 | --max_seq_length=128 \
73 | --train_batch_size=32 \
74 | --learning_rate=2e-5 \
75 | --num_train_epochs=3.0 \
76 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
77 | elif [ $1 == "predict" ]; then
78 | echo "Start predict..."
79 | python run_classifier.py \
80 | --task_name=$TASK_NAME \
81 | --do_train=false \
82 | --do_eval=false \
83 | --do_predict=true \
84 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
85 | --vocab_file=$BERT_BASE_DIR/vocab.txt \
86 | --bert_config_file=$BERT_BASE_DIR/bert_config.json \
87 | --init_checkpoint=$BERT_BASE_DIR/bert_model.ckpt \
88 | --max_seq_length=128 \
89 | --train_batch_size=32 \
90 | --learning_rate=2e-5 \
91 | --num_train_epochs=3.0 \
92 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
93 | fi
94 |
--------------------------------------------------------------------------------
/models/bert/run_classifier_emotion.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # @Author: bo.shi
3 | # @Date: 2019-11-04 09:56:36
4 | # @Last Modified by: bo.shi
5 | # @Last Modified time: 2019-12-05 11:01:06
6 |
7 | TASK_NAME="emotion"
8 | MODEL_NAME="chinese_L-12_H-768_A-12"
9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
10 | export CUDA_VISIBLE_DEVICES="0"
11 | export BERT_PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model
12 | export BERT_BASE_DIR=$BERT_PRETRAINED_MODELS_DIR/$MODEL_NAME
13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset
14 |
15 | # download and unzip dataset
16 | if [ ! -d $GLUE_DATA_DIR ]; then
17 | mkdir -p $GLUE_DATA_DIR
18 | echo "makedir $GLUE_DATA_DIR"
19 | fi
20 | cd $GLUE_DATA_DIR
21 | if [ ! -d $TASK_NAME ]; then
22 | mkdir $TASK_NAME
23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME"
24 | fi
25 | cd $TASK_NAME
26 | if [ ! -f "train.txt" ] || [ ! -f "valid.txt" ] || [ ! -f "test.txt" ]; then
27 | rm *
28 | echo "Pleae download the dataset with the source code from https://github.com/CLUEbenchmark/CLUEmotionAnalysis2020"
29 | else
30 | echo "data exists"
31 | fi
32 | echo "Finish download dataset."
33 |
34 | # download model
35 | if [ ! -d $BERT_PRETRAINED_MODELS_DIR ]; then
36 | mkdir -p $BERT_PRETRAINED_MODELS_DIR
37 | echo "makedir $BERT_PRETRAINED_MODELS_DIR"
38 | fi
39 | cd $BERT_PRETRAINED_MODELS_DIR
40 | if [ ! -d $MODEL_NAME ]; then
41 | wget https://storage.googleapis.com/bert_models/2018_11_03/chinese_L-12_H-768_A-12.zip
42 | unzip chinese_L-12_H-768_A-12.zip
43 | rm chinese_L-12_H-768_A-12.zip
44 | else
45 | cd $MODEL_NAME
46 | if [ ! -f "bert_config.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "bert_model.ckpt.index" ] || [ ! -f "bert_model.ckpt.meta" ] || [ ! -f "bert_model.ckpt.data-00000-of-00001" ]; then
47 | cd ..
48 | rm -rf $MODEL_NAME
49 | wget https://storage.googleapis.com/bert_models/2018_11_03/chinese_L-12_H-768_A-12.zip
50 | unzip chinese_L-12_H-768_A-12.zip
51 | rm chinese_L-12_H-768_A-12.zip
52 | else
53 | echo "model exists"
54 | fi
55 | fi
56 | echo "Finish download model."
57 |
58 | # run task
59 | cd $CURRENT_DIR
60 | echo "Start running..."
61 | if [ $# == 0 ]; then
62 | python run_classifier.py \
63 | --task_name=$TASK_NAME \
64 | --do_train=true \
65 | --do_eval=true \
66 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
67 | --vocab_file=$BERT_BASE_DIR/vocab.txt \
68 | --bert_config_file=$BERT_BASE_DIR/bert_config.json \
69 | --init_checkpoint=$BERT_BASE_DIR/bert_model.ckpt \
70 | --max_seq_length=128 \
71 | --train_batch_size=32 \
72 | --learning_rate=2e-5 \
73 | --num_train_epochs=3.0 \
74 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
75 | elif [ $1 == "predict" ]; then
76 | echo "Start predict..."
77 | python run_classifier.py \
78 | --task_name=$TASK_NAME \
79 | --do_train=false \
80 | --do_eval=false \
81 | --do_predict=true \
82 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
83 | --vocab_file=$BERT_BASE_DIR/vocab.txt \
84 | --bert_config_file=$BERT_BASE_DIR/bert_config.json \
85 | --init_checkpoint=$BERT_BASE_DIR/bert_model.ckpt \
86 | --max_seq_length=128 \
87 | --train_batch_size=32 \
88 | --learning_rate=2e-5 \
89 | --num_train_epochs=3.0 \
90 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
91 | fi
92 |
--------------------------------------------------------------------------------
/models/bert/tpu/run_classifier_inews.sh:
--------------------------------------------------------------------------------
1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S")
3 | TASK_NAME="inews"
4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/al/bert-base/chinese_L-12_H-768_A-12/
5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME
6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/bert-base/chinese_L-12_H-768_A-12/tpu/$TASK_NAME/$CURRENT_TIME
7 |
8 | python $CURRENT_DIR/../run_classifier.py \
9 | --task_name=$TASK_NAME \
10 | --do_train=true \
11 | --do_eval=true \
12 | --data_dir=$DATA_DIR \
13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \
14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \
15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \
16 | --max_seq_length=512 \
17 | --train_batch_size=16 \
18 | --learning_rate=2e-5 \
19 | --num_train_epochs=8.0 \
20 | --output_dir=$OUTPUT_DIR \
21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.1.101.2:8470
22 |
--------------------------------------------------------------------------------
/models/bert/tpu/run_classifier_jdcomment.sh:
--------------------------------------------------------------------------------
1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S")
3 | TASK_NAME="jdcomment"
4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/bert-base/chinese_L-12_H-768_A-12
5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/hard_${TASK_NAME}
6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/bert-base/chinese_L-12_H-768_A-12/tpu/$TASK_NAME/$CURRENT_TIME
7 | echo $DATA_DIR
8 | python3 $CURRENT_DIR/../run_classifier.py \
9 | --task_name=$TASK_NAME \
10 | --do_train=true \
11 | --do_eval=true \
12 | --data_dir=$DATA_DIR \
13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \
14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \
15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \
16 | --max_seq_length=128 \
17 | --train_batch_size=32 \
18 | --learning_rate=2e-5 \
19 | --num_train_epochs=3.0 \
20 | --output_dir=$OUTPUT_DIR \
21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://172.18.0.2:8470
22 |
--------------------------------------------------------------------------------
/models/bert/tpu/run_classifier_lcqmc.sh:
--------------------------------------------------------------------------------
1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S")
3 | TASK_NAME="lcqmc"
4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/bert-base/chinese_L-12_H-768_A-12
5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/hard_$TASK_NAME
6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/bert-base/chinese_L-12_H-768_A-12/tpu/$TASK_NAME/$CURRENT_TIME
7 |
8 | python $CURRENT_DIR/../run_classifier.py \
9 | --task_name=$TASK_NAME \
10 | --do_train=true \
11 | --do_eval=true \
12 | --data_dir=$DATA_DIR \
13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \
14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \
15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \
16 | --max_seq_length=128 \
17 | --train_batch_size=16 \
18 | --learning_rate=2e-5 \
19 | --num_train_epochs=8.0 \
20 | --output_dir=$OUTPUT_DIR \
21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://172.20.0.2:8470
22 |
--------------------------------------------------------------------------------
/models/bert/tpu/run_classifier_thucnews.sh:
--------------------------------------------------------------------------------
1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S")
3 | TASK_NAME="thucnews"
4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/al/bert-base/chinese_L-12_H-768_A-12/
5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME
6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/bert-base/chinese_L-12_H-768_A-12/tpu/$TASK_NAME/$CURRENT_TIME
7 |
8 | python $CURRENT_DIR/../run_classifier.py \
9 | --task_name=$TASK_NAME \
10 | --do_train=true \
11 | --do_eval=true \
12 | --data_dir=$DATA_DIR \
13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \
14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \
15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \
16 | --max_seq_length=512 \
17 | --train_batch_size=16 \
18 | --learning_rate=2e-5 \
19 | --num_train_epochs=8.0 \
20 | --output_dir=$OUTPUT_DIR \
21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.1.101.2:8470
22 |
--------------------------------------------------------------------------------
/models/bert/tpu/run_classifier_tnews.sh:
--------------------------------------------------------------------------------
1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S")
3 | TASK_NAME="tnews"
4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/bert-base/chinese_L-12_H-768_A-12
5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/hard_${TASK_NAME}_1
6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/bert-base/chinese_L-12_H-768_A-12/tpu/$TASK_NAME/$CURRENT_TIME
7 |
8 | python $CURRENT_DIR/../run_classifier.py \
9 | --task_name=$TASK_NAME \
10 | --do_train=true \
11 | --do_eval=true \
12 | --data_dir=$DATA_DIR \
13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \
14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \
15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \
16 | --max_seq_length=128 \
17 | --train_batch_size=32 \
18 | --learning_rate=2e-5 \
19 | --num_train_epochs=3.0 \
20 | --output_dir=$OUTPUT_DIR \
21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://192.168.0.2:8470
22 |
--------------------------------------------------------------------------------
/models/bert/tpu/run_classifier_xnli.sh:
--------------------------------------------------------------------------------
1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S")
3 | TASK_NAME="xnli"
4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/al/bert-base/chinese_L-12_H-768_A-12/
5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME
6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/bert-base/chinese_L-12_H-768_A-12/tpu/$TASK_NAME/$CURRENT_TIME
7 |
8 | python $CURRENT_DIR/../run_classifier.py \
9 | --task_name=$TASK_NAME \
10 | --do_train=true \
11 | --do_eval=true \
12 | --data_dir=$DATA_DIR \
13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \
14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \
15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \
16 | --max_seq_length=512 \
17 | --train_batch_size=16 \
18 | --learning_rate=2e-5 \
19 | --num_train_epochs=8.0 \
20 | --output_dir=$OUTPUT_DIR \
21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.1.101.2:8470
22 |
--------------------------------------------------------------------------------
/models/bert_wwm_ext/.gitignore:
--------------------------------------------------------------------------------
1 | # Initially taken from Github's Python gitignore file
2 |
3 | # Byte-compiled / optimized / DLL files
4 | __pycache__/
5 | *.py[cod]
6 | *$py.class
7 |
8 | # C extensions
9 | *.so
10 |
11 | # Distribution / packaging
12 | .Python
13 | build/
14 | develop-eggs/
15 | dist/
16 | downloads/
17 | eggs/
18 | .eggs/
19 | lib/
20 | lib64/
21 | parts/
22 | sdist/
23 | var/
24 | wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | .hypothesis/
51 | .pytest_cache/
52 |
53 | # Translations
54 | *.mo
55 | *.pot
56 |
57 | # Django stuff:
58 | *.log
59 | local_settings.py
60 | db.sqlite3
61 |
62 | # Flask stuff:
63 | instance/
64 | .webassets-cache
65 |
66 | # Scrapy stuff:
67 | .scrapy
68 |
69 | # Sphinx documentation
70 | docs/_build/
71 |
72 | # PyBuilder
73 | target/
74 |
75 | # Jupyter Notebook
76 | .ipynb_checkpoints
77 |
78 | # IPython
79 | profile_default/
80 | ipython_config.py
81 |
82 | # pyenv
83 | .python-version
84 |
85 | # celery beat schedule file
86 | celerybeat-schedule
87 |
88 | # SageMath parsed files
89 | *.sage.py
90 |
91 | # Environments
92 | .env
93 | .venv
94 | env/
95 | venv/
96 | ENV/
97 | env.bak/
98 | venv.bak/
99 |
100 | # Spyder project settings
101 | .spyderproject
102 | .spyproject
103 |
104 | # Rope project settings
105 | .ropeproject
106 |
107 | # mkdocs documentation
108 | /site
109 |
110 | # mypy
111 | .mypy_cache/
112 | .dmypy.json
113 | dmypy.json
114 |
115 | # Pyre type checker
116 | .pyre/
117 |
--------------------------------------------------------------------------------
/models/bert_wwm_ext/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # How to Contribute
2 |
3 | BERT needs to maintain permanent compatibility with the pre-trained model files,
4 | so we do not plan to make any major changes to this library (other than what was
5 | promised in the README). However, we can accept small patches related to
6 | re-factoring and documentation. To submit contributes, there are just a few
7 | small guidelines you need to follow.
8 |
9 | ## Contributor License Agreement
10 |
11 | Contributions to this project must be accompanied by a Contributor License
12 | Agreement. You (or your employer) retain the copyright to your contribution;
13 | this simply gives us permission to use and redistribute your contributions as
14 | part of the project. Head over to to see
15 | your current agreements on file or to sign a new one.
16 |
17 | You generally only need to submit a CLA once, so if you've already submitted one
18 | (even if it was for a different project), you probably don't need to do it
19 | again.
20 |
21 | ## Code reviews
22 |
23 | All submissions, including submissions by project members, require review. We
24 | use GitHub pull requests for this purpose. Consult
25 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
26 | information on using pull requests.
27 |
28 | ## Community Guidelines
29 |
30 | This project follows
31 | [Google's Open Source Community Guidelines](https://opensource.google.com/conduct/).
32 |
--------------------------------------------------------------------------------
/models/bert_wwm_ext/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2018 The Google AI Language Team Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
--------------------------------------------------------------------------------
/models/bert_wwm_ext/optimization_test.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2018 The Google AI Language Team Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 |
19 | import optimization
20 | import tensorflow as tf
21 |
22 |
23 | class OptimizationTest(tf.test.TestCase):
24 |
25 | def test_adam(self):
26 | with self.test_session() as sess:
27 | w = tf.get_variable(
28 | "w",
29 | shape=[3],
30 | initializer=tf.constant_initializer([0.1, -0.2, -0.1]))
31 | x = tf.constant([0.4, 0.2, -0.5])
32 | loss = tf.reduce_mean(tf.square(x - w))
33 | tvars = tf.trainable_variables()
34 | grads = tf.gradients(loss, tvars)
35 | global_step = tf.train.get_or_create_global_step()
36 | optimizer = optimization.AdamWeightDecayOptimizer(learning_rate=0.2)
37 | train_op = optimizer.apply_gradients(zip(grads, tvars), global_step)
38 | init_op = tf.group(tf.global_variables_initializer(),
39 | tf.local_variables_initializer())
40 | sess.run(init_op)
41 | for _ in range(100):
42 | sess.run(train_op)
43 | w_np = sess.run(w)
44 | self.assertAllClose(w_np.flat, [0.4, 0.2, -0.5], rtol=1e-2, atol=1e-2)
45 |
46 |
47 | if __name__ == "__main__":
48 | tf.test.main()
49 |
--------------------------------------------------------------------------------
/models/bert_wwm_ext/requirements.txt:
--------------------------------------------------------------------------------
1 | tensorflow >= 1.11.0 # CPU Version of TensorFlow.
2 | # tensorflow-gpu >= 1.11.0 # GPU version of TensorFlow.
3 |
--------------------------------------------------------------------------------
/models/bert_wwm_ext/run_classifier_afqmc.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # @Author: bo.shi
3 | # @Date: 2019-11-04 09:56:36
4 | # @Last Modified by: bo.shi
5 | # @Last Modified time: 2019-12-05 11:02:12
6 |
7 | TASK_NAME="afqmc"
8 | MODEL_NAME="chinese_wwm_ext_L-12_H-768_A-12"
9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
10 | export CUDA_VISIBLE_DEVICES="0"
11 | export BERT_PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model
12 | export BERT_WWM_BASE_DIR=$BERT_PRETRAINED_MODELS_DIR/$MODEL_NAME
13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset
14 |
15 | # download and unzip dataset
16 | if [ ! -d $GLUE_DATA_DIR ]; then
17 | mkdir -p $GLUE_DATA_DIR
18 | echo "makedir $GLUE_DATA_DIR"
19 | fi
20 | cd $GLUE_DATA_DIR
21 | if [ ! -d $TASK_NAME ]; then
22 | mkdir $TASK_NAME
23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME"
24 | fi
25 | cd $TASK_NAME
26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then
27 | rm *
28 | wget https://storage.googleapis.com/cluebenchmark/tasks/afqmc_public.zip
29 | unzip afqmc_public.zip
30 | rm afqmc_public.zip
31 | else
32 | echo "data exists"
33 | fi
34 | echo "Finish download dataset."
35 |
36 | # download model
37 | if [ ! -d $BERT_WWM_BASE_DIR ]; then
38 | mkdir -p $BERT_WWM_BASE_DIR
39 | echo "makedir $BERT_WWM_BASE_DIR"
40 | fi
41 | cd $BERT_WWM_BASE_DIR
42 | if [ ! -f "bert_config.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "bert_model.ckpt.index" ] || [ ! -f "bert_model.ckpt.meta" ] || [ ! -f "bert_model.ckpt.data-00000-of-00001" ]; then
43 | rm *
44 | wget -c https://storage.googleapis.com/chineseglue/pretrain_models/chinese_wwm_ext_L-12_H-768_A-12.zip
45 | unzip chinese_wwm_ext_L-12_H-768_A-12.zip
46 | rm chinese_wwm_ext_L-12_H-768_A-12.zip
47 | else
48 | echo "model exists"
49 | fi
50 | echo "Finish download model."
51 |
52 | # run task
53 | cd $CURRENT_DIR
54 | echo "Start running..."
55 | if [ $# == 0 ]; then:
56 | python run_classifier.py \
57 | --task_name=$TASK_NAME \
58 | --do_train=true \
59 | --do_eval=true \
60 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
61 | --vocab_file=$BERT_WWM_BASE_DIR/vocab.txt \
62 | --bert_config_file=$BERT_WWM_BASE_DIR/bert_config.json \
63 | --init_checkpoint=$BERT_WWM_BASE_DIR/bert_model.ckpt \
64 | --max_seq_length=128 \
65 | --train_batch_size=32 \
66 | --learning_rate=2e-5 \
67 | --num_train_epochs=3.0 \
68 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
69 | elif [ $1 == "predict" ]; then
70 | echo "Start predict..."
71 | python run_classifier.py \
72 | --task_name=$TASK_NAME \
73 | --do_train=false \
74 | --do_eval=false \
75 | --do_predict=true \
76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
77 | --vocab_file=$BERT_WWM_BASE_DIR/vocab.txt \
78 | --bert_config_file=$BERT_WWM_BASE_DIR/bert_config.json \
79 | --init_checkpoint=$BERT_WWM_BASE_DIR/bert_model.ckpt \
80 | --max_seq_length=128 \
81 | --train_batch_size=32 \
82 | --learning_rate=2e-5 \
83 | --num_train_epochs=3.0 \
84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
85 | fi
86 |
87 |
--------------------------------------------------------------------------------
/models/bert_wwm_ext/run_classifier_cmnli.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # @Author: bo.shi
3 | # @Date: 2019-11-04 09:56:36
4 | # @Last Modified by: bo.shi
5 | # @Last Modified time: 2019-12-05 11:02:51
6 |
7 | TASK_NAME="cmnli"
8 | MODEL_NAME="chinese_wwm_ext_L-12_H-768_A-12"
9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
10 | export CUDA_VISIBLE_DEVICES="0"
11 | export BERT_PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model
12 | export BERT_WWM_BASE_DIR=$BERT_PRETRAINED_MODELS_DIR/$MODEL_NAME
13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset
14 |
15 | # download and unzip dataset
16 | if [ ! -d $GLUE_DATA_DIR ]; then
17 | mkdir -p $GLUE_DATA_DIR
18 | echo "makedir $GLUE_DATA_DIR"
19 | fi
20 | cd $GLUE_DATA_DIR
21 | if [ ! -d $TASK_NAME ]; then
22 | mkdir $TASK_NAME
23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME"
24 | fi
25 | cd $TASK_NAME
26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then
27 | rm *
28 | wget https://storage.googleapis.com/cluebenchmark/tasks/cmnli_public.zip
29 | unzip cmnli_public.zip
30 | rm cmnli_public.zip
31 | else
32 | echo "data exists"
33 | fi
34 | echo "Finish download dataset."
35 |
36 | # download model
37 | if [ ! -d $BERT_WWM_BASE_DIR ]; then
38 | mkdir -p $BERT_WWM_BASE_DIR
39 | echo "makedir $BERT_WWM_BASE_DIR"
40 | fi
41 | cd $BERT_WWM_BASE_DIR
42 | if [ ! -f "bert_config.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "bert_model.ckpt.index" ] || [ ! -f "bert_model.ckpt.meta" ] || [ ! -f "bert_model.ckpt.data-00000-of-00001" ]; then
43 | rm *
44 | wget -c https://storage.googleapis.com/chineseglue/pretrain_models/chinese_wwm_ext_L-12_H-768_A-12.zip
45 | unzip chinese_wwm_ext_L-12_H-768_A-12.zip
46 | rm chinese_wwm_ext_L-12_H-768_A-12.zip
47 | else
48 | echo "model exists"
49 | fi
50 | echo "Finish download model."
51 |
52 | # run task
53 | cd $CURRENT_DIR
54 | echo "Start running..."
55 | if [ $# == 0 ]; then:
56 | python run_classifier.py \
57 | --task_name=$TASK_NAME \
58 | --do_train=true \
59 | --do_eval=true \
60 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
61 | --vocab_file=$BERT_WWM_BASE_DIR/vocab.txt \
62 | --bert_config_file=$BERT_WWM_BASE_DIR/bert_config.json \
63 | --init_checkpoint=$BERT_WWM_BASE_DIR/bert_model.ckpt \
64 | --max_seq_length=128 \
65 | --train_batch_size=32 \
66 | --learning_rate=2e-5 \
67 | --num_train_epochs=3.0 \
68 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
69 | elif [ $1 == "predict" ]; then
70 | echo "Start predict..."
71 | python run_classifier.py \
72 | --task_name=$TASK_NAME \
73 | --do_train=false \
74 | --do_eval=false \
75 | --do_predict=true \
76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
77 | --vocab_file=$BERT_WWM_BASE_DIR/vocab.txt \
78 | --bert_config_file=$BERT_WWM_BASE_DIR/bert_config.json \
79 | --init_checkpoint=$BERT_WWM_BASE_DIR/bert_model.ckpt \
80 | --max_seq_length=128 \
81 | --train_batch_size=32 \
82 | --learning_rate=2e-5 \
83 | --num_train_epochs=3.0 \
84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
85 | fi
86 |
87 |
--------------------------------------------------------------------------------
/models/bert_wwm_ext/run_classifier_csl.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # @Author: Li Yudong
3 | # @Date: 2019-11-28
4 | # @Last Modified by: bo.shi
5 | # @Last Modified time: 2019-12-05 11:02:59
6 |
7 | TASK_NAME="csl"
8 | MODEL_NAME="chinese_wwm_ext_L-12_H-768_A-12"
9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
10 | export CUDA_VISIBLE_DEVICES="0"
11 | export BERT_PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model
12 | export BERT_WWM_BASE_DIR=$BERT_PRETRAINED_MODELS_DIR/$MODEL_NAME
13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset
14 |
15 | # download and unzip dataset
16 | if [ ! -d $GLUE_DATA_DIR ]; then
17 | mkdir -p $GLUE_DATA_DIR
18 | echo "makedir $GLUE_DATA_DIR"
19 | fi
20 | cd $GLUE_DATA_DIR
21 | if [ ! -d $TASK_NAME ]; then
22 | mkdir $TASK_NAME
23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME"
24 | fi
25 | cd $TASK_NAME
26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then
27 | rm *
28 | wget https://storage.googleapis.com/cluebenchmark/tasks/csl_public.zip
29 | unzip csl_public.zip
30 | rm csl_public.zip
31 | else
32 | echo "data exists"
33 | fi
34 | echo "Finish download dataset."
35 |
36 | # download model
37 | if [ ! -d $BERT_WWM_BASE_DIR ]; then
38 | mkdir -p $BERT_WWM_BASE_DIR
39 | echo "makedir $BERT_WWM_BASE_DIR"
40 | fi
41 | cd $BERT_WWM_BASE_DIR
42 | if [ ! -f "bert_config.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "bert_model.ckpt.index" ] || [ ! -f "bert_model.ckpt.meta" ] || [ ! -f "bert_model.ckpt.data-00000-of-00001" ]; then
43 | rm *
44 | wget -c https://storage.googleapis.com/chineseglue/pretrain_models/chinese_wwm_ext_L-12_H-768_A-12.zip
45 | unzip chinese_wwm_ext_L-12_H-768_A-12.zip
46 | rm chinese_wwm_ext_L-12_H-768_A-12.zip
47 | else
48 | echo "model exists"
49 | fi
50 | echo "Finish download model."
51 |
52 | # run task
53 | cd $CURRENT_DIR
54 | echo "Start running..."
55 | if [ $# == 0 ]; then:
56 | python run_classifier.py \
57 | --task_name=$TASK_NAME \
58 | --do_train=true \
59 | --do_eval=true \
60 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
61 | --vocab_file=$BERT_WWM_BASE_DIR/vocab.txt \
62 | --bert_config_file=$BERT_WWM_BASE_DIR/bert_config.json \
63 | --init_checkpoint=$BERT_WWM_BASE_DIR/bert_model.ckpt \
64 | --max_seq_length=128 \
65 | --train_batch_size=32 \
66 | --learning_rate=2e-5 \
67 | --num_train_epochs=3.0 \
68 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
69 | elif [ $1 == "predict" ]; then
70 | echo "Start predict..."
71 | python run_classifier.py \
72 | --task_name=$TASK_NAME \
73 | --do_train=false \
74 | --do_eval=false \
75 | --do_predict=true \
76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
77 | --vocab_file=$BERT_WWM_BASE_DIR/vocab.txt \
78 | --bert_config_file=$BERT_WWM_BASE_DIR/bert_config.json \
79 | --init_checkpoint=$BERT_WWM_BASE_DIR/bert_model.ckpt \
80 | --max_seq_length=128 \
81 | --train_batch_size=32 \
82 | --learning_rate=2e-5 \
83 | --num_train_epochs=3.0 \
84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
85 | fi
86 |
--------------------------------------------------------------------------------
/models/bert_wwm_ext/run_classifier_iflytek.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # @Author: bo.shi
3 | # @Date: 2019-11-04 09:56:36
4 | # @Last Modified by: bo.shi
5 | # @Last Modified time: 2019-12-05 11:03:03
6 |
7 | TASK_NAME="iflytek"
8 | MODEL_NAME="chinese_wwm_ext_L-12_H-768_A-12"
9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
10 | export CUDA_VISIBLE_DEVICES="0"
11 | export BERT_PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model
12 | export BERT_WWM_BASE_DIR=$BERT_PRETRAINED_MODELS_DIR/$MODEL_NAME
13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset
14 |
15 | # download and unzip dataset
16 | if [ ! -d $GLUE_DATA_DIR ]; then
17 | mkdir -p $GLUE_DATA_DIR
18 | echo "makedir $GLUE_DATA_DIR"
19 | fi
20 | cd $GLUE_DATA_DIR
21 | if [ ! -d $TASK_NAME ]; then
22 | mkdir $TASK_NAME
23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME"
24 | fi
25 | cd $TASK_NAME
26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then
27 | rm *
28 | wget https://storage.googleapis.com/cluebenchmark/tasks/iflytek_public.zip
29 | unzip iflytek_public.zip
30 | rm iflytek_public.zip
31 | else
32 | echo "data exists"
33 | fi
34 | echo "Finish download dataset."
35 |
36 | # download model
37 | if [ ! -d $BERT_WWM_BASE_DIR ]; then
38 | mkdir -p $BERT_WWM_BASE_DIR
39 | echo "makedir $BERT_WWM_BASE_DIR"
40 | fi
41 | cd $BERT_WWM_BASE_DIR
42 | if [ ! -f "bert_config.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "bert_model.ckpt.index" ] || [ ! -f "bert_model.ckpt.meta" ] || [ ! -f "bert_model.ckpt.data-00000-of-00001" ]; then
43 | rm *
44 | wget -c https://storage.googleapis.com/chineseglue/pretrain_models/chinese_wwm_ext_L-12_H-768_A-12.zip
45 | unzip chinese_wwm_ext_L-12_H-768_A-12.zip
46 | rm chinese_wwm_ext_L-12_H-768_A-12.zip
47 | else
48 | echo "model exists"
49 | fi
50 | echo "Finish download model."
51 |
52 | # run task
53 | cd $CURRENT_DIR
54 | echo "Start running..."
55 | if [ $# == 0 ]; then:
56 | python run_classifier.py \
57 | --task_name=$TASK_NAME \
58 | --do_train=true \
59 | --do_eval=true \
60 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
61 | --vocab_file=$BERT_WWM_BASE_DIR/vocab.txt \
62 | --bert_config_file=$BERT_WWM_BASE_DIR/bert_config.json \
63 | --init_checkpoint=$BERT_WWM_BASE_DIR/bert_model.ckpt \
64 | --max_seq_length=128 \
65 | --train_batch_size=32 \
66 | --learning_rate=2e-5 \
67 | --num_train_epochs=3.0 \
68 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
69 | elif [ $1 == "predict" ]; then
70 | echo "Start predict..."
71 | python run_classifier.py \
72 | --task_name=$TASK_NAME \
73 | --do_train=false \
74 | --do_eval=false \
75 | --do_predict=true \
76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
77 | --vocab_file=$BERT_WWM_BASE_DIR/vocab.txt \
78 | --bert_config_file=$BERT_WWM_BASE_DIR/bert_config.json \
79 | --init_checkpoint=$BERT_WWM_BASE_DIR/bert_model.ckpt \
80 | --max_seq_length=128 \
81 | --train_batch_size=32 \
82 | --learning_rate=2e-5 \
83 | --num_train_epochs=3.0 \
84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
85 | fi
86 |
--------------------------------------------------------------------------------
/models/bert_wwm_ext/run_classifier_tnews.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # @Author: bo.shi
3 | # @Date: 2019-11-04 09:56:36
4 | # @Last Modified by: bo.shi
5 | # @Last Modified time: 2019-12-05 11:03:08
6 |
7 | TASK_NAME="tnews"
8 | MODEL_NAME="chinese_wwm_ext_L-12_H-768_A-12"
9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
10 | export CUDA_VISIBLE_DEVICES="0"
11 | export BERT_PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model
12 | export BERT_WWM_BASE_DIR=$BERT_PRETRAINED_MODELS_DIR/$MODEL_NAME
13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset
14 |
15 | # download and unzip dataset
16 | if [ ! -d $GLUE_DATA_DIR ]; then
17 | mkdir -p $GLUE_DATA_DIR
18 | echo "makedir $GLUE_DATA_DIR"
19 | fi
20 | cd $GLUE_DATA_DIR
21 | if [ ! -d $TASK_NAME ]; then
22 | mkdir $TASK_NAME
23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME"
24 | fi
25 | cd $TASK_NAME
26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then
27 | rm *
28 | wget https://storage.googleapis.com/cluebenchmark/tasks/tnews_public.zip
29 | unzip tnews_public.zip
30 | rm tnews_public.zip
31 | else
32 | echo "data exists"
33 | fi
34 | echo "Finish download dataset."
35 |
36 | # download model
37 | if [ ! -d $BERT_WWM_BASE_DIR ]; then
38 | mkdir -p $BERT_WWM_BASE_DIR
39 | echo "makedir $BERT_WWM_BASE_DIR"
40 | fi
41 | cd $BERT_WWM_BASE_DIR
42 | if [ ! -f "bert_config.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "bert_model.ckpt.index" ] || [ ! -f "bert_model.ckpt.meta" ] || [ ! -f "bert_model.ckpt.data-00000-of-00001" ]; then
43 | rm *
44 | wget -c https://storage.googleapis.com/chineseglue/pretrain_models/chinese_wwm_ext_L-12_H-768_A-12.zip
45 | unzip chinese_wwm_ext_L-12_H-768_A-12.zip
46 | rm chinese_wwm_ext_L-12_H-768_A-12.zip
47 | else
48 | echo "model exists"
49 | fi
50 | echo "Finish download model."
51 |
52 | # run task
53 | cd $CURRENT_DIR
54 | echo "Start running..."
55 | if [ $# == 0 ]; then:
56 | python run_classifier.py \
57 | --task_name=$TASK_NAME \
58 | --do_train=true \
59 | --do_eval=true \
60 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
61 | --vocab_file=$BERT_WWM_BASE_DIR/vocab.txt \
62 | --bert_config_file=$BERT_WWM_BASE_DIR/bert_config.json \
63 | --init_checkpoint=$BERT_WWM_BASE_DIR/bert_model.ckpt \
64 | --max_seq_length=128 \
65 | --train_batch_size=32 \
66 | --learning_rate=2e-5 \
67 | --num_train_epochs=3.0 \
68 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
69 | elif [ $1 == "predict" ]; then
70 | echo "Start predict..."
71 | python run_classifier.py \
72 | --task_name=$TASK_NAME \
73 | --do_train=false \
74 | --do_eval=false \
75 | --do_predict=true \
76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
77 | --vocab_file=$BERT_WWM_BASE_DIR/vocab.txt \
78 | --bert_config_file=$BERT_WWM_BASE_DIR/bert_config.json \
79 | --init_checkpoint=$BERT_WWM_BASE_DIR/bert_model.ckpt \
80 | --max_seq_length=128 \
81 | --train_batch_size=32 \
82 | --learning_rate=2e-5 \
83 | --num_train_epochs=3.0 \
84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
85 | fi
86 |
--------------------------------------------------------------------------------
/models/bert_wwm_ext/run_classifier_wsc.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # @Author: bo.shi
3 | # @Date: 2019-11-04 09:56:36
4 | # @Last Modified by: bo.shi
5 | # @Last Modified time: 2019-12-05 11:03:13
6 |
7 | TASK_NAME="wsc"
8 | MODEL_NAME="chinese_wwm_ext_L-12_H-768_A-12"
9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
10 | export CUDA_VISIBLE_DEVICES="0"
11 | export BERT_PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model
12 | export BERT_WWM_BASE_DIR=$BERT_PRETRAINED_MODELS_DIR/$MODEL_NAME
13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset
14 |
15 | # download and unzip dataset
16 | if [ ! -d $GLUE_DATA_DIR ]; then
17 | mkdir -p $GLUE_DATA_DIR
18 | echo "makedir $GLUE_DATA_DIR"
19 | fi
20 | cd $GLUE_DATA_DIR
21 | if [ ! -d $TASK_NAME ]; then
22 | mkdir $TASK_NAME
23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME"
24 | fi
25 | cd $TASK_NAME
26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then
27 | rm *
28 | wget https://storage.googleapis.com/cluebenchmark/tasks/wsc_public.zip
29 | unzip wsc_public.zip
30 | rm wsc_public.zip
31 | else
32 | echo "data exists"
33 | fi
34 | echo "Finish download dataset."
35 |
36 | # download model
37 | if [ ! -d $BERT_WWM_BASE_DIR ]; then
38 | mkdir -p $BERT_WWM_BASE_DIR
39 | echo "makedir $BERT_WWM_BASE_DIR"
40 | fi
41 | cd $BERT_WWM_BASE_DIR
42 | if [ ! -f "bert_config.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "bert_model.ckpt.index" ] || [ ! -f "bert_model.ckpt.meta" ] || [ ! -f "bert_model.ckpt.data-00000-of-00001" ]; then
43 | rm *
44 | wget -c https://storage.googleapis.com/chineseglue/pretrain_models/chinese_wwm_ext_L-12_H-768_A-12.zip
45 | unzip chinese_wwm_ext_L-12_H-768_A-12.zip
46 | rm chinese_wwm_ext_L-12_H-768_A-12.zip
47 | else
48 | echo "model exists"
49 | fi
50 | echo "Finish download model."
51 |
52 | # run task
53 | cd $CURRENT_DIR
54 | echo "Start running..."
55 | if [ $# == 0 ]; then:
56 | python run_classifier.py \
57 | --task_name=$TASK_NAME \
58 | --do_train=true \
59 | --do_eval=true \
60 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
61 | --vocab_file=$BERT_WWM_BASE_DIR/vocab.txt \
62 | --bert_config_file=$BERT_WWM_BASE_DIR/bert_config.json \
63 | --init_checkpoint=$BERT_WWM_BASE_DIR/bert_model.ckpt \
64 | --max_seq_length=128 \
65 | --train_batch_size=32 \
66 | --learning_rate=2e-5 \
67 | --num_train_epochs=3.0 \
68 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
69 | elif [ $1 == "predict" ]; then
70 | echo "Start predict..."
71 | python run_classifier.py \
72 | --task_name=$TASK_NAME \
73 | --do_train=false \
74 | --do_eval=false \
75 | --do_predict=true \
76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
77 | --vocab_file=$BERT_WWM_BASE_DIR/vocab.txt \
78 | --bert_config_file=$BERT_WWM_BASE_DIR/bert_config.json \
79 | --init_checkpoint=$BERT_WWM_BASE_DIR/bert_model.ckpt \
80 | --max_seq_length=128 \
81 | --train_batch_size=32 \
82 | --learning_rate=2e-5 \
83 | --num_train_epochs=3.0 \
84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
85 | fi
86 |
--------------------------------------------------------------------------------
/models/bert_wwm_ext/run_ner_msra.sh:
--------------------------------------------------------------------------------
1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
2 | export CUDA_VISIBLE_DEVICES="0"
3 | export BERT_BASE_DIR=$CURRENT_DIR/prev_trained_model/chinese_wwm_ext_L-12_H-768_A-12
4 | export GLUE_DIR=$CURRENT_DIR/../../glue/chineseGLUEdatasets/
5 | TASK_NAME="msraner"
6 |
7 | python run_ner.py \
8 | --task_name=$TASK_NAME \
9 | --do_train=true \
10 | --do_eval=false \
11 | --do_predict=true \
12 | --data_dir=$GLUE_DIR/$TASK_NAME \
13 | --vocab_file=$BERT_BASE_DIR/vocab.txt \
14 | --bert_config_file=$BERT_BASE_DIR/bert_config.json \
15 | --init_checkpoint=$BERT_BASE_DIR/bert_model.ckpt \
16 | --max_seq_length=256 \
17 | --train_batch_size=16 \
18 | --learning_rate=2e-5 \
19 | --num_train_epochs=5.0 \
20 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
21 |
--------------------------------------------------------------------------------
/models/bert_wwm_ext/tpu/run_classifier_inews.sh:
--------------------------------------------------------------------------------
1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S")
3 | TASK_NAME="inews"
4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/bert-wwm-ext-base/chinese_wwm_ext_L-12_H-768_A-12
5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME
6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/bert-wwm-ext-base/chinese_wwm_ext_L-12_H-768_A-12/tpu/$TASK_NAME/$CURRENT_TIME
7 |
8 | python $CURRENT_DIR/../run_classifier.py \
9 | --task_name=$TASK_NAME \
10 | --do_train=true \
11 | --do_eval=true \
12 | --data_dir=$DATA_DIR \
13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \
14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \
15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \
16 | --max_seq_length=512 \
17 | --train_batch_size=16 \
18 | --learning_rate=2e-5 \
19 | --num_train_epochs=8.0 \
20 | --output_dir=$OUTPUT_DIR \
21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.1.101.2:8470
22 |
--------------------------------------------------------------------------------
/models/bert_wwm_ext/tpu/run_classifier_lcqmc.sh:
--------------------------------------------------------------------------------
1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S")
3 | TASK_NAME="lcqmc"
4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/bert-wwm-ext-base/chinese_wwm_ext_L-12_H-768_A-12
5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/hard_$TASK_NAME
6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/bert-wwm-ext-base/chinese_wwm_ext_L-12_H-768_A-12/tpu/$TASK_NAME/$CURRENT_TIME
7 |
8 | python $CURRENT_DIR/../run_classifier.py \
9 | --task_name=$TASK_NAME \
10 | --do_train=true \
11 | --do_eval=true \
12 | --data_dir=$DATA_DIR \
13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \
14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \
15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \
16 | --max_seq_length=128 \
17 | --train_batch_size=16 \
18 | --learning_rate=2e-5 \
19 | --num_train_epochs=3.0 \
20 | --output_dir=$OUTPUT_DIR \
21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://192.168.0.2:8470
22 |
--------------------------------------------------------------------------------
/models/bert_wwm_ext/tpu/run_classifier_thucnews.sh:
--------------------------------------------------------------------------------
1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S")
3 | TASK_NAME="inews"
4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/bert-wwm-ext-base/chinese_wwm_ext_L-12_H-768_A-12
5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME
6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/bert-wwm-ext-base/chinese_wwm_ext_L-12_H-768_A-12/tpu/$TASK_NAME/$CURRENT_TIME
7 |
8 | python $CURRENT_DIR/../run_classifier.py \
9 | --task_name=$TASK_NAME \
10 | --do_train=true \
11 | --do_eval=true \
12 | --data_dir=$DATA_DIR \
13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \
14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \
15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \
16 | --max_seq_length=512 \
17 | --train_batch_size=16 \
18 | --learning_rate=2e-5 \
19 | --num_train_epochs=8.0 \
20 | --output_dir=$OUTPUT_DIR \
21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.1.101.2:8470
22 |
--------------------------------------------------------------------------------
/models/bert_wwm_ext/tpu/run_classifier_tnews.sh:
--------------------------------------------------------------------------------
1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S")
3 | TASK_NAME="tnews"
4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/bert-wwm-ext-base/chinese_wwm_ext_L-12_H-768_A-12
5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/hard_${TASK_NAME}_1
6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/bert-wwm-ext-base/chinese_wwm_ext_L-12_H-768_A-12/tpu/$TASK_NAME/$CURRENT_TIME
7 |
8 | python $CURRENT_DIR/../run_classifier.py \
9 | --task_name=$TASK_NAME \
10 | --do_train=true \
11 | --do_eval=true \
12 | --data_dir=$DATA_DIR \
13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \
14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \
15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \
16 | --max_seq_length=128 \
17 | --train_batch_size=16 \
18 | --learning_rate=2e-5 \
19 | --num_train_epochs=3.0 \
20 | --output_dir=$OUTPUT_DIR \
21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.230.1.2:8470
22 |
--------------------------------------------------------------------------------
/models/bert_wwm_ext/tpu/run_classifier_xnli.sh:
--------------------------------------------------------------------------------
1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S")
3 | TASK_NAME="xnli"
4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/bert-wwm-ext-base/chinese_wwm_ext_L-12_H-768_A-12
5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME
6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/bert-wwm-ext-base/chinese_wwm_ext_L-12_H-768_A-12/tpu/$TASK_NAME/$CURRENT_TIME
7 |
8 | python $CURRENT_DIR/../run_classifier.py \
9 | --task_name=$TASK_NAME \
10 | --do_train=true \
11 | --do_eval=true \
12 | --data_dir=$DATA_DIR \
13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \
14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \
15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \
16 | --max_seq_length=512 \
17 | --train_batch_size=16 \
18 | --learning_rate=2e-5 \
19 | --num_train_epochs=8.0 \
20 | --output_dir=$OUTPUT_DIR \
21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.1.101.2:8470
22 |
--------------------------------------------------------------------------------
/models/copa_sh/convert_test.py:
--------------------------------------------------------------------------------
1 | #coding:utf-8
2 | import sys
3 | import json
4 |
5 | test_file=sys.argv[1]
6 | predict_label = []
7 | tmp = []
8 | for line in open(test_file, 'r').readlines():
9 | ss = line.strip().split('\t')
10 | if len(ss) == 2:
11 | tmp.append(ss[1])
12 | else:
13 | print ('wrong format!!!: ' + line.strip())
14 |
15 | i = 0
16 | while(i < len(tmp)-1):
17 | if tmp[i] >= tmp[i+1]:
18 | predict_label.append(str(0))
19 | else:
20 | predict_label.append(str(1))
21 | i += 2
22 | print ("predict_label size: " + str(len(predict_label)))
23 |
24 | res = {}
25 | for idx, label in enumerate(predict_label):
26 | res['id'] = idx
27 | res['label'] = label
28 | print(json.dumps(res, ensure_ascii=False))
29 |
30 |
31 |
--------------------------------------------------------------------------------
/models/copa_sh/copa_eval_dev.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #########################################################################
3 | # File Name: copa_eval_dev.sh
4 | # Author: Junyi Li
5 | # Personal page: dukeenglish.github.io
6 | # Created Time: 21:39:07 2019-12-02
7 | #########################################################################
8 | '''
9 | 因为copa任务比较特殊,所以特地提供了一个额外的评估脚本。如果想要测试获取
10 | 和网络上公布一致的dev结果,请将dev.json作为test.json进行预测,将预测结果
11 | 进行评测,使用下面的命令。
12 | 如果不进行评测,则可以利用第二条命令生成submit格式的test结果
13 | '''
14 | python eval_copa.py copa_output/test_result.tsv
15 | python covert_test.py copa_output/test_result.tsv
16 |
17 |
--------------------------------------------------------------------------------
/models/copa_sh/dev_label.txt:
--------------------------------------------------------------------------------
1 | 0 1
2 | 1 1
3 | 2 0
4 | 3 1
5 | 4 1
6 | 5 0
7 | 6 0
8 | 7 0
9 | 8 0
10 | 9 0
11 | 10 0
12 | 11 1
13 | 12 1
14 | 13 1
15 | 14 0
16 | 15 0
17 | 16 1
18 | 17 0
19 | 18 1
20 | 19 0
21 | 20 0
22 | 21 1
23 | 22 0
24 | 23 0
25 | 24 0
26 | 25 1
27 | 26 1
28 | 27 1
29 | 28 1
30 | 29 1
31 | 30 1
32 | 31 1
33 | 32 0
34 | 33 0
35 | 34 0
36 | 35 1
37 | 36 0
38 | 37 1
39 | 38 1
40 | 39 0
41 | 40 0
42 | 41 0
43 | 42 1
44 | 43 0
45 | 44 1
46 | 45 1
47 | 46 0
48 | 47 0
49 | 48 1
50 | 49 0
51 | 50 0
52 | 51 1
53 | 52 1
54 | 53 0
55 | 54 0
56 | 55 1
57 | 56 0
58 | 57 0
59 | 58 1
60 | 59 0
61 | 60 1
62 | 61 0
63 | 62 0
64 | 63 1
65 | 64 0
66 | 65 0
67 | 66 0
68 | 67 1
69 | 68 0
70 | 69 0
71 | 70 0
72 | 71 0
73 | 72 1
74 | 73 1
75 | 74 1
76 | 75 1
77 | 76 1
78 | 77 0
79 | 78 1
80 | 79 0
81 | 80 0
82 | 81 0
83 | 82 0
84 | 83 1
85 | 84 1
86 | 85 0
87 | 86 0
88 | 87 0
89 | 88 0
90 | 89 0
91 | 90 0
92 | 91 1
93 | 92 1
94 | 93 0
95 | 94 0
96 | 95 1
97 | 96 1
98 | 97 0
99 | 98 1
100 | 99 1
101 |
--------------------------------------------------------------------------------
/models/copa_sh/eval_copa.py:
--------------------------------------------------------------------------------
1 | #coding:utf-8
2 | import sys
3 |
4 | test_file=sys.argv[1]
5 | predict_label = []
6 | tmp = []
7 | for line in open(test_file, 'r').readlines():
8 | ss = line.strip().split('\t')
9 | if len(ss) == 2:
10 | tmp.append(ss[1])
11 | else:
12 | print ('wrong format!!!: ' + line.strip())
13 |
14 | i = 0
15 | while(i < len(tmp)-1):
16 | if tmp[i] >= tmp[i+1]:
17 | predict_label.append(str(0))
18 | else:
19 | predict_label.append(str(1))
20 | i += 2
21 | print ("predict_label size: " + str(len(predict_label)))
22 |
23 |
24 | golden_file = 'dev_label.txt'
25 | golden_label=[]
26 | for line in open(golden_file, 'r').readlines():
27 | ss = line.strip().split('\t')
28 | if len(ss) == 2:
29 | golden_label.append(ss[1])
30 | else:
31 | print ('wrong format!!!: ' + line.strip())
32 |
33 | print ('golden_label size: ' + str(len(golden_label)))
34 | correct_count = 0
35 | wrong_count = 0
36 | for i in range(0, len(golden_label)):
37 | if golden_label[i] == predict_label[i]:
38 | correct_count += 1
39 | else:
40 | wrong_count += 1
41 | print ("correct_count: " + str(correct_count))
42 | print ("wrong_count: " + str(wrong_count))
43 | print ("precision: " + str( correct_count * 1.0 / len(golden_label)))
44 |
45 |
46 |
47 |
--------------------------------------------------------------------------------
/models/ernie/.gitignore:
--------------------------------------------------------------------------------
1 | # Initially taken from Github's Python gitignore file
2 |
3 | # Byte-compiled / optimized / DLL files
4 | __pycache__/
5 | *.py[cod]
6 | *$py.class
7 |
8 | # C extensions
9 | *.so
10 |
11 | # Distribution / packaging
12 | .Python
13 | build/
14 | develop-eggs/
15 | dist/
16 | downloads/
17 | eggs/
18 | .eggs/
19 | lib/
20 | lib64/
21 | parts/
22 | sdist/
23 | var/
24 | wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | .hypothesis/
51 | .pytest_cache/
52 |
53 | # Translations
54 | *.mo
55 | *.pot
56 |
57 | # Django stuff:
58 | *.log
59 | local_settings.py
60 | db.sqlite3
61 |
62 | # Flask stuff:
63 | instance/
64 | .webassets-cache
65 |
66 | # Scrapy stuff:
67 | .scrapy
68 |
69 | # Sphinx documentation
70 | docs/_build/
71 |
72 | # PyBuilder
73 | target/
74 |
75 | # Jupyter Notebook
76 | .ipynb_checkpoints
77 |
78 | # IPython
79 | profile_default/
80 | ipython_config.py
81 |
82 | # pyenv
83 | .python-version
84 |
85 | # celery beat schedule file
86 | celerybeat-schedule
87 |
88 | # SageMath parsed files
89 | *.sage.py
90 |
91 | # Environments
92 | .env
93 | .venv
94 | env/
95 | venv/
96 | ENV/
97 | env.bak/
98 | venv.bak/
99 |
100 | # Spyder project settings
101 | .spyderproject
102 | .spyproject
103 |
104 | # Rope project settings
105 | .ropeproject
106 |
107 | # mkdocs documentation
108 | /site
109 |
110 | # mypy
111 | .mypy_cache/
112 | .dmypy.json
113 | dmypy.json
114 |
115 | # Pyre type checker
116 | .pyre/
117 |
--------------------------------------------------------------------------------
/models/ernie/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # How to Contribute
2 |
3 | BERT needs to maintain permanent compatibility with the pre-trained model files,
4 | so we do not plan to make any major changes to this library (other than what was
5 | promised in the README). However, we can accept small patches related to
6 | re-factoring and documentation. To submit contributes, there are just a few
7 | small guidelines you need to follow.
8 |
9 | ## Contributor License Agreement
10 |
11 | Contributions to this project must be accompanied by a Contributor License
12 | Agreement. You (or your employer) retain the copyright to your contribution;
13 | this simply gives us permission to use and redistribute your contributions as
14 | part of the project. Head over to to see
15 | your current agreements on file or to sign a new one.
16 |
17 | You generally only need to submit a CLA once, so if you've already submitted one
18 | (even if it was for a different project), you probably don't need to do it
19 | again.
20 |
21 | ## Code reviews
22 |
23 | All submissions, including submissions by project members, require review. We
24 | use GitHub pull requests for this purpose. Consult
25 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
26 | information on using pull requests.
27 |
28 | ## Community Guidelines
29 |
30 | This project follows
31 | [Google's Open Source Community Guidelines](https://opensource.google.com/conduct/).
32 |
--------------------------------------------------------------------------------
/models/ernie/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2018 The Google AI Language Team Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
--------------------------------------------------------------------------------
/models/ernie/optimization_test.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2018 The Google AI Language Team Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 |
19 | import optimization
20 | import tensorflow as tf
21 |
22 |
23 | class OptimizationTest(tf.test.TestCase):
24 |
25 | def test_adam(self):
26 | with self.test_session() as sess:
27 | w = tf.get_variable(
28 | "w",
29 | shape=[3],
30 | initializer=tf.constant_initializer([0.1, -0.2, -0.1]))
31 | x = tf.constant([0.4, 0.2, -0.5])
32 | loss = tf.reduce_mean(tf.square(x - w))
33 | tvars = tf.trainable_variables()
34 | grads = tf.gradients(loss, tvars)
35 | global_step = tf.train.get_or_create_global_step()
36 | optimizer = optimization.AdamWeightDecayOptimizer(learning_rate=0.2)
37 | train_op = optimizer.apply_gradients(zip(grads, tvars), global_step)
38 | init_op = tf.group(tf.global_variables_initializer(),
39 | tf.local_variables_initializer())
40 | sess.run(init_op)
41 | for _ in range(100):
42 | sess.run(train_op)
43 | w_np = sess.run(w)
44 | self.assertAllClose(w_np.flat, [0.4, 0.2, -0.5], rtol=1e-2, atol=1e-2)
45 |
46 |
47 | if __name__ == "__main__":
48 | tf.test.main()
49 |
--------------------------------------------------------------------------------
/models/ernie/requirements.txt:
--------------------------------------------------------------------------------
1 | tensorflow >= 1.11.0 # CPU Version of TensorFlow.
2 | # tensorflow-gpu >= 1.11.0 # GPU version of TensorFlow.
3 |
--------------------------------------------------------------------------------
/models/ernie/run_classifier_afqmc.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # @Author: bo.shi
3 | # @Date: 2019-11-04 09:56:36
4 | # @Last Modified by: bo.shi
5 | # @Last Modified time: 2019-12-05 11:04:18
6 |
7 | TASK_NAME="afqmc"
8 | MODEL_NAME="baidu_ernie"
9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
10 | export CUDA_VISIBLE_DEVICES="0"
11 | export PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model
12 | export ERNIE_DIR=$PRETRAINED_MODELS_DIR/$MODEL_NAME
13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset
14 |
15 | # download and unzip dataset
16 | if [ ! -d $GLUE_DATA_DIR ]; then
17 | mkdir -p $GLUE_DATA_DIR
18 | echo "makedir $GLUE_DATA_DIR"
19 | fi
20 | cd $GLUE_DATA_DIR
21 | if [ ! -d $TASK_NAME ]; then
22 | mkdir $TASK_NAME
23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME"
24 | fi
25 | cd $TASK_NAME
26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then
27 | rm *
28 | wget https://storage.googleapis.com/cluebenchmark/tasks/afqmc_public.zip
29 | unzip afqmc_public.zip
30 | rm afqmc_public.zip
31 | else
32 | echo "data exists"
33 | fi
34 | echo "Finish download dataset."
35 |
36 | # download model
37 | if [ ! -d $ERNIE_DIR ]; then
38 | mkdir -p $ERNIE_DIR
39 | echo "makedir $ERNIE_DIR"
40 | fi
41 | cd $ERNIE_DIR
42 | if [ ! -f "bert_config.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "bert_model.ckpt.index" ] || [ ! -f "bert_model.ckpt.meta" ] || [ ! -f "bert_model.ckpt.data-00000-of-00001" ]; then
43 | rm *
44 | wget https://storage.googleapis.com/chineseglue/pretrain_models/baidu_ernie.zip
45 | unzip baidu_ernie.zip
46 | rm baidu_ernie.zip
47 | else
48 | echo "model exists"
49 | fi
50 | echo "Finish download model."
51 |
52 | # run task
53 | cd $CURRENT_DIR
54 | echo "Start running..."
55 | if [ $# == 0 ]; then
56 | python run_classifier.py \
57 | --task_name=$TASK_NAME \
58 | --do_train=true \
59 | --do_eval=true \
60 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
61 | --vocab_file=$ERNIE_DIR/vocab.txt \
62 | --bert_config_file=$ERNIE_DIR/bert_config.json \
63 | --init_checkpoint=$ERNIE_DIR/bert_model.ckpt \
64 | --max_seq_length=128 \
65 | --train_batch_size=32 \
66 | --learning_rate=2e-5 \
67 | --num_train_epochs=3.0 \
68 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
69 | elif [ $1 == "predict" ]; then
70 | echo "Start predict..."
71 | python run_classifier.py \
72 | --task_name=$TASK_NAME \
73 | --do_train=false \
74 | --do_eval=false \
75 | --do_predict=true \
76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
77 | --vocab_file=$ERNIE_DIR/vocab.txt \
78 | --bert_config_file=$ERNIE_DIR/bert_config.json \
79 | --init_checkpoint=$ERNIE_DIR/bert_model.ckpt \
80 | --max_seq_length=128 \
81 | --train_batch_size=32 \
82 | --learning_rate=2e-5 \
83 | --num_train_epochs=3.0 \
84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
85 | fi
86 |
--------------------------------------------------------------------------------
/models/ernie/run_classifier_cmnli.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # @Author: bo.shi
3 | # @Date: 2019-11-04 09:56:36
4 | # @Last Modified by: bo.shi
5 | # @Last Modified time: 2019-12-05 11:04:49
6 |
7 | TASK_NAME="cmnli"
8 | MODEL_NAME="baidu_ernie"
9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
10 | export CUDA_VISIBLE_DEVICES="0"
11 | export PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model
12 | export ERNIE_DIR=$PRETRAINED_MODELS_DIR/$MODEL_NAME
13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset
14 |
15 | # download and unzip dataset
16 | if [ ! -d $GLUE_DATA_DIR ]; then
17 | mkdir -p $GLUE_DATA_DIR
18 | echo "makedir $GLUE_DATA_DIR"
19 | fi
20 | cd $GLUE_DATA_DIR
21 | if [ ! -d $TASK_NAME ]; then
22 | mkdir $TASK_NAME
23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME"
24 | fi
25 | cd $TASK_NAME
26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then
27 | rm *
28 | wget https://storage.googleapis.com/cluebenchmark/tasks/cmnli_public.zip
29 | unzip cmnli_public.zip
30 | rm cmnli_public.zip
31 | else
32 | echo "data exists"
33 | fi
34 | echo "Finish download dataset."
35 |
36 | # download model
37 | if [ ! -d $ERNIE_DIR ]; then
38 | mkdir -p $ERNIE_DIR
39 | echo "makedir $ERNIE_DIR"
40 | fi
41 | cd $ERNIE_DIR
42 | if [ ! -f "bert_config.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "bert_model.ckpt.index" ] || [ ! -f "bert_model.ckpt.meta" ] || [ ! -f "bert_model.ckpt.data-00000-of-00001" ]; then
43 | rm *
44 | wget https://storage.googleapis.com/chineseglue/pretrain_models/baidu_ernie.zip
45 | unzip baidu_ernie.zip
46 | rm baidu_ernie.zip
47 | else
48 | echo "model exists"
49 | fi
50 | echo "Finish download model."
51 |
52 | # run task
53 | cd $CURRENT_DIR
54 | echo "Start running..."
55 | if [ $# == 0 ]; then
56 | python run_classifier.py \
57 | --task_name=$TASK_NAME \
58 | --do_train=true \
59 | --do_eval=true \
60 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
61 | --vocab_file=$ERNIE_DIR/vocab.txt \
62 | --bert_config_file=$ERNIE_DIR/bert_config.json \
63 | --init_checkpoint=$ERNIE_DIR/bert_model.ckpt \
64 | --max_seq_length=128 \
65 | --train_batch_size=32 \
66 | --learning_rate=2e-5 \
67 | --num_train_epochs=3.0 \
68 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
69 | elif [ $1 == "predict" ]; then
70 | echo "Start predict..."
71 | python run_classifier.py \
72 | --task_name=$TASK_NAME \
73 | --do_train=false \
74 | --do_eval=false \
75 | --do_predict=true \
76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
77 | --vocab_file=$ERNIE_DIR/vocab.txt \
78 | --bert_config_file=$ERNIE_DIR/bert_config.json \
79 | --init_checkpoint=$ERNIE_DIR/bert_model.ckpt \
80 | --max_seq_length=128 \
81 | --train_batch_size=32 \
82 | --learning_rate=2e-5 \
83 | --num_train_epochs=3.0 \
84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
85 | fi
86 |
--------------------------------------------------------------------------------
/models/ernie/run_classifier_csl.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # @Author: Li Yudong
3 | # @Date: 2019-11-28
4 | # @Last Modified by: bo.shi
5 | # @Last Modified time: 2019-12-05 11:04:58
6 |
7 | TASK_NAME="csl"
8 | MODEL_NAME="baidu_ernie"
9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
10 | export CUDA_VISIBLE_DEVICES="0"
11 | export PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model
12 | export ERNIE_DIR=$PRETRAINED_MODELS_DIR/$MODEL_NAME
13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset
14 |
15 | # download and unzip dataset
16 | if [ ! -d $GLUE_DATA_DIR ]; then
17 | mkdir -p $GLUE_DATA_DIR
18 | echo "makedir $GLUE_DATA_DIR"
19 | fi
20 | cd $GLUE_DATA_DIR
21 | if [ ! -d $TASK_NAME ]; then
22 | mkdir $TASK_NAME
23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME"
24 | fi
25 | cd $TASK_NAME
26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then
27 | rm *
28 | wget https://storage.googleapis.com/cluebenchmark/tasks/csl_public.zip
29 | unzip csl_public.zip
30 | rm csl_public.zip
31 | else
32 | echo "data exists"
33 | fi
34 | echo "Finish download dataset."
35 |
36 | # download model
37 | if [ ! -d $ERNIE_DIR ]; then
38 | mkdir -p $ERNIE_DIR
39 | echo "makedir $ERNIE_DIR"
40 | fi
41 | cd $ERNIE_DIR
42 | if [ ! -f "bert_config.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "bert_model.ckpt.index" ] || [ ! -f "bert_model.ckpt.meta" ] || [ ! -f "bert_model.ckpt.data-00000-of-00001" ]; then
43 | rm *
44 | wget https://storage.googleapis.com/chineseglue/pretrain_models/baidu_ernie.zip
45 | unzip baidu_ernie.zip
46 | rm baidu_ernie.zip
47 | else
48 | echo "model exists"
49 | fi
50 | echo "Finish download model."
51 |
52 | # run task
53 | cd $CURRENT_DIR
54 | echo "Start running..."
55 | if [ $# == 0 ]; then
56 | python run_classifier.py \
57 | --task_name=$TASK_NAME \
58 | --do_train=true \
59 | --do_eval=true \
60 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
61 | --vocab_file=$ERNIE_DIR/vocab.txt \
62 | --bert_config_file=$ERNIE_DIR/bert_config.json \
63 | --init_checkpoint=$ERNIE_DIR/bert_model.ckpt \
64 | --max_seq_length=128 \
65 | --train_batch_size=32 \
66 | --learning_rate=2e-5 \
67 | --num_train_epochs=3.0 \
68 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
69 | elif [ $1 == "predict" ]; then
70 | echo "Start predict..."
71 | python run_classifier.py \
72 | --task_name=$TASK_NAME \
73 | --do_train=false \
74 | --do_eval=false \
75 | --do_predict=true \
76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
77 | --vocab_file=$ERNIE_DIR/vocab.txt \
78 | --bert_config_file=$ERNIE_DIR/bert_config.json \
79 | --init_checkpoint=$ERNIE_DIR/bert_model.ckpt \
80 | --max_seq_length=128 \
81 | --train_batch_size=32 \
82 | --learning_rate=2e-5 \
83 | --num_train_epochs=3.0 \
84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
85 | fi
86 |
--------------------------------------------------------------------------------
/models/ernie/run_classifier_iflytek.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # @Author: bo.shi
3 | # @Date: 2019-11-04 09:56:36
4 | # @Last Modified by: bo.shi
5 | # @Last Modified time: 2019-12-05 11:05:02
6 |
7 | TASK_NAME="iflytek"
8 | MODEL_NAME="baidu_ernie"
9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
10 | export CUDA_VISIBLE_DEVICES="0"
11 | export PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model
12 | export ERNIE_DIR=$PRETRAINED_MODELS_DIR/$MODEL_NAME
13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset
14 |
15 | # download and unzip dataset
16 | if [ ! -d $GLUE_DATA_DIR ]; then
17 | mkdir -p $GLUE_DATA_DIR
18 | echo "makedir $GLUE_DATA_DIR"
19 | fi
20 | cd $GLUE_DATA_DIR
21 | if [ ! -d $TASK_NAME ]; then
22 | mkdir $TASK_NAME
23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME"
24 | fi
25 | cd $TASK_NAME
26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then
27 | rm *
28 | wget https://storage.googleapis.com/cluebenchmark/tasks/iflytek_public.zip
29 | unzip iflytek_public.zip
30 | rm iflytek_public.zip
31 | else
32 | echo "data exists"
33 | fi
34 | echo "Finish download dataset."
35 |
36 | # download model
37 | if [ ! -d $ERNIE_DIR ]; then
38 | mkdir -p $ERNIE_DIR
39 | echo "makedir $ERNIE_DIR"
40 | fi
41 | cd $ERNIE_DIR
42 | if [ ! -f "bert_config.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "bert_model.ckpt.index" ] || [ ! -f "bert_model.ckpt.meta" ] || [ ! -f "bert_model.ckpt.data-00000-of-00001" ]; then
43 | rm *
44 | wget https://storage.googleapis.com/chineseglue/pretrain_models/baidu_ernie.zip
45 | unzip baidu_ernie.zip
46 | rm baidu_ernie.zip
47 | else
48 | echo "model exists"
49 | fi
50 | echo "Finish download model."
51 |
52 | # run task
53 | cd $CURRENT_DIR
54 | echo "Start running..."
55 | if [ $# == 0 ]; then
56 | python run_classifier.py \
57 | --task_name=$TASK_NAME \
58 | --do_train=true \
59 | --do_eval=true \
60 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
61 | --vocab_file=$ERNIE_DIR/vocab.txt \
62 | --bert_config_file=$ERNIE_DIR/bert_config.json \
63 | --init_checkpoint=$ERNIE_DIR/bert_model.ckpt \
64 | --max_seq_length=128 \
65 | --train_batch_size=32 \
66 | --learning_rate=2e-5 \
67 | --num_train_epochs=3.0 \
68 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
69 | elif [ $1 == "predict" ]; then
70 | echo "Start predict..."
71 | python run_classifier.py \
72 | --task_name=$TASK_NAME \
73 | --do_train=false \
74 | --do_eval=false \
75 | --do_predict=true \
76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
77 | --vocab_file=$ERNIE_DIR/vocab.txt \
78 | --bert_config_file=$ERNIE_DIR/bert_config.json \
79 | --init_checkpoint=$ERNIE_DIR/bert_model.ckpt \
80 | --max_seq_length=128 \
81 | --train_batch_size=32 \
82 | --learning_rate=2e-5 \
83 | --num_train_epochs=3.0 \
84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
85 | fi
86 |
--------------------------------------------------------------------------------
/models/ernie/run_classifier_tnews.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # @Author: bo.shi
3 | # @Date: 2019-11-04 09:56:36
4 | # @Last Modified by: bo.shi
5 | # @Last Modified time: 2019-12-05 11:05:06
6 |
7 | TASK_NAME="tnews"
8 | MODEL_NAME="baidu_ernie"
9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
10 | export CUDA_VISIBLE_DEVICES="0"
11 | export PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model
12 | export ERNIE_DIR=$PRETRAINED_MODELS_DIR/$MODEL_NAME
13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset
14 |
15 | # download and unzip dataset
16 | if [ ! -d $GLUE_DATA_DIR ]; then
17 | mkdir -p $GLUE_DATA_DIR
18 | echo "makedir $GLUE_DATA_DIR"
19 | fi
20 | cd $GLUE_DATA_DIR
21 | if [ ! -d $TASK_NAME ]; then
22 | mkdir $TASK_NAME
23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME"
24 | fi
25 | cd $TASK_NAME
26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then
27 | rm *
28 | wget https://storage.googleapis.com/cluebenchmark/tasks/tnews_public.zip
29 | unzip tnews_public.zip
30 | rm tnews_public.zip
31 | else
32 | echo "data exists"
33 | fi
34 | echo "Finish download dataset."
35 |
36 | # download model
37 | if [ ! -d $ERNIE_DIR ]; then
38 | mkdir -p $ERNIE_DIR
39 | echo "makedir $ERNIE_DIR"
40 | fi
41 | cd $ERNIE_DIR
42 | if [ ! -f "bert_config.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "bert_model.ckpt.index" ] || [ ! -f "bert_model.ckpt.meta" ] || [ ! -f "bert_model.ckpt.data-00000-of-00001" ]; then
43 | rm *
44 | wget https://storage.googleapis.com/chineseglue/pretrain_models/baidu_ernie.zip
45 | unzip baidu_ernie.zip
46 | rm baidu_ernie.zip
47 | else
48 | echo "model exists"
49 | fi
50 | echo "Finish download model."
51 |
52 | # run task
53 | cd $CURRENT_DIR
54 | echo "Start running..."
55 | if [ $# == 0 ]; then
56 | python run_classifier.py \
57 | --task_name=$TASK_NAME \
58 | --do_train=true \
59 | --do_eval=true \
60 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
61 | --vocab_file=$ERNIE_DIR/vocab.txt \
62 | --bert_config_file=$ERNIE_DIR/bert_config.json \
63 | --init_checkpoint=$ERNIE_DIR/bert_model.ckpt \
64 | --max_seq_length=128 \
65 | --train_batch_size=32 \
66 | --learning_rate=2e-5 \
67 | --num_train_epochs=3.0 \
68 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
69 | elif [ $1 == "predict" ]; then
70 | echo "Start predict..."
71 | python run_classifier.py \
72 | --task_name=$TASK_NAME \
73 | --do_train=false \
74 | --do_eval=false \
75 | --do_predict=true \
76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
77 | --vocab_file=$ERNIE_DIR/vocab.txt \
78 | --bert_config_file=$ERNIE_DIR/bert_config.json \
79 | --init_checkpoint=$ERNIE_DIR/bert_model.ckpt \
80 | --max_seq_length=128 \
81 | --train_batch_size=32 \
82 | --learning_rate=2e-5 \
83 | --num_train_epochs=3.0 \
84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
85 | fi
86 |
--------------------------------------------------------------------------------
/models/ernie/run_classifier_wsc.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # @Author: bo.shi
3 | # @Date: 2019-11-04 09:56:36
4 | # @Last Modified by: bo.shi
5 | # @Last Modified time: 2019-12-05 11:05:13
6 |
7 | TASK_NAME="wsc"
8 | MODEL_NAME="baidu_ernie"
9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
10 | export CUDA_VISIBLE_DEVICES="0"
11 | export PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model
12 | export ERNIE_DIR=$PRETRAINED_MODELS_DIR/$MODEL_NAME
13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset
14 |
15 | # download and unzip dataset
16 | if [ ! -d $GLUE_DATA_DIR ]; then
17 | mkdir -p $GLUE_DATA_DIR
18 | echo "makedir $GLUE_DATA_DIR"
19 | fi
20 | cd $GLUE_DATA_DIR
21 | if [ ! -d $TASK_NAME ]; then
22 | mkdir $TASK_NAME
23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME"
24 | fi
25 | cd $TASK_NAME
26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then
27 | rm *
28 | wget https://storage.googleapis.com/cluebenchmark/tasks/wsc_public.zip
29 | unzip wsc_public.zip
30 | rm wsc_public.zip
31 | else
32 | echo "data exists"
33 | fi
34 | echo "Finish download dataset."
35 |
36 | # download model
37 | if [ ! -d $ERNIE_DIR ]; then
38 | mkdir -p $ERNIE_DIR
39 | echo "makedir $ERNIE_DIR"
40 | fi
41 | cd $ERNIE_DIR
42 | if [ ! -f "bert_config.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "bert_model.ckpt.index" ] || [ ! -f "bert_model.ckpt.meta" ] || [ ! -f "bert_model.ckpt.data-00000-of-00001" ]; then
43 | rm *
44 | wget https://storage.googleapis.com/chineseglue/pretrain_models/baidu_ernie.zip
45 | unzip baidu_ernie.zip
46 | rm baidu_ernie.zip
47 | else
48 | echo "model exists"
49 | fi
50 | echo "Finish download model."
51 |
52 | # run task
53 | cd $CURRENT_DIR
54 | echo "Start running..."
55 | if [ $# == 0 ]; then
56 | python run_classifier.py \
57 | --task_name=$TASK_NAME \
58 | --do_train=true \
59 | --do_eval=true \
60 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
61 | --vocab_file=$ERNIE_DIR/vocab.txt \
62 | --bert_config_file=$ERNIE_DIR/bert_config.json \
63 | --init_checkpoint=$ERNIE_DIR/bert_model.ckpt \
64 | --max_seq_length=128 \
65 | --train_batch_size=32 \
66 | --learning_rate=2e-5 \
67 | --num_train_epochs=3.0 \
68 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
69 | elif [ $1 == "predict" ]; then
70 | echo "Start predict..."
71 | python run_classifier.py \
72 | --task_name=$TASK_NAME \
73 | --do_train=false \
74 | --do_eval=false \
75 | --do_predict=true \
76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
77 | --vocab_file=$ERNIE_DIR/vocab.txt \
78 | --bert_config_file=$ERNIE_DIR/bert_config.json \
79 | --init_checkpoint=$ERNIE_DIR/bert_model.ckpt \
80 | --max_seq_length=128 \
81 | --train_batch_size=32 \
82 | --learning_rate=2e-5 \
83 | --num_train_epochs=3.0 \
84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
85 | fi
86 |
--------------------------------------------------------------------------------
/models/ernie/run_ner_msra.sh:
--------------------------------------------------------------------------------
1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
2 | export CUDA_VISIBLE_DEVICES="0"
3 | export BERT_BASE_DIR=$CURRENT_DIR/prev_trained_model/tensorflow
4 | export GLUE_DIR=$CURRENT_DIR/../../glue/chineseGLUEdatasets/
5 | TASK_NAME="msraner"
6 |
7 | python run_ner.py \
8 | --task_name=$TASK_NAME \
9 | --do_train=true \
10 | --do_eval=false \
11 | --do_predict=true \
12 | --data_dir=$GLUE_DIR/$TASK_NAME \
13 | --vocab_file=$BERT_BASE_DIR/vocab.txt \
14 | --bert_config_file=$BERT_BASE_DIR/bert_config.json \
15 | --init_checkpoint=$BERT_BASE_DIR/bert_model.ckpt \
16 | --max_seq_length=256 \
17 | --train_batch_size=16 \
18 | --learning_rate=2e-5 \
19 | --num_train_epochs=5.0 \
20 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
21 |
--------------------------------------------------------------------------------
/models/ernie/tpu/run_classifier_inews.sh:
--------------------------------------------------------------------------------
1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S")
3 | TASK_NAME="inews"
4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/ernie1.0-base/baidu_ernie
5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME
6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/ernie1.0-base/baidu_ernie/tpu/$TASK_NAME/$CURRENT_TIME
7 |
8 | python $CURRENT_DIR/../run_classifier.py \
9 | --task_name=$TASK_NAME \
10 | --do_train=true \
11 | --do_eval=true \
12 | --data_dir=$DATA_DIR \
13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \
14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \
15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \
16 | --max_seq_length=512 \
17 | --train_batch_size=16 \
18 | --learning_rate=2e-5 \
19 | --num_train_epochs=8.0 \
20 | --output_dir=$OUTPUT_DIR \
21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.1.101.2:8470
22 |
--------------------------------------------------------------------------------
/models/ernie/tpu/run_classifier_lcqmc.sh:
--------------------------------------------------------------------------------
1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S")
3 | TASK_NAME="lcqmc"
4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/ernie1.0-base/baidu_ernie
5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/hard_$TASK_NAME
6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/ernie1.0-base/baidu_ernie/tpu/$TASK_NAME/$CURRENT_TIME
7 |
8 | python $CURRENT_DIR/../run_classifier.py \
9 | --task_name=$TASK_NAME \
10 | --do_train=true \
11 | --do_eval=true \
12 | --data_dir=$DATA_DIR \
13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \
14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \
15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \
16 | --max_seq_length=128 \
17 | --train_batch_size=16 \
18 | --learning_rate=2e-5 \
19 | --num_train_epochs=3.0 \
20 | --output_dir=$OUTPUT_DIR \
21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://172.18.0.2:8470
22 |
--------------------------------------------------------------------------------
/models/ernie/tpu/run_classifier_thucnews.sh:
--------------------------------------------------------------------------------
1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S")
3 | TASK_NAME="inews"
4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/ernie1.0-base/baidu_ernie
5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME
6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/ernie1.0-base/baidu_ernie/tpu/$TASK_NAME/$CURRENT_TIME
7 |
8 | python $CURRENT_DIR/../run_classifier.py \
9 | --task_name=$TASK_NAME \
10 | --do_train=true \
11 | --do_eval=true \
12 | --data_dir=$DATA_DIR \
13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \
14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \
15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \
16 | --max_seq_length=512 \
17 | --train_batch_size=16 \
18 | --learning_rate=2e-5 \
19 | --num_train_epochs=8.0 \
20 | --output_dir=$OUTPUT_DIR \
21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.1.101.2:8470
22 |
--------------------------------------------------------------------------------
/models/ernie/tpu/run_classifier_tnews.sh:
--------------------------------------------------------------------------------
1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S")
3 | TASK_NAME="tnews"
4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/ernie1.0-base/baidu_ernie
5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/hard_${TASK_NAME}_1
6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/ernie1.0-base/baidu_ernie/tpu/$TASK_NAME/$CURRENT_TIME
7 |
8 | python $CURRENT_DIR/../run_classifier.py \
9 | --task_name=$TASK_NAME \
10 | --do_train=true \
11 | --do_eval=true \
12 | --data_dir=$DATA_DIR \
13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \
14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \
15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \
16 | --max_seq_length=128 \
17 | --train_batch_size=16 \
18 | --learning_rate=2e-5 \
19 | --num_train_epochs=3.0 \
20 | --output_dir=$OUTPUT_DIR \
21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://172.16.0.2:8470
22 |
--------------------------------------------------------------------------------
/models/ernie/tpu/run_classifier_xnli.sh:
--------------------------------------------------------------------------------
1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S")
3 | TASK_NAME="xnli"
4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/ernie1.0-base/baidu_ernie
5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME
6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/ernie1.0-base/baidu_ernie/tpu/$TASK_NAME/$CURRENT_TIME
7 |
8 | python $CURRENT_DIR/../run_classifier.py \
9 | --task_name=$TASK_NAME \
10 | --do_train=true \
11 | --do_eval=true \
12 | --data_dir=$DATA_DIR \
13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \
14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \
15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \
16 | --max_seq_length=512 \
17 | --train_batch_size=16 \
18 | --learning_rate=2e-5 \
19 | --num_train_epochs=8.0 \
20 | --output_dir=$OUTPUT_DIR \
21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.1.101.2:8470
22 |
--------------------------------------------------------------------------------
/models/roberta/create_pretrain_data.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | echo $1,$2
3 |
4 | for((i=$1;i<=$2;i++));
5 | do
6 | python3 create_pretraining_data.py --do_whole_word_mask=True --input_file=./raw_text/news2016zh_$i.txt \
7 | --output_file=./tf_records_all/tf_news2016zh_$i.tfrecord --vocab_file=./resources/vocab.txt \
8 | --do_lower_case=True --max_seq_length=256 --max_predictions_per_seq=23 --masked_lm_prob=0.10 --random_seed=12345 --dupe_factor=5
9 | done
10 |
--------------------------------------------------------------------------------
/models/roberta/resources/RoBERTa_zh_Large_Learning_Curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CLUEbenchmark/CLUEmotionAnalysis2020/ff1bc945fc3a5854e0013de5a2f222dd1da61516/models/roberta/resources/RoBERTa_zh_Large_Learning_Curve.png
--------------------------------------------------------------------------------
/models/roberta/run_classifier_afqmc.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # @Author: bo.shi
3 | # @Date: 2019-11-04 09:56:36
4 | # @Last Modified by: bo.shi
5 | # @Last Modified time: 2019-12-05 11:19:53
6 |
7 | TASK_NAME="afqmc"
8 | MODEL_NAME="roeberta_zh_L-24_H-1024_A-16"
9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
10 | export CUDA_VISIBLE_DEVICES="0"
11 | export PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model
12 | export ROBERTA_LARGE_DIR=$PRETRAINED_MODELS_DIR/$MODEL_NAME
13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset
14 |
15 | # download and unzip dataset
16 | if [ ! -d $GLUE_DATA_DIR ]; then
17 | mkdir -p $GLUE_DATA_DIR
18 | echo "makedir $GLUE_DATA_DIR"
19 | fi
20 | cd $GLUE_DATA_DIR
21 | if [ ! -d $TASK_NAME ]; then
22 | mkdir $TASK_NAME
23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME"
24 | fi
25 | cd $TASK_NAME
26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then
27 | rm *
28 | wget https://storage.googleapis.com/cluebenchmark/tasks/afqmc_public.zip
29 | unzip afqmc_public.zip
30 | rm afqmc_public.zip
31 | else
32 | echo "data exists"
33 | fi
34 | echo "Finish download dataset."
35 |
36 | # download model
37 | if [ ! -d $ROBERTA_LARGE_DIR ]; then
38 | mkdir -p $ROBERTA_LARGE_DIR
39 | echo "makedir $ROBERTA_LARGE_DIR"
40 | fi
41 | cd $ROBERTA_LARGE_DIR
42 | if [ ! -f "bert_config_large.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "checkpoint" ] || [ ! -f "roberta_zh_large_model.ckpt.index" ] || [ ! -f "roberta_zh_large_model.ckpt.meta" ] || [ ! -f "roberta_zh_large_model.ckpt.data-00000-of-00001" ]; then
43 | rm *
44 | wget -c https://storage.googleapis.com/chineseglue/pretrain_models/roeberta_zh_L-24_H-1024_A-16.zip
45 | unzip roeberta_zh_L-24_H-1024_A-16.zip
46 | rm roeberta_zh_L-24_H-1024_A-16.zip
47 | else
48 | echo "model exists"
49 | fi
50 | echo "Finish download model."
51 |
52 | # run task
53 | cd $CURRENT_DIR
54 | echo "Start running..."
55 | if [ $# == 0 ]; then
56 | python run_classifier.py \
57 | --task_name=$TASK_NAME \
58 | --do_train=true \
59 | --do_eval=true \
60 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
61 | --vocab_file=$ROBERTA_LARGE_DIR/vocab.txt \
62 | --bert_config_file=$ROBERTA_LARGE_DIR/bert_config_large.json \
63 | --init_checkpoint=$ROBERTA_LARGE_DIR/roberta_zh_large_model.ckpt \
64 | --max_seq_length=128 \
65 | --train_batch_size=32 \
66 | --learning_rate=2e-5 \
67 | --num_train_epochs=3.0 \
68 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
69 | elif [ $1 == "predict" ]; then
70 | echo "Start predict..."
71 | python run_classifier.py \
72 | --task_name=$TASK_NAME \
73 | --do_train=false \
74 | --do_eval=false \
75 | --do_predict=true \
76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
77 | --vocab_file=$ROBERTA_LARGE_DIR/vocab.txt \
78 | --bert_config_file=$ROBERTA_LARGE_DIR/bert_config_large.json \
79 | --init_checkpoint=$ROBERTA_LARGE_DIR/roberta_zh_large_model.ckpt \
80 | --max_seq_length=128 \
81 | --train_batch_size=32 \
82 | --learning_rate=2e-5 \
83 | --num_train_epochs=3.0 \
84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
85 | fi
86 |
--------------------------------------------------------------------------------
/models/roberta/run_classifier_cmnli.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # @Author: bo.shi
3 | # @Date: 2019-11-04 09:56:36
4 | # @Last Modified by: bo.shi
5 | # @Last Modified time: 2019-12-05 11:20:14
6 |
7 | TASK_NAME="cmnli"
8 | MODEL_NAME="roeberta_zh_L-24_H-1024_A-16"
9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
10 | export CUDA_VISIBLE_DEVICES="0"
11 | export PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model
12 | export ROBERTA_LARGE_DIR=$PRETRAINED_MODELS_DIR/$MODEL_NAME
13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset
14 |
15 | # download and unzip dataset
16 | if [ ! -d $GLUE_DATA_DIR ]; then
17 | mkdir -p $GLUE_DATA_DIR
18 | echo "makedir $GLUE_DATA_DIR"
19 | fi
20 | cd $GLUE_DATA_DIR
21 | if [ ! -d $TASK_NAME ]; then
22 | mkdir $TASK_NAME
23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME"
24 | fi
25 | cd $TASK_NAME
26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then
27 | rm *
28 | wget https://storage.googleapis.com/cluebenchmark/tasks/cmnli_public.zip
29 | unzip cmnli_public.zip
30 | rm cmnli_public.zip
31 | else
32 | echo "data exists"
33 | fi
34 | echo "Finish download dataset."
35 |
36 | # download model
37 | if [ ! -d $ROBERTA_LARGE_DIR ]; then
38 | mkdir -p $ROBERTA_LARGE_DIR
39 | echo "makedir $ROBERTA_LARGE_DIR"
40 | fi
41 | cd $ROBERTA_LARGE_DIR
42 | if [ ! -f "bert_config_large.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "checkpoint" ] || [ ! -f "roberta_zh_large_model.ckpt.index" ] || [ ! -f "roberta_zh_large_model.ckpt.meta" ] || [ ! -f "roberta_zh_large_model.ckpt.data-00000-of-00001" ]; then
43 | rm *
44 | wget -c https://storage.googleapis.com/chineseglue/pretrain_models/roeberta_zh_L-24_H-1024_A-16.zip
45 | unzip roeberta_zh_L-24_H-1024_A-16.zip
46 | rm roeberta_zh_L-24_H-1024_A-16.zip
47 | else
48 | echo "model exists"
49 | fi
50 | echo "Finish download model."
51 |
52 | # run task
53 | cd $CURRENT_DIR
54 | echo "Start running..."
55 | if [ $# == 0 ]; then
56 | python run_classifier.py \
57 | --task_name=$TASK_NAME \
58 | --do_train=true \
59 | --do_eval=true \
60 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
61 | --vocab_file=$ROBERTA_LARGE_DIR/vocab.txt \
62 | --bert_config_file=$ROBERTA_LARGE_DIR/bert_config_large.json \
63 | --init_checkpoint=$ROBERTA_LARGE_DIR/roberta_zh_large_model.ckpt \
64 | --max_seq_length=128 \
65 | --train_batch_size=32 \
66 | --learning_rate=2e-5 \
67 | --num_train_epochs=3.0 \
68 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
69 | elif [ $1 == "predict" ]; then
70 | echo "Start predict..."
71 | python run_classifier.py \
72 | --task_name=$TASK_NAME \
73 | --do_train=false \
74 | --do_eval=false \
75 | --do_predict=true \
76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
77 | --vocab_file=$ROBERTA_LARGE_DIR/vocab.txt \
78 | --bert_config_file=$ROBERTA_LARGE_DIR/bert_config_large.json \
79 | --init_checkpoint=$ROBERTA_LARGE_DIR/roberta_zh_large_model.ckpt \
80 | --max_seq_length=128 \
81 | --train_batch_size=32 \
82 | --learning_rate=2e-5 \
83 | --num_train_epochs=3.0 \
84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
85 | fi
--------------------------------------------------------------------------------
/models/roberta/run_classifier_csl.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # @Author: Li Yudong
3 | # @Date: 2019-11-28
4 | # @Last Modified by: bo.shi
5 | # @Last Modified time: 2019-12-05 11:20:24
6 |
7 | TASK_NAME="csl"
8 | MODEL_NAME="roeberta_zh_L-24_H-1024_A-16"
9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
10 | export CUDA_VISIBLE_DEVICES="0"
11 | export PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model
12 | export ROBERTA_LARGE_DIR=$PRETRAINED_MODELS_DIR/$MODEL_NAME
13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset
14 |
15 | # download and unzip dataset
16 | if [ ! -d $GLUE_DATA_DIR ]; then
17 | mkdir -p $GLUE_DATA_DIR
18 | echo "makedir $GLUE_DATA_DIR"
19 | fi
20 | cd $GLUE_DATA_DIR
21 | if [ ! -d $TASK_NAME ]; then
22 | mkdir $TASK_NAME
23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME"
24 | fi
25 | cd $TASK_NAME
26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then
27 | rm *
28 | wget https://storage.googleapis.com/cluebenchmark/tasks/csl_public.zip
29 | unzip csl_public.zip
30 | rm csl_public.zip
31 | else
32 | echo "data exists"
33 | fi
34 | echo "Finish download dataset."
35 |
36 | # download model
37 | if [ ! -d $ROBERTA_LARGE_DIR ]; then
38 | mkdir -p $ROBERTA_LARGE_DIR
39 | echo "makedir $ROBERTA_LARGE_DIR"
40 | fi
41 | cd $ROBERTA_LARGE_DIR
42 | if [ ! -f "bert_config_large.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "checkpoint" ] || [ ! -f "roberta_zh_large_model.ckpt.index" ] || [ ! -f "roberta_zh_large_model.ckpt.meta" ] || [ ! -f "roberta_zh_large_model.ckpt.data-00000-of-00001" ]; then
43 | rm *
44 | wget -c https://storage.googleapis.com/chineseglue/pretrain_models/roeberta_zh_L-24_H-1024_A-16.zip
45 | unzip roeberta_zh_L-24_H-1024_A-16.zip
46 | rm roeberta_zh_L-24_H-1024_A-16.zip
47 | else
48 | echo "model exists"
49 | fi
50 | echo "Finish download model."
51 |
52 | # run task
53 | cd $CURRENT_DIR
54 | echo "Start running..."
55 | if [ $# == 0 ]; then
56 | python run_classifier.py \
57 | --task_name=$TASK_NAME \
58 | --do_train=true \
59 | --do_eval=true \
60 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
61 | --vocab_file=$ROBERTA_LARGE_DIR/vocab.txt \
62 | --bert_config_file=$ROBERTA_LARGE_DIR/bert_config_large.json \
63 | --init_checkpoint=$ROBERTA_LARGE_DIR/roberta_zh_large_model.ckpt \
64 | --max_seq_length=128 \
65 | --train_batch_size=32 \
66 | --learning_rate=2e-5 \
67 | --num_train_epochs=3.0 \
68 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
69 | elif [ $1 == "predict" ]; then
70 | echo "Start predict..."
71 | python run_classifier.py \
72 | --task_name=$TASK_NAME \
73 | --do_train=false \
74 | --do_eval=false \
75 | --do_predict=true \
76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
77 | --vocab_file=$ROBERTA_LARGE_DIR/vocab.txt \
78 | --bert_config_file=$ROBERTA_LARGE_DIR/bert_config_large.json \
79 | --init_checkpoint=$ROBERTA_LARGE_DIR/roberta_zh_large_model.ckpt \
80 | --max_seq_length=128 \
81 | --train_batch_size=32 \
82 | --learning_rate=2e-5 \
83 | --num_train_epochs=3.0 \
84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
85 | fi
86 |
--------------------------------------------------------------------------------
/models/roberta/run_classifier_iflytek.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # @Author: bo.shi
3 | # @Date: 2019-11-04 09:56:36
4 | # @Last Modified by: bo.shi
5 | # @Last Modified time: 2019-12-05 11:20:28
6 |
7 | TASK_NAME="iflytek"
8 | MODEL_NAME="roeberta_zh_L-24_H-1024_A-16"
9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
10 | export CUDA_VISIBLE_DEVICES="0"
11 | export PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model
12 | export ROBERTA_LARGE_DIR=$PRETRAINED_MODELS_DIR/$MODEL_NAME
13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset
14 |
15 | # download and unzip dataset
16 | if [ ! -d $GLUE_DATA_DIR ]; then
17 | mkdir -p $GLUE_DATA_DIR
18 | echo "makedir $GLUE_DATA_DIR"
19 | fi
20 | cd $GLUE_DATA_DIR
21 | if [ ! -d $TASK_NAME ]; then
22 | mkdir $TASK_NAME
23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME"
24 | fi
25 | cd $TASK_NAME
26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then
27 | rm *
28 | wget https://storage.googleapis.com/cluebenchmark/tasks/iflytek_public.zip
29 | unzip iflytek_public.zip
30 | rm iflytek_public.zip
31 | else
32 | echo "data exists"
33 | fi
34 | echo "Finish download dataset."
35 |
36 | # download model
37 | if [ ! -d $ROBERTA_LARGE_DIR ]; then
38 | mkdir -p $ROBERTA_LARGE_DIR
39 | echo "makedir $ROBERTA_LARGE_DIR"
40 | fi
41 | cd $ROBERTA_LARGE_DIR
42 | if [ ! -f "bert_config_large.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "checkpoint" ] || [ ! -f "roberta_zh_large_model.ckpt.index" ] || [ ! -f "roberta_zh_large_model.ckpt.meta" ] || [ ! -f "roberta_zh_large_model.ckpt.data-00000-of-00001" ]; then
43 | rm *
44 | wget -c https://storage.googleapis.com/chineseglue/pretrain_models/roeberta_zh_L-24_H-1024_A-16.zip
45 | unzip roeberta_zh_L-24_H-1024_A-16.zip
46 | rm roeberta_zh_L-24_H-1024_A-16.zip
47 | else
48 | echo "model exists"
49 | fi
50 | echo "Finish download model."
51 |
52 | # run task
53 | cd $CURRENT_DIR
54 | echo "Start running..."
55 | if [ $# == 0 ]; then
56 | python run_classifier.py \
57 | --task_name=$TASK_NAME \
58 | --do_train=true \
59 | --do_eval=true \
60 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
61 | --vocab_file=$ROBERTA_LARGE_DIR/vocab.txt \
62 | --bert_config_file=$ROBERTA_LARGE_DIR/bert_config_large.json \
63 | --init_checkpoint=$ROBERTA_LARGE_DIR/roberta_zh_large_model.ckpt \
64 | --max_seq_length=128 \
65 | --train_batch_size=32 \
66 | --learning_rate=2e-5 \
67 | --num_train_epochs=3.0 \
68 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
69 | elif [ $1 == "predict" ]; then
70 | echo "Start predict..."
71 | python run_classifier.py \
72 | --task_name=$TASK_NAME \
73 | --do_train=false \
74 | --do_eval=false \
75 | --do_predict=true \
76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
77 | --vocab_file=$ROBERTA_LARGE_DIR/vocab.txt \
78 | --bert_config_file=$ROBERTA_LARGE_DIR/bert_config_large.json \
79 | --init_checkpoint=$ROBERTA_LARGE_DIR/roberta_zh_large_model.ckpt \
80 | --max_seq_length=128 \
81 | --train_batch_size=32 \
82 | --learning_rate=2e-5 \
83 | --num_train_epochs=3.0 \
84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
85 | fi
86 |
--------------------------------------------------------------------------------
/models/roberta/run_classifier_tnews.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # @Author: bo.shi
3 | # @Date: 2019-11-04 09:56:36
4 | # @Last Modified by: bo.shi
5 | # @Last Modified time: 2019-12-05 11:20:34
6 |
7 | TASK_NAME="tnews"
8 | MODEL_NAME="roeberta_zh_L-24_H-1024_A-16"
9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
10 | export CUDA_VISIBLE_DEVICES="0"
11 | export PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model
12 | export ROBERTA_LARGE_DIR=$PRETRAINED_MODELS_DIR/$MODEL_NAME
13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset
14 |
15 | # download and unzip dataset
16 | if [ ! -d $GLUE_DATA_DIR ]; then
17 | mkdir -p $GLUE_DATA_DIR
18 | echo "makedir $GLUE_DATA_DIR"
19 | fi
20 | cd $GLUE_DATA_DIR
21 | if [ ! -d $TASK_NAME ]; then
22 | mkdir $TASK_NAME
23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME"
24 | fi
25 | cd $TASK_NAME
26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then
27 | rm *
28 | wget https://storage.googleapis.com/cluebenchmark/tasks/tnews_public.zip
29 | unzip tnews_public.zip
30 | rm tnews_public.zip
31 | else
32 | echo "data exists"
33 | fi
34 | echo "Finish download dataset."
35 |
36 | # download model
37 | if [ ! -d $ROBERTA_LARGE_DIR ]; then
38 | mkdir -p $ROBERTA_LARGE_DIR
39 | echo "makedir $ROBERTA_LARGE_DIR"
40 | fi
41 | cd $ROBERTA_LARGE_DIR
42 | if [ ! -f "bert_config_large.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "checkpoint" ] || [ ! -f "roberta_zh_large_model.ckpt.index" ] || [ ! -f "roberta_zh_large_model.ckpt.meta" ] || [ ! -f "roberta_zh_large_model.ckpt.data-00000-of-00001" ]; then
43 | rm *
44 | wget -c https://storage.googleapis.com/chineseglue/pretrain_models/roeberta_zh_L-24_H-1024_A-16.zip
45 | unzip roeberta_zh_L-24_H-1024_A-16.zip
46 | rm roeberta_zh_L-24_H-1024_A-16.zip
47 | else
48 | echo "model exists"
49 | fi
50 | echo "Finish download model."
51 |
52 | # run task
53 | cd $CURRENT_DIR
54 | echo "Start running..."
55 | if [ $# == 0 ]; then
56 | python run_classifier.py \
57 | --task_name=$TASK_NAME \
58 | --do_train=true \
59 | --do_eval=true \
60 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
61 | --vocab_file=$ROBERTA_LARGE_DIR/vocab.txt \
62 | --bert_config_file=$ROBERTA_LARGE_DIR/bert_config_large.json \
63 | --init_checkpoint=$ROBERTA_LARGE_DIR/roberta_zh_large_model.ckpt \
64 | --max_seq_length=128 \
65 | --train_batch_size=32 \
66 | --learning_rate=2e-5 \
67 | --num_train_epochs=3.0 \
68 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
69 | elif [ $1 == "predict" ]; then
70 | echo "Start predict..."
71 | python run_classifier.py \
72 | --task_name=$TASK_NAME \
73 | --do_train=false \
74 | --do_eval=false \
75 | --do_predict=true \
76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
77 | --vocab_file=$ROBERTA_LARGE_DIR/vocab.txt \
78 | --bert_config_file=$ROBERTA_LARGE_DIR/bert_config_large.json \
79 | --init_checkpoint=$ROBERTA_LARGE_DIR/roberta_zh_large_model.ckpt \
80 | --max_seq_length=128 \
81 | --train_batch_size=32 \
82 | --learning_rate=2e-5 \
83 | --num_train_epochs=3.0 \
84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
85 | fi
86 |
--------------------------------------------------------------------------------
/models/roberta/run_classifier_wsc.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # @Author: bo.shi
3 | # @Date: 2019-11-04 09:56:36
4 | # @Last Modified by: bo.shi
5 | # @Last Modified time: 2019-12-05 11:20:39
6 |
7 | TASK_NAME="wsc"
8 | MODEL_NAME="roeberta_zh_L-24_H-1024_A-16"
9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
10 | export CUDA_VISIBLE_DEVICES="0"
11 | export PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model
12 | export ROBERTA_LARGE_DIR=$PRETRAINED_MODELS_DIR/$MODEL_NAME
13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset
14 |
15 | # download and unzip dataset
16 | if [ ! -d $GLUE_DATA_DIR ]; then
17 | mkdir -p $GLUE_DATA_DIR
18 | echo "makedir $GLUE_DATA_DIR"
19 | fi
20 | cd $GLUE_DATA_DIR
21 | if [ ! -d $TASK_NAME ]; then
22 | mkdir $TASK_NAME
23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME"
24 | fi
25 | cd $TASK_NAME
26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then
27 | rm *
28 | wget https://storage.googleapis.com/cluebenchmark/tasks/wsc_public.zip
29 | unzip wsc_public.zip
30 | rm wsc_public.zip
31 | else
32 | echo "data exists"
33 | fi
34 | echo "Finish download dataset."
35 |
36 | # download model
37 | if [ ! -d $ROBERTA_LARGE_DIR ]; then
38 | mkdir -p $ROBERTA_LARGE_DIR
39 | echo "makedir $ROBERTA_LARGE_DIR"
40 | fi
41 | cd $ROBERTA_LARGE_DIR
42 | if [ ! -f "bert_config_large.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "checkpoint" ] || [ ! -f "roberta_zh_large_model.ckpt.index" ] || [ ! -f "roberta_zh_large_model.ckpt.meta" ] || [ ! -f "roberta_zh_large_model.ckpt.data-00000-of-00001" ]; then
43 | rm *
44 | wget -c https://storage.googleapis.com/chineseglue/pretrain_models/roeberta_zh_L-24_H-1024_A-16.zip
45 | unzip roeberta_zh_L-24_H-1024_A-16.zip
46 | rm roeberta_zh_L-24_H-1024_A-16.zip
47 | else
48 | echo "model exists"
49 | fi
50 | echo "Finish download model."
51 |
52 | # run task
53 | cd $CURRENT_DIR
54 | echo "Start running..."
55 | if [ $# == 0 ]; then
56 | python run_classifier.py \
57 | --task_name=$TASK_NAME \
58 | --do_train=true \
59 | --do_eval=true \
60 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
61 | --vocab_file=$ROBERTA_LARGE_DIR/vocab.txt \
62 | --bert_config_file=$ROBERTA_LARGE_DIR/bert_config_large.json \
63 | --init_checkpoint=$ROBERTA_LARGE_DIR/roberta_zh_large_model.ckpt \
64 | --max_seq_length=128 \
65 | --train_batch_size=32 \
66 | --learning_rate=2e-5 \
67 | --num_train_epochs=3.0 \
68 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
69 | elif [ $1 == "predict" ]; then
70 | echo "Start predict..."
71 | python run_classifier.py \
72 | --task_name=$TASK_NAME \
73 | --do_train=false \
74 | --do_eval=false \
75 | --do_predict=true \
76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
77 | --vocab_file=$ROBERTA_LARGE_DIR/vocab.txt \
78 | --bert_config_file=$ROBERTA_LARGE_DIR/bert_config_large.json \
79 | --init_checkpoint=$ROBERTA_LARGE_DIR/roberta_zh_large_model.ckpt \
80 | --max_seq_length=128 \
81 | --train_batch_size=32 \
82 | --learning_rate=2e-5 \
83 | --num_train_epochs=3.0 \
84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
85 | fi
86 |
--------------------------------------------------------------------------------
/models/roberta/run_ner_msra.sh:
--------------------------------------------------------------------------------
1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
2 | export CUDA_VISIBLE_DEVICES="0"
3 | export BERT_LARGE_DIR=$CURRENT_DIR/prev_trained_model/roberta_zh_L-24_H-1024_A-16
4 | export GLUE_DIR=$CURRENT_DIR/../../glue/chineseGLUEdatasets/
5 | TASK_NAME="msraner"
6 |
7 | python run_ner.py \
8 | --task_name=$TASK_NAME \
9 | --do_train=true \
10 | --do_eval=false \
11 | --do_predict=true \
12 | --data_dir=$GLUE_DIR/$TASK_NAME \
13 | --vocab_file=$BERT_LARGE_DIR/vocab.txt \
14 | --bert_config_file=$BERT_LARGE_DIR/bert_config_large.json \
15 | --init_checkpoint=$BERT_LARGE_DIR/roberta_zh_large_model.ckpt \
16 | --max_seq_length=256 \
17 | --train_batch_size=8 \
18 | --learning_rate=2e-5 \
19 | --num_train_epochs=5.0 \
20 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
21 |
--------------------------------------------------------------------------------
/models/roberta/tpu/run_classifier_inews.sh:
--------------------------------------------------------------------------------
1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S")
3 | TASK_NAME="inews"
4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/roberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16
5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME
6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/oberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16/tpu/$TASK_NAME/$CURRENT_TIME
7 |
8 | python $CURRENT_DIR/../run_classifier.py \
9 | --task_name=$TASK_NAME \
10 | --do_train=true \
11 | --do_eval=true \
12 | --data_dir=$DATA_DIR \
13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \
14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \
15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \
16 | --max_seq_length=512 \
17 | --train_batch_size=32 \
18 | --learning_rate=2e-5 \
19 | --num_train_epochs=8.0 \
20 | --output_dir=$OUTPUT_DIR \
21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.1.101.2:8470
22 |
--------------------------------------------------------------------------------
/models/roberta/tpu/run_classifier_jdcomment.sh:
--------------------------------------------------------------------------------
1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S")
3 | TASK_NAME="jdcomment"
4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/roberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16
5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/hard_${TASK_NAME}
6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/roberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16/tpu/$TASK_NAME/$CURRENT_TIME
7 |
8 | python $CURRENT_DIR/../run_classifier.py \
9 | --task_name=$TASK_NAME \
10 | --do_train=true \
11 | --do_eval=true \
12 | --data_dir=$DATA_DIR \
13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \
14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \
15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \
16 | --max_seq_length=128 \
17 | --train_batch_size=32 \
18 | --learning_rate=2e-5 \
19 | --num_train_epochs=8.0 \
20 | --output_dir=$OUTPUT_DIR \
21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.230.1.2:8470
22 |
--------------------------------------------------------------------------------
/models/roberta/tpu/run_classifier_lcqmc.sh:
--------------------------------------------------------------------------------
1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S")
3 | TASK_NAME="lcqmc"
4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/roberta-large/roeberta_zh_L-24_H-1024_A-16
5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/hard_${TASK_NAME}
6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/roberta-large/roeberta_zh_L-24_H-1024_A-16/tpu/$TASK_NAME/$CURRENT_TIME
7 |
8 | python $CURRENT_DIR/../run_classifier.py \
9 | --task_name=$TASK_NAME \
10 | --do_train=true \
11 | --do_eval=true \
12 | --data_dir=$DATA_DIR \
13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \
14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config_large.json \
15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/roberta_zh_large_model.ckpt \
16 | --max_seq_length=128 \
17 | --train_batch_size=16 \
18 | --learning_rate=2e-5 \
19 | --num_train_epochs=3.0 \
20 | --output_dir=$OUTPUT_DIR \
21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://172.16.0.2:8470
22 |
--------------------------------------------------------------------------------
/models/roberta/tpu/run_classifier_thucnews.sh:
--------------------------------------------------------------------------------
1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S")
3 | TASK_NAME="thucnews"
4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/roberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16
5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME
6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/oberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16/tpu/$TASK_NAME/$CURRENT_TIME
7 |
8 | python $CURRENT_DIR/../run_classifier.py \
9 | --task_name=$TASK_NAME \
10 | --do_train=true \
11 | --do_eval=true \
12 | --data_dir=$DATA_DIR \
13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \
14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \
15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \
16 | --max_seq_length=512 \
17 | --train_batch_size=32 \
18 | --learning_rate=2e-5 \
19 | --num_train_epochs=8.0 \
20 | --output_dir=$OUTPUT_DIR \
21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.1.101.2:8470
22 |
--------------------------------------------------------------------------------
/models/roberta/tpu/run_classifier_tnews.sh:
--------------------------------------------------------------------------------
1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S")
3 | TASK_NAME="tnews"
4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/roberta-large/roeberta_zh_L-24_H-1024_A-16
5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/hard_${TASK_NAME}_1
6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/roberta-large/roeberta_zh_L-24_H-1024_A-16/tpu/$TASK_NAME/$CURRENT_TIME
7 |
8 | python $CURRENT_DIR/../run_classifier.py \
9 | --task_name=$TASK_NAME \
10 | --do_train=true \
11 | --do_eval=true \
12 | --data_dir=$DATA_DIR \
13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \
14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config_large.json \
15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/roberta_zh_large_model.ckpt \
16 | --max_seq_length=128 \
17 | --train_batch_size=16 \
18 | --learning_rate=2e-5 \
19 | --num_train_epochs=3.0 \
20 | --output_dir=$OUTPUT_DIR \
21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://172.18.0.2:8470
22 |
--------------------------------------------------------------------------------
/models/roberta/tpu/run_classifier_xnli.sh:
--------------------------------------------------------------------------------
1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S")
3 | TASK_NAME="xnli"
4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/roberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16
5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME
6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/oberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16/tpu/$TASK_NAME/$CURRENT_TIME
7 |
8 | python $CURRENT_DIR/../run_classifier.py \
9 | --task_name=$TASK_NAME \
10 | --do_train=true \
11 | --do_eval=true \
12 | --data_dir=$DATA_DIR \
13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \
14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \
15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \
16 | --max_seq_length=512 \
17 | --train_batch_size=32 \
18 | --learning_rate=2e-5 \
19 | --num_train_epochs=8.0 \
20 | --output_dir=$OUTPUT_DIR \
21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.1.101.2:8470
22 |
--------------------------------------------------------------------------------
/models/roberta_wwm_ext/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # How to Contribute
2 |
3 | BERT needs to maintain permanent compatibility with the pre-trained model files,
4 | so we do not plan to make any major changes to this library (other than what was
5 | promised in the README). However, we can accept small patches related to
6 | re-factoring and documentation. To submit contributes, there are just a few
7 | small guidelines you need to follow.
8 |
9 | ## Contributor License Agreement
10 |
11 | Contributions to this project must be accompanied by a Contributor License
12 | Agreement. You (or your employer) retain the copyright to your contribution;
13 | this simply gives us permission to use and redistribute your contributions as
14 | part of the project. Head over to to see
15 | your current agreements on file or to sign a new one.
16 |
17 | You generally only need to submit a CLA once, so if you've already submitted one
18 | (even if it was for a different project), you probably don't need to do it
19 | again.
20 |
21 | ## Code reviews
22 |
23 | All submissions, including submissions by project members, require review. We
24 | use GitHub pull requests for this purpose. Consult
25 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
26 | information on using pull requests.
27 |
28 | ## Community Guidelines
29 |
30 | This project follows
31 | [Google's Open Source Community Guidelines](https://opensource.google.com/conduct/).
32 |
--------------------------------------------------------------------------------
/models/roberta_wwm_ext/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2018 The Google AI Language Team Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
--------------------------------------------------------------------------------
/models/roberta_wwm_ext/optimization_test.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2018 The Google AI Language Team Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 |
19 | import optimization
20 | import tensorflow as tf
21 |
22 |
23 | class OptimizationTest(tf.test.TestCase):
24 |
25 | def test_adam(self):
26 | with self.test_session() as sess:
27 | w = tf.get_variable(
28 | "w",
29 | shape=[3],
30 | initializer=tf.constant_initializer([0.1, -0.2, -0.1]))
31 | x = tf.constant([0.4, 0.2, -0.5])
32 | loss = tf.reduce_mean(tf.square(x - w))
33 | tvars = tf.trainable_variables()
34 | grads = tf.gradients(loss, tvars)
35 | global_step = tf.train.get_or_create_global_step()
36 | optimizer = optimization.AdamWeightDecayOptimizer(learning_rate=0.2)
37 | train_op = optimizer.apply_gradients(zip(grads, tvars), global_step)
38 | init_op = tf.group(tf.global_variables_initializer(),
39 | tf.local_variables_initializer())
40 | sess.run(init_op)
41 | for _ in range(100):
42 | sess.run(train_op)
43 | w_np = sess.run(w)
44 | self.assertAllClose(w_np.flat, [0.4, 0.2, -0.5], rtol=1e-2, atol=1e-2)
45 |
46 |
47 | if __name__ == "__main__":
48 | tf.test.main()
49 |
--------------------------------------------------------------------------------
/models/roberta_wwm_ext/requirements.txt:
--------------------------------------------------------------------------------
1 | tensorflow >= 1.11.0 # CPU Version of TensorFlow.
2 | # tensorflow-gpu >= 1.11.0 # GPU version of TensorFlow.
3 |
--------------------------------------------------------------------------------
/models/roberta_wwm_ext/run_classifier_afqmc.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # @Author: bo.shi
3 | # @Date: 2019-11-04 09:56:36
4 | # @Last Modified by: bo.shi
5 | # @Last Modified time: 2019-12-05 11:21:27
6 |
7 | TASK_NAME="afqmc"
8 | MODEL_NAME="chinese_roberta_wwm_ext_L-12_H-768_A-12"
9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
10 | export CUDA_VISIBLE_DEVICES="0"
11 | export PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model
12 | export ROBERTA_WWM_DIR=$PRETRAINED_MODELS_DIR/$MODEL_NAME
13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset
14 |
15 | # download and unzip dataset
16 | if [ ! -d $GLUE_DATA_DIR ]; then
17 | mkdir -p $GLUE_DATA_DIR
18 | echo "makedir $GLUE_DATA_DIR"
19 | fi
20 | cd $GLUE_DATA_DIR
21 | if [ ! -d $TASK_NAME ]; then
22 | mkdir $TASK_NAME
23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME"
24 | fi
25 | cd $TASK_NAME
26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then
27 | rm *
28 | wget https://storage.googleapis.com/cluebenchmark/tasks/afqmc_public.zip
29 | unzip afqmc_public.zip
30 | rm afqmc_public.zip
31 | else
32 | echo "data exists"
33 | fi
34 | echo "Finish download dataset."
35 |
36 | # download model
37 | if [ ! -d $ROBERTA_WWM_DIR ]; then
38 | mkdir -p $ROBERTA_WWM_DIR
39 | echo "makedir $ROBERTA_WWM_DIR"
40 | fi
41 | cd $ROBERTA_WWM_DIR
42 | if [ ! -f "bert_config.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "bert_model.ckpt.index" ] || [ ! -f "bert_model.ckpt.meta" ] || [ ! -f "bert_model.ckpt.data-00000-of-00001" ]; then
43 | rm *
44 | wget -c https://storage.googleapis.com/chineseglue/pretrain_models/chinese_roberta_wwm_ext_L-12_H-768_A-12.zip
45 | unzip chinese_roberta_wwm_ext_L-12_H-768_A-12.zip
46 | rm chinese_roberta_wwm_ext_L-12_H-768_A-12.zip
47 | else
48 | echo "model exists"
49 | fi
50 | echo "Finish download model."
51 |
52 | # run task
53 | cd $CURRENT_DIR
54 | echo "Start running..."
55 | if [ $# == 0 ]; then
56 | python run_classifier.py \
57 | --task_name=$TASK_NAME \
58 | --do_train=true \
59 | --do_eval=true \
60 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
61 | --vocab_file=$ROBERTA_WWM_DIR/vocab.txt \
62 | --bert_config_file=$ROBERTA_WWM_DIR/bert_config.json \
63 | --init_checkpoint=$ROBERTA_WWM_DIR/bert_model.ckpt \
64 | --max_seq_length=128 \
65 | --train_batch_size=32 \
66 | --learning_rate=2e-5 \
67 | --num_train_epochs=3.0 \
68 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
69 | elif [ $1 == "predict" ]; then
70 | echo "Start predict..."
71 | python run_classifier.py \
72 | --task_name=$TASK_NAME \
73 | --do_train=false \
74 | --do_eval=false \
75 | --do_predict=true \
76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
77 | --vocab_file=$ROBERTA_WWM_DIR/vocab.txt \
78 | --bert_config_file=$ROBERTA_WWM_DIR/bert_config.json \
79 | --init_checkpoint=$ROBERTA_WWM_DIR/bert_model.ckpt \
80 | --max_seq_length=128 \
81 | --train_batch_size=32 \
82 | --learning_rate=2e-5 \
83 | --num_train_epochs=3.0 \
84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
85 | fi
86 |
--------------------------------------------------------------------------------
/models/roberta_wwm_ext/run_classifier_cmnli.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # @Author: bo.shi
3 | # @Date: 2019-11-04 09:56:36
4 | # @Last Modified by: bo.shi
5 | # @Last Modified time: 2019-12-05 11:21:50
6 |
7 | TASK_NAME="cmnli"
8 | MODEL_NAME="chinese_roberta_wwm_ext_L-12_H-768_A-12"
9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
10 | export CUDA_VISIBLE_DEVICES="0"
11 | export PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model
12 | export ROBERTA_WWM_DIR=$PRETRAINED_MODELS_DIR/$MODEL_NAME
13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset
14 |
15 | # download and unzip dataset
16 | if [ ! -d $GLUE_DATA_DIR ]; then
17 | mkdir -p $GLUE_DATA_DIR
18 | echo "makedir $GLUE_DATA_DIR"
19 | fi
20 | cd $GLUE_DATA_DIR
21 | if [ ! -d $TASK_NAME ]; then
22 | mkdir $TASK_NAME
23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME"
24 | fi
25 | cd $TASK_NAME
26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then
27 | rm *
28 | wget https://storage.googleapis.com/cluebenchmark/tasks/cmnli_public.zip
29 | unzip cmnli_public.zip
30 | rm cmnli_public.zip
31 | else
32 | echo "data exists"
33 | fi
34 | echo "Finish download dataset."
35 |
36 | # download model
37 | if [ ! -d $ROBERTA_WWM_DIR ]; then
38 | mkdir -p $ROBERTA_WWM_DIR
39 | echo "makedir $ROBERTA_WWM_DIR"
40 | fi
41 | cd $ROBERTA_WWM_DIR
42 | if [ ! -f "bert_config.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "bert_model.ckpt.index" ] || [ ! -f "bert_model.ckpt.meta" ] || [ ! -f "bert_model.ckpt.data-00000-of-00001" ]; then
43 | rm *
44 | wget -c https://storage.googleapis.com/chineseglue/pretrain_models/chinese_roberta_wwm_ext_L-12_H-768_A-12.zip
45 | unzip chinese_roberta_wwm_ext_L-12_H-768_A-12.zip
46 | rm chinese_roberta_wwm_ext_L-12_H-768_A-12.zip
47 | else
48 | echo "model exists"
49 | fi
50 | echo "Finish download model."
51 |
52 | # run task
53 | cd $CURRENT_DIR
54 | echo "Start running..."
55 | if [ $# == 0 ]; then
56 | python run_classifier.py \
57 | --task_name=$TASK_NAME \
58 | --do_train=true \
59 | --do_eval=true \
60 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
61 | --vocab_file=$ROBERTA_WWM_DIR/vocab.txt \
62 | --bert_config_file=$ROBERTA_WWM_DIR/bert_config.json \
63 | --init_checkpoint=$ROBERTA_WWM_DIR/bert_model.ckpt \
64 | --max_seq_length=128 \
65 | --train_batch_size=32 \
66 | --learning_rate=2e-5 \
67 | --num_train_epochs=3.0 \
68 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
69 | elif [ $1 == "predict" ]; then
70 | echo "Start predict..."
71 | python run_classifier.py \
72 | --task_name=$TASK_NAME \
73 | --do_train=false \
74 | --do_eval=false \
75 | --do_predict=true \
76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
77 | --vocab_file=$ROBERTA_WWM_DIR/vocab.txt \
78 | --bert_config_file=$ROBERTA_WWM_DIR/bert_config.json \
79 | --init_checkpoint=$ROBERTA_WWM_DIR/bert_model.ckpt \
80 | --max_seq_length=128 \
81 | --train_batch_size=32 \
82 | --learning_rate=2e-5 \
83 | --num_train_epochs=3.0 \
84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
85 | fi
86 |
--------------------------------------------------------------------------------
/models/roberta_wwm_ext/run_classifier_csl.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # @Author: Li Yudong
3 | # @Date: 2019-11-28
4 | # @Last Modified by: bo.shi
5 | # @Last Modified time: 2019-12-05 11:21:58
6 |
7 | TASK_NAME="csl"
8 | MODEL_NAME="chinese_roberta_wwm_ext_L-12_H-768_A-12"
9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
10 | export CUDA_VISIBLE_DEVICES="0"
11 | export PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model
12 | export ROBERTA_WWM_DIR=$PRETRAINED_MODELS_DIR/$MODEL_NAME
13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset
14 |
15 | # download and unzip dataset
16 | if [ ! -d $GLUE_DATA_DIR ]; then
17 | mkdir -p $GLUE_DATA_DIR
18 | echo "makedir $GLUE_DATA_DIR"
19 | fi
20 | cd $GLUE_DATA_DIR
21 | if [ ! -d $TASK_NAME ]; then
22 | mkdir $TASK_NAME
23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME"
24 | fi
25 | cd $TASK_NAME
26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then
27 | rm *
28 | wget https://storage.googleapis.com/cluebenchmark/tasks/csl_public.zip
29 | unzip csl_public.zip
30 | rm csl_public.zip
31 | else
32 | echo "data exists"
33 | fi
34 | echo "Finish download dataset."
35 |
36 | # download model
37 | if [ ! -d $ROBERTA_WWM_DIR ]; then
38 | mkdir -p $ROBERTA_WWM_DIR
39 | echo "makedir $ROBERTA_WWM_DIR"
40 | fi
41 | cd $ROBERTA_WWM_DIR
42 | if [ ! -f "bert_config.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "bert_model.ckpt.index" ] || [ ! -f "bert_model.ckpt.meta" ] || [ ! -f "bert_model.ckpt.data-00000-of-00001" ]; then
43 | rm *
44 | wget -c https://storage.googleapis.com/chineseglue/pretrain_models/chinese_roberta_wwm_ext_L-12_H-768_A-12.zip
45 | unzip chinese_roberta_wwm_ext_L-12_H-768_A-12.zip
46 | rm chinese_roberta_wwm_ext_L-12_H-768_A-12.zip
47 | else
48 | echo "model exists"
49 | fi
50 | echo "Finish download model."
51 |
52 | # run task
53 | cd $CURRENT_DIR
54 | echo "Start running..."
55 | if [ $# == 0 ]; then
56 | python run_classifier.py \
57 | --task_name=$TASK_NAME \
58 | --do_train=true \
59 | --do_eval=true \
60 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
61 | --vocab_file=$ROBERTA_WWM_DIR/vocab.txt \
62 | --bert_config_file=$ROBERTA_WWM_DIR/bert_config.json \
63 | --init_checkpoint=$ROBERTA_WWM_DIR/bert_model.ckpt \
64 | --max_seq_length=128 \
65 | --train_batch_size=32 \
66 | --learning_rate=2e-5 \
67 | --num_train_epochs=3.0 \
68 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
69 | elif [ $1 == "predict" ]; then
70 | echo "Start predict..."
71 | python run_classifier.py \
72 | --task_name=$TASK_NAME \
73 | --do_train=false \
74 | --do_eval=false \
75 | --do_predict=true \
76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
77 | --vocab_file=$ROBERTA_WWM_DIR/vocab.txt \
78 | --bert_config_file=$ROBERTA_WWM_DIR/bert_config.json \
79 | --init_checkpoint=$ROBERTA_WWM_DIR/bert_model.ckpt \
80 | --max_seq_length=128 \
81 | --train_batch_size=32 \
82 | --learning_rate=2e-5 \
83 | --num_train_epochs=3.0 \
84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
85 | fi
86 |
--------------------------------------------------------------------------------
/models/roberta_wwm_ext/run_classifier_iflytek.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # @Author: bo.shi
3 | # @Date: 2019-11-04 09:56:36
4 | # @Last Modified by: bo.shi
5 | # @Last Modified time: 2019-12-05 11:22:02
6 |
7 | TASK_NAME="iflytek"
8 | MODEL_NAME="chinese_roberta_wwm_ext_L-12_H-768_A-12"
9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
10 | export CUDA_VISIBLE_DEVICES="0"
11 | export PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model
12 | export ROBERTA_WWM_DIR=$PRETRAINED_MODELS_DIR/$MODEL_NAME
13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset
14 |
15 | # download and unzip dataset
16 | if [ ! -d $GLUE_DATA_DIR ]; then
17 | mkdir -p $GLUE_DATA_DIR
18 | echo "makedir $GLUE_DATA_DIR"
19 | fi
20 | cd $GLUE_DATA_DIR
21 | if [ ! -d $TASK_NAME ]; then
22 | mkdir $TASK_NAME
23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME"
24 | fi
25 | cd $TASK_NAME
26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then
27 | rm *
28 | wget https://storage.googleapis.com/cluebenchmark/tasks/iflytek_public.zip
29 | unzip iflytek_public.zip
30 | rm iflytek_public.zip
31 | else
32 | echo "data exists"
33 | fi
34 | echo "Finish download dataset."
35 |
36 | # download model
37 | if [ ! -d $ROBERTA_WWM_DIR ]; then
38 | mkdir -p $ROBERTA_WWM_DIR
39 | echo "makedir $ROBERTA_WWM_DIR"
40 | fi
41 | cd $ROBERTA_WWM_DIR
42 | if [ ! -f "bert_config.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "bert_model.ckpt.index" ] || [ ! -f "bert_model.ckpt.meta" ] || [ ! -f "bert_model.ckpt.data-00000-of-00001" ]; then
43 | rm *
44 | wget -c https://storage.googleapis.com/chineseglue/pretrain_models/chinese_roberta_wwm_ext_L-12_H-768_A-12.zip
45 | unzip chinese_roberta_wwm_ext_L-12_H-768_A-12.zip
46 | rm chinese_roberta_wwm_ext_L-12_H-768_A-12.zip
47 | else
48 | echo "model exists"
49 | fi
50 | echo "Finish download model."
51 |
52 | # run task
53 | cd $CURRENT_DIR
54 | echo "Start running..."
55 | if [ $# == 0 ]; then
56 | python run_classifier.py \
57 | --task_name=$TASK_NAME \
58 | --do_train=true \
59 | --do_eval=true \
60 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
61 | --vocab_file=$ROBERTA_WWM_DIR/vocab.txt \
62 | --bert_config_file=$ROBERTA_WWM_DIR/bert_config.json \
63 | --init_checkpoint=$ROBERTA_WWM_DIR/bert_model.ckpt \
64 | --max_seq_length=128 \
65 | --train_batch_size=32 \
66 | --learning_rate=2e-5 \
67 | --num_train_epochs=3.0 \
68 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
69 | elif [ $1 == "predict" ]; then
70 | echo "Start predict..."
71 | python run_classifier.py \
72 | --task_name=$TASK_NAME \
73 | --do_train=false \
74 | --do_eval=false \
75 | --do_predict=true \
76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
77 | --vocab_file=$ROBERTA_WWM_DIR/vocab.txt \
78 | --bert_config_file=$ROBERTA_WWM_DIR/bert_config.json \
79 | --init_checkpoint=$ROBERTA_WWM_DIR/bert_model.ckpt \
80 | --max_seq_length=128 \
81 | --train_batch_size=32 \
82 | --learning_rate=2e-5 \
83 | --num_train_epochs=3.0 \
84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
85 | fi
86 |
--------------------------------------------------------------------------------
/models/roberta_wwm_ext/run_classifier_tnews.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # @Author: bo.shi
3 | # @Date: 2019-11-04 09:56:36
4 | # @Last Modified by: bo.shi
5 | # @Last Modified time: 2019-12-05 11:22:06
6 |
7 | TASK_NAME="tnews"
8 | MODEL_NAME="chinese_roberta_wwm_ext_L-12_H-768_A-12"
9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
10 | export CUDA_VISIBLE_DEVICES="0"
11 | export PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model
12 | export ROBERTA_WWM_DIR=$PRETRAINED_MODELS_DIR/$MODEL_NAME
13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset
14 |
15 | # download and unzip dataset
16 | if [ ! -d $GLUE_DATA_DIR ]; then
17 | mkdir -p $GLUE_DATA_DIR
18 | echo "makedir $GLUE_DATA_DIR"
19 | fi
20 | cd $GLUE_DATA_DIR
21 | if [ ! -d $TASK_NAME ]; then
22 | mkdir $TASK_NAME
23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME"
24 | fi
25 | cd $TASK_NAME
26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then
27 | rm *
28 | wget https://storage.googleapis.com/cluebenchmark/tasks/tnews_public.zip
29 | unzip tnews_public.zip
30 | rm tnews_public.zip
31 | else
32 | echo "data exists"
33 | fi
34 | echo "Finish download dataset."
35 |
36 | # download model
37 | if [ ! -d $ROBERTA_WWM_DIR ]; then
38 | mkdir -p $ROBERTA_WWM_DIR
39 | echo "makedir $ROBERTA_WWM_DIR"
40 | fi
41 | cd $ROBERTA_WWM_DIR
42 | if [ ! -f "bert_config.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "bert_model.ckpt.index" ] || [ ! -f "bert_model.ckpt.meta" ] || [ ! -f "bert_model.ckpt.data-00000-of-00001" ]; then
43 | rm *
44 | wget -c https://storage.googleapis.com/chineseglue/pretrain_models/chinese_roberta_wwm_ext_L-12_H-768_A-12.zip
45 | unzip chinese_roberta_wwm_ext_L-12_H-768_A-12.zip
46 | rm chinese_roberta_wwm_ext_L-12_H-768_A-12.zip
47 | else
48 | echo "model exists"
49 | fi
50 | echo "Finish download model."
51 |
52 | # run task
53 | cd $CURRENT_DIR
54 | echo "Start running..."
55 | if [ $# == 0 ]; then
56 | python run_classifier.py \
57 | --task_name=$TASK_NAME \
58 | --do_train=true \
59 | --do_eval=true \
60 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
61 | --vocab_file=$ROBERTA_WWM_DIR/vocab.txt \
62 | --bert_config_file=$ROBERTA_WWM_DIR/bert_config.json \
63 | --init_checkpoint=$ROBERTA_WWM_DIR/bert_model.ckpt \
64 | --max_seq_length=128 \
65 | --train_batch_size=32 \
66 | --learning_rate=2e-5 \
67 | --num_train_epochs=3.0 \
68 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
69 | elif [ $1 == "predict" ]; then
70 | echo "Start predict..."
71 | python run_classifier.py \
72 | --task_name=$TASK_NAME \
73 | --do_train=false \
74 | --do_eval=false \
75 | --do_predict=true \
76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
77 | --vocab_file=$ROBERTA_WWM_DIR/vocab.txt \
78 | --bert_config_file=$ROBERTA_WWM_DIR/bert_config.json \
79 | --init_checkpoint=$ROBERTA_WWM_DIR/bert_model.ckpt \
80 | --max_seq_length=128 \
81 | --train_batch_size=32 \
82 | --learning_rate=2e-5 \
83 | --num_train_epochs=3.0 \
84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
85 | fi
86 |
87 |
--------------------------------------------------------------------------------
/models/roberta_wwm_ext/run_classifier_wsc.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # @Author: bo.shi
3 | # @Date: 2019-11-04 09:56:36
4 | # @Last Modified by: bo.shi
5 | # @Last Modified time: 2019-12-05 11:22:11
6 |
7 | TASK_NAME="wsc"
8 | MODEL_NAME="chinese_roberta_wwm_ext_L-12_H-768_A-12"
9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
10 | export CUDA_VISIBLE_DEVICES="0"
11 | export PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model
12 | export ROBERTA_WWM_DIR=$PRETRAINED_MODELS_DIR/$MODEL_NAME
13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset
14 |
15 | # download and unzip dataset
16 | if [ ! -d $GLUE_DATA_DIR ]; then
17 | mkdir -p $GLUE_DATA_DIR
18 | echo "makedir $GLUE_DATA_DIR"
19 | fi
20 | cd $GLUE_DATA_DIR
21 | if [ ! -d $TASK_NAME ]; then
22 | mkdir $TASK_NAME
23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME"
24 | fi
25 | cd $TASK_NAME
26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then
27 | rm *
28 | wget https://storage.googleapis.com/cluebenchmark/tasks/wsc_public.zip
29 | unzip wsc_public.zip
30 | rm wsc_public.zip
31 | else
32 | echo "data exists"
33 | fi
34 | echo "Finish download dataset."
35 |
36 | # download model
37 | if [ ! -d $ROBERTA_WWM_DIR ]; then
38 | mkdir -p $ROBERTA_WWM_DIR
39 | echo "makedir $ROBERTA_WWM_DIR"
40 | fi
41 | cd $ROBERTA_WWM_DIR
42 | if [ ! -f "bert_config.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "bert_model.ckpt.index" ] || [ ! -f "bert_model.ckpt.meta" ] || [ ! -f "bert_model.ckpt.data-00000-of-00001" ]; then
43 | rm *
44 | wget -c https://storage.googleapis.com/chineseglue/pretrain_models/chinese_roberta_wwm_ext_L-12_H-768_A-12.zip
45 | unzip chinese_roberta_wwm_ext_L-12_H-768_A-12.zip
46 | rm chinese_roberta_wwm_ext_L-12_H-768_A-12.zip
47 | else
48 | echo "model exists"
49 | fi
50 | echo "Finish download model."
51 |
52 | # run task
53 | cd $CURRENT_DIR
54 | echo "Start running..."
55 | if [ $# == 0 ]; then
56 | python run_classifier.py \
57 | --task_name=$TASK_NAME \
58 | --do_train=true \
59 | --do_eval=true \
60 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
61 | --vocab_file=$ROBERTA_WWM_DIR/vocab.txt \
62 | --bert_config_file=$ROBERTA_WWM_DIR/bert_config.json \
63 | --init_checkpoint=$ROBERTA_WWM_DIR/bert_model.ckpt \
64 | --max_seq_length=128 \
65 | --train_batch_size=32 \
66 | --learning_rate=2e-5 \
67 | --num_train_epochs=3.0 \
68 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
69 | elif [ $1 == "predict" ]; then
70 | echo "Start predict..."
71 | python run_classifier.py \
72 | --task_name=$TASK_NAME \
73 | --do_train=false \
74 | --do_eval=false \
75 | --do_predict=true \
76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
77 | --vocab_file=$ROBERTA_WWM_DIR/vocab.txt \
78 | --bert_config_file=$ROBERTA_WWM_DIR/bert_config.json \
79 | --init_checkpoint=$ROBERTA_WWM_DIR/bert_model.ckpt \
80 | --max_seq_length=128 \
81 | --train_batch_size=32 \
82 | --learning_rate=2e-5 \
83 | --num_train_epochs=3.0 \
84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
85 | fi
86 |
87 |
--------------------------------------------------------------------------------
/models/roberta_wwm_ext/run_ner_msra.sh:
--------------------------------------------------------------------------------
1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
2 | export CUDA_VISIBLE_DEVICES="0"
3 | export BERT_BASE_DIR=$CURRENT_DIR/prev_trained_model/chinese_roberta_wwm_ext_L-12_H-768_A-12
4 | export GLUE_DIR=$CURRENT_DIR/../../glue/chineseGLUEdatasets/
5 | TASK_NAME="msraner"
6 |
7 | python run_ner.py \
8 | --task_name=$TASK_NAME \
9 | --do_train=true \
10 | --do_eval=false \
11 | --do_predict=true \
12 | --data_dir=$GLUE_DIR/$TASK_NAME \
13 | --vocab_file=$BERT_BASE_DIR/vocab.txt \
14 | --bert_config_file=$BERT_BASE_DIR/bert_config.json \
15 | --init_checkpoint=$BERT_BASE_DIR/bert_model.ckpt \
16 | --max_seq_length=256 \
17 | --train_batch_size=16 \
18 | --learning_rate=2e-5 \
19 | --num_train_epochs=5.0 \
20 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
21 |
--------------------------------------------------------------------------------
/models/roberta_wwm_ext/tpu/run_classifier_inews.sh:
--------------------------------------------------------------------------------
1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S")
3 | TASK_NAME="inews"
4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/roberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16
5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME
6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/oberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16/tpu/$TASK_NAME/$CURRENT_TIME
7 |
8 | python $CURRENT_DIR/../run_classifier.py \
9 | --task_name=$TASK_NAME \
10 | --do_train=true \
11 | --do_eval=true \
12 | --data_dir=$DATA_DIR \
13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \
14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \
15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \
16 | --max_seq_length=512 \
17 | --train_batch_size=32 \
18 | --learning_rate=2e-5 \
19 | --num_train_epochs=8.0 \
20 | --output_dir=$OUTPUT_DIR \
21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.1.101.2:8470
22 |
--------------------------------------------------------------------------------
/models/roberta_wwm_ext/tpu/run_classifier_jdcomment.sh:
--------------------------------------------------------------------------------
1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S")
3 | TASK_NAME="jdcomment"
4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/roberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16
5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/hard_${TASK_NAME}
6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/roberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16/tpu/$TASK_NAME/$CURRENT_TIME
7 |
8 | python $CURRENT_DIR/../run_classifier.py \
9 | --task_name=$TASK_NAME \
10 | --do_train=true \
11 | --do_eval=true \
12 | --data_dir=$DATA_DIR \
13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \
14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \
15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \
16 | --max_seq_length=128 \
17 | --train_batch_size=32 \
18 | --learning_rate=2e-5 \
19 | --num_train_epochs=8.0 \
20 | --output_dir=$OUTPUT_DIR \
21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.230.1.2:8470
22 |
--------------------------------------------------------------------------------
/models/roberta_wwm_ext/tpu/run_classifier_lcqmc.sh:
--------------------------------------------------------------------------------
1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S")
3 | TASK_NAME="lcqmc"
4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/roberta-wwm-ext-base/chinese_roberta_wwm_ext_L-12_H-768_A-12
5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/hard_${TASK_NAME}
6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/roberta-wwm-ext-base/chinese_roberta_wwm_ext_L-12_H-768_A-12/tpu/$TASK_NAME/$CURRENT_TIME
7 |
8 | python $CURRENT_DIR/../run_classifier.py \
9 | --task_name=$TASK_NAME \
10 | --do_train=true \
11 | --do_eval=true \
12 | --data_dir=$DATA_DIR \
13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \
14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \
15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \
16 | --max_seq_length=128 \
17 | --train_batch_size=16 \
18 | --learning_rate=2e-5 \
19 | --num_train_epochs=3.0 \
20 | --output_dir=$OUTPUT_DIR \
21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://172.16.0.2:8470
22 |
--------------------------------------------------------------------------------
/models/roberta_wwm_ext/tpu/run_classifier_thucnews.sh:
--------------------------------------------------------------------------------
1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S")
3 | TASK_NAME="thucnews"
4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/roberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16
5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME
6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/oberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16/tpu/$TASK_NAME/$CURRENT_TIME
7 |
8 | python $CURRENT_DIR/../run_classifier.py \
9 | --task_name=$TASK_NAME \
10 | --do_train=true \
11 | --do_eval=true \
12 | --data_dir=$DATA_DIR \
13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \
14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \
15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \
16 | --max_seq_length=512 \
17 | --train_batch_size=32 \
18 | --learning_rate=2e-5 \
19 | --num_train_epochs=8.0 \
20 | --output_dir=$OUTPUT_DIR \
21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.1.101.2:8470
22 |
--------------------------------------------------------------------------------
/models/roberta_wwm_ext/tpu/run_classifier_tnews.sh:
--------------------------------------------------------------------------------
1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S")
3 | TASK_NAME="tnews"
4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/roberta-wwm-ext-base/chinese_roberta_wwm_ext_L-12_H-768_A-12
5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/hard_${TASK_NAME}_1
6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/roberta-wwm-ext-base/chinese_roberta_wwm_ext_L-12_H-768_A-12/tpu/$TASK_NAME/$CURRENT_TIME
7 |
8 | python $CURRENT_DIR/../run_classifier.py \
9 | --task_name=$TASK_NAME \
10 | --do_train=true \
11 | --do_eval=true \
12 | --data_dir=$DATA_DIR \
13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \
14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \
15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \
16 | --max_seq_length=128 \
17 | --train_batch_size=16 \
18 | --learning_rate=2e-5 \
19 | --num_train_epochs=3.0 \
20 | --output_dir=$OUTPUT_DIR \
21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://172.18.0.2:8470
22 |
--------------------------------------------------------------------------------
/models/roberta_wwm_ext/tpu/run_classifier_xnli.sh:
--------------------------------------------------------------------------------
1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S")
3 | TASK_NAME="xnli"
4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/roberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16
5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME
6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/oberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16/tpu/$TASK_NAME/$CURRENT_TIME
7 |
8 | python $CURRENT_DIR/../run_classifier.py \
9 | --task_name=$TASK_NAME \
10 | --do_train=true \
11 | --do_eval=true \
12 | --data_dir=$DATA_DIR \
13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \
14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \
15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \
16 | --max_seq_length=512 \
17 | --train_batch_size=32 \
18 | --learning_rate=2e-5 \
19 | --num_train_epochs=8.0 \
20 | --output_dir=$OUTPUT_DIR \
21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.1.101.2:8470
22 |
--------------------------------------------------------------------------------
/models/roberta_wwm_large_ext/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # How to Contribute
2 |
3 | BERT needs to maintain permanent compatibility with the pre-trained model files,
4 | so we do not plan to make any major changes to this library (other than what was
5 | promised in the README). However, we can accept small patches related to
6 | re-factoring and documentation. To submit contributes, there are just a few
7 | small guidelines you need to follow.
8 |
9 | ## Contributor License Agreement
10 |
11 | Contributions to this project must be accompanied by a Contributor License
12 | Agreement. You (or your employer) retain the copyright to your contribution;
13 | this simply gives us permission to use and redistribute your contributions as
14 | part of the project. Head over to to see
15 | your current agreements on file or to sign a new one.
16 |
17 | You generally only need to submit a CLA once, so if you've already submitted one
18 | (even if it was for a different project), you probably don't need to do it
19 | again.
20 |
21 | ## Code reviews
22 |
23 | All submissions, including submissions by project members, require review. We
24 | use GitHub pull requests for this purpose. Consult
25 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
26 | information on using pull requests.
27 |
28 | ## Community Guidelines
29 |
30 | This project follows
31 | [Google's Open Source Community Guidelines](https://opensource.google.com/conduct/).
32 |
--------------------------------------------------------------------------------
/models/roberta_wwm_large_ext/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2018 The Google AI Language Team Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
--------------------------------------------------------------------------------
/models/roberta_wwm_large_ext/optimization_test.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2018 The Google AI Language Team Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 |
19 | import optimization
20 | import tensorflow as tf
21 |
22 |
23 | class OptimizationTest(tf.test.TestCase):
24 |
25 | def test_adam(self):
26 | with self.test_session() as sess:
27 | w = tf.get_variable(
28 | "w",
29 | shape=[3],
30 | initializer=tf.constant_initializer([0.1, -0.2, -0.1]))
31 | x = tf.constant([0.4, 0.2, -0.5])
32 | loss = tf.reduce_mean(tf.square(x - w))
33 | tvars = tf.trainable_variables()
34 | grads = tf.gradients(loss, tvars)
35 | global_step = tf.train.get_or_create_global_step()
36 | optimizer = optimization.AdamWeightDecayOptimizer(learning_rate=0.2)
37 | train_op = optimizer.apply_gradients(zip(grads, tvars), global_step)
38 | init_op = tf.group(tf.global_variables_initializer(),
39 | tf.local_variables_initializer())
40 | sess.run(init_op)
41 | for _ in range(100):
42 | sess.run(train_op)
43 | w_np = sess.run(w)
44 | self.assertAllClose(w_np.flat, [0.4, 0.2, -0.5], rtol=1e-2, atol=1e-2)
45 |
46 |
47 | if __name__ == "__main__":
48 | tf.test.main()
49 |
--------------------------------------------------------------------------------
/models/roberta_wwm_large_ext/requirements.txt:
--------------------------------------------------------------------------------
1 | tensorflow >= 1.11.0 # CPU Version of TensorFlow.
2 | # tensorflow-gpu >= 1.11.0 # GPU version of TensorFlow.
3 |
--------------------------------------------------------------------------------
/models/roberta_wwm_large_ext/run_classifier_afqmc.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # @Author: bo.shi
3 | # @Date: 2019-11-04 09:56:36
4 | # @Last Modified by: bo.shi
5 | # @Last Modified time: 2019-12-05 11:23:18
6 |
7 | TASK_NAME="afqmc"
8 | MODEL_NAME="chinese_roberta_wwm_large_ext_L-24_H-1024_A-16"
9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
10 | export CUDA_VISIBLE_DEVICES="0"
11 | export PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model
12 | export ROBERTA_WWM_LARGE_DIR=$PRETRAINED_MODELS_DIR/$MODEL_NAME
13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset
14 |
15 | # download and unzip dataset
16 | if [ ! -d $GLUE_DATA_DIR ]; then
17 | mkdir -p $GLUE_DATA_DIR
18 | echo "makedir $GLUE_DATA_DIR"
19 | fi
20 | cd $GLUE_DATA_DIR
21 | if [ ! -d $TASK_NAME ]; then
22 | mkdir $TASK_NAME
23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME"
24 | fi
25 | cd $TASK_NAME
26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then
27 | rm *
28 | wget https://storage.googleapis.com/cluebenchmark/tasks/afqmc_public.zip
29 | unzip afqmc_public.zip
30 | rm afqmc_public.zip
31 | else
32 | echo "data exists"
33 | fi
34 | echo "Finish download dataset."
35 |
36 | # download model
37 | if [ ! -d $ROBERTA_WWM_LARGE_DIR ]; then
38 | mkdir -p $ROBERTA_WWM_LARGE_DIR
39 | echo "makedir $ROBERTA_WWM_LARGE_DIR"
40 | fi
41 | cd $ROBERTA_WWM_LARGE_DIR
42 | if [ ! -f "bert_config.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "bert_model.ckpt.index" ] || [ ! -f "bert_model.ckpt.meta" ] || [ ! -f "bert_model.ckpt.data-00000-of-00001" ]; then
43 | rm *
44 | wget -c https://storage.googleapis.com/chineseglue/pretrain_models/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16.zip
45 | unzip chinese_roberta_wwm_large_ext_L-24_H-1024_A-16.zip
46 | rm chinese_roberta_wwm_large_ext_L-24_H-1024_A-16.zip
47 | else
48 | echo "model exists"
49 | fi
50 | echo "Finish download model."
51 |
52 | # run task
53 | cd $CURRENT_DIR
54 | echo "Start running..."
55 | if [ $1 == "predict" ]; then
56 | python run_classifier.py \
57 | --task_name=$TASK_NAME \
58 | --do_train=true \
59 | --do_eval=true \
60 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
61 | --vocab_file=$ROBERTA_WWM_LARGE_DIR/vocab.txt \
62 | --bert_config_file=$ROBERTA_WWM_LARGE_DIR/bert_config.json \
63 | --init_checkpoint=$ROBERTA_WWM_LARGE_DIR/bert_model.ckpt \
64 | --max_seq_length=128 \
65 | --train_batch_size=32 \
66 | --learning_rate=2e-5 \
67 | --num_train_epochs=3.0 \
68 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
69 | elif [ $1 == "predict" ]; then
70 | echo "Start predict..."
71 | python run_classifier.py \
72 | --task_name=$TASK_NAME \
73 | --do_train=false \
74 | --do_eval=false \
75 | --do_predict=true \
76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
77 | --vocab_file=$ROBERTA_WWM_LARGE_DIR/vocab.txt \
78 | --bert_config_file=$ROBERTA_WWM_LARGE_DIR/bert_config.json \
79 | --init_checkpoint=$ROBERTA_WWM_LARGE_DIR/bert_model.ckpt \
80 | --max_seq_length=128 \
81 | --train_batch_size=32 \
82 | --learning_rate=2e-5 \
83 | --num_train_epochs=3.0 \
84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
85 | fi
86 |
--------------------------------------------------------------------------------
/models/roberta_wwm_large_ext/run_classifier_cmnli.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # @Author: bo.shi
3 | # @Date: 2019-11-04 09:56:36
4 | # @Last Modified by: bo.shi
5 | # @Last Modified time: 2019-12-05 11:23:30
6 |
7 | TASK_NAME="cmnli"
8 | MODEL_NAME="chinese_roberta_wwm_large_ext_L-24_H-1024_A-16"
9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
10 | export CUDA_VISIBLE_DEVICES="0"
11 | export PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model
12 | export ROBERTA_WWM_LARGE_DIR=$PRETRAINED_MODELS_DIR/$MODEL_NAME
13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset
14 |
15 | # download and unzip dataset
16 | if [ ! -d $GLUE_DATA_DIR ]; then
17 | mkdir -p $GLUE_DATA_DIR
18 | echo "makedir $GLUE_DATA_DIR"
19 | fi
20 | cd $GLUE_DATA_DIR
21 | if [ ! -d $TASK_NAME ]; then
22 | mkdir $TASK_NAME
23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME"
24 | fi
25 | cd $TASK_NAME
26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then
27 | rm *
28 | wget https://storage.googleapis.com/cluebenchmark/tasks/cmnli_public.zip
29 | unzip cmnli_public.zip
30 | rm cmnli_public.zip
31 | else
32 | echo "data exists"
33 | fi
34 | echo "Finish download dataset."
35 |
36 | # download model
37 | if [ ! -d $ROBERTA_WWM_LARGE_DIR ]; then
38 | mkdir -p $ROBERTA_WWM_LARGE_DIR
39 | echo "makedir $ROBERTA_WWM_LARGE_DIR"
40 | fi
41 | cd $ROBERTA_WWM_LARGE_DIR
42 | if [ ! -f "bert_config.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "bert_model.ckpt.index" ] || [ ! -f "bert_model.ckpt.meta" ] || [ ! -f "bert_model.ckpt.data-00000-of-00001" ]; then
43 | rm *
44 | wget -c https://storage.googleapis.com/chineseglue/pretrain_models/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16.zip
45 | unzip chinese_roberta_wwm_large_ext_L-24_H-1024_A-16.zip
46 | rm chinese_roberta_wwm_large_ext_L-24_H-1024_A-16.zip
47 | else
48 | echo "model exists"
49 | fi
50 | echo "Finish download model."
51 |
52 | # run task
53 | cd $CURRENT_DIR
54 | echo "Start running..."
55 | if [ $1 == "predict" ]; then
56 | python run_classifier.py \
57 | --task_name=$TASK_NAME \
58 | --do_train=true \
59 | --do_eval=true \
60 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
61 | --vocab_file=$ROBERTA_WWM_LARGE_DIR/vocab.txt \
62 | --bert_config_file=$ROBERTA_WWM_LARGE_DIR/bert_config.json \
63 | --init_checkpoint=$ROBERTA_WWM_LARGE_DIR/bert_model.ckpt \
64 | --max_seq_length=128 \
65 | --train_batch_size=32 \
66 | --learning_rate=2e-5 \
67 | --num_train_epochs=3.0 \
68 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
69 | elif [ $1 == "predict" ]; then
70 | echo "Start predict..."
71 | python run_classifier.py \
72 | --task_name=$TASK_NAME \
73 | --do_train=false \
74 | --do_eval=false \
75 | --do_predict=true \
76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
77 | --vocab_file=$ROBERTA_WWM_LARGE_DIR/vocab.txt \
78 | --bert_config_file=$ROBERTA_WWM_LARGE_DIR/bert_config.json \
79 | --init_checkpoint=$ROBERTA_WWM_LARGE_DIR/bert_model.ckpt \
80 | --max_seq_length=128 \
81 | --train_batch_size=32 \
82 | --learning_rate=2e-5 \
83 | --num_train_epochs=3.0 \
84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
85 | fi
86 |
--------------------------------------------------------------------------------
/models/roberta_wwm_large_ext/run_classifier_csl.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # @Author: bo.shi
3 | # @Date: 2019-11-04 09:56:36
4 | # @Last Modified by: bo.shi
5 | # @Last Modified time: 2019-12-05 11:23:41
6 |
7 | TASK_NAME="csl"
8 | MODEL_NAME="chinese_roberta_wwm_large_ext_L-24_H-1024_A-16"
9 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
10 | export CUDA_VISIBLE_DEVICES="0"
11 | export PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model
12 | export ROBERTA_WWM_LARGE_DIR=$PRETRAINED_MODELS_DIR/$MODEL_NAME
13 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset
14 |
15 | # download and unzip dataset
16 | if [ ! -d $GLUE_DATA_DIR ]; then
17 | mkdir -p $GLUE_DATA_DIR
18 | echo "makedir $GLUE_DATA_DIR"
19 | fi
20 | cd $GLUE_DATA_DIR
21 | if [ ! -d $TASK_NAME ]; then
22 | mkdir $TASK_NAME
23 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME"
24 | fi
25 | cd $TASK_NAME
26 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then
27 | rm *
28 | wget https://storage.googleapis.com/cluebenchmark/tasks/csl_public.zip
29 | unzip csl_public.zip
30 | rm csl_public.zip
31 | else
32 | echo "data exists"
33 | fi
34 | echo "Finish download dataset."
35 |
36 | # download model
37 | if [ ! -d $ROBERTA_WWM_LARGE_DIR ]; then
38 | mkdir -p $ROBERTA_WWM_LARGE_DIR
39 | echo "makedir $ROBERTA_WWM_LARGE_DIR"
40 | fi
41 | cd $ROBERTA_WWM_LARGE_DIR
42 | if [ ! -f "bert_config.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "bert_model.ckpt.index" ] || [ ! -f "bert_model.ckpt.meta" ] || [ ! -f "bert_model.ckpt.data-00000-of-00001" ]; then
43 | rm *
44 | wget -c https://storage.googleapis.com/chineseglue/pretrain_models/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16.zip
45 | unzip chinese_roberta_wwm_large_ext_L-24_H-1024_A-16.zip
46 | rm chinese_roberta_wwm_large_ext_L-24_H-1024_A-16.zip
47 | else
48 | echo "model exists"
49 | fi
50 | echo "Finish download model."
51 |
52 | # run task
53 | cd $CURRENT_DIR
54 | echo "Start running..."
55 | if [ $1 == "predict" ]; then
56 | python run_classifier.py \
57 | --task_name=$TASK_NAME \
58 | --do_train=true \
59 | --do_eval=true \
60 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
61 | --vocab_file=$ROBERTA_WWM_LARGE_DIR/vocab.txt \
62 | --bert_config_file=$ROBERTA_WWM_LARGE_DIR/bert_config.json \
63 | --init_checkpoint=$ROBERTA_WWM_LARGE_DIR/bert_model.ckpt \
64 | --max_seq_length=128 \
65 | --train_batch_size=32 \
66 | --learning_rate=2e-5 \
67 | --num_train_epochs=3.0 \
68 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
69 | elif [ $1 == "predict" ]; then
70 | echo "Start predict..."
71 | python run_classifier.py \
72 | --task_name=$TASK_NAME \
73 | --do_train=false \
74 | --do_eval=false \
75 | --do_predict=true \
76 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
77 | --vocab_file=$ROBERTA_WWM_LARGE_DIR/vocab.txt \
78 | --bert_config_file=$ROBERTA_WWM_LARGE_DIR/bert_config.json \
79 | --init_checkpoint=$ROBERTA_WWM_LARGE_DIR/bert_model.ckpt \
80 | --max_seq_length=128 \
81 | --train_batch_size=32 \
82 | --learning_rate=2e-5 \
83 | --num_train_epochs=3.0 \
84 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
85 | fi
86 |
87 |
88 |
--------------------------------------------------------------------------------
/models/roberta_wwm_large_ext/run_classifier_iflytek.sh:
--------------------------------------------------------------------------------
1 |
2 | #!/usr/bin/env bash
3 | # @Author: bo.shi
4 | # @Date: 2019-11-04 09:56:36
5 | # @Last Modified by: bo.shi
6 | # @Last Modified time: 2019-12-05 11:23:45
7 |
8 | TASK_NAME="iflytek"
9 | MODEL_NAME="chinese_roberta_wwm_large_ext_L-24_H-1024_A-16"
10 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
11 | export CUDA_VISIBLE_DEVICES="0"
12 | export PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model
13 | export ROBERTA_WWM_LARGE_DIR=$PRETRAINED_MODELS_DIR/$MODEL_NAME
14 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset
15 |
16 | # download and unzip dataset
17 | if [ ! -d $GLUE_DATA_DIR ]; then
18 | mkdir -p $GLUE_DATA_DIR
19 | echo "makedir $GLUE_DATA_DIR"
20 | fi
21 | cd $GLUE_DATA_DIR
22 | if [ ! -d $TASK_NAME ]; then
23 | mkdir $TASK_NAME
24 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME"
25 | fi
26 | cd $TASK_NAME
27 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then
28 | rm *
29 | wget https://storage.googleapis.com/cluebenchmark/tasks/iflytek_public.zip
30 | unzip iflytek_public.zip
31 | rm iflytek_public.zip
32 | else
33 | echo "data exists"
34 | fi
35 | echo "Finish download dataset."
36 |
37 | # download model
38 | if [ ! -d $ROBERTA_WWM_LARGE_DIR ]; then
39 | mkdir -p $ROBERTA_WWM_LARGE_DIR
40 | echo "makedir $ROBERTA_WWM_LARGE_DIR"
41 | fi
42 | cd $ROBERTA_WWM_LARGE_DIR
43 | if [ ! -f "bert_config.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "bert_model.ckpt.index" ] || [ ! -f "bert_model.ckpt.meta" ] || [ ! -f "bert_model.ckpt.data-00000-of-00001" ]; then
44 | rm *
45 | wget -c https://storage.googleapis.com/chineseglue/pretrain_models/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16.zip
46 | unzip chinese_roberta_wwm_large_ext_L-24_H-1024_A-16.zip
47 | rm chinese_roberta_wwm_large_ext_L-24_H-1024_A-16.zip
48 | else
49 | echo "model exists"
50 | fi
51 | echo "Finish download model."
52 |
53 | # run task
54 | cd $CURRENT_DIR
55 | echo "Start running..."
56 | if [ $1 == "predict" ]; then
57 | python run_classifier.py \
58 | --task_name=$TASK_NAME \
59 | --do_train=true \
60 | --do_eval=true \
61 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
62 | --vocab_file=$ROBERTA_WWM_LARGE_DIR/vocab.txt \
63 | --bert_config_file=$ROBERTA_WWM_LARGE_DIR/bert_config.json \
64 | --init_checkpoint=$ROBERTA_WWM_LARGE_DIR/bert_model.ckpt \
65 | --max_seq_length=128 \
66 | --train_batch_size=32 \
67 | --learning_rate=2e-5 \
68 | --num_train_epochs=3.0 \
69 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
70 | elif [ $1 == "predict" ]; then
71 | echo "Start predict..."
72 | python run_classifier.py \
73 | --task_name=$TASK_NAME \
74 | --do_train=false \
75 | --do_eval=false \
76 | --do_predict=true \
77 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
78 | --vocab_file=$ROBERTA_WWM_LARGE_DIR/vocab.txt \
79 | --bert_config_file=$ROBERTA_WWM_LARGE_DIR/bert_config.json \
80 | --init_checkpoint=$ROBERTA_WWM_LARGE_DIR/bert_model.ckpt \
81 | --max_seq_length=128 \
82 | --train_batch_size=32 \
83 | --learning_rate=2e-5 \
84 | --num_train_epochs=3.0 \
85 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
86 | fi
87 |
88 |
--------------------------------------------------------------------------------
/models/roberta_wwm_large_ext/run_classifier_tnews.sh:
--------------------------------------------------------------------------------
1 |
2 | #!/usr/bin/env bash
3 | # @Author: bo.shi
4 | # @Date: 2019-11-04 09:56:36
5 | # @Last Modified by: bo.shi
6 | # @Last Modified time: 2019-12-05 11:23:49
7 |
8 | TASK_NAME="tnews"
9 | MODEL_NAME="chinese_roberta_wwm_large_ext_L-24_H-1024_A-16"
10 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
11 | export CUDA_VISIBLE_DEVICES="0"
12 | export PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model
13 | export ROBERTA_WWM_LARGE_DIR=$PRETRAINED_MODELS_DIR/$MODEL_NAME
14 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset
15 |
16 | # download and unzip dataset
17 | if [ ! -d $GLUE_DATA_DIR ]; then
18 | mkdir -p $GLUE_DATA_DIR
19 | echo "makedir $GLUE_DATA_DIR"
20 | fi
21 | cd $GLUE_DATA_DIR
22 | if [ ! -d $TASK_NAME ]; then
23 | mkdir $TASK_NAME
24 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME"
25 | fi
26 | cd $TASK_NAME
27 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then
28 | rm *
29 | wget https://storage.googleapis.com/cluebenchmark/tasks/tnews_public.zip
30 | unzip tnews_public.zip
31 | rm tnews_public.zip
32 | else
33 | echo "data exists"
34 | fi
35 | echo "Finish download dataset."
36 |
37 | # download model
38 | if [ ! -d $ROBERTA_WWM_LARGE_DIR ]; then
39 | mkdir -p $ROBERTA_WWM_LARGE_DIR
40 | echo "makedir $ROBERTA_WWM_LARGE_DIR"
41 | fi
42 | cd $ROBERTA_WWM_LARGE_DIR
43 | if [ ! -f "bert_config.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "bert_model.ckpt.index" ] || [ ! -f "bert_model.ckpt.meta" ] || [ ! -f "bert_model.ckpt.data-00000-of-00001" ]; then
44 | rm *
45 | wget -c https://storage.googleapis.com/chineseglue/pretrain_models/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16.zip
46 | unzip chinese_roberta_wwm_large_ext_L-24_H-1024_A-16.zip
47 | rm chinese_roberta_wwm_large_ext_L-24_H-1024_A-16.zip
48 | else
49 | echo "model exists"
50 | fi
51 | echo "Finish download model."
52 |
53 | # run task
54 | cd $CURRENT_DIR
55 | echo "Start running..."
56 | if [ $1 == "predict" ]; then
57 | python run_classifier.py \
58 | --task_name=$TASK_NAME \
59 | --do_train=true \
60 | --do_eval=true \
61 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
62 | --vocab_file=$ROBERTA_WWM_LARGE_DIR/vocab.txt \
63 | --bert_config_file=$ROBERTA_WWM_LARGE_DIR/bert_config.json \
64 | --init_checkpoint=$ROBERTA_WWM_LARGE_DIR/bert_model.ckpt \
65 | --max_seq_length=128 \
66 | --train_batch_size=32 \
67 | --learning_rate=2e-5 \
68 | --num_train_epochs=3.0 \
69 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
70 | elif [ $1 == "predict" ]; then
71 | echo "Start predict..."
72 | python run_classifier.py \
73 | --task_name=$TASK_NAME \
74 | --do_train=false \
75 | --do_eval=false \
76 | --do_predict=true \
77 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
78 | --vocab_file=$ROBERTA_WWM_LARGE_DIR/vocab.txt \
79 | --bert_config_file=$ROBERTA_WWM_LARGE_DIR/bert_config.json \
80 | --init_checkpoint=$ROBERTA_WWM_LARGE_DIR/bert_model.ckpt \
81 | --max_seq_length=128 \
82 | --train_batch_size=32 \
83 | --learning_rate=2e-5 \
84 | --num_train_epochs=3.0 \
85 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
86 | fi
87 |
88 |
--------------------------------------------------------------------------------
/models/roberta_wwm_large_ext/run_classifier_wsc.sh:
--------------------------------------------------------------------------------
1 |
2 | #!/usr/bin/env bash
3 | # @Author: bo.shi
4 | # @Date: 2019-11-04 09:56:36
5 | # @Last Modified by: bo.shi
6 | # @Last Modified time: 2019-12-05 11:23:54
7 |
8 | TASK_NAME="wsc"
9 | MODEL_NAME="chinese_roberta_wwm_large_ext_L-24_H-1024_A-16"
10 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
11 | export CUDA_VISIBLE_DEVICES="0"
12 | export PRETRAINED_MODELS_DIR=$CURRENT_DIR/prev_trained_model
13 | export ROBERTA_WWM_LARGE_DIR=$PRETRAINED_MODELS_DIR/$MODEL_NAME
14 | export GLUE_DATA_DIR=$CURRENT_DIR/../../CLUEdataset
15 |
16 | # download and unzip dataset
17 | if [ ! -d $GLUE_DATA_DIR ]; then
18 | mkdir -p $GLUE_DATA_DIR
19 | echo "makedir $GLUE_DATA_DIR"
20 | fi
21 | cd $GLUE_DATA_DIR
22 | if [ ! -d $TASK_NAME ]; then
23 | mkdir $TASK_NAME
24 | echo "makedir $GLUE_DATA_DIR/$TASK_NAME"
25 | fi
26 | cd $TASK_NAME
27 | if [ ! -f "train.json" ] || [ ! -f "dev.json" ] || [ ! -f "test.json" ]; then
28 | rm *
29 | wget https://storage.googleapis.com/cluebenchmark/tasks/wsc_public.zip
30 | unzip wsc_public.zip
31 | rm wsc_public.zip
32 | else
33 | echo "data exists"
34 | fi
35 | echo "Finish download dataset."
36 |
37 | # download model
38 | if [ ! -d $ROBERTA_WWM_LARGE_DIR ]; then
39 | mkdir -p $ROBERTA_WWM_LARGE_DIR
40 | echo "makedir $ROBERTA_WWM_LARGE_DIR"
41 | fi
42 | cd $ROBERTA_WWM_LARGE_DIR
43 | if [ ! -f "bert_config.json" ] || [ ! -f "vocab.txt" ] || [ ! -f "bert_model.ckpt.index" ] || [ ! -f "bert_model.ckpt.meta" ] || [ ! -f "bert_model.ckpt.data-00000-of-00001" ]; then
44 | rm *
45 | wget -c https://storage.googleapis.com/chineseglue/pretrain_models/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16.zip
46 | unzip chinese_roberta_wwm_large_ext_L-24_H-1024_A-16.zip
47 | rm chinese_roberta_wwm_large_ext_L-24_H-1024_A-16.zip
48 | else
49 | echo "model exists"
50 | fi
51 | echo "Finish download model."
52 |
53 | # run task
54 | cd $CURRENT_DIR
55 | echo "Start running..."
56 | if [ $1 == "predict" ]; then
57 | python run_classifier.py \
58 | --task_name=$TASK_NAME \
59 | --do_train=true \
60 | --do_eval=true \
61 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
62 | --vocab_file=$ROBERTA_WWM_LARGE_DIR/vocab.txt \
63 | --bert_config_file=$ROBERTA_WWM_LARGE_DIR/bert_config.json \
64 | --init_checkpoint=$ROBERTA_WWM_LARGE_DIR/bert_model.ckpt \
65 | --max_seq_length=128 \
66 | --train_batch_size=32 \
67 | --learning_rate=2e-5 \
68 | --num_train_epochs=3.0 \
69 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
70 | elif [ $1 == "predict" ]; then
71 | echo "Start predict..."
72 | python run_classifier.py \
73 | --task_name=$TASK_NAME \
74 | --do_train=false \
75 | --do_eval=false \
76 | --do_predict=true \
77 | --data_dir=$GLUE_DATA_DIR/$TASK_NAME \
78 | --vocab_file=$ROBERTA_WWM_LARGE_DIR/vocab.txt \
79 | --bert_config_file=$ROBERTA_WWM_LARGE_DIR/bert_config.json \
80 | --init_checkpoint=$ROBERTA_WWM_LARGE_DIR/bert_model.ckpt \
81 | --max_seq_length=128 \
82 | --train_batch_size=32 \
83 | --learning_rate=2e-5 \
84 | --num_train_epochs=3.0 \
85 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
86 | fi
87 |
88 |
--------------------------------------------------------------------------------
/models/roberta_wwm_large_ext/run_ner_msra.sh:
--------------------------------------------------------------------------------
1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
2 | export CUDA_VISIBLE_DEVICES="0"
3 | export BERT_BASE_DIR=$CURRENT_DIR/prev_trained_model/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16
4 | export GLUE_DIR=$CURRENT_DIR/../../glue/chineseGLUEdatasets/
5 | TASK_NAME="msraner"
6 |
7 | python run_ner.py \
8 | --task_name=$TASK_NAME \
9 | --do_train=true \
10 | --do_eval=false \
11 | --do_predict=true \
12 | --data_dir=$GLUE_DIR/$TASK_NAME \
13 | --vocab_file=$BERT_BASE_DIR/vocab.txt \
14 | --bert_config_file=$BERT_BASE_DIR/bert_config.json \
15 | --init_checkpoint=$BERT_BASE_DIR/bert_model.ckpt \
16 | --max_seq_length=256 \
17 | --train_batch_size=8 \
18 | --learning_rate=2e-5 \
19 | --num_train_epochs=5.0 \
20 | --output_dir=$CURRENT_DIR/${TASK_NAME}_output/
21 |
--------------------------------------------------------------------------------
/models/roberta_wwm_large_ext/tpu/run_classifier_inews.sh:
--------------------------------------------------------------------------------
1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S")
3 | TASK_NAME="inews"
4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/roberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16
5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME
6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/oberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16/tpu/$TASK_NAME/$CURRENT_TIME
7 |
8 | python $CURRENT_DIR/../run_classifier.py \
9 | --task_name=$TASK_NAME \
10 | --do_train=true \
11 | --do_eval=true \
12 | --data_dir=$DATA_DIR \
13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \
14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \
15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \
16 | --max_seq_length=512 \
17 | --train_batch_size=32 \
18 | --learning_rate=2e-5 \
19 | --num_train_epochs=8.0 \
20 | --output_dir=$OUTPUT_DIR \
21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.1.101.2:8470
22 |
--------------------------------------------------------------------------------
/models/roberta_wwm_large_ext/tpu/run_classifier_jdcomment.sh:
--------------------------------------------------------------------------------
1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S")
3 | TASK_NAME="jdcomment"
4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/roberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16
5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/hard_${TASK_NAME}
6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/roberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16/tpu/$TASK_NAME/$CURRENT_TIME
7 |
8 | python $CURRENT_DIR/../run_classifier.py \
9 | --task_name=$TASK_NAME \
10 | --do_train=true \
11 | --do_eval=true \
12 | --data_dir=$DATA_DIR \
13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \
14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \
15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \
16 | --max_seq_length=128 \
17 | --train_batch_size=32 \
18 | --learning_rate=2e-5 \
19 | --num_train_epochs=8.0 \
20 | --output_dir=$OUTPUT_DIR \
21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.230.1.2:8470
22 |
--------------------------------------------------------------------------------
/models/roberta_wwm_large_ext/tpu/run_classifier_lcqmc.sh:
--------------------------------------------------------------------------------
1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S")
3 | TASK_NAME="lcqmc"
4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/roberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16
5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/hard_$TASK_NAME
6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/oberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16/tpu/$TASK_NAME/$CURRENT_TIME
7 |
8 | python $CURRENT_DIR/../run_classifier.py \
9 | --task_name=$TASK_NAME \
10 | --do_train=true \
11 | --do_eval=true \
12 | --data_dir=$DATA_DIR \
13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \
14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \
15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \
16 | --max_seq_length=128 \
17 | --train_batch_size=16 \
18 | --learning_rate=2e-5 \
19 | --num_train_epochs=3.0 \
20 | --output_dir=$OUTPUT_DIR \
21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://172.18.0.2:8470
22 |
--------------------------------------------------------------------------------
/models/roberta_wwm_large_ext/tpu/run_classifier_thucnews.sh:
--------------------------------------------------------------------------------
1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S")
3 | TASK_NAME="thucnews"
4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/roberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16
5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME
6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/oberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16/tpu/$TASK_NAME/$CURRENT_TIME
7 |
8 | python $CURRENT_DIR/../run_classifier.py \
9 | --task_name=$TASK_NAME \
10 | --do_train=true \
11 | --do_eval=true \
12 | --data_dir=$DATA_DIR \
13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \
14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \
15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \
16 | --max_seq_length=512 \
17 | --train_batch_size=32 \
18 | --learning_rate=2e-5 \
19 | --num_train_epochs=8.0 \
20 | --output_dir=$OUTPUT_DIR \
21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.1.101.2:8470
22 |
--------------------------------------------------------------------------------
/models/roberta_wwm_large_ext/tpu/run_classifier_tnews.sh:
--------------------------------------------------------------------------------
1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S")
3 | TASK_NAME="tnews"
4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/roberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16
5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/hard_${TASK_NAME}_1
6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/roberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16/tpu/$TASK_NAME/$CURRENT_TIME
7 |
8 | python $CURRENT_DIR/../run_classifier.py \
9 | --task_name=$TASK_NAME \
10 | --do_train=true \
11 | --do_eval=true \
12 | --data_dir=$DATA_DIR \
13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \
14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \
15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \
16 | --max_seq_length=128 \
17 | --train_batch_size=16 \
18 | --learning_rate=2e-5 \
19 | --num_train_epochs=3.0 \
20 | --output_dir=$OUTPUT_DIR \
21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://172.18.0.2:8470
22 |
--------------------------------------------------------------------------------
/models/roberta_wwm_large_ext/tpu/run_classifier_xnli.sh:
--------------------------------------------------------------------------------
1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S")
3 | TASK_NAME="xnli"
4 | export PREV_TRAINED_MODEL_DIR=gs://models_zxw/prev_trained_models/nlp/roberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16
5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME
6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/oberta-wwm-ext-large/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16/tpu/$TASK_NAME/$CURRENT_TIME
7 |
8 | python $CURRENT_DIR/../run_classifier.py \
9 | --task_name=$TASK_NAME \
10 | --do_train=true \
11 | --do_eval=true \
12 | --data_dir=$DATA_DIR \
13 | --vocab_file=$PREV_TRAINED_MODEL_DIR/vocab.txt \
14 | --bert_config_file=$PREV_TRAINED_MODEL_DIR/bert_config.json \
15 | --init_checkpoint=$PREV_TRAINED_MODEL_DIR/bert_model.ckpt \
16 | --max_seq_length=512 \
17 | --train_batch_size=32 \
18 | --learning_rate=2e-5 \
19 | --num_train_epochs=8.0 \
20 | --output_dir=$OUTPUT_DIR \
21 | --num_tpu_cores=8 --use_tpu=True --tpu_name=grpc://10.1.101.2:8470
22 |
--------------------------------------------------------------------------------
/models/xlnet/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CLUEbenchmark/CLUEmotionAnalysis2020/ff1bc945fc3a5854e0013de5a2f222dd1da61516/models/xlnet/__init__.py
--------------------------------------------------------------------------------
/models/xlnet/gpu_utils.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import os
6 | import tensorflow as tf
7 |
8 | def assign_to_gpu(gpu=0, ps_dev="/device:CPU:0"):
9 | def _assign(op):
10 | node_def = op if isinstance(op, tf.NodeDef) else op.node_def
11 | if node_def.op == "Variable":
12 | return ps_dev
13 | else:
14 | return "/gpu:%d" % gpu
15 | return _assign
16 |
17 |
18 | def average_grads_and_vars(tower_grads_and_vars):
19 | def average_dense(grad_and_vars):
20 | if len(grad_and_vars) == 1:
21 | return grad_and_vars[0][0]
22 |
23 | grad = grad_and_vars[0][0]
24 | for g, _ in grad_and_vars[1:]:
25 | grad += g
26 | return grad / len(grad_and_vars)
27 |
28 | def average_sparse(grad_and_vars):
29 | if len(grad_and_vars) == 1:
30 | return grad_and_vars[0][0]
31 |
32 | indices = []
33 | values = []
34 | for g, _ in grad_and_vars:
35 | indices += [g.indices]
36 | values += [g.values]
37 | indices = tf.concat(indices, 0)
38 | values = tf.concat(values, 0) / len(grad_and_vars)
39 | return tf.IndexedSlices(values, indices, grad_and_vars[0][0].dense_shape)
40 |
41 | average_grads_and_vars = []
42 | for grad_and_vars in zip(*tower_grads_and_vars):
43 | if grad_and_vars[0][0] is None:
44 | grad = None
45 | elif isinstance(grad_and_vars[0][0], tf.IndexedSlices):
46 | grad = average_sparse(grad_and_vars)
47 | else:
48 | grad = average_dense(grad_and_vars)
49 | # Keep in mind that the Variables are redundant because they are shared
50 | # across towers. So .. we will just return the first tower's pointer to
51 | # the Variable.
52 | v = grad_and_vars[0][1]
53 | grad_and_var = (grad, v)
54 | average_grads_and_vars.append(grad_and_var)
55 | return average_grads_and_vars
56 |
57 |
58 | def load_from_checkpoint(saver, logdir):
59 | sess = tf.get_default_session()
60 | ckpt = tf.train.get_checkpoint_state(logdir)
61 | if ckpt and ckpt.model_checkpoint_path:
62 | if os.path.isabs(ckpt.model_checkpoint_path):
63 | # Restores from checkpoint with absolute path.
64 | saver.restore(sess, ckpt.model_checkpoint_path)
65 | else:
66 | # Restores from checkpoint with relative path.
67 | saver.restore(sess, os.path.join(logdir, ckpt.model_checkpoint_path))
68 | return True
69 | return False
70 |
--------------------------------------------------------------------------------
/models/xlnet/spiece.model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CLUEbenchmark/CLUEmotionAnalysis2020/ff1bc945fc3a5854e0013de5a2f222dd1da61516/models/xlnet/spiece.model
--------------------------------------------------------------------------------
/models/xlnet/temp.sh:
--------------------------------------------------------------------------------
1 | a=`pwd`
2 | echo $a
3 |
--------------------------------------------------------------------------------
/models/xlnet/tpu/run_classifier_inews.sh:
--------------------------------------------------------------------------------
1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S")
3 | TASK_NAME="inews"
4 | export XLNET_DIR=gs://models_zxw/prev_trained_models/nlp/xlnet-base/chinese_xlnet_base_L-12_H-768_A-12
5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME
6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/xlnet-base/chinese_xlnet_base_L-12_H-768_A-12/tpu/$TASK_NAME/$CURRENT_TIME
7 |
8 | python $CURRENT_DIR/../run_classifier.py \
9 | --spiece_model_file=${CURRENT_DIR}/../spiece.model \
10 | --model_config_path=${XLNET_DIR}/xlnet_config.json \
11 | --init_checkpoint=${XLNET_DIR}/xlnet_model.ckpt \
12 | --task_name=$TASK_NAME \
13 | --do_train=True \
14 | --do_eval=True \
15 | --eval_all_ckpt=False \
16 | --uncased=False \
17 | --data_dir=$DATA_DIR \
18 | --output_dir=${OUTPUT_DIR} \
19 | --model_dir=${OUTPUT_DIR} \
20 | --train_batch_size=32 \
21 | --eval_batch_size=8 \
22 | --num_hosts=1 \
23 | --num_core_per_host=8 \
24 | --num_train_epochs=3 \
25 | --max_seq_length=128 \
26 | --learning_rate=2e-5 \
27 | --save_steps=1000 \
28 | --use_tpu=True
29 |
--------------------------------------------------------------------------------
/models/xlnet/tpu/run_classifier_lcqmc.sh:
--------------------------------------------------------------------------------
1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S")
3 | TASK_NAME="lcqmc"
4 | export XLNET_DIR=gs://models_zxw/prev_trained_models/nlp/xlnet-base/chinese_xlnet_base_L-12_H-768_A-12
5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME
6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/xlnet-base/chinese_xlnet_base_L-12_H-768_A-12/tpu/$TASK_NAME/$CURRENT_TIME
7 |
8 | python $CURRENT_DIR/../run_classifier.py \
9 | --spiece_model_file=${CURRENT_DIR}/../spiece.model \
10 | --model_config_path=${XLNET_DIR}/xlnet_config.json \
11 | --init_checkpoint=${XLNET_DIR}/xlnet_model.ckpt \
12 | --task_name=$TASK_NAME \
13 | --do_train=True \
14 | --do_eval=True \
15 | --eval_all_ckpt=False \
16 | --uncased=False \
17 | --data_dir=$DATA_DIR \
18 | --output_dir=${OUTPUT_DIR} \
19 | --model_dir=${OUTPUT_DIR} \
20 | --train_batch_size=32 \
21 | --eval_batch_size=8 \
22 | --num_hosts=1 \
23 | --num_core_per_host=8 \
24 | --num_train_epochs=3 \
25 | --max_seq_length=128 \
26 | --learning_rate=2e-5 \
27 | --save_steps=1000 \
28 | --use_tpu=True
29 |
--------------------------------------------------------------------------------
/models/xlnet/tpu/run_classifier_tnews.sh:
--------------------------------------------------------------------------------
1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S")
3 | TASK_NAME="tnews"
4 | export XLNET_DIR=gs://models_zxw/prev_trained_models/nlp/xlnet-base/chinese_xlnet_base_L-12_H-768_A-12
5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/hard_${TASK_NAME}_1
6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/xlnet-base/chinese_xlnet_base_L-12_H-768_A-12/tpu/$TASK_NAME/$CURRENT_TIME
7 |
8 | python $CURRENT_DIR/../run_classifier.py \
9 | --spiece_model_file=${CURRENT_DIR}/../spiece.model \
10 | --model_config_path=${XLNET_DIR}/xlnet_config.json \
11 | --init_checkpoint=${XLNET_DIR}/xlnet_model.ckpt \
12 | --task_name=$TASK_NAME \
13 | --do_train=True \
14 | --do_eval=True \
15 | --eval_all_ckpt=True \
16 | --uncased=False \
17 | --data_dir=$DATA_DIR \
18 | --output_dir=${OUTPUT_DIR} \
19 | --model_dir=${OUTPUT_DIR} \
20 | --train_batch_size=16 \
21 | --eval_batch_size=8 \
22 | --num_hosts=1 \
23 | --num_core_per_host=8 \
24 | --num_train_epochs=3 \
25 | --max_seq_length=128 \
26 | --learning_rate=1e-5 \
27 | --save_steps=1000 \
28 | --use_tpu=True --tpu=grpc://192.168.0.2:8470
29 |
--------------------------------------------------------------------------------
/models/xlnet/tpu/run_classifier_xnli.sh:
--------------------------------------------------------------------------------
1 | CURRENT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
2 | CURRENT_TIME=$(date "+%Y%m%d-%H%M%S")
3 | TASK_NAME="xnli"
4 | export XLNET_DIR=gs://models_zxw/prev_trained_models/nlp/xlnet-base/chinese_xlnet_base_L-12_H-768_A-12
5 | export DATA_DIR=gs://data_zxw/nlp/chineseGLUEdatasets.v0.0.1/$TASK_NAME
6 | export OUTPUT_DIR=gs://models_zxw/fine_tuning_models/nlp/xlnet-base/chinese_xlnet_base_L-12_H-768_A-12/tpu/$TASK_NAME/$CURRENT_TIME
7 |
8 | python $CURRENT_DIR/../run_classifier.py \
9 | --spiece_model_file=${CURRENT_DIR}/../spiece.model \
10 | --model_config_path=${XLNET_DIR}/xlnet_config.json \
11 | --init_checkpoint=${XLNET_DIR}/xlnet_model.ckpt \
12 | --task_name=$TASK_NAME \
13 | --do_train=True \
14 | --do_eval=True \
15 | --eval_all_ckpt=False \
16 | --uncased=False \
17 | --data_dir=$DATA_DIR \
18 | --output_dir=${OUTPUT_DIR} \
19 | --model_dir=${OUTPUT_DIR} \
20 | --train_batch_size=32 \
21 | --eval_batch_size=8 \
22 | --num_hosts=1 \
23 | --num_core_per_host=8 \
24 | --num_train_epochs=3 \
25 | --max_seq_length=128 \
26 | --learning_rate=2e-5 \
27 | --save_steps=1000 \
28 | --use_tpu=True
29 |
--------------------------------------------------------------------------------