├── baseline ├── __init__.py └── classifier.py ├── weighting ├── __init__.py ├── image_classifier.py └── classifier.py ├── augmentation ├── __init__.py ├── bert_model.py ├── classifier.py └── generator.py ├── scripts ├── text │ ├── run_baseline_sst5_low.sh │ ├── run_ren_sst2_imb01.sh │ ├── run_ren_sst2_imb002.sh │ ├── run_ren_sst2_imb005.sh │ ├── run_baseline_sst2_imb01.sh │ ├── run_baseline_sst2_imb002.sh │ ├── run_baseline_sst2_imb005.sh │ ├── run_ren_sst5_low.sh │ ├── run_augmentation_sst5_low.sh │ ├── run_weighting_sst2_imb002.sh │ ├── run_weighting_sst2_imb005.sh │ ├── run_weighting_sst2_imb01.sh │ └── run_weighting_sst5_low.sh └── image │ ├── baseline_low_nopretrain.sh │ ├── ren_low_nopretrain.sh │ ├── baseline_low_pretrained.sh │ ├── ren_imb002.sh │ ├── ren_imb005.sh │ ├── ren_imb01.sh │ ├── baseline_imb002.sh │ ├── ren_low_pretrained.sh │ ├── baseline_imb005.sh │ ├── baseline_imb01.sh │ ├── weighting_imb01.sh │ ├── weighting_imb002.sh │ ├── weighting_imb005.sh │ ├── weighting_low_nopretrain.sh │ └── weighting_low_pretrained.sh ├── data_utils ├── data_utils.py ├── download_sst2.py ├── image_data_processors.py └── text_data_processors.py ├── README.md ├── baseline_main.py ├── ren_main.py ├── weighting_main.py ├── magic_module.py ├── augmentation_main.py └── results └── text ├── results_baseline_sst2_imb002.txt ├── results_baseline_sst2_imb005.txt ├── results_baseline_sst2_imb01.txt ├── results_ren_sst2_imb01.txt └── results_ren_sst2_imb005.txt /baseline/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /weighting/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /augmentation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /scripts/text/run_baseline_sst5_low.sh: -------------------------------------------------------------------------------- 1 | for t in {1..15} 2 | do 3 | python -u baseline_main.py \ 4 | --task sst-5 \ 5 | --train_num_per_class 40 \ 6 | --dev_num_per_class 2 \ 7 | --batch_size 8 \ 8 | --epochs 10 9 | done 10 | -------------------------------------------------------------------------------- /scripts/image/baseline_low_nopretrain.sh: -------------------------------------------------------------------------------- 1 | for t in {1..30} 2 | do 3 | python -u baseline_main.py \ 4 | --task cifar-10 \ 5 | --data_seed 159 \ 6 | --epochs 15 \ 7 | --train_num_per_class 40 \ 8 | --dev_num_per_class 2 \ 9 | --batch_size 10 10 | 11 | done 12 | -------------------------------------------------------------------------------- /scripts/text/run_ren_sst2_imb01.sh: -------------------------------------------------------------------------------- 1 | for t in {1..15} 2 | do 3 | python -u ren_main.py \ 4 | --task sst-2 \ 5 | --train_num_per_class 1000 \ 6 | --imbalance_rate 0.1 \ 7 | --dev_num_per_class 10 \ 8 | --epochs 10 \ 9 | --batch_size 25 10 | done 11 | -------------------------------------------------------------------------------- /scripts/text/run_ren_sst2_imb002.sh: -------------------------------------------------------------------------------- 1 | for t in {1..15} 2 | do 3 | python -u ren_main.py \ 4 | --task sst-2 \ 5 | --train_num_per_class 1000 \ 6 | --imbalance_rate 0.02 \ 7 | --dev_num_per_class 10 \ 8 | --epochs 10 \ 9 | --batch_size 25 10 | done 11 | -------------------------------------------------------------------------------- /scripts/text/run_ren_sst2_imb005.sh: -------------------------------------------------------------------------------- 1 | for t in {1..15} 2 | do 3 | python -u ren_main.py \ 4 | --task sst-2 \ 5 | --train_num_per_class 1000 \ 6 | --imbalance_rate 0.05 \ 7 | --dev_num_per_class 10 \ 8 | --epochs 10 \ 9 | --batch_size 25 10 | done 11 | -------------------------------------------------------------------------------- /scripts/image/ren_low_nopretrain.sh: -------------------------------------------------------------------------------- 1 | for t in {1..30} 2 | do 3 | python -u ren_main.py \ 4 | --task cifar-10 \ 5 | --data_seed 159 \ 6 | --epochs 15 \ 7 | --train_num_per_class 40 \ 8 | --dev_num_per_class 2 \ 9 | --batch_size 10 \ 10 | --pretrain_epochs 5 11 | 12 | done 13 | -------------------------------------------------------------------------------- /scripts/text/run_baseline_sst2_imb01.sh: -------------------------------------------------------------------------------- 1 | for t in {1..15} 2 | do 3 | python -u baseline_main.py \ 4 | --task sst-2 \ 5 | --train_num_per_class 1000 \ 6 | --imbalance_rate 0.1 \ 7 | --dev_num_per_class 10 \ 8 | --epochs 10 \ 9 | --batch_size 25 10 | done 11 | -------------------------------------------------------------------------------- /scripts/image/baseline_low_pretrained.sh: -------------------------------------------------------------------------------- 1 | for t in {1..30} 2 | do 3 | python -u baseline_main.py \ 4 | --task cifar-10 \ 5 | --data_seed 159 \ 6 | --epochs 15 \ 7 | --train_num_per_class 40 \ 8 | --dev_num_per_class 2 \ 9 | --resnet_pretrained \ 10 | --batch_size 10 11 | done 12 | -------------------------------------------------------------------------------- /scripts/text/run_baseline_sst2_imb002.sh: -------------------------------------------------------------------------------- 1 | for t in {1..15} 2 | do 3 | python -u baseline_main.py \ 4 | --task sst-2 \ 5 | --train_num_per_class 1000 \ 6 | --imbalance_rate 0.02 \ 7 | --dev_num_per_class 10 \ 8 | --epochs 10 \ 9 | --batch_size 25 10 | done 11 | -------------------------------------------------------------------------------- /scripts/text/run_baseline_sst2_imb005.sh: -------------------------------------------------------------------------------- 1 | for t in {1..15} 2 | do 3 | python -u baseline_main.py \ 4 | --task sst-2 \ 5 | --train_num_per_class 1000 \ 6 | --imbalance_rate 0.05 \ 7 | --dev_num_per_class 10 \ 8 | --epochs 10 \ 9 | --batch_size 25 10 | done 11 | -------------------------------------------------------------------------------- /scripts/text/run_ren_sst5_low.sh: -------------------------------------------------------------------------------- 1 | for t in {1..15} 2 | do 3 | python -u ren_main.py \ 4 | --task sst-5 \ 5 | --train_num_per_class 40 \ 6 | --dev_num_per_class 2 \ 7 | --batch_size 8 \ 8 | --epochs 10 \ 9 | --pretrain_epochs 3 \ 10 | --min_epochs 1 11 | done 12 | -------------------------------------------------------------------------------- /scripts/image/ren_imb002.sh: -------------------------------------------------------------------------------- 1 | for t in {1..30} 2 | do 3 | python -u ren_main.py \ 4 | --task cifar-10 \ 5 | --data_seed 159 \ 6 | --epochs 15 \ 7 | --batch_size 20 \ 8 | --train_num_per_class 1000 \ 9 | --dev_num_per_class 10 \ 10 | --resnet_pretrained \ 11 | --imbalance_rate 0.02 12 | 13 | done 14 | -------------------------------------------------------------------------------- /scripts/image/ren_imb005.sh: -------------------------------------------------------------------------------- 1 | for t in {1..30} 2 | do 3 | python -u ren_main.py \ 4 | --task cifar-10 \ 5 | --data_seed 159 \ 6 | --epochs 15 \ 7 | --batch_size 20 \ 8 | --train_num_per_class 1000 \ 9 | --dev_num_per_class 10 \ 10 | --resnet_pretrained \ 11 | --imbalance_rate 0.05 12 | 13 | done 14 | -------------------------------------------------------------------------------- /scripts/image/ren_imb01.sh: -------------------------------------------------------------------------------- 1 | for t in {1..30} 2 | do 3 | python -u ren_main.py \ 4 | --task cifar-10 \ 5 | --data_seed 159 \ 6 | --epochs 15 \ 7 | --batch_size 20 \ 8 | --train_num_per_class 1000 \ 9 | --dev_num_per_class 10 \ 10 | --resnet_pretrained \ 11 | --imbalance_rate 0.1 12 | 13 | done 14 | -------------------------------------------------------------------------------- /scripts/image/baseline_imb002.sh: -------------------------------------------------------------------------------- 1 | for t in {1..30} 2 | do 3 | python -u baseline_main.py \ 4 | --task cifar-10 \ 5 | --data_seed 159 \ 6 | --epochs 15 \ 7 | --batch_size 20 \ 8 | --train_num_per_class 1000 \ 9 | --dev_num_per_class 10 \ 10 | --resnet_pretrained \ 11 | --imbalance_rate 0.02 12 | done 13 | -------------------------------------------------------------------------------- /scripts/image/ren_low_pretrained.sh: -------------------------------------------------------------------------------- 1 | for t in {1..30} 2 | do 3 | python -u ren_main.py \ 4 | --task cifar-10 \ 5 | --data_seed 159 \ 6 | --epochs 15 \ 7 | --train_num_per_class 40 \ 8 | --dev_num_per_class 2 \ 9 | --resnet_pretrained \ 10 | --batch_size 10 \ 11 | --pretrain_epochs 5 12 | 13 | done 14 | -------------------------------------------------------------------------------- /scripts/image/baseline_imb005.sh: -------------------------------------------------------------------------------- 1 | for t in {1..30} 2 | do 3 | python -u baseline_main.py \ 4 | --task cifar-10 \ 5 | --data_seed 159 \ 6 | --epochs 15 \ 7 | --batch_size 20 \ 8 | --train_num_per_class 1000 \ 9 | --dev_num_per_class 10 \ 10 | --resnet_pretrained \ 11 | --imbalance_rate 0.05 12 | 13 | done 14 | -------------------------------------------------------------------------------- /scripts/image/baseline_imb01.sh: -------------------------------------------------------------------------------- 1 | for t in {1..30} 2 | do 3 | python -u baseline_main.py \ 4 | --task cifar-10 \ 5 | --data_seed 159 \ 6 | --epochs 15 \ 7 | --batch_size 20 \ 8 | --train_num_per_class 1000 \ 9 | --dev_num_per_class 10 \ 10 | --resnet_pretrained \ 11 | --imbalance_rate 0.1 12 | 13 | done 14 | -------------------------------------------------------------------------------- /scripts/image/weighting_imb01.sh: -------------------------------------------------------------------------------- 1 | for t in {1..30} 2 | do 3 | python -u weighting_main.py \ 4 | --task cifar-10 \ 5 | --data_seed 159 \ 6 | --epochs 15 \ 7 | --batch_size 20 \ 8 | --train_num_per_class 1000 \ 9 | --dev_num_per_class 10 \ 10 | --resnet_pretrained \ 11 | --imbalance_rate 0.1 \ 12 | --w_init 1. \ 13 | --w_decay 2. \ 14 | --norm_fn linear 15 | 16 | done 17 | -------------------------------------------------------------------------------- /scripts/image/weighting_imb002.sh: -------------------------------------------------------------------------------- 1 | for t in {1..30} 2 | do 3 | python -u weighting_main.py \ 4 | --task cifar-10 \ 5 | --data_seed 159 \ 6 | --epochs 15 \ 7 | --batch_size 20 \ 8 | --train_num_per_class 1000 \ 9 | --dev_num_per_class 10 \ 10 | --resnet_pretrained \ 11 | --imbalance_rate 0.02 \ 12 | --w_init 1. \ 13 | --w_decay 2. \ 14 | --norm_fn linear 15 | 16 | done 17 | -------------------------------------------------------------------------------- /scripts/image/weighting_imb005.sh: -------------------------------------------------------------------------------- 1 | for t in {1..30} 2 | do 3 | python -u weighting_main.py \ 4 | --task cifar-10 \ 5 | --data_seed 159 \ 6 | --epochs 15 \ 7 | --batch_size 20 \ 8 | --train_num_per_class 1000 \ 9 | --dev_num_per_class 10 \ 10 | --resnet_pretrained \ 11 | --imbalance_rate 0.05 \ 12 | --w_init 1. \ 13 | --w_decay 2. \ 14 | --norm_fn linear 15 | 16 | done 17 | -------------------------------------------------------------------------------- /scripts/image/weighting_low_nopretrain.sh: -------------------------------------------------------------------------------- 1 | for t in {1..30} 2 | do 3 | python -u weighting_main.py \ 4 | --task cifar-10 \ 5 | --data_seed 159 \ 6 | --epochs 15 \ 7 | --train_num_per_class 40 \ 8 | --dev_num_per_class 2 \ 9 | --w_init 1. \ 10 | --w_decay 2. \ 11 | --norm_fn softmax \ 12 | --image_softmax_norm_temp 10. \ 13 | --batch_size 10 \ 14 | --pretrain_epochs 5 15 | 16 | done 17 | -------------------------------------------------------------------------------- /scripts/image/weighting_low_pretrained.sh: -------------------------------------------------------------------------------- 1 | for t in {1..30} 2 | do 3 | python -u weighting_main.py \ 4 | --task cifar-10 \ 5 | --data_seed 159 \ 6 | --epochs 15 \ 7 | --train_num_per_class 40 \ 8 | --dev_num_per_class 2 \ 9 | --resnet_pretrained \ 10 | --w_init 1. \ 11 | --w_decay 5. \ 12 | --norm_fn softmax \ 13 | --image_softmax_norm_temp 10. \ 14 | --batch_size 10 \ 15 | --pretrain_epochs 5 16 | 17 | done 18 | -------------------------------------------------------------------------------- /scripts/text/run_augmentation_sst5_low.sh: -------------------------------------------------------------------------------- 1 | for t in {1..15} 2 | do 3 | python3 -u augmentation_main.py \ 4 | --train_num_per_class 40 \ 5 | --dev_num_per_class 2 \ 6 | --classifier_lr 4e-5 \ 7 | --epochs 5 \ 8 | --min_epochs 0 \ 9 | --batch_size 8 \ 10 | --generator_lr 4e-5 \ 11 | --classifier_pretrain_epochs 3 \ 12 | --generator_pretrain_epochs 60 \ 13 | --n_aug 4 14 | done 15 | -------------------------------------------------------------------------------- /scripts/text/run_weighting_sst2_imb002.sh: -------------------------------------------------------------------------------- 1 | for t in {1..15} 2 | do 3 | python -u weighting_main.py \ 4 | --task sst-2 \ 5 | --train_num_per_class 1000 \ 6 | --imbalance_rate 0.02 \ 7 | --dev_num_per_class 10 \ 8 | --epochs 10 \ 9 | --batch_size 25 \ 10 | --w_init 1. \ 11 | --w_decay 5. \ 12 | --norm_fn linear 13 | done 14 | 15 | # (batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.02, learning_rate=4e-05, min_epochs=0, norm_fn='linear', pretrain_epochs=0, task='sst-2', train_num_per_class=1000, w_decay=5.0, w_init=1.0) -------------------------------------------------------------------------------- /scripts/text/run_weighting_sst2_imb005.sh: -------------------------------------------------------------------------------- 1 | for t in {1..15} 2 | do 3 | python -u weighting_main.py \ 4 | --task sst-2 \ 5 | --train_num_per_class 1000 \ 6 | --imbalance_rate 0.05 \ 7 | --dev_num_per_class 10 \ 8 | --epochs 10 \ 9 | --batch_size 25 \ 10 | --w_init 1. \ 11 | --w_decay 5. \ 12 | --norm_fn linear 13 | done 14 | 15 | # (batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.05, learning_rate=4e-05, min_epochs=0, norm_fn='linear', pretrain_epochs=0, task='sst-2', train_num_per_class=1000, w_decay=5.0, w_init=1.0) -------------------------------------------------------------------------------- /scripts/text/run_weighting_sst2_imb01.sh: -------------------------------------------------------------------------------- 1 | for t in {1..15} 2 | do 3 | python -u weighting_main.py \ 4 | --task sst-2 \ 5 | --train_num_per_class 1000 \ 6 | --imbalance_rate 0.1 \ 7 | --dev_num_per_class 10 \ 8 | --epochs 10 \ 9 | --batch_size 25 \ 10 | --w_init 1. \ 11 | --w_decay 10. \ 12 | --norm_fn linear 13 | done 14 | 15 | # (batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.1, learning_rate=4e-05, min_epochs=0, norm_fn='linear', pretrain_epochs=0, task='sst-2', train_num_per_class=1000, w_decay=10.0, w_init=1.0) -------------------------------------------------------------------------------- /scripts/text/run_weighting_sst5_low.sh: -------------------------------------------------------------------------------- 1 | for t in {1..15} 2 | do 3 | python -u weighting_main.py \ 4 | --task sst-5 \ 5 | --train_num_per_class 40 \ 6 | --dev_num_per_class 2 \ 7 | --batch_size 8 \ 8 | --epochs 10 \ 9 | --pretrain_epochs 3 \ 10 | --min_epochs 1 \ 11 | --w_init 1. \ 12 | --w_decay 5. \ 13 | --norm_fn softmax 14 | done 15 | 16 | # (batch_size=8, data_seed=159, dev_num_per_class=2, epochs=10, imbalance_rate=1.0, learning_rate=4e-05, min_epochs=1, norm_fn='softmax', pretrain_epochs=3, task='sst-5', train_num_per_class=40, w_decay=5.0, w_init=1.0) -------------------------------------------------------------------------------- /data_utils/data_utils.py: -------------------------------------------------------------------------------- 1 | from .text_data_processors import get_data as get_text_data 2 | from .image_data_processors import get_data as get_image_data 3 | 4 | 5 | def get_data(task, train_num_per_class, dev_num_per_class, imbalance_rate, 6 | data_seed): 7 | if task in ['sst-2', 'sst-5']: 8 | return get_text_data( 9 | task=task, 10 | train_num_per_class=train_num_per_class, 11 | dev_num_per_class=dev_num_per_class, 12 | imbalance_rate=imbalance_rate, 13 | data_seed=data_seed) 14 | 15 | elif task in ['cifar-10']: 16 | return get_image_data( 17 | train_num_per_class=train_num_per_class, 18 | dev_num_per_class=dev_num_per_class, 19 | imbalance_rate=imbalance_rate, 20 | data_seed=data_seed) -------------------------------------------------------------------------------- /data_utils/download_sst2.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | from io import open # pylint: disable=redefined-builtin 4 | import tensorflow as tf 5 | import texar as tx 6 | 7 | 8 | def clean_sst_text(text): 9 | """Cleans tokens in the SST data, which has already been tokenized. 10 | """ 11 | text = re.sub(r"[^A-Za-z0-9(),!?\'\`]", " ", text) 12 | text = re.sub(r"\s{2,}", " ", text) 13 | return text.strip().lower() 14 | 15 | 16 | def transform_raw_sst(data_path, raw_fn, new_fn): 17 | """Transforms the raw data format to a new format. 18 | """ 19 | fout_x_name = os.path.join(data_path, new_fn + '.sentences.txt') 20 | fout_x = open(fout_x_name, 'w', encoding='utf-8') 21 | fout_y_name = os.path.join(data_path, new_fn + '.labels.txt') 22 | fout_y = open(fout_y_name, 'w', encoding='utf-8') 23 | 24 | fin_name = os.path.join(data_path, raw_fn) 25 | with open(fin_name, 'r', encoding='utf-8') as fin: 26 | for line in fin: 27 | parts = line.strip().split() 28 | label = parts[0] 29 | sent = ' '.join(parts[1:]) 30 | sent = clean_sst_text(sent) 31 | fout_x.write(sent + '\n') 32 | fout_y.write(label + '\n') 33 | 34 | return fout_x_name, fout_y_name 35 | 36 | 37 | def prepare_data(data_path): 38 | """Preprocesses SST2 data. 39 | """ 40 | train_path = os.path.join(data_path, "sst.train.sentences.txt") 41 | if not tf.gfile.Exists(train_path): 42 | url = ('https://raw.githubusercontent.com/ZhitingHu/' 43 | 'logicnn/master/data/raw/') 44 | files = ['stsa.binary.phrases.train', 'stsa.binary.dev', 45 | 'stsa.binary.test'] 46 | for fn in files: 47 | tx.data.maybe_download(url + fn, data_path, extract=True) 48 | 49 | fn_train, _ = transform_raw_sst( 50 | data_path, 'stsa.binary.phrases.train', 'sst2.train') 51 | transform_raw_sst(data_path, 'stsa.binary.dev', 'sst2.dev') 52 | transform_raw_sst(data_path, 'stsa.binary.test', 'sst2.test') 53 | 54 | vocab = tx.data.make_vocab(fn_train) 55 | fn_vocab = os.path.join(data_path, 'sst2.vocab') 56 | with open(fn_vocab, 'w', encoding='utf-8') as f_vocab: 57 | for v in vocab: 58 | f_vocab.write(v + '\n') 59 | 60 | tf.logging.info('Preprocessing done: {}'.format(data_path)) 61 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Learning Data Manipulation 2 | 3 | This repo contains preliminary code of the following paper: 4 | 5 | [Learning Data Manipulation for Augmentation and Weighting](http://www.cs.cmu.edu/~zhitingh/data/neurips19_data_manip_preprint.pdf) 6 | Zhiting Hu*, Bowen Tan*, Ruslan Salakhutdinov, Tom Mitchell, Eric P. Xing 7 | NeurIPS 2019 (equal contribution) 8 | 9 | ## Requirements 10 | 11 | - `python3.6` 12 | - `pytorch==1.0.1` 13 | - `pytorch_pretrained_bert==0.6.1` 14 | - `torchvision==0.2.2` 15 | 16 | ## Code 17 | * ```baseline_main.py```: Vanilla BERT Classifier. 18 | * ```ren_main.py```: Described in [(Ren et al.)](https://arxiv.org/pdf/1803.09050.pdf). 19 | * ```weighting_main.py```: Our weighting algorithm. 20 | * ```augmentation_main.py```: Our augmentation algorithm. 21 | 22 | 23 | ## Running 24 | Running scripts for experiments are available in [scripts/](scripts/). 25 | 26 | ## Results 27 | 28 | All the detailed training logs are availble in [results/](results/). 29 | 30 | *(Note: The result numbers may be slightly different from those in the paper due to slightly different implementation details and random seeds, while the improvements over comparison methods are consistent.)* 31 | 32 | ### low data 33 | 34 | ##### SST-5 35 | |Base Model: BERT|Ren et al.| Weighting | Augmentation | 36 | |:-:|:-:|:-:|:-:| 37 | | 33.32 ± 4.04 | 36.09 ± 2.26 | 36.51 ± 2.54 | 37.55 ± 2.63 | 38 | 39 | ##### CIFAR-10 40 | | | Pretrained | Not Pretrained | 41 | |------------------|----------------|----------------| 42 | |Base Model: ResNet| 34.58 ± 4.13 | 24.68 ± 3.29 | 43 | | Ren et al. | 23.29 ± 5.95 | 22.26 ± 2.80 | 44 | | Weighting | 36.75 ± 3.09 | 26.47 ± 1.69 | 45 | 46 | 47 | ### imbalanced data 48 | 49 | ##### SST-2 50 | || 20 : 1000 | 50 : 1000 | 100 : 1000 51 | |:-:|:-:|:-:|:-:| 52 | |Base Model: BERT| 54.91 ± 5.98 | 67.73 ± 9.20 | 75.04 ± 4.51 | 53 | |Ren et al.| 74.61 ± 3.54 | 76.89 ± 5.07 | 80.73 ± 2.19 | 54 | |Weighting| 75.08 ± 4.98 | 79.35 ± 2.59 | 81.82 ± 1.88 | 55 | 56 | ##### CIFAR-10 57 | | | 20 : 1000 | 50 : 1000 | 100 : 1000 | 58 | |------------------|--------------|--------------|--------------| 59 | |Base Model: ResNet| 70.65 ± 4.98 | 79.52 ± 4.81 | 86.12 ± 3.37 | 60 | | Ren et al. | 76.68 ± 5.35 | 77.34 ± 7.38 | 78.57 ± 5.61 | 61 | | Weighting | 79.07 ± 5.02 | 82.65 ± 5.13 | 87.63 ± 3.72 | 62 | -------------------------------------------------------------------------------- /baseline_main.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import argparse 3 | 4 | from data_utils.data_utils import get_data 5 | from baseline.classifier import Classifier 6 | from weighting.image_classifier import ImageClassifier 7 | 8 | 9 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 10 | print('device:', device) 11 | 12 | parser = argparse.ArgumentParser() 13 | 14 | # data 15 | parser.add_argument('--task', choices=['sst-2', 'sst-5', 'cifar-10']) 16 | parser.add_argument('--train_num_per_class', default=None, type=int) 17 | parser.add_argument('--dev_num_per_class', default=None, type=int) 18 | parser.add_argument('--imbalance_rate', default=1.0, type=float) 19 | parser.add_argument('--data_seed', default=159, type=int) 20 | 21 | # training 22 | parser.add_argument('--epochs', default=10, type=int) 23 | parser.add_argument('--min_epochs', default=0, type=int) 24 | parser.add_argument("--learning_rate", default=4e-5, type=float) 25 | parser.add_argument('--batch_size', default=4, type=int) 26 | 27 | # image 28 | parser.add_argument('--resnet_pretrained', default=False, action='store_true') 29 | parser.add_argument('--image_lr', default=1e-3, type=float) 30 | parser.add_argument('--image_momentum', default=0.9, type=float) 31 | parser.add_argument('--image_weight_decay', default=0.01, type=float) 32 | 33 | args = parser.parse_args() 34 | print(args) 35 | 36 | 37 | def main(): 38 | examples, label_list = get_data( 39 | task=args.task, 40 | train_num_per_class=args.train_num_per_class, 41 | dev_num_per_class=args.dev_num_per_class, 42 | imbalance_rate=args.imbalance_rate, 43 | data_seed=args.data_seed) 44 | 45 | if args.task in ['sst-2', 'sst-5']: 46 | classifier = Classifier(label_list=label_list, device=device) 47 | classifier.get_optimizer(learning_rate=args.learning_rate) 48 | 49 | else: 50 | classifier = ImageClassifier( 51 | pretrained=args.resnet_pretrained, baseline=True) 52 | 53 | classifier.get_optimizer( 54 | learning_rate=args.image_lr, 55 | momentum=args.image_momentum, 56 | weight_decay=args.image_weight_decay) 57 | 58 | for split in ['train', 'dev', 'test']: 59 | classifier.load_data( 60 | set_type=split, 61 | examples=examples[split], 62 | batch_size=args.batch_size, 63 | shuffle=(split != 'test')) 64 | 65 | print('=' * 60, '\n', 'Training', '\n', '=' * 60, sep='') 66 | best_dev_acc, final_test_acc = -1., -1. 67 | for epoch in range(args.epochs): 68 | classifier.train_epoch() 69 | dev_acc = classifier.evaluate('dev') 70 | 71 | if epoch >= args.min_epochs: 72 | do_test = (dev_acc > best_dev_acc) 73 | best_dev_acc = max(best_dev_acc, dev_acc) 74 | else: 75 | do_test = False 76 | 77 | print('Epoch {}, Dev Acc: {:.4f}, Best Ever: {:.4f}'.format( 78 | epoch, 100. * dev_acc, 100. * best_dev_acc)) 79 | 80 | if do_test: 81 | final_test_acc = classifier.evaluate('test') 82 | print('Test Acc: {:.4f}'.format(100. * final_test_acc)) 83 | 84 | print('Final Dev Acc: {:.4f}, Final Test Acc: {:.4f}'.format( 85 | 100. * best_dev_acc, 100. * final_test_acc)) 86 | 87 | 88 | if __name__ == '__main__': 89 | main() 90 | -------------------------------------------------------------------------------- /ren_main.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import argparse 3 | 4 | from data_utils.data_utils import get_data 5 | from weighting.classifier import Classifier 6 | from weighting.image_classifier import ImageClassifier 7 | 8 | 9 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 10 | print('device:', device) 11 | 12 | parser = argparse.ArgumentParser() 13 | 14 | # data 15 | parser.add_argument('--task', choices=['sst-2', 'sst-5', 'cifar-10']) 16 | parser.add_argument('--train_num_per_class', default=None, type=int) 17 | parser.add_argument('--dev_num_per_class', default=None, type=int) 18 | parser.add_argument('--imbalance_rate', default=1.0, type=float) 19 | parser.add_argument('--data_seed', default=159, type=int) 20 | 21 | # training 22 | parser.add_argument('--epochs', default=10, type=int) 23 | parser.add_argument('--min_epochs', default=0, type=int) 24 | parser.add_argument('--pretrain_epochs', default=0, type=int) 25 | parser.add_argument("--learning_rate", default=4e-5, type=float) 26 | parser.add_argument('--batch_size', default=4, type=int) 27 | 28 | # image 29 | parser.add_argument('--resnet_pretrained', default=False, action='store_true') 30 | parser.add_argument('--image_lr', default=1e-3, type=float) 31 | parser.add_argument('--image_momentum', default=0.9, type=float) 32 | parser.add_argument('--image_weight_decay', default=0.01, type=float) 33 | 34 | args = parser.parse_args() 35 | print(args) 36 | 37 | 38 | def main(): 39 | examples, label_list = get_data( 40 | task=args.task, 41 | train_num_per_class=args.train_num_per_class, 42 | dev_num_per_class=args.dev_num_per_class, 43 | imbalance_rate=args.imbalance_rate, 44 | data_seed=args.data_seed) 45 | 46 | if args.task in ['sst-2', 'sst-5']: 47 | classifier = Classifier( 48 | label_list=label_list, ren=True, norm_fn='linear', device=device) 49 | classifier.get_optimizer(learning_rate=args.learning_rate) 50 | 51 | else: 52 | classifier = ImageClassifier( 53 | pretrained=args.resnet_pretrained, ren=True) 54 | 55 | classifier.get_optimizer( 56 | learning_rate=args.image_lr, 57 | momentum=args.image_momentum, 58 | weight_decay=args.image_weight_decay) 59 | 60 | for split in ['train', 'dev', 'test']: 61 | classifier.load_data( 62 | set_type=split, 63 | examples=examples[split], 64 | batch_size=args.batch_size, 65 | shuffle=(split != 'test')) 66 | 67 | print('=' * 60, '\n', 'Pre-training', '\n', '=' * 60, sep='') 68 | for epoch in range(args.pretrain_epochs): 69 | classifier.pretrain_epoch() 70 | dev_acc = classifier.evaluate('dev') 71 | 72 | print('Pre-train Epoch {}, Dev Acc: {:.4f}'.format( 73 | epoch, 100. * dev_acc)) 74 | 75 | print('=' * 60, '\n', 'Training', '\n', '=' * 60, sep='') 76 | best_dev_acc, final_test_acc = -1., -1. 77 | for epoch in range(args.epochs): 78 | classifier.train_epoch() 79 | dev_acc = classifier.evaluate('dev') 80 | 81 | if epoch >= args.min_epochs: 82 | do_test = (dev_acc > best_dev_acc) 83 | best_dev_acc = max(best_dev_acc, dev_acc) 84 | else: 85 | do_test = False 86 | 87 | print('Epoch {}, Dev Acc: {:.4f}, Best Ever: {:.4f}'.format( 88 | epoch, 100. * dev_acc, 100. * best_dev_acc)) 89 | 90 | if do_test: 91 | final_test_acc = classifier.evaluate('test') 92 | print('Test Acc: {:.4f}'.format(100. * final_test_acc)) 93 | 94 | print('Final Dev Acc: {:.4f}, Final Test Acc: {:.4f}'.format( 95 | 100. * best_dev_acc, 100. * final_test_acc)) 96 | 97 | 98 | if __name__ == '__main__': 99 | main() 100 | -------------------------------------------------------------------------------- /data_utils/image_data_processors.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | import torchvision 4 | from torchvision import transforms 5 | 6 | 7 | def get_data(train_num_per_class, dev_num_per_class, imbalance_rate=1., 8 | data_seed=159): 9 | random.seed(data_seed) 10 | processor = CIFAR10Processor() 11 | 12 | # if running imbalance setting, keep only 2 labels. 13 | if imbalance_rate < 1.: 14 | processor.set_labels(list(range(2))) 15 | else: 16 | processor.set_labels(list(range(10))) 17 | 18 | examples = {} 19 | 20 | if train_num_per_class is not None: 21 | train_num_per_class = { 22 | label: train_num_per_class for label in processor.get_labels()} 23 | train_num_per_class[processor.get_labels()[0]] = int( 24 | train_num_per_class[processor.get_labels()[0]] * imbalance_rate) 25 | examples['train'] = processor.get_train_examples(train_num_per_class) 26 | 27 | if dev_num_per_class is not None: 28 | dev_num_per_class = { 29 | label: dev_num_per_class for label in processor.get_labels()} 30 | examples['dev'] = processor.get_dev_examples(dev_num_per_class) 31 | 32 | examples['test'] = processor.get_test_examples() 33 | 34 | for split, examples_split in examples.items(): 35 | print(f'#{split}: {len(examples_split)}') 36 | 37 | return examples, processor.get_labels() 38 | 39 | 40 | class DataExample: 41 | def __init__(self, input, label): 42 | self.input = input 43 | self.label = label 44 | 45 | 46 | class CIFAR10Processor: 47 | def __init__(self): 48 | self._labels = None 49 | 50 | self._transform = transforms.Compose([ 51 | transforms.ToTensor(), 52 | transforms.Normalize( 53 | (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))]) 54 | 55 | def set_labels(self, labels): 56 | self._labels = labels 57 | 58 | def get_train_examples(self, num_per_class=None): 59 | train_set = torchvision.datasets.CIFAR10( 60 | root='./data', train=True, download=True, 61 | transform=self._transform) 62 | 63 | all_examples = [DataExample(input=t[0], label=t[1]) 64 | for t in train_set][:-500] 65 | 66 | return _subsample_by_classes( 67 | all_examples=all_examples, labels=self.get_labels(), 68 | num_per_class=num_per_class) 69 | 70 | def get_dev_examples(self, num_per_class=None): 71 | dev_set = torchvision.datasets.CIFAR10( 72 | root='./data', train=True, download=True, 73 | transform=self._transform) 74 | 75 | all_examples = [DataExample(input=t[0], label=t[1]) 76 | for t in dev_set][-500:] 77 | 78 | return _subsample_by_classes( 79 | all_examples=all_examples, labels=self.get_labels(), 80 | num_per_class=num_per_class) 81 | 82 | def get_test_examples(self): 83 | test_set = torchvision.datasets.CIFAR10( 84 | root='./data', train=False, download=True, 85 | transform=self._transform) 86 | 87 | return [DataExample(input=t[0], label=t[1]) 88 | for t in test_set if t[1] in self.get_labels()] 89 | 90 | def get_labels(self): 91 | return self._labels 92 | 93 | 94 | def _subsample_by_classes(all_examples, labels, num_per_class=None): 95 | if num_per_class is None: 96 | return all_examples 97 | 98 | examples = {label: [] for label in labels} 99 | for example in all_examples: 100 | if example.label in labels: 101 | examples[example.label].append(example) 102 | 103 | picked_examples = [] 104 | for label in labels: 105 | random.shuffle(examples[label]) 106 | 107 | examples_with_label = examples[label][:num_per_class[label]] 108 | picked_examples.extend(examples_with_label) 109 | 110 | print(f'number of examples with label \'{label}\': ' 111 | f'{len(examples_with_label)}') 112 | 113 | return picked_examples 114 | -------------------------------------------------------------------------------- /weighting_main.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import argparse 3 | 4 | from data_utils.data_utils import get_data 5 | from weighting.classifier import Classifier 6 | from weighting.image_classifier import ImageClassifier 7 | 8 | 9 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 10 | print('device:', device) 11 | 12 | parser = argparse.ArgumentParser() 13 | 14 | # data 15 | parser.add_argument('--task', choices=['sst-2', 'sst-5', 'cifar-10']) 16 | parser.add_argument('--train_num_per_class', default=None, type=int) 17 | parser.add_argument('--dev_num_per_class', default=None, type=int) 18 | parser.add_argument('--imbalance_rate', default=1.0, type=float) 19 | parser.add_argument('--data_seed', default=159, type=int) 20 | 21 | # training 22 | parser.add_argument('--epochs', default=10, type=int) 23 | parser.add_argument('--min_epochs', default=0, type=int) 24 | parser.add_argument('--pretrain_epochs', default=0, type=int) 25 | parser.add_argument("--learning_rate", default=4e-5, type=float) 26 | parser.add_argument('--batch_size', default=4, type=int) 27 | 28 | # weighting 29 | parser.add_argument("--w_decay", default=10., type=float) 30 | parser.add_argument("--w_init", default=0., type=float) 31 | parser.add_argument('--norm_fn', choices=['linear', 'softmax']) 32 | 33 | # image 34 | parser.add_argument('--resnet_pretrained', default=False, action='store_true') 35 | parser.add_argument('--image_lr', default=1e-3, type=float) 36 | parser.add_argument('--image_momentum', default=0.9, type=float) 37 | parser.add_argument('--image_weight_decay', default=0.01, type=float) 38 | parser.add_argument('--image_softmax_norm_temp', default=1., type=float) 39 | 40 | args = parser.parse_args() 41 | print(args) 42 | 43 | 44 | def main(): 45 | examples, label_list = get_data( 46 | task=args.task, 47 | train_num_per_class=args.train_num_per_class, 48 | dev_num_per_class=args.dev_num_per_class, 49 | imbalance_rate=args.imbalance_rate, 50 | data_seed=args.data_seed) 51 | 52 | if args.task in ['sst-2', 'sst-5']: 53 | classifier = Classifier( 54 | label_list=label_list, ren=False, norm_fn=args.norm_fn, device=device) 55 | classifier.get_optimizer(learning_rate=args.learning_rate) 56 | 57 | else: 58 | classifier = ImageClassifier( 59 | pretrained=args.resnet_pretrained, 60 | norm_fn=args.norm_fn, 61 | softmax_temp=args.image_softmax_norm_temp) 62 | 63 | classifier.get_optimizer( 64 | learning_rate=args.image_lr, 65 | momentum=args.image_momentum, 66 | weight_decay=args.image_weight_decay) 67 | 68 | for split in ['train', 'dev', 'test']: 69 | classifier.load_data( 70 | set_type=split, 71 | examples=examples[split], 72 | batch_size=args.batch_size, 73 | shuffle=(split != 'test')) 74 | 75 | classifier.init_weights( 76 | n_examples=len(examples['train']), 77 | w_init=args.w_init, 78 | w_decay=args.w_decay) 79 | 80 | print('=' * 60, '\n', 'Pre-training', '\n', '=' * 60, sep='') 81 | for epoch in range(args.pretrain_epochs): 82 | classifier.pretrain_epoch() 83 | dev_acc = classifier.evaluate('dev') 84 | 85 | print('Pre-train Epoch {}, Dev Acc: {:.4f}'.format( 86 | epoch, 100. * dev_acc)) 87 | 88 | print('=' * 60, '\n', 'Training', '\n', '=' * 60, sep='') 89 | best_dev_acc, final_test_acc = -1., -1. 90 | for epoch in range(args.epochs): 91 | classifier.train_epoch() 92 | dev_acc = classifier.evaluate('dev') 93 | 94 | if epoch >= args.min_epochs: 95 | do_test = (dev_acc > best_dev_acc) 96 | best_dev_acc = max(best_dev_acc, dev_acc) 97 | else: 98 | do_test = False 99 | 100 | print('Epoch {}, Dev Acc: {:.4f}, Best Ever: {:.4f}'.format( 101 | epoch, 100. * dev_acc, 100. * best_dev_acc)) 102 | 103 | if do_test: 104 | final_test_acc = classifier.evaluate('test') 105 | print('Test Acc: {:.4f}'.format(100. * final_test_acc)) 106 | 107 | print('Final Dev Acc: {:.4f}, Final Test Acc: {:.4f}'.format( 108 | 100. * best_dev_acc, 100. * final_test_acc)) 109 | 110 | 111 | if __name__ == '__main__': 112 | main() 113 | -------------------------------------------------------------------------------- /magic_module.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from torch.nn import functional as F 3 | from torch.nn.modules.utils import _pair 4 | 5 | from pytorch_pretrained_bert.modeling import BertSelfAttention 6 | 7 | import operator 8 | import copy 9 | 10 | 11 | class MagicModule(nn.Module): 12 | def __init__(self, module): 13 | nn.Module.__init__(self) 14 | self._type = type(module) 15 | 16 | for key, value in module._parameters.items(): 17 | if value is not None: 18 | self.register_parameter('_origin_' + key, value) 19 | self.register_buffer(key, value.data) 20 | else: 21 | self.register_buffer(key, None) 22 | 23 | for key, value in module._buffers.items(): 24 | self.register_buffer(key, copy.deepcopy(value)) 25 | 26 | for key, value in module._modules.items(): 27 | self.add_module(key, MagicModule(value)) 28 | 29 | for key, value in module.__dict__.items(): 30 | if (not key in self.__dict__) and\ 31 | (not key in self._buffers) and\ 32 | (not key in self._modules): 33 | self.__setattr__(key, value) 34 | 35 | def forward(self, *args, **kwargs): 36 | return self._type.forward(self, *args, **kwargs) 37 | 38 | def update_params(self, deltas): 39 | sub_params = {} 40 | for key, delta in deltas.items(): 41 | if not ('.' in key): 42 | self._buffers[key] = self._buffers[key] + delta 43 | else: 44 | attr = key.split('.')[0] 45 | if not (attr in sub_params): 46 | sub_params[attr] = {} 47 | sub_params[attr]['.'.join(key.split('.')[1:])] = delta 48 | for key, value in sub_params.items(): 49 | self._modules[key].update_params(value) 50 | 51 | def check_forward_args(self, *args, **kwargs): 52 | assert issubclass(self._type, nn.RNNBase) 53 | return nn.RNNBase.check_forward_args(self, *args, **kwargs) 54 | 55 | @property 56 | def _flat_weights(self): 57 | assert issubclass(self._type, nn.RNNBase) 58 | return [p for layerparams in self.all_weights for p in layerparams] 59 | 60 | @property 61 | def all_weights(self): 62 | assert issubclass(self._type, nn.RNNBase) 63 | return [[getattr(self, weight) for weight in weights] for weights in 64 | self._all_weights] 65 | 66 | def _get_abs_string_index(self, idx): 67 | assert issubclass(self._type, nn.ModuleList) 68 | """Get the absolute index for the list of modules""" 69 | idx = operator.index(idx) 70 | if not (-len(self) <= idx < len(self)): 71 | raise IndexError('index {} is out of range'.format(idx)) 72 | if idx < 0: 73 | idx += len(self) 74 | return str(idx) 75 | 76 | def __getitem__(self, idx): 77 | assert issubclass(self._type, nn.ModuleList) 78 | if isinstance(idx, slice): 79 | return self.__class__(list(self._modules.values())[idx]) 80 | else: 81 | return self._modules[self._get_abs_string_index(idx)] 82 | 83 | def __len__(self): 84 | assert issubclass(self._type, nn.ModuleList) 85 | return len(self._modules) 86 | 87 | def transpose_for_scores(self, x): 88 | assert issubclass(self._type, BertSelfAttention) 89 | new_x_shape = x.size()[:-1] + ( 90 | self.num_attention_heads, self.attention_head_size) 91 | x = x.view(*new_x_shape) 92 | return x.permute(0, 2, 1, 3) 93 | 94 | def conv2d_forward(self, input, weight): 95 | assert issubclass(self._type, nn.Conv2d) 96 | 97 | if self.padding_mode == 'circular': 98 | expanded_padding = ((self.padding[1] + 1) // 2, self.padding[1] // 2, 99 | (self.padding[0] + 1) // 2, self.padding[0] // 2) 100 | return F.conv2d(F.pad(input, expanded_padding, mode='circular'), 101 | weight, self.bias, self.stride, 102 | _pair(0), self.dilation, self.groups) 103 | return F.conv2d(input, weight, self.bias, self.stride, 104 | self.padding, self.dilation, self.groups) 105 | 106 | def _check_input_dim(self, input): 107 | assert issubclass(self._type, nn.BatchNorm2d) 108 | if input.dim() != 4: 109 | raise ValueError('expected 4D input (got {}D input)' 110 | .format(input.dim())) 111 | -------------------------------------------------------------------------------- /augmentation_main.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | from tqdm import trange 4 | import time 5 | import random 6 | 7 | import torch 8 | 9 | from data_utils.data_utils import get_data 10 | 11 | from augmentation.classifier import Classifier 12 | from augmentation.generator import Generator 13 | 14 | 15 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 16 | print('device:', device) 17 | 18 | parser = argparse.ArgumentParser() 19 | 20 | # data 21 | parser.add_argument("--task", default="sst-5", type=str) 22 | parser.add_argument('--train_num_per_class', default=None, type=int) 23 | parser.add_argument('--dev_num_per_class', default=None, type=int) 24 | parser.add_argument('--data_seed', default=159, type=int) 25 | 26 | # train 27 | parser.add_argument("--batch_size", default=8, type=int) 28 | parser.add_argument("--classifier_lr", default=4e-5, type=float) 29 | parser.add_argument("--classifier_pretrain_epochs", default=1, type=int) 30 | parser.add_argument("--epochs", default=10, type=int) 31 | parser.add_argument("--min_epochs", default=0, type=int) 32 | 33 | # augmentation 34 | parser.add_argument("--generator_lr", default=4e-5, type=float) 35 | parser.add_argument("--generator_pretrain_epochs", default=60, type=int) 36 | parser.add_argument('--n_aug', default=2, type=int) 37 | 38 | args = parser.parse_args() 39 | print(args) 40 | 41 | random.seed(args.data_seed) 42 | 43 | 44 | def _pretrain_classifier(classifier, train_examples): 45 | print('=' * 60, '\n', 'Classifier Pre-training', '\n', '=' * 60, sep='') 46 | for epoch in range(args.classifier_pretrain_epochs): 47 | train_epoch( 48 | epoch=-1, 49 | generator=None, 50 | classifier=classifier, 51 | train_examples=train_examples, 52 | do_augment=False) 53 | dev_acc = classifier.evaluate('dev') 54 | 55 | print('Classifier pretrain Epoch {}, Dev Acc: {:.4f}'.format( 56 | epoch, 100. * dev_acc)) 57 | 58 | 59 | def _pretrain_generator(generator, train_examples): 60 | print('=' * 60, '\n', 'Generator Pre-training', '\n', '=' * 60, sep='') 61 | best_dev_loss = 1e10 62 | tag = time.time() 63 | 64 | for epoch in range(args.generator_pretrain_epochs): 65 | dev_loss = generator.train_epoch() 66 | 67 | if dev_loss < best_dev_loss: 68 | torch.save(generator.model.state_dict(), 69 | '/tmp/generator_model_{}.pt'.format(tag)) 70 | torch.save(generator.optimizer.state_dict(), 71 | '/tmp/generator_optimizer_{}.pt'.format(tag)) 72 | best_dev_loss = dev_loss 73 | 74 | print('Epoch {}, Dev Loss: {:.4f}; Best Dev Loss: {:.4f}'.format( 75 | epoch, dev_loss, best_dev_loss)) 76 | 77 | generator.model.load_state_dict( 78 | torch.load('/tmp/generator_model_{}.pt'.format(tag))) 79 | generator.optimizer.load_state_dict( 80 | torch.load('/tmp/generator_optimizer_{}.pt'.format(tag))) 81 | 82 | os.remove(os.path.join('/tmp', 'generator_model_{}.pt'.format(tag))) 83 | os.remove(os.path.join('/tmp', 'generator_optimizer_{}.pt'.format(tag))) 84 | 85 | 86 | def train_epoch(epoch, generator, classifier, train_examples, do_augment): 87 | random.seed(199 * (epoch + 1)) 88 | random.shuffle(train_examples) 89 | 90 | batch_size = args.batch_size 91 | for i in trange(0, len(train_examples), batch_size, desc='Training'): 92 | batch_examples = train_examples[i: i + batch_size] 93 | if do_augment: 94 | generator.finetune_batch( 95 | classifier=classifier, 96 | examples=batch_examples, 97 | num_aug=args.n_aug) 98 | 99 | batch_examples = generator.augment_batch( 100 | examples=batch_examples, num_aug=1) 101 | 102 | classifier.train_batch(batch_examples, is_augment=do_augment) 103 | 104 | 105 | def main(): 106 | examples, label_list = get_data( 107 | task=args.task, 108 | train_num_per_class=args.train_num_per_class, 109 | dev_num_per_class=args.dev_num_per_class, 110 | data_seed=args.data_seed) 111 | 112 | generator = Generator(label_list=label_list, device=device) 113 | generator.get_optimizer(learning_rate=args.generator_lr) 114 | generator.load_data('train', examples['train'], 115 | batch_size=args.batch_size, shuffle=True) 116 | generator.load_data('dev', examples['dev'], 117 | batch_size=args.batch_size, shuffle=False) 118 | 119 | classifier = Classifier(label_list=label_list, device=device) 120 | classifier.get_optimizer(learning_rate=args.classifier_lr) 121 | classifier.load_data('dev', examples['dev'], batch_size=20, shuffle=True) 122 | classifier.load_data('test', examples['test'], batch_size=20, shuffle=True) 123 | 124 | _pretrain_classifier(classifier, examples['train']) 125 | _pretrain_generator(generator, examples['train']) 126 | 127 | print('=' * 60, '\n', 'Training', '\n', '=' * 60, sep='') 128 | best_dev_acc, final_test_acc = -1., -1. 129 | for epoch in range(args.epochs): 130 | train_epoch( 131 | epoch=epoch, 132 | generator=generator, 133 | classifier=classifier, 134 | train_examples=examples['train'], 135 | do_augment=True) 136 | dev_acc = classifier.evaluate('dev') 137 | 138 | if epoch >= args.min_epochs: 139 | do_test = (dev_acc > best_dev_acc) 140 | best_dev_acc = max(best_dev_acc, dev_acc) 141 | else: 142 | do_test = False 143 | 144 | print('Epoch {}, Dev Acc: {:.4f}, Best Ever: {:.4f}'.format( 145 | epoch, 100. * dev_acc, 100. * best_dev_acc)) 146 | 147 | if do_test: 148 | final_test_acc = classifier.evaluate('test') 149 | print('Test Acc: {:.4f}'.format(100. * final_test_acc)) 150 | 151 | print('Final Dev Acc: {:.4f}, Final Test Acc: {:.4f}'.format( 152 | 100. * best_dev_acc, 100. * final_test_acc)) 153 | 154 | 155 | if __name__ == '__main__': 156 | main() 157 | -------------------------------------------------------------------------------- /augmentation/bert_model.py: -------------------------------------------------------------------------------- 1 | from pytorch_pretrained_bert.modeling import BertPreTrainedModel 2 | from pytorch_pretrained_bert.modeling import BertEncoder, BertPooler, BertLayerNorm 3 | 4 | import torch 5 | from torch import nn 6 | from torch.nn import functional as F 7 | 8 | 9 | class SoftEmbedding(nn.Module): 10 | def __init__(self, num_embeddings, embedding_dim): 11 | nn.Module.__init__(self) 12 | self.num_embeddings = num_embeddings 13 | self.embedding_dim = embedding_dim 14 | 15 | self.weight = nn.Parameter(torch.Tensor(num_embeddings, embedding_dim)) 16 | 17 | def forward(self, ids_or_probs, use_probs=False): 18 | if not use_probs: 19 | ids = ids_or_probs 20 | assert len(ids.shape) == 2 21 | probs = torch.zeros( 22 | ids.shape[0], ids.shape[1], self.num_embeddings, 23 | device=ids_or_probs.device).scatter_(2, ids.unsqueeze(2), 1.) 24 | else: 25 | probs = ids_or_probs 26 | 27 | embedding = probs.view(-1, self.num_embeddings).mm(self.weight).\ 28 | view(probs.shape[0], probs.shape[1], self.embedding_dim) 29 | 30 | return embedding 31 | 32 | 33 | class BertEmbeddings(nn.Module): 34 | """Construct the embeddings from word, position and token_type embeddings. 35 | """ 36 | def __init__(self, config): 37 | super(BertEmbeddings, self).__init__() 38 | self.word_embeddings = SoftEmbedding( 39 | config.vocab_size, config.hidden_size) 40 | self.position_embeddings = SoftEmbedding( 41 | config.max_position_embeddings, config.hidden_size) 42 | self.token_type_embeddings = SoftEmbedding( 43 | config.type_vocab_size, config.hidden_size) 44 | 45 | # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load 46 | # any TensorFlow checkpoint file 47 | self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12) 48 | self.dropout = nn.Dropout(config.hidden_dropout_prob) 49 | 50 | def forward(self, input_ids_or_probs, token_type_ids=None, 51 | use_input_probs=False): 52 | seq_length = input_ids_or_probs.size(1) 53 | position_ids = torch.arange( 54 | seq_length, dtype=torch.long, device=input_ids_or_probs.device) 55 | position_ids = position_ids.unsqueeze(0).\ 56 | expand(input_ids_or_probs.shape[:2]) 57 | assert token_type_ids is not None 58 | # if token_type_ids is None: 59 | # token_type_ids = torch.zeros_like(input_ids) 60 | 61 | words_embeddings = \ 62 | self.word_embeddings(input_ids_or_probs, use_probs=use_input_probs) 63 | position_embeddings = self.position_embeddings(position_ids) 64 | token_type_embeddings = self.token_type_embeddings(token_type_ids) 65 | 66 | embeddings = \ 67 | words_embeddings + position_embeddings + token_type_embeddings 68 | embeddings = self.LayerNorm(embeddings) 69 | embeddings = self.dropout(embeddings) 70 | return embeddings 71 | 72 | 73 | class BertModel(BertPreTrainedModel): 74 | def __init__(self, config): 75 | super(BertModel, self).__init__(config) 76 | self.embeddings = BertEmbeddings(config) 77 | self.encoder = BertEncoder(config) 78 | self.pooler = BertPooler(config) 79 | self.apply(self.init_bert_weights) 80 | 81 | def forward(self, input_ids_or_probs, token_type_ids=None, attention_mask=None, 82 | output_all_encoded_layers=True, use_input_probs=False): 83 | assert attention_mask is not None 84 | # if attention_mask is None: 85 | # attention_mask = torch.ones_like(input_ids) 86 | # if token_type_ids is None: 87 | # token_type_ids = torch.zeros_like(input_ids) 88 | 89 | # We create a 3D attention mask from a 2D tensor mask. 90 | # Sizes are [batch_size, 1, 1, to_seq_length] 91 | # So we can broadcast to [batch_size, num_heads, from_seq_length, to_seq_length] 92 | # this attention mask is more simple than the triangular masking of causal attention 93 | # used in OpenAI GPT, we just need to prepare the broadcast dimension here. 94 | extended_attention_mask = attention_mask.unsqueeze(1).unsqueeze(2) 95 | 96 | # Since attention_mask is 1.0 for positions we want to attend and 0.0 for 97 | # masked positions, this operation will create a tensor which is 0.0 for 98 | # positions we want to attend and -10000.0 for masked positions. 99 | # Since we are adding it to the raw scores before the softmax, this is 100 | # effectively the same as removing these entirely. 101 | extended_attention_mask = \ 102 | extended_attention_mask.to(dtype=next(self.parameters()).dtype) 103 | extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0 104 | 105 | embedding_output = \ 106 | self.embeddings(input_ids_or_probs, token_type_ids, use_input_probs) 107 | encoded_layers = self.encoder( 108 | embedding_output, extended_attention_mask, 109 | output_all_encoded_layers=output_all_encoded_layers) 110 | sequence_output = encoded_layers[-1] 111 | pooled_output = self.pooler(sequence_output) 112 | if not output_all_encoded_layers: 113 | encoded_layers = encoded_layers[-1] 114 | return encoded_layers, pooled_output 115 | 116 | 117 | class BertForSequenceClassification(BertPreTrainedModel): 118 | def __init__(self, config, num_labels): 119 | super(BertForSequenceClassification, self).__init__(config) 120 | self.num_labels = num_labels 121 | self.bert = BertModel(config) 122 | self.dropout = nn.Dropout(config.hidden_dropout_prob) 123 | self.classifier = nn.Linear(config.hidden_size, num_labels) 124 | self.apply(self.init_bert_weights) 125 | 126 | def forward(self, input_ids_or_probs, token_type_ids=None, 127 | attention_mask=None, labels=None, use_input_probs=False): 128 | _, pooled_output = self.bert( 129 | input_ids_or_probs, token_type_ids, attention_mask, 130 | output_all_encoded_layers=False, use_input_probs=use_input_probs) 131 | pooled_output = self.dropout(pooled_output) 132 | logits = self.classifier(pooled_output) 133 | 134 | if labels is not None: 135 | loss_fct = nn.CrossEntropyLoss() 136 | loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) 137 | return loss 138 | else: 139 | return logits -------------------------------------------------------------------------------- /weighting/image_classifier.py: -------------------------------------------------------------------------------- 1 | from tqdm import tqdm 2 | import copy 3 | 4 | import torch 5 | from torch import nn 6 | from torch import optim 7 | from torch.utils.data import TensorDataset, DataLoader 8 | 9 | from torchvision import models 10 | 11 | from magic_module import MagicModule 12 | 13 | 14 | EPSILON = 1e-5 15 | 16 | 17 | class ImageClassifier: 18 | def __init__(self, norm_fn='linear', pretrained=True, 19 | baseline=False, ren=False, softmax_temp=1.): 20 | if ren or baseline: 21 | assert norm_fn == 'linear' 22 | 23 | self._model = models.resnet34(pretrained=pretrained).to('cuda') 24 | 25 | self._optimizer = None 26 | 27 | self._dataset = {} 28 | self._data_loader = {} 29 | 30 | self._weights = None 31 | self._w_decay = None 32 | 33 | self._baseline = baseline 34 | self._ren = ren 35 | 36 | self._norm_fn = norm_fn 37 | self._softmax_temp = softmax_temp 38 | 39 | def init_weights(self, n_examples, w_init, w_decay): 40 | assert self._ren is False and self._baseline is False 41 | self._weights = torch.tensor( 42 | [w_init] * n_examples, requires_grad=True).to('cuda') 43 | self._w_decay = w_decay 44 | 45 | def load_data(self, set_type, examples, batch_size, shuffle): 46 | self._dataset[set_type] = examples 47 | 48 | all_inputs = torch.tensor([t.input.tolist() for t in examples]) 49 | all_labels = torch.tensor([t.label for t in examples]) 50 | all_ids = torch.arange(len(examples)) 51 | 52 | self._data_loader[set_type] = DataLoader( 53 | TensorDataset(all_inputs, all_labels, all_ids), 54 | batch_size=batch_size, shuffle=shuffle) 55 | 56 | def get_optimizer(self, learning_rate, momentum, weight_decay): 57 | self._optimizer = optim.SGD( 58 | self._model.parameters(), 59 | lr=learning_rate, momentum=momentum, weight_decay=weight_decay) 60 | 61 | def pretrain_epoch(self): 62 | self.train_epoch(is_pretrain=True) 63 | 64 | def train_epoch(self, is_pretrain=False): 65 | criterion = nn.CrossEntropyLoss(reduction='none') 66 | for batch in tqdm(self._data_loader['train'], desc='Training Epoch'): 67 | inputs, labels, ids = tuple(t.to('cuda') for t in batch) 68 | 69 | if is_pretrain: 70 | weights = linear_normalize( 71 | torch.ones(inputs.shape[0]).to('cuda')) 72 | else: 73 | if self._norm_fn == 'softmax': 74 | weights = softmax_normalize( 75 | self._get_weights(batch), 76 | temperature=self._softmax_temp) 77 | else: 78 | weights = linear_normalize(self._get_weights(batch)) 79 | 80 | self._model.train() 81 | 82 | self._optimizer.zero_grad() 83 | logits = self._model(inputs) 84 | loss = criterion(logits, labels) 85 | loss = torch.sum(loss * weights.data) 86 | loss.backward() 87 | self._optimizer.step() 88 | 89 | def _get_weights(self, batch): 90 | self._model.eval() 91 | 92 | inputs, labels, ids = tuple(t.to('cuda') for t in batch) 93 | batch_size = inputs.shape[0] 94 | 95 | if self._baseline: 96 | return torch.ones(batch_size).to('cuda') 97 | elif self._ren: 98 | weights = torch.zeros(batch_size, requires_grad=True).to('cuda') 99 | else: 100 | weights = self._weights[ids] 101 | 102 | magic_model = MagicModule(self._model) 103 | criterion = nn.CrossEntropyLoss() 104 | 105 | model_tmp = copy.deepcopy(self._model) 106 | optimizer_hparams = self._optimizer.state_dict()['param_groups'][0] 107 | optimizer_tmp = optim.SGD( 108 | model_tmp.parameters(), 109 | lr=optimizer_hparams['lr'], 110 | momentum=optimizer_hparams['momentum'], 111 | weight_decay=optimizer_hparams['weight_decay']) 112 | 113 | for i in range(batch_size): 114 | model_tmp.load_state_dict(self._model.state_dict()) 115 | optimizer_tmp.load_state_dict(self._optimizer.state_dict()) 116 | 117 | model_tmp.zero_grad() 118 | 119 | if i > 0: 120 | l, r, t = i - 1, i + 1, 1 121 | else: 122 | l, r, t = i, i + 2, 0 123 | 124 | logits = model_tmp(inputs[l:r])[t:t+1] 125 | loss = criterion(logits, labels[i:i+1]) 126 | loss.backward() 127 | optimizer_tmp.step() 128 | 129 | deltas = {} 130 | for (name, param), (name_tmp, param_tmp) in zip( 131 | self._model.named_parameters(), 132 | model_tmp.named_parameters()): 133 | assert name == name_tmp 134 | deltas[name] = weights[i] * (param_tmp.data - param.data) 135 | magic_model.update_params(deltas) 136 | 137 | weights_grad_list = [] 138 | for step, val_batch in enumerate(self._data_loader['dev']): 139 | val_batch = (t.to('cuda') for t in val_batch) 140 | val_inputs, val_labels, _ = val_batch 141 | val_batch_size = val_labels.shape[0] 142 | 143 | if weights.grad is not None: 144 | weights.grad.zero_() 145 | val_logits = magic_model(val_inputs) 146 | val_loss = criterion(val_logits, val_labels) 147 | val_loss = val_loss * float(val_batch_size) / float( 148 | len(self._dataset['dev'])) 149 | 150 | weights_grad = torch.autograd.grad( 151 | val_loss, weights, retain_graph=True)[0] 152 | weights_grad_list.append(weights_grad) 153 | 154 | weights_grad = sum(weights_grad_list) 155 | 156 | if self._ren: 157 | return -weights_grad 158 | else: 159 | self._weights[ids] = weights.data / self._w_decay - weights_grad 160 | self._weights[ids] = torch.max(self._weights[ids], torch.ones_like( 161 | self._weights[ids]).fill_(EPSILON)) 162 | 163 | return self._weights[ids].data 164 | 165 | def evaluate(self, set_type): 166 | self._model.eval() 167 | 168 | preds_all, labels_all = [], [] 169 | for batch in tqdm(self._data_loader[set_type], 170 | desc="Evaluating {} set".format(set_type)): 171 | batch = tuple(t.to('cuda') for t in batch) 172 | inputs, labels, _ = batch 173 | 174 | with torch.no_grad(): 175 | logits = self._model(inputs) 176 | 177 | preds = torch.argmax(logits, dim=1) 178 | preds_all.append(preds) 179 | labels_all.append(labels) 180 | 181 | preds_all = torch.cat(preds_all, dim=0) 182 | labels_all = torch.cat(labels_all, dim=0) 183 | 184 | return torch.sum(preds_all == labels_all).item() / labels_all.shape[0] 185 | 186 | 187 | def linear_normalize(weights): 188 | weights = torch.max(weights, torch.zeros_like(weights)) 189 | if torch.sum(weights) > 1e-8: 190 | return weights / torch.sum(weights) 191 | return torch.zeros_like(weights) 192 | 193 | 194 | def softmax_normalize(weights, temperature): 195 | return nn.functional.softmax(weights / temperature, dim=0) 196 | -------------------------------------------------------------------------------- /baseline/classifier.py: -------------------------------------------------------------------------------- 1 | from pytorch_pretrained_bert.tokenization import BertTokenizer 2 | from pytorch_pretrained_bert.modeling import BertForSequenceClassification 3 | 4 | from tqdm import tqdm 5 | import math 6 | 7 | import torch 8 | from torch import nn 9 | from torch import optim 10 | 11 | from torch.utils.data import DataLoader, TensorDataset 12 | from data_utils.text_data_processors import InputFeatures 13 | 14 | 15 | BERT_MODEL = 'bert-base-uncased' 16 | MAX_SEQ_LENGTH = 64 17 | 18 | 19 | class Classifier: 20 | def __init__(self, label_list, device): 21 | self._label_list = label_list 22 | self._device = device 23 | 24 | self._tokenizer = BertTokenizer.from_pretrained( 25 | BERT_MODEL, do_lower_case=True) 26 | 27 | self._model = BertForSequenceClassification.from_pretrained( 28 | BERT_MODEL, num_labels=len(label_list)).to(device) 29 | 30 | self._optimizer = None 31 | 32 | self._dataset = {} 33 | self._data_loader = {} 34 | 35 | def load_data(self, set_type, examples, batch_size, shuffle): 36 | self._dataset[set_type] = examples 37 | self._data_loader[set_type] = _make_data_loader( 38 | examples=examples, 39 | label_list=self._label_list, 40 | tokenizer=self._tokenizer, 41 | batch_size=batch_size, 42 | shuffle=shuffle) 43 | 44 | def get_optimizer(self, learning_rate): 45 | self._optimizer = _get_optimizer( 46 | self._model, learning_rate=learning_rate) 47 | 48 | def train_epoch(self): 49 | self._model.train() 50 | 51 | for step, batch in enumerate(tqdm(self._data_loader['train'], 52 | desc='Training')): 53 | batch = tuple(t.to(self._device) for t in batch) 54 | input_ids, input_mask, segment_ids, label_ids, _ = batch 55 | 56 | self._optimizer.zero_grad() 57 | loss = self._model(input_ids, segment_ids, input_mask, label_ids) 58 | loss.backward() 59 | self._optimizer.step() 60 | 61 | def evaluate(self, set_type): 62 | self._model.eval() 63 | 64 | preds_all, labels_all = [], [] 65 | data_loader = self._data_loader[set_type] 66 | 67 | for batch in tqdm(data_loader, 68 | desc="Evaluating {} set".format(set_type)): 69 | batch = tuple(t.to(self._device) for t in batch) 70 | input_ids, input_mask, segment_ids, label_ids = batch[:4] 71 | 72 | with torch.no_grad(): 73 | logits = self._model(input_ids, segment_ids, input_mask) 74 | preds = torch.argmax(logits, dim=1) 75 | 76 | preds_all.append(preds) 77 | labels_all.append(label_ids) 78 | 79 | preds_all = torch.cat(preds_all, dim=0) 80 | labels_all = torch.cat(labels_all, dim=0) 81 | 82 | return torch.sum(preds_all == labels_all).item() / labels_all.shape[0] 83 | 84 | 85 | def _get_optimizer(model, learning_rate): 86 | param_optimizer = list(model.named_parameters()) 87 | no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] 88 | optimizer_grouped_parameters = [ 89 | {'params': [p for n, p in param_optimizer if 90 | not any(nd in n for nd in no_decay)], 'weight_decay': 0.01}, 91 | {'params': [p for n, p in param_optimizer if 92 | any(nd in n for nd in no_decay)], 'weight_decay': 0.0}] 93 | 94 | return optim.Adam(optimizer_grouped_parameters, lr=learning_rate) 95 | 96 | 97 | def _make_data_loader(examples, label_list, tokenizer, batch_size, shuffle): 98 | all_features = _convert_examples_to_features( 99 | examples=examples, 100 | label_list=label_list, 101 | max_seq_length=MAX_SEQ_LENGTH, 102 | tokenizer=tokenizer, 103 | output_mode='classification') 104 | 105 | all_input_ids = torch.tensor( 106 | [f.input_ids for f in all_features], dtype=torch.long) 107 | all_input_mask = torch.tensor( 108 | [f.input_mask for f in all_features], dtype=torch.long) 109 | all_segment_ids = torch.tensor( 110 | [f.segment_ids for f in all_features], dtype=torch.long) 111 | all_label_ids = torch.tensor( 112 | [f.label_id for f in all_features], dtype=torch.long) 113 | all_ids = torch.arange(len(examples)) 114 | 115 | dataset = TensorDataset( 116 | all_input_ids, all_input_mask, all_segment_ids, all_label_ids, all_ids) 117 | return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle) 118 | 119 | 120 | def _convert_examples_to_features(examples, label_list, max_seq_length, 121 | tokenizer, output_mode): 122 | """Loads a data file into a list of `InputBatch`s.""" 123 | 124 | label_map = {label: i for i, label in enumerate(label_list)} 125 | 126 | features = [] 127 | for (ex_index, example) in enumerate(examples): 128 | tokens_a = tokenizer.tokenize(example.text_a) 129 | 130 | tokens_b = None 131 | if example.text_b: 132 | tokens_b = tokenizer.tokenize(example.text_b) 133 | # Modifies `tokens_a` and `tokens_b` in place so that the total 134 | # length is less than the specified length. 135 | # Account for [CLS], [SEP], [SEP] with "- 3" 136 | _truncate_seq_pair(tokens_a, tokens_b, max_seq_length - 3) 137 | else: 138 | # Account for [CLS] and [SEP] with "- 2" 139 | if len(tokens_a) > max_seq_length - 2: 140 | tokens_a = tokens_a[:(max_seq_length - 2)] 141 | 142 | tokens = ["[CLS]"] + tokens_a + ["[SEP]"] 143 | segment_ids = [0] * len(tokens) 144 | 145 | if tokens_b: 146 | tokens += tokens_b + ["[SEP]"] 147 | segment_ids += [1] * (len(tokens_b) + 1) 148 | 149 | input_ids = tokenizer.convert_tokens_to_ids(tokens) 150 | 151 | # The mask has 1 for real tokens and 0 for padding tokens. Only real 152 | # tokens are attended to. 153 | input_mask = [1] * len(input_ids) 154 | 155 | # Zero-pad up to the sequence length. 156 | padding = [0] * (max_seq_length - len(input_ids)) 157 | input_ids += padding 158 | input_mask += padding 159 | segment_ids += padding 160 | 161 | assert len(input_ids) == max_seq_length 162 | assert len(input_mask) == max_seq_length 163 | assert len(segment_ids) == max_seq_length 164 | 165 | if output_mode == "classification": 166 | label_id = label_map[example.label] 167 | elif output_mode == "regression": 168 | label_id = float(example.label) 169 | else: 170 | raise KeyError(output_mode) 171 | 172 | features.append( 173 | InputFeatures(input_ids=input_ids, 174 | input_mask=input_mask, 175 | segment_ids=segment_ids, 176 | label_id=label_id)) 177 | 178 | return features 179 | 180 | 181 | def _truncate_seq_pair(tokens_a, tokens_b, max_length): 182 | """Truncates a sequence pair in place to the maximum length.""" 183 | 184 | # This is a simple heuristic which will always truncate the longer sequence 185 | # one token at a time. This makes more sense than truncating an equal 186 | # percent of tokens from each, since if one sequence is very short then each 187 | # token that's truncated likely contains more information than a longer 188 | # sequence. 189 | while True: 190 | total_length = len(tokens_a) + len(tokens_b) 191 | if total_length <= max_length: 192 | break 193 | if len(tokens_a) > len(tokens_b): 194 | tokens_a.pop() 195 | else: 196 | tokens_b.pop() 197 | -------------------------------------------------------------------------------- /data_utils/text_data_processors.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | import torchtext 4 | from data_utils.download_sst2 import prepare_data 5 | 6 | 7 | def _get_processor(task): 8 | if task == 'sst-5': 9 | return SST5Processor() 10 | elif task == 'sst-2': 11 | return SST2Processor() 12 | else: 13 | raise ValueError('Unknown task') 14 | 15 | 16 | def get_data(task, train_num_per_class, dev_num_per_class, imbalance_rate=1., data_seed=159): 17 | random.seed(data_seed) 18 | processor = _get_processor(task) 19 | 20 | examples = dict() 21 | 22 | if train_num_per_class is not None: 23 | train_num_per_class = { 24 | label: train_num_per_class for label in processor.get_labels()} 25 | train_num_per_class[processor.get_labels()[0]] = int( 26 | train_num_per_class[processor.get_labels()[0]] * imbalance_rate) 27 | examples['train'] = processor.get_train_examples(train_num_per_class) 28 | 29 | if dev_num_per_class is not None: 30 | dev_num_per_class = { 31 | label: dev_num_per_class for label in processor.get_labels()} 32 | examples['dev'] = processor.get_dev_examples(dev_num_per_class) 33 | 34 | examples['test'] = processor.get_test_examples() 35 | 36 | for key, value in examples.items(): 37 | print('#{}: {}'.format(key, len(value))) 38 | 39 | return examples, processor.get_labels() 40 | 41 | 42 | def _subsample_by_classes(all_examples, labels, num_per_class=None): 43 | if num_per_class is None: 44 | return all_examples 45 | 46 | examples = {label: [] for label in labels} 47 | for example in all_examples: 48 | examples[example.label].append(example) 49 | 50 | selected_examples = [] 51 | for label in labels: 52 | random.shuffle(examples[label]) 53 | 54 | num_in_class = num_per_class[label] 55 | selected_examples = selected_examples + examples[label][:num_in_class] 56 | print('number of examples with label \'{}\': {}'.format( 57 | label, num_in_class)) 58 | 59 | return selected_examples 60 | 61 | 62 | def _split_by_classes(all_examples, labels, num_select_per_class): 63 | examples = {label: [] for label in labels} 64 | for example in all_examples: 65 | examples[example.label].append(example) 66 | 67 | selected_examples = [] 68 | remaining_examples = [] 69 | for label in labels: 70 | assert num_select_per_class <= len(examples[label]) 71 | 72 | random.shuffle(examples[label]) 73 | selected_examples = \ 74 | selected_examples + examples[label][:num_select_per_class] 75 | remaining_examples = \ 76 | remaining_examples + examples[label][num_select_per_class:] 77 | 78 | return selected_examples, remaining_examples 79 | 80 | 81 | class InputExample: 82 | """A single training/test example for simple sequence classification.""" 83 | 84 | def __init__(self, guid, text_a, text_b=None, label=None): 85 | self.guid = guid 86 | self.text_a = text_a 87 | self.text_b = text_b 88 | self.label = label 89 | 90 | 91 | class InputFeatures(object): 92 | """A single set of features of data.""" 93 | 94 | def __init__(self, input_ids, input_mask, segment_ids, label_id): 95 | self.input_ids = input_ids 96 | self.input_mask = input_mask 97 | self.segment_ids = segment_ids 98 | self.label_id = label_id 99 | 100 | def __getitem__(self, item): 101 | return [self.input_ids, self.input_mask, 102 | self.segment_ids, self.label_id][item] 103 | 104 | 105 | class DatasetProcessor: 106 | def get_train_examples(self): 107 | raise NotImplementedError 108 | 109 | def get_dev_examples(self): 110 | raise NotImplementedError 111 | 112 | def get_test_examples(self): 113 | raise NotImplementedError 114 | 115 | def get_labels(self): 116 | raise NotImplementedError 117 | 118 | 119 | class SST5Processor(DatasetProcessor): 120 | """Processor for the SST-5 data set.""" 121 | 122 | def __init__(self): 123 | TEXT = torchtext.data.Field() 124 | LABEL = torchtext.data.Field(sequential=False) 125 | 126 | self._train_set, self._dev_set, self._test_set = \ 127 | torchtext.datasets.SST.splits( 128 | TEXT, LABEL, fine_grained=True) 129 | 130 | def get_train_examples(self, num_per_class=None, noise_rate=0.): 131 | """See base class.""" 132 | print('getting train examples...') 133 | all_examples = self._create_examples(self._train_set, "train") 134 | 135 | # Add noise 136 | for i, _ in enumerate(all_examples): 137 | if random.random() < noise_rate: 138 | all_examples[i].label = random.choice(self.get_labels()) 139 | 140 | return _subsample_by_classes( 141 | all_examples, self.get_labels(), num_per_class) 142 | 143 | def get_dev_examples(self, num_per_class=None): 144 | """See base class.""" 145 | print('getting dev examples...') 146 | all_examples = self._create_examples(self._dev_set, "dev") 147 | 148 | return _subsample_by_classes( 149 | all_examples, self.get_labels(), num_per_class) 150 | 151 | def get_test_examples(self): 152 | """See base class.""" 153 | print('getting test examples...') 154 | return self._create_examples(self._test_set, "test") 155 | 156 | def get_labels(self): 157 | """See base class.""" 158 | return ['negative', 'very positive', 'neutral', 159 | 'positive', 'very negative'] 160 | 161 | def _create_examples(self, dataset, set_type): 162 | """Creates examples for the training and dev sets.""" 163 | examples = [] 164 | for (i, data) in enumerate(dataset): 165 | guid = "%s-%s" % (set_type, i) 166 | examples.append(InputExample( 167 | guid=guid, 168 | text_a=' '.join(data.text), 169 | text_b=None, 170 | label=data.label)) 171 | return examples 172 | 173 | 174 | class SST2Processor(DatasetProcessor): 175 | """Processor for the SST-2 data set (GLUE version).""" 176 | 177 | def __init__(self): 178 | prepare_data('./data') 179 | 180 | def get_train_examples(self, num_per_class=None, noise_rate=0.): 181 | print('getting train examples...') 182 | all_examples = self._create_examples("train") 183 | 184 | # Add noise 185 | for i, _ in enumerate(all_examples): 186 | if random.random() < noise_rate: 187 | all_examples[i].label = random.choice(self.get_labels()) 188 | 189 | return _subsample_by_classes( 190 | all_examples, self.get_labels(), num_per_class) 191 | 192 | def get_dev_examples(self, num_per_class=None): 193 | print('getting dev examples...') 194 | return _subsample_by_classes( 195 | self._create_examples("dev"), self.get_labels(), num_per_class) 196 | 197 | def get_test_examples(self): 198 | print('getting test examples...') 199 | return self._create_examples("test") 200 | 201 | def get_labels(self): 202 | """See base class.""" 203 | return ["0", "1"] 204 | 205 | def _create_examples(self, set_type): 206 | """Creates examples for the training and dev sets.""" 207 | sentence_file = open('data/sst2.{}.sentences.txt'.format(set_type)) 208 | labels_file = open('data/sst2.{}.labels.txt'.format(set_type)) 209 | 210 | examples = [] 211 | for sentence, label in zip( 212 | sentence_file.readlines(), labels_file.readlines()): 213 | label = label.strip('\n') 214 | sentence = sentence.strip('\n') 215 | 216 | if label == '': 217 | break 218 | examples.append(InputExample( 219 | guid=set_type, text_a=sentence, text_b=None, label=label)) 220 | return examples 221 | -------------------------------------------------------------------------------- /augmentation/classifier.py: -------------------------------------------------------------------------------- 1 | from pytorch_pretrained_bert.tokenization import BertTokenizer 2 | from augmentation.bert_model import BertForSequenceClassification 3 | 4 | import torch 5 | from torch import nn 6 | from torch import optim 7 | from torch.utils.data import Dataset, DataLoader 8 | 9 | from magic_module import MagicModule 10 | import math 11 | from tqdm import tqdm 12 | 13 | 14 | BERT_MODEL = 'bert-base-uncased' 15 | MAX_SEQ_LENGTH = 64 16 | 17 | 18 | class Classifier: 19 | def __init__(self, label_list, device): 20 | self._label_list = label_list 21 | 22 | self._tokenizer = BertTokenizer.from_pretrained( 23 | BERT_MODEL, do_lower_case=True) 24 | 25 | self._model = BertForSequenceClassification.from_pretrained( 26 | BERT_MODEL, num_labels=len(self._label_list)) 27 | 28 | self._device = device 29 | self._model.to(self._device) 30 | 31 | self._optimizer = None 32 | 33 | self._dataset = {} 34 | self._data_loader = {} 35 | 36 | def get_optimizer(self, learning_rate): 37 | self._optimizer = _get_optimizer( 38 | self._model, learning_rate=learning_rate) 39 | 40 | def load_data(self, set_type, examples, batch_size, shuffle): 41 | self._dataset[set_type] = BERTDataset( 42 | examples=examples, 43 | label_list=self._label_list, 44 | tokenizer=self._tokenizer, 45 | max_seq_length=MAX_SEQ_LENGTH) 46 | 47 | self._data_loader[set_type] = DataLoader( 48 | self._dataset[set_type], batch_size=batch_size, shuffle=shuffle) 49 | 50 | def train_batch(self, train_examples, is_augment): 51 | features = [] 52 | for example in train_examples: 53 | if is_augment: 54 | example = example[0] 55 | 56 | features.append(_convert_example_to_features( 57 | example=example, 58 | label_list=self._label_list, 59 | max_seq_length=MAX_SEQ_LENGTH, 60 | tokenizer=self._tokenizer)) 61 | 62 | input_ids_or_probs, input_masks, segment_ids, label_ids = [torch.cat( 63 | [t[i].unsqueeze(0) for t in features], dim=0).to( 64 | self._device) for i in range(4)] 65 | if is_augment: 66 | num_aug = len(train_examples[0][1]) 67 | 68 | input_ids_or_probs_aug = [] 69 | for i in range(num_aug): 70 | for example in train_examples: 71 | input_ids_or_probs_aug.append(example[1][i:i+1]) 72 | input_ids_or_probs_aug = \ 73 | torch.cat(input_ids_or_probs_aug, dim=0).to(self._device) 74 | 75 | inputs_onehot = torch.zeros_like( 76 | input_ids_or_probs_aug[:len(input_ids_or_probs)]).scatter_( 77 | 2, input_ids_or_probs.unsqueeze(2), 1.) 78 | input_ids_or_probs = torch.cat( 79 | [inputs_onehot, input_ids_or_probs_aug], dim=0).to(self._device) 80 | 81 | segment_ids = \ 82 | torch.cat([segment_ids] * (num_aug+1), dim=0).to(self._device) 83 | input_masks = \ 84 | torch.cat([input_masks] * (num_aug+1), dim=0).to(self._device) 85 | label_ids = \ 86 | torch.cat([label_ids] * (num_aug+1), dim=0).to(self._device) 87 | 88 | self._model.train() 89 | self._optimizer.zero_grad() 90 | loss = self._model( 91 | input_ids_or_probs, segment_ids, input_masks, label_ids, 92 | use_input_probs=is_augment) 93 | loss.backward() 94 | self._optimizer.step() 95 | 96 | def evaluate(self, set_type): 97 | self._model.eval() 98 | 99 | preds_all, labels_all = [], [] 100 | data_loader = self._data_loader[set_type] 101 | 102 | for batch in tqdm(data_loader, 103 | desc="Evaluating {} set".format(set_type)): 104 | batch = tuple(t.to(self._device) for t in batch) 105 | input_ids, input_mask, segment_ids, label_ids = batch[:4] 106 | 107 | with torch.no_grad(): 108 | logits = self._model(input_ids, segment_ids, input_mask) 109 | preds = torch.argmax(logits, dim=1) 110 | 111 | preds_all.append(preds) 112 | labels_all.append(label_ids) 113 | 114 | preds_all = torch.cat(preds_all, dim=0) 115 | labels_all = torch.cat(labels_all, dim=0) 116 | 117 | return torch.sum(preds_all == labels_all).item() / labels_all.shape[0] 118 | 119 | def finetune_generator(self, example, aug_probs, finetune_batch_size): 120 | magic_model = MagicModule(self._model) 121 | 122 | features = _convert_example_to_features( 123 | example=example, 124 | label_list=self._label_list, 125 | max_seq_length=MAX_SEQ_LENGTH, 126 | tokenizer=self._tokenizer) 127 | 128 | _, input_mask, segment_ids, label_ids = \ 129 | (t.to(self._device).unsqueeze(0) for t in features) 130 | 131 | num_aug = len(aug_probs) 132 | input_mask_aug = torch.cat([input_mask] * num_aug, dim=0) 133 | segment_ids_aug = torch.cat([segment_ids] * num_aug, dim=0) 134 | label_ids_aug = torch.cat([label_ids] * num_aug, dim=0) 135 | 136 | self._model.zero_grad() 137 | loss = self._model( 138 | aug_probs, segment_ids_aug, input_mask_aug, label_ids_aug, 139 | use_input_probs=True) 140 | grads = torch.autograd.grad( 141 | loss, [param for name, param in self._model.named_parameters()], 142 | create_graph=True) 143 | 144 | grads = {param: grads[i] for i, (name, param) in enumerate( 145 | self._model.named_parameters())} 146 | 147 | deltas = _adam_delta(self._optimizer, self._model, grads) 148 | magic_model.update_params(deltas) 149 | 150 | for step, batch in enumerate(self._data_loader['dev']): 151 | batch = tuple(t.to(self._device) for t in batch) 152 | input_ids, input_mask, segment_ids, label_ids = batch 153 | 154 | dev_loss = magic_model( 155 | input_ids, segment_ids, input_mask, label_ids) 156 | dev_loss = dev_loss / len(self._data_loader['dev']) / \ 157 | finetune_batch_size / num_aug 158 | dev_loss.backward() 159 | 160 | @property 161 | def model(self): 162 | return self._model 163 | 164 | @property 165 | def optimizer(self): 166 | return self._optimizer 167 | 168 | 169 | class BERTDataset(Dataset): 170 | def __init__(self, examples, label_list, tokenizer, max_seq_length): 171 | self._examples = examples 172 | self._label_list = label_list 173 | self._tokenizer = tokenizer 174 | self._max_seq_length = max_seq_length 175 | 176 | def __len__(self): 177 | return len(self._examples) 178 | 179 | def __getitem__(self, index): 180 | return _convert_example_to_features( 181 | example=self._examples[index], 182 | label_list=self._label_list, 183 | max_seq_length=self._max_seq_length, 184 | tokenizer=self._tokenizer) 185 | 186 | 187 | def _get_optimizer(model, learning_rate): 188 | param_optimizer = list(model.named_parameters()) 189 | no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] 190 | optimizer_grouped_parameters = [ 191 | {'params': [p for n, p in param_optimizer if 192 | not any(nd in n for nd in no_decay)], 'weight_decay': 0.01}, 193 | {'params': [p for n, p in param_optimizer if 194 | any(nd in n for nd in no_decay)], 'weight_decay': 0.0}] 195 | 196 | return optim.Adam(optimizer_grouped_parameters, lr=learning_rate) 197 | 198 | 199 | def _convert_example_to_features( 200 | example, label_list, max_seq_length, tokenizer): 201 | label_map = {label: i for i, label in enumerate(label_list)} 202 | 203 | tokens_a = tokenizer.tokenize(example.text_a) 204 | 205 | if len(tokens_a) > max_seq_length - 2: 206 | tokens_a = tokens_a[:max_seq_length - 2] 207 | 208 | tokens = ["[CLS]"] + tokens_a + ["[SEP]"] 209 | segment_ids = [0] * len(tokens) 210 | 211 | input_ids = tokenizer.convert_tokens_to_ids(tokens) 212 | 213 | input_mask = [1] * len(input_ids) 214 | 215 | padding = [0] * (max_seq_length - len(input_ids)) 216 | input_ids += padding 217 | input_mask += padding 218 | segment_ids += padding 219 | 220 | assert len(input_ids) == max_seq_length 221 | assert len(input_mask) == max_seq_length 222 | assert len(segment_ids) == max_seq_length 223 | 224 | label_id = label_map[example.label] 225 | 226 | return (torch.tensor(input_ids), 227 | torch.tensor(input_mask), 228 | torch.tensor(segment_ids), 229 | torch.tensor(label_id)) 230 | 231 | 232 | def _adam_delta(optimizer, model, grads): 233 | deltas = {} 234 | for group in optimizer.param_groups: 235 | for param in group['params']: 236 | grad = grads[param] 237 | state = optimizer.state[param] 238 | 239 | exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq'] 240 | beta1, beta2 = group['betas'] 241 | 242 | step = state['step'] + 1 243 | 244 | if group['weight_decay'] != 0: 245 | grad = grad + group['weight_decay'] * param.data 246 | 247 | exp_avg = exp_avg * beta1 + (1. - beta1) * grad 248 | exp_avg_sq = exp_avg_sq * beta2 + (1. - beta2) * grad * grad 249 | denom = exp_avg_sq.sqrt() + group['eps'] 250 | 251 | bias_correction1 = 1. - beta1 ** step 252 | bias_correction2 = 1. - beta2 ** step 253 | step_size = group['lr'] * math.sqrt( 254 | bias_correction2) / bias_correction1 255 | 256 | deltas[param] = -step_size * exp_avg / denom 257 | 258 | param_to_name = {param: name for name, param in model.named_parameters()} 259 | 260 | return {param_to_name[param]: delta for param, delta in deltas.items()} -------------------------------------------------------------------------------- /augmentation/generator.py: -------------------------------------------------------------------------------- 1 | from pytorch_pretrained_bert.tokenization import BertTokenizer 2 | from pytorch_pretrained_bert.modeling import BertForMaskedLM 3 | from pytorch_pretrained_bert.optimization import BertAdam 4 | 5 | import torch 6 | from torch import nn 7 | from torch import optim 8 | from torch.nn import functional as F 9 | from torch.utils.data import Dataset, DataLoader 10 | 11 | import random 12 | 13 | 14 | BERT_MODEL = 'bert-base-uncased' 15 | MAX_SEQ_LENGTH = 64 16 | 17 | 18 | class Generator: 19 | def __init__(self, label_list, device): 20 | self._label_list = label_list 21 | 22 | self._tokenizer = BertTokenizer.from_pretrained( 23 | BERT_MODEL, do_lower_case=True) 24 | 25 | self._model = BertForMaskedLM.from_pretrained(BERT_MODEL) 26 | if len(self._label_list) != 2: 27 | self._model.bert.embeddings.token_type_embeddings = \ 28 | nn.Embedding(len(label_list), 768) 29 | self._model.bert.embeddings.token_type_embeddings.weight.data.\ 30 | normal_(mean=0.0, std=0.02) 31 | 32 | self._device = device 33 | self._model.to(self._device) 34 | 35 | self._optimizer = None 36 | 37 | self._dataset = {} 38 | self._data_loader = {} 39 | 40 | def get_optimizer(self, learning_rate): 41 | self._optimizer = _get_optimizer( 42 | self._model, learning_rate=learning_rate) 43 | 44 | def load_data(self, set_type, examples, batch_size, shuffle): 45 | self._dataset[set_type] = RandomMaskedBERTDataset( 46 | examples=examples, 47 | label_list=self._label_list, 48 | tokenizer=self._tokenizer, 49 | max_seq_length=MAX_SEQ_LENGTH) 50 | 51 | self._data_loader[set_type] = DataLoader( 52 | self._dataset[set_type], batch_size=batch_size, shuffle=shuffle) 53 | 54 | def dev_loss(self): 55 | self._model.eval() 56 | sum_loss = 0. 57 | for step, batch in enumerate(self._data_loader['dev']): 58 | batch = tuple(t.to(self._device) for t in batch) 59 | _, input_ids, input_mask, segment_ids, masked_ids = batch 60 | 61 | loss = self._model(input_ids, segment_ids, input_mask, masked_ids) 62 | sum_loss += loss.item() 63 | 64 | return sum_loss 65 | 66 | def train_epoch(self): 67 | self._model.train() 68 | for step, batch in enumerate(self._data_loader['train']): 69 | batch = tuple(t.to(self._device) for t in batch) 70 | _, input_ids, input_mask, segment_ids, masked_ids = batch 71 | 72 | self._model.zero_grad() 73 | loss = self._model(input_ids, segment_ids, input_mask, masked_ids) 74 | loss.backward() 75 | self._optimizer.step() 76 | 77 | return self.dev_loss() 78 | 79 | def _augment_example(self, example, num_aug): 80 | features = _convert_example_to_features( 81 | example=example, 82 | label_list=self._label_list, 83 | max_seq_length=MAX_SEQ_LENGTH, 84 | tokenizer=self._tokenizer) 85 | 86 | init_ids, _, input_mask, segment_ids, _ = \ 87 | (t.view(1, -1).to(self._device) for t in features) 88 | 89 | len = int(torch.sum(input_mask).item()) 90 | if len >= 4: 91 | mask_idx = sorted( 92 | random.sample(list(range(1, len - 1)), max(len // 7, 2))) 93 | else: 94 | mask_idx = [1] 95 | 96 | masked_ids = init_ids[0][mask_idx] 97 | init_ids[0][mask_idx] = \ 98 | self._tokenizer.convert_tokens_to_ids(['[MASK]'])[0] 99 | logits = self._model(init_ids, segment_ids, input_mask)[0] 100 | 101 | # Get 2 samples 102 | aug_probs_all = [] 103 | for _ in range(num_aug): 104 | probs = F.gumbel_softmax(logits, hard=False) # TODO 105 | aug_probs = torch.zeros_like(probs).scatter_( 106 | 1, init_ids[0].unsqueeze(1), 1.) 107 | for t in mask_idx: 108 | aug_probs = torch.cat( 109 | [aug_probs[:t], probs[t:t + 1], aug_probs[t + 1:]], dim=0) 110 | 111 | aug_probs_all.append(aug_probs) 112 | 113 | aug_probs = torch.cat([ap.unsqueeze(0) for ap in aug_probs_all], dim=0) 114 | 115 | return aug_probs 116 | 117 | def _finetune_example(self, classifier, example, 118 | finetune_batch_size, num_aug): 119 | aug_probs = self._augment_example(example, num_aug=num_aug) 120 | classifier.finetune_generator( 121 | example, aug_probs, finetune_batch_size) 122 | 123 | def finetune_batch(self, classifier, examples, num_aug=1): 124 | self._model.train() 125 | self._model.zero_grad() 126 | 127 | for example in examples: 128 | aug_probs = self._augment_example(example, num_aug=num_aug) 129 | classifier.finetune_generator( 130 | example, aug_probs, finetune_batch_size=len(examples)) 131 | self._optimizer.step() 132 | 133 | def augment_batch(self, examples, num_aug=1): 134 | self._model.eval() 135 | 136 | aug_examples = [] 137 | for example in examples: 138 | with torch.no_grad(): 139 | aug_probs = self._augment_example(example, num_aug=num_aug) 140 | aug_examples.append((example, aug_probs)) 141 | 142 | return aug_examples 143 | 144 | @property 145 | def model(self): 146 | return self._model 147 | 148 | @property 149 | def optimizer(self): 150 | return self._optimizer 151 | 152 | 153 | class RandomMaskedBERTDataset(Dataset): 154 | def __init__(self, examples, label_list, tokenizer, max_seq_length): 155 | self._examples = examples 156 | self._label_list = label_list 157 | self._tokenizer = tokenizer 158 | self._max_seq_length = max_seq_length 159 | 160 | def __len__(self): 161 | return len(self._examples) 162 | 163 | def __getitem__(self, index): 164 | # generate different random masks every time. 165 | return _convert_example_to_features( 166 | example=self._examples[index], 167 | label_list=self._label_list, 168 | max_seq_length=self._max_seq_length, 169 | tokenizer=self._tokenizer) 170 | 171 | 172 | def _get_optimizer(model, learning_rate): 173 | param_optimizer = list(model.named_parameters()) 174 | no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] 175 | optimizer_grouped_parameters = [ 176 | {'params': [p for n, p in param_optimizer if 177 | not any(nd in n for nd in no_decay)], 'weight_decay': 0.01}, 178 | {'params': [p for n, p in param_optimizer if 179 | any(nd in n for nd in no_decay)], 'weight_decay': 0.0}] 180 | 181 | return optim.Adam(optimizer_grouped_parameters, lr=learning_rate) 182 | 183 | 184 | def _convert_example_to_features( 185 | example, label_list, max_seq_length, tokenizer): 186 | """ 187 | this function is copied from 188 | https://github.com/IIEKES/cbert_aug/blob/master/aug_dataset_wo_ft.py#L119 189 | """ 190 | 191 | label_map = {} 192 | for (i, label) in enumerate(label_list): 193 | label_map[label] = i 194 | 195 | masked_lm_prob = 0.15 196 | max_predictions_per_seq = 20 197 | 198 | tokens_a = tokenizer.tokenize(example.text_a) 199 | segment_id = label_map[example.label] 200 | # Account for [CLS] and [SEP] with "- 2" 201 | if len(tokens_a) > max_seq_length - 2: 202 | tokens_a = tokens_a[0:(max_seq_length - 2)] 203 | 204 | # 由于是CMLM,所以需要用标签 205 | tokens = [] 206 | segment_ids = [] 207 | # is [CLS]和[SEP] needed ? 208 | tokens.append("[CLS]") 209 | segment_ids.append(segment_id) 210 | for token in tokens_a: 211 | tokens.append(token) 212 | segment_ids.append(segment_id) 213 | tokens.append("[SEP]") 214 | segment_ids.append(segment_id) 215 | masked_lm_labels = [-1] * max_seq_length 216 | 217 | cand_indexes = [] 218 | for (i, token) in enumerate(tokens): 219 | if token == "[CLS]" or token == "[SEP]": 220 | continue 221 | cand_indexes.append(i) 222 | 223 | random.shuffle(cand_indexes) 224 | len_cand = len(cand_indexes) 225 | 226 | output_tokens = list(tokens) 227 | 228 | num_to_predict = min(max_predictions_per_seq, 229 | max(1, int(round(len(tokens) * masked_lm_prob)))) 230 | 231 | masked_lms_pos = [] 232 | covered_indexes = set() 233 | for index in cand_indexes: 234 | if len(masked_lms_pos) >= num_to_predict: 235 | break 236 | if index in covered_indexes: 237 | continue 238 | covered_indexes.add(index) 239 | 240 | # 80% of the time, replace with [MASK] 241 | if random.random() < 0.8: 242 | masked_token = "[MASK]" 243 | else: 244 | # 10% of the time, keep original 245 | if random.random() < 0.5: 246 | masked_token = tokens[index] 247 | # 10% of the time, replace with random word 248 | else: 249 | masked_token = tokens[cand_indexes[ 250 | random.randint(0, len_cand - 1)]] 251 | 252 | masked_lm_labels[index] = \ 253 | tokenizer.convert_tokens_to_ids([tokens[index]])[0] 254 | output_tokens[index] = masked_token 255 | masked_lms_pos.append(index) 256 | 257 | init_ids = tokenizer.convert_tokens_to_ids(tokens) 258 | input_ids = tokenizer.convert_tokens_to_ids(output_tokens) 259 | 260 | # The mask has 1 for real tokens and 0 for padding tokens. Only real 261 | # tokens are attended to. 262 | input_mask = [1] * len(input_ids) 263 | 264 | # Zero-pad up to the sequence length. 265 | while len(input_ids) < max_seq_length: 266 | init_ids.append(0) 267 | input_ids.append(0) 268 | input_mask.append(0) 269 | segment_ids.append(0) # ?segment_id 270 | 271 | assert len(init_ids) == max_seq_length 272 | assert len(input_ids) == max_seq_length 273 | assert len(input_mask) == max_seq_length 274 | assert len(segment_ids) == max_seq_length 275 | 276 | return (torch.tensor(init_ids), 277 | torch.tensor(input_ids), 278 | torch.tensor(input_mask), 279 | torch.tensor(segment_ids), 280 | torch.tensor(masked_lm_labels)) 281 | 282 | 283 | def _rev_wordpiece(str): 284 | if len(str) > 1: 285 | for i in range(len(str)-1, 0, -1): 286 | if str[i] == '[PAD]': 287 | str.remove(str[i]) 288 | elif len(str[i]) > 1 and str[i][0] == '#' and str[i][1] == '#': 289 | str[i-1] += str[i][2:] 290 | str.remove(str[i]) 291 | return " ".join(str[1:-1]) -------------------------------------------------------------------------------- /weighting/classifier.py: -------------------------------------------------------------------------------- 1 | from pytorch_pretrained_bert.tokenization import BertTokenizer 2 | from pytorch_pretrained_bert.modeling import BertForSequenceClassification 3 | 4 | from tqdm import tqdm 5 | import math 6 | 7 | import torch 8 | from torch import nn 9 | from torch import optim 10 | 11 | from torch.utils.data import DataLoader, TensorDataset 12 | from data_utils.text_data_processors import InputFeatures 13 | 14 | from magic_module import MagicModule 15 | 16 | 17 | BERT_MODEL = 'bert-base-uncased' 18 | MAX_SEQ_LENGTH = 64 19 | EPSILON = 1e-5 20 | 21 | 22 | class Classifier: 23 | def __init__(self, label_list, ren, norm_fn, device): 24 | self._label_list = label_list 25 | self._ren = ren 26 | self._device = device 27 | 28 | self._tokenizer = BertTokenizer.from_pretrained( 29 | BERT_MODEL, do_lower_case=True) 30 | 31 | self._model = BertForSequenceClassification.from_pretrained( 32 | BERT_MODEL, num_labels=len(label_list)).to(device) 33 | 34 | self._optimizer = None 35 | 36 | self._dataset = {} 37 | self._data_loader = {} 38 | 39 | self._weights = None 40 | self._w_decay = None 41 | 42 | if norm_fn == 'linear': 43 | self._norm_fn = _linear_normalize 44 | elif norm_fn == 'softmax': 45 | self._norm_fn = _softmax_normalize 46 | 47 | if ren: 48 | assert norm_fn == 'linear' 49 | 50 | def init_weights(self, n_examples, w_init, w_decay): 51 | if self._ren: 52 | raise ValueError( 53 | 'no global weighting initialization when \'ren\'=True') 54 | 55 | self._weights = torch.tensor( 56 | [w_init] * n_examples, requires_grad=True).to(device=self._device) 57 | self._w_decay = w_decay 58 | 59 | def load_data(self, set_type, examples, batch_size, shuffle): 60 | self._dataset[set_type] = examples 61 | self._data_loader[set_type] = _make_data_loader( 62 | examples=examples, 63 | label_list=self._label_list, 64 | tokenizer=self._tokenizer, 65 | batch_size=batch_size, 66 | shuffle=shuffle) 67 | 68 | def get_optimizer(self, learning_rate): 69 | self._optimizer = _get_optimizer( 70 | self._model, learning_rate=learning_rate) 71 | 72 | def pretrain_epoch(self): 73 | self._model.train() 74 | 75 | for step, batch in enumerate(tqdm(self._data_loader['train'], 76 | desc='Pre-training')): 77 | batch = tuple(t.to(self._device) for t in batch) 78 | input_ids, input_mask, segment_ids, label_ids, _ = batch 79 | 80 | self._optimizer.zero_grad() 81 | loss = self._model(input_ids, segment_ids, input_mask, label_ids) 82 | loss.backward() 83 | self._optimizer.step() 84 | 85 | def train_epoch(self): 86 | self._model.train() 87 | 88 | for step, batch in enumerate(tqdm(self._data_loader['train'], 89 | desc='Training')): 90 | batch = tuple(t.to(self._device) for t in batch) 91 | input_ids, input_mask, segment_ids, label_ids, _ = batch 92 | 93 | batch_size = batch[-1].shape[0] 94 | weights = [] 95 | for i in range(0, batch_size, 8): 96 | lil_batch = tuple(t[i:i+8] for t in batch) 97 | weights.append(self._get_weights(lil_batch)) 98 | weights = self._norm_fn(torch.cat(weights, dim=0)) 99 | 100 | self._optimizer.zero_grad() 101 | criterion = nn.CrossEntropyLoss(reduction='none') 102 | logits = self._model(input_ids, segment_ids, input_mask) 103 | loss = criterion(logits, label_ids) 104 | loss = torch.sum(loss * weights.data) 105 | loss.backward() 106 | self._optimizer.step() 107 | 108 | def _get_weights(self, batch): 109 | input_ids, input_mask, segment_ids, label_ids, ids = batch 110 | batch_size = label_ids.shape[0] 111 | 112 | optimizer_initialized = ('exp_avg' in self._optimizer.state[ 113 | next(self._model.parameters())]) 114 | if not optimizer_initialized: 115 | return torch.ones(batch_size).to(self._device) 116 | 117 | if self._ren: 118 | weights = torch.zeros( 119 | batch_size, requires_grad=True).to(self._device) 120 | else: 121 | weights = self._weights[ids] 122 | 123 | magic_model = MagicModule(self._model) 124 | criterion = nn.CrossEntropyLoss() 125 | 126 | for i in range(batch_size): 127 | self._model.zero_grad() 128 | logits = self._model( 129 | input_ids[i:i + 1], segment_ids[i:i + 1], input_mask[i:i + 1]) 130 | loss = criterion(logits, label_ids[i:i + 1]) 131 | 132 | grads = torch.autograd.grad( 133 | loss, [param for name, param in self._model.named_parameters()]) 134 | grads = {param: grads[j] for j, (name, param) in enumerate( 135 | self._model.named_parameters())} 136 | 137 | deltas = _adam_delta(self._optimizer, self._model, grads) 138 | deltas = {name: weights[i] * delta.data for name, delta in 139 | deltas.items()} 140 | magic_model.update_params(deltas) 141 | 142 | weights_grad_list = [] 143 | for step, val_batch in enumerate(self._data_loader['dev']): 144 | val_batch = (t.to(self._device) for t in val_batch) 145 | val_input_ids, val_input_mask, val_segment_ids, val_label_ids, _ = \ 146 | val_batch 147 | val_batch_size = val_label_ids.shape[0] 148 | 149 | val_loss = magic_model( 150 | val_input_ids, val_segment_ids, val_input_mask, val_label_ids) 151 | val_loss = val_loss * \ 152 | float(val_batch_size) / float(len(self._dataset['dev'])) 153 | 154 | weights_grad = torch.autograd.grad( 155 | val_loss, weights, retain_graph=True)[0] 156 | weights_grad_list.append(weights_grad) 157 | 158 | weights_grad = sum(weights_grad_list) 159 | 160 | if self._ren: 161 | return -weights_grad 162 | else: 163 | self._weights[ids] = weights.data / self._w_decay - weights_grad 164 | self._weights[ids] = torch.max(self._weights[ids], torch.ones_like( 165 | self._weights[ids]).fill_(EPSILON)) 166 | 167 | return self._weights[ids].data 168 | 169 | def evaluate(self, set_type): 170 | self._model.eval() 171 | 172 | preds_all, labels_all = [], [] 173 | data_loader = self._data_loader[set_type] 174 | 175 | for batch in tqdm(data_loader, 176 | desc="Evaluating {} set".format(set_type)): 177 | batch = tuple(t.to(self._device) for t in batch) 178 | input_ids, input_mask, segment_ids, label_ids = batch[:4] 179 | 180 | with torch.no_grad(): 181 | logits = self._model(input_ids, segment_ids, input_mask) 182 | preds = torch.argmax(logits, dim=1) 183 | 184 | preds_all.append(preds) 185 | labels_all.append(label_ids) 186 | 187 | preds_all = torch.cat(preds_all, dim=0) 188 | labels_all = torch.cat(labels_all, dim=0) 189 | 190 | return torch.sum(preds_all == labels_all).item() / labels_all.shape[0] 191 | 192 | 193 | def _get_optimizer(model, learning_rate): 194 | param_optimizer = list(model.named_parameters()) 195 | no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] 196 | optimizer_grouped_parameters = [ 197 | {'params': [p for n, p in param_optimizer if 198 | not any(nd in n for nd in no_decay)], 'weight_decay': 0.01}, 199 | {'params': [p for n, p in param_optimizer if 200 | any(nd in n for nd in no_decay)], 'weight_decay': 0.0}] 201 | 202 | return optim.Adam(optimizer_grouped_parameters, lr=learning_rate) 203 | 204 | 205 | def _make_data_loader(examples, label_list, tokenizer, batch_size, shuffle): 206 | all_features = _convert_examples_to_features( 207 | examples=examples, 208 | label_list=label_list, 209 | max_seq_length=MAX_SEQ_LENGTH, 210 | tokenizer=tokenizer, 211 | output_mode='classification') 212 | 213 | all_input_ids = torch.tensor( 214 | [f.input_ids for f in all_features], dtype=torch.long) 215 | all_input_mask = torch.tensor( 216 | [f.input_mask for f in all_features], dtype=torch.long) 217 | all_segment_ids = torch.tensor( 218 | [f.segment_ids for f in all_features], dtype=torch.long) 219 | all_label_ids = torch.tensor( 220 | [f.label_id for f in all_features], dtype=torch.long) 221 | all_ids = torch.arange(len(examples)) 222 | 223 | dataset = TensorDataset( 224 | all_input_ids, all_input_mask, all_segment_ids, all_label_ids, all_ids) 225 | return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle) 226 | 227 | 228 | def _linear_normalize(weights): 229 | weights = torch.max(weights, torch.zeros_like(weights)) 230 | if torch.sum(weights) > 1e-8: 231 | return weights / torch.sum(weights) 232 | return torch.zeros_like(weights) 233 | 234 | 235 | def _softmax_normalize(weights): 236 | return nn.functional.softmax(weights, dim=0) 237 | 238 | 239 | def _convert_examples_to_features(examples, label_list, max_seq_length, 240 | tokenizer, output_mode): 241 | """Loads a data file into a list of `InputBatch`s.""" 242 | 243 | label_map = {label: i for i, label in enumerate(label_list)} 244 | 245 | features = [] 246 | for (ex_index, example) in enumerate(examples): 247 | tokens_a = tokenizer.tokenize(example.text_a) 248 | 249 | tokens_b = None 250 | if example.text_b: 251 | tokens_b = tokenizer.tokenize(example.text_b) 252 | # Modifies `tokens_a` and `tokens_b` in place so that the total 253 | # length is less than the specified length. 254 | # Account for [CLS], [SEP], [SEP] with "- 3" 255 | _truncate_seq_pair(tokens_a, tokens_b, max_seq_length - 3) 256 | else: 257 | # Account for [CLS] and [SEP] with "- 2" 258 | if len(tokens_a) > max_seq_length - 2: 259 | tokens_a = tokens_a[:(max_seq_length - 2)] 260 | 261 | tokens = ["[CLS]"] + tokens_a + ["[SEP]"] 262 | segment_ids = [0] * len(tokens) 263 | 264 | if tokens_b: 265 | tokens += tokens_b + ["[SEP]"] 266 | segment_ids += [1] * (len(tokens_b) + 1) 267 | 268 | input_ids = tokenizer.convert_tokens_to_ids(tokens) 269 | 270 | # The mask has 1 for real tokens and 0 for padding tokens. Only real 271 | # tokens are attended to. 272 | input_mask = [1] * len(input_ids) 273 | 274 | # Zero-pad up to the sequence length. 275 | padding = [0] * (max_seq_length - len(input_ids)) 276 | input_ids += padding 277 | input_mask += padding 278 | segment_ids += padding 279 | 280 | assert len(input_ids) == max_seq_length 281 | assert len(input_mask) == max_seq_length 282 | assert len(segment_ids) == max_seq_length 283 | 284 | if output_mode == "classification": 285 | label_id = label_map[example.label] 286 | elif output_mode == "regression": 287 | label_id = float(example.label) 288 | else: 289 | raise KeyError(output_mode) 290 | 291 | features.append( 292 | InputFeatures(input_ids=input_ids, 293 | input_mask=input_mask, 294 | segment_ids=segment_ids, 295 | label_id=label_id)) 296 | 297 | return features 298 | 299 | 300 | def _truncate_seq_pair(tokens_a, tokens_b, max_length): 301 | """Truncates a sequence pair in place to the maximum length.""" 302 | 303 | # This is a simple heuristic which will always truncate the longer sequence 304 | # one token at a time. This makes more sense than truncating an equal 305 | # percent of tokens from each, since if one sequence is very short then each 306 | # token that's truncated likely contains more information than a longer 307 | # sequence. 308 | while True: 309 | total_length = len(tokens_a) + len(tokens_b) 310 | if total_length <= max_length: 311 | break 312 | if len(tokens_a) > len(tokens_b): 313 | tokens_a.pop() 314 | else: 315 | tokens_b.pop() 316 | 317 | 318 | def _adam_delta(optimizer, model, grads): 319 | deltas = {} 320 | for group in optimizer.param_groups: 321 | for param in group['params']: 322 | grad = grads[param] 323 | state = optimizer.state[param] 324 | 325 | exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq'] 326 | beta1, beta2 = group['betas'] 327 | 328 | step = state['step'] + 1 329 | 330 | if group['weight_decay'] != 0: 331 | grad = grad + group['weight_decay'] * param.data 332 | 333 | exp_avg = exp_avg * beta1 + (1. - beta1) * grad 334 | exp_avg_sq = exp_avg_sq * beta2 + (1. - beta2) * grad * grad 335 | denom = exp_avg_sq.sqrt() + group['eps'] 336 | 337 | bias_correction1 = 1. - beta1 ** step 338 | bias_correction2 = 1. - beta2 ** step 339 | step_size = group['lr'] * math.sqrt( 340 | bias_correction2) / bias_correction1 341 | 342 | deltas[param] = -step_size * exp_avg / denom 343 | 344 | param_to_name = {param: name for name, param in model.named_parameters()} 345 | 346 | return {param_to_name[param]: delta for param, delta in deltas.items()} 347 | -------------------------------------------------------------------------------- /results/text/results_baseline_sst2_imb002.txt: -------------------------------------------------------------------------------- 1 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 2 | device: cuda 3 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.02, learning_rate=4e-05, min_epochs=0, task='sst-2', train_num_per_class=1000) 4 | getting train examples... 5 | number of examples with label '0': 20 6 | number of examples with label '1': 1000 7 | getting dev examples... 8 | number of examples with label '0': 10 9 | number of examples with label '1': 10 10 | getting test examples... 11 | #train: 1020 12 | #dev: 20 13 | #test: 1821 14 | ============================================================ 15 | Training 16 | ============================================================ 17 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 18 | Test Acc: 49.9176 19 | Epoch 1, Dev Acc: 50.0000, Best Ever: 50.0000 20 | Epoch 2, Dev Acc: 50.0000, Best Ever: 50.0000 21 | Epoch 3, Dev Acc: 50.0000, Best Ever: 50.0000 22 | Epoch 4, Dev Acc: 50.0000, Best Ever: 50.0000 23 | Epoch 5, Dev Acc: 50.0000, Best Ever: 50.0000 24 | Epoch 6, Dev Acc: 50.0000, Best Ever: 50.0000 25 | Epoch 7, Dev Acc: 50.0000, Best Ever: 50.0000 26 | Epoch 8, Dev Acc: 70.0000, Best Ever: 70.0000 27 | Test Acc: 69.7419 28 | Epoch 9, Dev Acc: 50.0000, Best Ever: 70.0000 29 | Final Dev Acc: 70.0000, Final Test Acc: 69.7419 30 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 31 | device: cuda 32 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.02, learning_rate=4e-05, min_epochs=0, task='sst-2', train_num_per_class=1000) 33 | getting train examples... 34 | number of examples with label '0': 20 35 | number of examples with label '1': 1000 36 | getting dev examples... 37 | number of examples with label '0': 10 38 | number of examples with label '1': 10 39 | getting test examples... 40 | #train: 1020 41 | #dev: 20 42 | #test: 1821 43 | ============================================================ 44 | Training 45 | ============================================================ 46 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 47 | Test Acc: 49.9176 48 | Epoch 1, Dev Acc: 50.0000, Best Ever: 50.0000 49 | Epoch 2, Dev Acc: 50.0000, Best Ever: 50.0000 50 | Epoch 3, Dev Acc: 50.0000, Best Ever: 50.0000 51 | Epoch 4, Dev Acc: 50.0000, Best Ever: 50.0000 52 | Epoch 5, Dev Acc: 50.0000, Best Ever: 50.0000 53 | Epoch 6, Dev Acc: 50.0000, Best Ever: 50.0000 54 | Epoch 7, Dev Acc: 50.0000, Best Ever: 50.0000 55 | Epoch 8, Dev Acc: 50.0000, Best Ever: 50.0000 56 | Epoch 9, Dev Acc: 50.0000, Best Ever: 50.0000 57 | Final Dev Acc: 50.0000, Final Test Acc: 49.9176 58 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 59 | device: cuda 60 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.02, learning_rate=4e-05, min_epochs=0, task='sst-2', train_num_per_class=1000) 61 | getting train examples... 62 | number of examples with label '0': 20 63 | number of examples with label '1': 1000 64 | getting dev examples... 65 | number of examples with label '0': 10 66 | number of examples with label '1': 10 67 | getting test examples... 68 | #train: 1020 69 | #dev: 20 70 | #test: 1821 71 | ============================================================ 72 | Training 73 | ============================================================ 74 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 75 | Test Acc: 49.9176 76 | Epoch 1, Dev Acc: 50.0000, Best Ever: 50.0000 77 | Epoch 2, Dev Acc: 50.0000, Best Ever: 50.0000 78 | Epoch 3, Dev Acc: 50.0000, Best Ever: 50.0000 79 | Epoch 4, Dev Acc: 50.0000, Best Ever: 50.0000 80 | Epoch 5, Dev Acc: 50.0000, Best Ever: 50.0000 81 | Epoch 6, Dev Acc: 50.0000, Best Ever: 50.0000 82 | Epoch 7, Dev Acc: 50.0000, Best Ever: 50.0000 83 | Epoch 8, Dev Acc: 45.0000, Best Ever: 50.0000 84 | Epoch 9, Dev Acc: 50.0000, Best Ever: 50.0000 85 | Final Dev Acc: 50.0000, Final Test Acc: 49.9176 86 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 87 | device: cuda 88 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.02, learning_rate=4e-05, min_epochs=0, task='sst-2', train_num_per_class=1000) 89 | getting train examples... 90 | number of examples with label '0': 20 91 | number of examples with label '1': 1000 92 | getting dev examples... 93 | number of examples with label '0': 10 94 | number of examples with label '1': 10 95 | getting test examples... 96 | #train: 1020 97 | #dev: 20 98 | #test: 1821 99 | ============================================================ 100 | Training 101 | ============================================================ 102 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 103 | Test Acc: 49.9176 104 | Epoch 1, Dev Acc: 50.0000, Best Ever: 50.0000 105 | Epoch 2, Dev Acc: 50.0000, Best Ever: 50.0000 106 | Epoch 3, Dev Acc: 50.0000, Best Ever: 50.0000 107 | Epoch 4, Dev Acc: 50.0000, Best Ever: 50.0000 108 | Epoch 5, Dev Acc: 60.0000, Best Ever: 60.0000 109 | Test Acc: 55.1345 110 | Epoch 6, Dev Acc: 50.0000, Best Ever: 60.0000 111 | Epoch 7, Dev Acc: 50.0000, Best Ever: 60.0000 112 | Epoch 8, Dev Acc: 50.0000, Best Ever: 60.0000 113 | Epoch 9, Dev Acc: 50.0000, Best Ever: 60.0000 114 | Final Dev Acc: 60.0000, Final Test Acc: 55.1345 115 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 116 | device: cuda 117 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.02, learning_rate=4e-05, min_epochs=0, task='sst-2', train_num_per_class=1000) 118 | getting train examples... 119 | number of examples with label '0': 20 120 | number of examples with label '1': 1000 121 | getting dev examples... 122 | number of examples with label '0': 10 123 | number of examples with label '1': 10 124 | getting test examples... 125 | #train: 1020 126 | #dev: 20 127 | #test: 1821 128 | ============================================================ 129 | Training 130 | ============================================================ 131 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 132 | Test Acc: 49.9176 133 | Epoch 1, Dev Acc: 50.0000, Best Ever: 50.0000 134 | Epoch 2, Dev Acc: 50.0000, Best Ever: 50.0000 135 | Epoch 3, Dev Acc: 50.0000, Best Ever: 50.0000 136 | Epoch 4, Dev Acc: 50.0000, Best Ever: 50.0000 137 | Epoch 5, Dev Acc: 55.0000, Best Ever: 55.0000 138 | Test Acc: 61.1752 139 | Epoch 6, Dev Acc: 50.0000, Best Ever: 55.0000 140 | Epoch 7, Dev Acc: 50.0000, Best Ever: 55.0000 141 | Epoch 8, Dev Acc: 50.0000, Best Ever: 55.0000 142 | Epoch 9, Dev Acc: 50.0000, Best Ever: 55.0000 143 | Final Dev Acc: 55.0000, Final Test Acc: 61.1752 144 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 145 | device: cuda 146 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.02, learning_rate=4e-05, min_epochs=0, task='sst-2', train_num_per_class=1000) 147 | getting train examples... 148 | number of examples with label '0': 20 149 | number of examples with label '1': 1000 150 | getting dev examples... 151 | number of examples with label '0': 10 152 | number of examples with label '1': 10 153 | getting test examples... 154 | #train: 1020 155 | #dev: 20 156 | #test: 1821 157 | ============================================================ 158 | Training 159 | ============================================================ 160 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 161 | Test Acc: 49.9176 162 | Epoch 1, Dev Acc: 50.0000, Best Ever: 50.0000 163 | Epoch 2, Dev Acc: 50.0000, Best Ever: 50.0000 164 | Epoch 3, Dev Acc: 50.0000, Best Ever: 50.0000 165 | Epoch 4, Dev Acc: 50.0000, Best Ever: 50.0000 166 | Epoch 5, Dev Acc: 50.0000, Best Ever: 50.0000 167 | Epoch 6, Dev Acc: 50.0000, Best Ever: 50.0000 168 | Epoch 7, Dev Acc: 50.0000, Best Ever: 50.0000 169 | Epoch 8, Dev Acc: 50.0000, Best Ever: 50.0000 170 | Epoch 9, Dev Acc: 50.0000, Best Ever: 50.0000 171 | Final Dev Acc: 50.0000, Final Test Acc: 49.9176 172 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 173 | device: cuda 174 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.02, learning_rate=4e-05, min_epochs=0, task='sst-2', train_num_per_class=1000) 175 | getting train examples... 176 | number of examples with label '0': 20 177 | number of examples with label '1': 1000 178 | getting dev examples... 179 | number of examples with label '0': 10 180 | number of examples with label '1': 10 181 | getting test examples... 182 | #train: 1020 183 | #dev: 20 184 | #test: 1821 185 | ============================================================ 186 | Training 187 | ============================================================ 188 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 189 | Test Acc: 49.9176 190 | Epoch 1, Dev Acc: 50.0000, Best Ever: 50.0000 191 | Epoch 2, Dev Acc: 50.0000, Best Ever: 50.0000 192 | Epoch 3, Dev Acc: 50.0000, Best Ever: 50.0000 193 | Epoch 4, Dev Acc: 50.0000, Best Ever: 50.0000 194 | Epoch 5, Dev Acc: 50.0000, Best Ever: 50.0000 195 | Epoch 6, Dev Acc: 50.0000, Best Ever: 50.0000 196 | Epoch 7, Dev Acc: 50.0000, Best Ever: 50.0000 197 | Epoch 8, Dev Acc: 60.0000, Best Ever: 60.0000 198 | Test Acc: 61.3948 199 | Epoch 9, Dev Acc: 50.0000, Best Ever: 60.0000 200 | Final Dev Acc: 60.0000, Final Test Acc: 61.3948 201 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 202 | device: cuda 203 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.02, learning_rate=4e-05, min_epochs=0, task='sst-2', train_num_per_class=1000) 204 | getting train examples... 205 | number of examples with label '0': 20 206 | number of examples with label '1': 1000 207 | getting dev examples... 208 | number of examples with label '0': 10 209 | number of examples with label '1': 10 210 | getting test examples... 211 | #train: 1020 212 | #dev: 20 213 | #test: 1821 214 | ============================================================ 215 | Training 216 | ============================================================ 217 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 218 | Test Acc: 49.9176 219 | Epoch 1, Dev Acc: 50.0000, Best Ever: 50.0000 220 | Epoch 2, Dev Acc: 50.0000, Best Ever: 50.0000 221 | Epoch 3, Dev Acc: 50.0000, Best Ever: 50.0000 222 | Epoch 4, Dev Acc: 50.0000, Best Ever: 50.0000 223 | Epoch 5, Dev Acc: 50.0000, Best Ever: 50.0000 224 | Epoch 6, Dev Acc: 50.0000, Best Ever: 50.0000 225 | Epoch 7, Dev Acc: 55.0000, Best Ever: 55.0000 226 | Test Acc: 51.3454 227 | Epoch 8, Dev Acc: 50.0000, Best Ever: 55.0000 228 | Epoch 9, Dev Acc: 50.0000, Best Ever: 55.0000 229 | Final Dev Acc: 55.0000, Final Test Acc: 51.3454 230 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 231 | device: cuda 232 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.02, learning_rate=4e-05, min_epochs=0, task='sst-2', train_num_per_class=1000) 233 | getting train examples... 234 | number of examples with label '0': 20 235 | number of examples with label '1': 1000 236 | getting dev examples... 237 | number of examples with label '0': 10 238 | number of examples with label '1': 10 239 | getting test examples... 240 | #train: 1020 241 | #dev: 20 242 | #test: 1821 243 | ============================================================ 244 | Training 245 | ============================================================ 246 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 247 | Test Acc: 49.9176 248 | Epoch 1, Dev Acc: 50.0000, Best Ever: 50.0000 249 | Epoch 2, Dev Acc: 50.0000, Best Ever: 50.0000 250 | Epoch 3, Dev Acc: 50.0000, Best Ever: 50.0000 251 | Epoch 4, Dev Acc: 50.0000, Best Ever: 50.0000 252 | Epoch 5, Dev Acc: 50.0000, Best Ever: 50.0000 253 | Epoch 6, Dev Acc: 50.0000, Best Ever: 50.0000 254 | Epoch 7, Dev Acc: 55.0000, Best Ever: 55.0000 255 | Test Acc: 60.3515 256 | Epoch 8, Dev Acc: 50.0000, Best Ever: 55.0000 257 | Epoch 9, Dev Acc: 50.0000, Best Ever: 55.0000 258 | Final Dev Acc: 55.0000, Final Test Acc: 60.3515 259 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 260 | device: cuda 261 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.02, learning_rate=4e-05, min_epochs=0, task='sst-2', train_num_per_class=1000) 262 | getting train examples... 263 | number of examples with label '0': 20 264 | number of examples with label '1': 1000 265 | getting dev examples... 266 | number of examples with label '0': 10 267 | number of examples with label '1': 10 268 | getting test examples... 269 | #train: 1020 270 | #dev: 20 271 | #test: 1821 272 | ============================================================ 273 | Training 274 | ============================================================ 275 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 276 | Test Acc: 49.9176 277 | Epoch 1, Dev Acc: 50.0000, Best Ever: 50.0000 278 | Epoch 2, Dev Acc: 50.0000, Best Ever: 50.0000 279 | Epoch 3, Dev Acc: 50.0000, Best Ever: 50.0000 280 | Epoch 4, Dev Acc: 50.0000, Best Ever: 50.0000 281 | Epoch 5, Dev Acc: 50.0000, Best Ever: 50.0000 282 | Epoch 6, Dev Acc: 50.0000, Best Ever: 50.0000 283 | Epoch 7, Dev Acc: 50.0000, Best Ever: 50.0000 284 | Epoch 8, Dev Acc: 50.0000, Best Ever: 50.0000 285 | Epoch 9, Dev Acc: 50.0000, Best Ever: 50.0000 286 | Final Dev Acc: 50.0000, Final Test Acc: 49.9176 287 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 288 | device: cuda 289 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.02, learning_rate=4e-05, min_epochs=0, task='sst-2', train_num_per_class=1000) 290 | getting train examples... 291 | number of examples with label '0': 20 292 | number of examples with label '1': 1000 293 | getting dev examples... 294 | number of examples with label '0': 10 295 | number of examples with label '1': 10 296 | getting test examples... 297 | #train: 1020 298 | #dev: 20 299 | #test: 1821 300 | ============================================================ 301 | Training 302 | ============================================================ 303 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 304 | Test Acc: 49.9176 305 | Epoch 1, Dev Acc: 50.0000, Best Ever: 50.0000 306 | Epoch 2, Dev Acc: 50.0000, Best Ever: 50.0000 307 | Epoch 3, Dev Acc: 50.0000, Best Ever: 50.0000 308 | Epoch 4, Dev Acc: 50.0000, Best Ever: 50.0000 309 | Epoch 5, Dev Acc: 50.0000, Best Ever: 50.0000 310 | Epoch 6, Dev Acc: 55.0000, Best Ever: 55.0000 311 | Test Acc: 56.1230 312 | Epoch 7, Dev Acc: 50.0000, Best Ever: 55.0000 313 | Epoch 8, Dev Acc: 50.0000, Best Ever: 55.0000 314 | Epoch 9, Dev Acc: 50.0000, Best Ever: 55.0000 315 | Final Dev Acc: 55.0000, Final Test Acc: 56.1230 316 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 317 | device: cuda 318 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.02, learning_rate=4e-05, min_epochs=0, task='sst-2', train_num_per_class=1000) 319 | getting train examples... 320 | number of examples with label '0': 20 321 | number of examples with label '1': 1000 322 | getting dev examples... 323 | number of examples with label '0': 10 324 | number of examples with label '1': 10 325 | getting test examples... 326 | #train: 1020 327 | #dev: 20 328 | #test: 1821 329 | ============================================================ 330 | Training 331 | ============================================================ 332 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 333 | Test Acc: 49.9176 334 | Epoch 1, Dev Acc: 50.0000, Best Ever: 50.0000 335 | Epoch 2, Dev Acc: 50.0000, Best Ever: 50.0000 336 | Epoch 3, Dev Acc: 50.0000, Best Ever: 50.0000 337 | Epoch 4, Dev Acc: 50.0000, Best Ever: 50.0000 338 | Epoch 5, Dev Acc: 45.0000, Best Ever: 50.0000 339 | Epoch 6, Dev Acc: 50.0000, Best Ever: 50.0000 340 | Epoch 7, Dev Acc: 50.0000, Best Ever: 50.0000 341 | Epoch 8, Dev Acc: 50.0000, Best Ever: 50.0000 342 | Epoch 9, Dev Acc: 50.0000, Best Ever: 50.0000 343 | Final Dev Acc: 50.0000, Final Test Acc: 49.9176 344 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 345 | device: cuda 346 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.02, learning_rate=4e-05, min_epochs=0, task='sst-2', train_num_per_class=1000) 347 | getting train examples... 348 | number of examples with label '0': 20 349 | number of examples with label '1': 1000 350 | getting dev examples... 351 | number of examples with label '0': 10 352 | number of examples with label '1': 10 353 | getting test examples... 354 | #train: 1020 355 | #dev: 20 356 | #test: 1821 357 | ============================================================ 358 | Training 359 | ============================================================ 360 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 361 | Test Acc: 49.9176 362 | Epoch 1, Dev Acc: 50.0000, Best Ever: 50.0000 363 | Epoch 2, Dev Acc: 50.0000, Best Ever: 50.0000 364 | Epoch 3, Dev Acc: 50.0000, Best Ever: 50.0000 365 | Epoch 4, Dev Acc: 50.0000, Best Ever: 50.0000 366 | Epoch 5, Dev Acc: 50.0000, Best Ever: 50.0000 367 | Epoch 6, Dev Acc: 50.0000, Best Ever: 50.0000 368 | Epoch 7, Dev Acc: 50.0000, Best Ever: 50.0000 369 | Epoch 8, Dev Acc: 50.0000, Best Ever: 50.0000 370 | Epoch 9, Dev Acc: 50.0000, Best Ever: 50.0000 371 | Final Dev Acc: 50.0000, Final Test Acc: 49.9176 372 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 373 | device: cuda 374 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.02, learning_rate=4e-05, min_epochs=0, task='sst-2', train_num_per_class=1000) 375 | getting train examples... 376 | number of examples with label '0': 20 377 | number of examples with label '1': 1000 378 | getting dev examples... 379 | number of examples with label '0': 10 380 | number of examples with label '1': 10 381 | getting test examples... 382 | #train: 1020 383 | #dev: 20 384 | #test: 1821 385 | ============================================================ 386 | Training 387 | ============================================================ 388 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 389 | Test Acc: 49.9176 390 | Epoch 1, Dev Acc: 50.0000, Best Ever: 50.0000 391 | Epoch 2, Dev Acc: 50.0000, Best Ever: 50.0000 392 | Epoch 3, Dev Acc: 50.0000, Best Ever: 50.0000 393 | Epoch 4, Dev Acc: 50.0000, Best Ever: 50.0000 394 | Epoch 5, Dev Acc: 50.0000, Best Ever: 50.0000 395 | Epoch 6, Dev Acc: 50.0000, Best Ever: 50.0000 396 | Epoch 7, Dev Acc: 50.0000, Best Ever: 50.0000 397 | Epoch 8, Dev Acc: 50.0000, Best Ever: 50.0000 398 | Epoch 9, Dev Acc: 50.0000, Best Ever: 50.0000 399 | Final Dev Acc: 50.0000, Final Test Acc: 49.9176 400 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 401 | device: cuda 402 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.02, learning_rate=4e-05, min_epochs=0, task='sst-2', train_num_per_class=1000) 403 | getting train examples... 404 | number of examples with label '0': 20 405 | number of examples with label '1': 1000 406 | getting dev examples... 407 | number of examples with label '0': 10 408 | number of examples with label '1': 10 409 | getting test examples... 410 | #train: 1020 411 | #dev: 20 412 | #test: 1821 413 | ============================================================ 414 | Training 415 | ============================================================ 416 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 417 | Test Acc: 49.9176 418 | Epoch 1, Dev Acc: 50.0000, Best Ever: 50.0000 419 | Epoch 2, Dev Acc: 50.0000, Best Ever: 50.0000 420 | Epoch 3, Dev Acc: 50.0000, Best Ever: 50.0000 421 | Epoch 4, Dev Acc: 55.0000, Best Ever: 55.0000 422 | Test Acc: 59.0884 423 | Epoch 5, Dev Acc: 55.0000, Best Ever: 55.0000 424 | Epoch 6, Dev Acc: 50.0000, Best Ever: 55.0000 425 | Epoch 7, Dev Acc: 50.0000, Best Ever: 55.0000 426 | Epoch 8, Dev Acc: 50.0000, Best Ever: 55.0000 427 | Epoch 9, Dev Acc: 55.0000, Best Ever: 55.0000 428 | Final Dev Acc: 55.0000, Final Test Acc: 59.0884 429 | -------------------------------------------------------------------------------- /results/text/results_baseline_sst2_imb005.txt: -------------------------------------------------------------------------------- 1 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 2 | device: cuda 3 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.05, learning_rate=4e-05, min_epochs=0, task='sst-2', train_num_per_class=1000) 4 | getting train examples... 5 | number of examples with label '0': 50 6 | number of examples with label '1': 1000 7 | getting dev examples... 8 | number of examples with label '0': 10 9 | number of examples with label '1': 10 10 | getting test examples... 11 | #train: 1050 12 | #dev: 20 13 | #test: 1821 14 | ============================================================ 15 | Training 16 | ============================================================ 17 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 18 | Test Acc: 49.9176 19 | Epoch 1, Dev Acc: 50.0000, Best Ever: 50.0000 20 | Epoch 2, Dev Acc: 50.0000, Best Ever: 50.0000 21 | Epoch 3, Dev Acc: 50.0000, Best Ever: 50.0000 22 | Epoch 4, Dev Acc: 65.0000, Best Ever: 65.0000 23 | Test Acc: 72.9270 24 | Epoch 5, Dev Acc: 50.0000, Best Ever: 65.0000 25 | Epoch 6, Dev Acc: 50.0000, Best Ever: 65.0000 26 | Epoch 7, Dev Acc: 60.0000, Best Ever: 65.0000 27 | Epoch 8, Dev Acc: 60.0000, Best Ever: 65.0000 28 | Epoch 9, Dev Acc: 55.0000, Best Ever: 65.0000 29 | Final Dev Acc: 65.0000, Final Test Acc: 72.9270 30 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 31 | device: cuda 32 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.05, learning_rate=4e-05, min_epochs=0, task='sst-2', train_num_per_class=1000) 33 | getting train examples... 34 | number of examples with label '0': 50 35 | number of examples with label '1': 1000 36 | getting dev examples... 37 | number of examples with label '0': 10 38 | number of examples with label '1': 10 39 | getting test examples... 40 | #train: 1050 41 | #dev: 20 42 | #test: 1821 43 | ============================================================ 44 | Training 45 | ============================================================ 46 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 47 | Test Acc: 49.9176 48 | Epoch 1, Dev Acc: 50.0000, Best Ever: 50.0000 49 | Epoch 2, Dev Acc: 50.0000, Best Ever: 50.0000 50 | Epoch 3, Dev Acc: 50.0000, Best Ever: 50.0000 51 | Epoch 4, Dev Acc: 55.0000, Best Ever: 55.0000 52 | Test Acc: 60.2965 53 | Epoch 5, Dev Acc: 50.0000, Best Ever: 55.0000 54 | Epoch 6, Dev Acc: 80.0000, Best Ever: 80.0000 55 | Test Acc: 81.3839 56 | Epoch 7, Dev Acc: 50.0000, Best Ever: 80.0000 57 | Epoch 8, Dev Acc: 50.0000, Best Ever: 80.0000 58 | Epoch 9, Dev Acc: 50.0000, Best Ever: 80.0000 59 | Final Dev Acc: 80.0000, Final Test Acc: 81.3839 60 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 61 | device: cuda 62 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.05, learning_rate=4e-05, min_epochs=0, task='sst-2', train_num_per_class=1000) 63 | getting train examples... 64 | number of examples with label '0': 50 65 | number of examples with label '1': 1000 66 | getting dev examples... 67 | number of examples with label '0': 10 68 | number of examples with label '1': 10 69 | getting test examples... 70 | #train: 1050 71 | #dev: 20 72 | #test: 1821 73 | ============================================================ 74 | Training 75 | ============================================================ 76 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 77 | Test Acc: 49.9176 78 | Epoch 1, Dev Acc: 50.0000, Best Ever: 50.0000 79 | Epoch 2, Dev Acc: 50.0000, Best Ever: 50.0000 80 | Epoch 3, Dev Acc: 50.0000, Best Ever: 50.0000 81 | Epoch 4, Dev Acc: 50.0000, Best Ever: 50.0000 82 | Epoch 5, Dev Acc: 65.0000, Best Ever: 65.0000 83 | Test Acc: 69.1378 84 | Epoch 6, Dev Acc: 50.0000, Best Ever: 65.0000 85 | Epoch 7, Dev Acc: 50.0000, Best Ever: 65.0000 86 | Epoch 8, Dev Acc: 50.0000, Best Ever: 65.0000 87 | Epoch 9, Dev Acc: 50.0000, Best Ever: 65.0000 88 | Final Dev Acc: 65.0000, Final Test Acc: 69.1378 89 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 90 | device: cuda 91 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.05, learning_rate=4e-05, min_epochs=0, task='sst-2', train_num_per_class=1000) 92 | getting train examples... 93 | number of examples with label '0': 50 94 | number of examples with label '1': 1000 95 | getting dev examples... 96 | number of examples with label '0': 10 97 | number of examples with label '1': 10 98 | getting test examples... 99 | #train: 1050 100 | #dev: 20 101 | #test: 1821 102 | ============================================================ 103 | Training 104 | ============================================================ 105 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 106 | Test Acc: 49.9176 107 | Epoch 1, Dev Acc: 50.0000, Best Ever: 50.0000 108 | Epoch 2, Dev Acc: 50.0000, Best Ever: 50.0000 109 | Epoch 3, Dev Acc: 55.0000, Best Ever: 55.0000 110 | Test Acc: 65.8429 111 | Epoch 4, Dev Acc: 50.0000, Best Ever: 55.0000 112 | Epoch 5, Dev Acc: 50.0000, Best Ever: 55.0000 113 | Epoch 6, Dev Acc: 50.0000, Best Ever: 55.0000 114 | Epoch 7, Dev Acc: 50.0000, Best Ever: 55.0000 115 | Epoch 8, Dev Acc: 55.0000, Best Ever: 55.0000 116 | Epoch 9, Dev Acc: 50.0000, Best Ever: 55.0000 117 | Final Dev Acc: 55.0000, Final Test Acc: 65.8429 118 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 119 | device: cuda 120 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.05, learning_rate=4e-05, min_epochs=0, task='sst-2', train_num_per_class=1000) 121 | getting train examples... 122 | number of examples with label '0': 50 123 | number of examples with label '1': 1000 124 | getting dev examples... 125 | number of examples with label '0': 10 126 | number of examples with label '1': 10 127 | getting test examples... 128 | #train: 1050 129 | #dev: 20 130 | #test: 1821 131 | ============================================================ 132 | Training 133 | ============================================================ 134 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 135 | Test Acc: 49.9176 136 | Epoch 1, Dev Acc: 50.0000, Best Ever: 50.0000 137 | Epoch 2, Dev Acc: 50.0000, Best Ever: 50.0000 138 | Epoch 3, Dev Acc: 50.0000, Best Ever: 50.0000 139 | Epoch 4, Dev Acc: 50.0000, Best Ever: 50.0000 140 | Epoch 5, Dev Acc: 50.0000, Best Ever: 50.0000 141 | Epoch 6, Dev Acc: 80.0000, Best Ever: 80.0000 142 | Test Acc: 77.3202 143 | Epoch 7, Dev Acc: 50.0000, Best Ever: 80.0000 144 | Epoch 8, Dev Acc: 50.0000, Best Ever: 80.0000 145 | Epoch 9, Dev Acc: 50.0000, Best Ever: 80.0000 146 | Final Dev Acc: 80.0000, Final Test Acc: 77.3202 147 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 148 | device: cuda 149 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.05, learning_rate=4e-05, min_epochs=0, task='sst-2', train_num_per_class=1000) 150 | getting train examples... 151 | number of examples with label '0': 50 152 | number of examples with label '1': 1000 153 | getting dev examples... 154 | number of examples with label '0': 10 155 | number of examples with label '1': 10 156 | getting test examples... 157 | #train: 1050 158 | #dev: 20 159 | #test: 1821 160 | ============================================================ 161 | Training 162 | ============================================================ 163 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 164 | Test Acc: 49.9176 165 | Epoch 1, Dev Acc: 50.0000, Best Ever: 50.0000 166 | Epoch 2, Dev Acc: 50.0000, Best Ever: 50.0000 167 | Epoch 3, Dev Acc: 50.0000, Best Ever: 50.0000 168 | Epoch 4, Dev Acc: 50.0000, Best Ever: 50.0000 169 | Epoch 5, Dev Acc: 50.0000, Best Ever: 50.0000 170 | Epoch 6, Dev Acc: 65.0000, Best Ever: 65.0000 171 | Test Acc: 60.0220 172 | Epoch 7, Dev Acc: 50.0000, Best Ever: 65.0000 173 | Epoch 8, Dev Acc: 75.0000, Best Ever: 75.0000 174 | Test Acc: 65.4036 175 | Epoch 9, Dev Acc: 50.0000, Best Ever: 75.0000 176 | Final Dev Acc: 75.0000, Final Test Acc: 65.4036 177 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 178 | device: cuda 179 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.05, learning_rate=4e-05, min_epochs=0, task='sst-2', train_num_per_class=1000) 180 | getting train examples... 181 | number of examples with label '0': 50 182 | number of examples with label '1': 1000 183 | getting dev examples... 184 | number of examples with label '0': 10 185 | number of examples with label '1': 10 186 | getting test examples... 187 | #train: 1050 188 | #dev: 20 189 | #test: 1821 190 | ============================================================ 191 | Training 192 | ============================================================ 193 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 194 | Test Acc: 49.9176 195 | Epoch 1, Dev Acc: 50.0000, Best Ever: 50.0000 196 | Epoch 2, Dev Acc: 50.0000, Best Ever: 50.0000 197 | Epoch 3, Dev Acc: 50.0000, Best Ever: 50.0000 198 | Epoch 4, Dev Acc: 50.0000, Best Ever: 50.0000 199 | Epoch 5, Dev Acc: 55.0000, Best Ever: 55.0000 200 | Test Acc: 54.1461 201 | Epoch 6, Dev Acc: 60.0000, Best Ever: 60.0000 202 | Test Acc: 62.3833 203 | Epoch 7, Dev Acc: 60.0000, Best Ever: 60.0000 204 | Epoch 8, Dev Acc: 60.0000, Best Ever: 60.0000 205 | Epoch 9, Dev Acc: 50.0000, Best Ever: 60.0000 206 | Final Dev Acc: 60.0000, Final Test Acc: 62.3833 207 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 208 | device: cuda 209 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.05, learning_rate=4e-05, min_epochs=0, task='sst-2', train_num_per_class=1000) 210 | getting train examples... 211 | number of examples with label '0': 50 212 | number of examples with label '1': 1000 213 | getting dev examples... 214 | number of examples with label '0': 10 215 | number of examples with label '1': 10 216 | getting test examples... 217 | #train: 1050 218 | #dev: 20 219 | #test: 1821 220 | ============================================================ 221 | Training 222 | ============================================================ 223 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 224 | Test Acc: 49.9176 225 | Epoch 1, Dev Acc: 50.0000, Best Ever: 50.0000 226 | Epoch 2, Dev Acc: 50.0000, Best Ever: 50.0000 227 | Epoch 3, Dev Acc: 50.0000, Best Ever: 50.0000 228 | Epoch 4, Dev Acc: 50.0000, Best Ever: 50.0000 229 | Epoch 5, Dev Acc: 50.0000, Best Ever: 50.0000 230 | Epoch 6, Dev Acc: 50.0000, Best Ever: 50.0000 231 | Epoch 7, Dev Acc: 50.0000, Best Ever: 50.0000 232 | Epoch 8, Dev Acc: 50.0000, Best Ever: 50.0000 233 | Epoch 9, Dev Acc: 50.0000, Best Ever: 50.0000 234 | Final Dev Acc: 50.0000, Final Test Acc: 49.9176 235 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 236 | device: cuda 237 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.05, learning_rate=4e-05, min_epochs=0, task='sst-2', train_num_per_class=1000) 238 | getting train examples... 239 | number of examples with label '0': 50 240 | number of examples with label '1': 1000 241 | getting dev examples... 242 | number of examples with label '0': 10 243 | number of examples with label '1': 10 244 | getting test examples... 245 | #train: 1050 246 | #dev: 20 247 | #test: 1821 248 | ============================================================ 249 | Training 250 | ============================================================ 251 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 252 | Test Acc: 49.9176 253 | Epoch 1, Dev Acc: 50.0000, Best Ever: 50.0000 254 | Epoch 2, Dev Acc: 50.0000, Best Ever: 50.0000 255 | Epoch 3, Dev Acc: 50.0000, Best Ever: 50.0000 256 | Epoch 4, Dev Acc: 50.0000, Best Ever: 50.0000 257 | Epoch 5, Dev Acc: 50.0000, Best Ever: 50.0000 258 | Epoch 6, Dev Acc: 70.0000, Best Ever: 70.0000 259 | Test Acc: 72.7073 260 | Epoch 7, Dev Acc: 50.0000, Best Ever: 70.0000 261 | Epoch 8, Dev Acc: 55.0000, Best Ever: 70.0000 262 | Epoch 9, Dev Acc: 50.0000, Best Ever: 70.0000 263 | Final Dev Acc: 70.0000, Final Test Acc: 72.7073 264 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 265 | device: cuda 266 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.05, learning_rate=4e-05, min_epochs=0, task='sst-2', train_num_per_class=1000) 267 | getting train examples... 268 | number of examples with label '0': 50 269 | number of examples with label '1': 1000 270 | getting dev examples... 271 | number of examples with label '0': 10 272 | number of examples with label '1': 10 273 | getting test examples... 274 | #train: 1050 275 | #dev: 20 276 | #test: 1821 277 | ============================================================ 278 | Training 279 | ============================================================ 280 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 281 | Test Acc: 49.9176 282 | Epoch 1, Dev Acc: 50.0000, Best Ever: 50.0000 283 | Epoch 2, Dev Acc: 50.0000, Best Ever: 50.0000 284 | Epoch 3, Dev Acc: 50.0000, Best Ever: 50.0000 285 | Epoch 4, Dev Acc: 50.0000, Best Ever: 50.0000 286 | Epoch 5, Dev Acc: 50.0000, Best Ever: 50.0000 287 | Epoch 6, Dev Acc: 50.0000, Best Ever: 50.0000 288 | Epoch 7, Dev Acc: 50.0000, Best Ever: 50.0000 289 | Epoch 8, Dev Acc: 50.0000, Best Ever: 50.0000 290 | Epoch 9, Dev Acc: 50.0000, Best Ever: 50.0000 291 | Final Dev Acc: 50.0000, Final Test Acc: 49.9176 292 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 293 | device: cuda 294 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.05, learning_rate=4e-05, min_epochs=0, task='sst-2', train_num_per_class=1000) 295 | getting train examples... 296 | number of examples with label '0': 50 297 | number of examples with label '1': 1000 298 | getting dev examples... 299 | number of examples with label '0': 10 300 | number of examples with label '1': 10 301 | getting test examples... 302 | #train: 1050 303 | #dev: 20 304 | #test: 1821 305 | ============================================================ 306 | Training 307 | ============================================================ 308 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 309 | Test Acc: 49.9176 310 | Epoch 1, Dev Acc: 50.0000, Best Ever: 50.0000 311 | Epoch 2, Dev Acc: 50.0000, Best Ever: 50.0000 312 | Epoch 3, Dev Acc: 50.0000, Best Ever: 50.0000 313 | Epoch 4, Dev Acc: 50.0000, Best Ever: 50.0000 314 | Epoch 5, Dev Acc: 50.0000, Best Ever: 50.0000 315 | Epoch 6, Dev Acc: 50.0000, Best Ever: 50.0000 316 | Epoch 7, Dev Acc: 60.0000, Best Ever: 60.0000 317 | Test Acc: 63.9758 318 | Epoch 8, Dev Acc: 50.0000, Best Ever: 60.0000 319 | Epoch 9, Dev Acc: 50.0000, Best Ever: 60.0000 320 | Final Dev Acc: 60.0000, Final Test Acc: 63.9758 321 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 322 | device: cuda 323 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.05, learning_rate=4e-05, min_epochs=0, task='sst-2', train_num_per_class=1000) 324 | getting train examples... 325 | number of examples with label '0': 50 326 | number of examples with label '1': 1000 327 | getting dev examples... 328 | number of examples with label '0': 10 329 | number of examples with label '1': 10 330 | getting test examples... 331 | #train: 1050 332 | #dev: 20 333 | #test: 1821 334 | ============================================================ 335 | Training 336 | ============================================================ 337 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 338 | Test Acc: 49.9176 339 | Epoch 1, Dev Acc: 50.0000, Best Ever: 50.0000 340 | Epoch 2, Dev Acc: 50.0000, Best Ever: 50.0000 341 | Epoch 3, Dev Acc: 50.0000, Best Ever: 50.0000 342 | Epoch 4, Dev Acc: 50.0000, Best Ever: 50.0000 343 | Epoch 5, Dev Acc: 60.0000, Best Ever: 60.0000 344 | Test Acc: 62.3833 345 | Epoch 6, Dev Acc: 50.0000, Best Ever: 60.0000 346 | Epoch 7, Dev Acc: 50.0000, Best Ever: 60.0000 347 | Epoch 8, Dev Acc: 55.0000, Best Ever: 60.0000 348 | Epoch 9, Dev Acc: 50.0000, Best Ever: 60.0000 349 | Final Dev Acc: 60.0000, Final Test Acc: 62.3833 350 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 351 | device: cuda 352 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.05, learning_rate=4e-05, min_epochs=0, task='sst-2', train_num_per_class=1000) 353 | getting train examples... 354 | number of examples with label '0': 50 355 | number of examples with label '1': 1000 356 | getting dev examples... 357 | number of examples with label '0': 10 358 | number of examples with label '1': 10 359 | getting test examples... 360 | #train: 1050 361 | #dev: 20 362 | #test: 1821 363 | ============================================================ 364 | Training 365 | ============================================================ 366 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 367 | Test Acc: 49.9176 368 | Epoch 1, Dev Acc: 50.0000, Best Ever: 50.0000 369 | Epoch 2, Dev Acc: 50.0000, Best Ever: 50.0000 370 | Epoch 3, Dev Acc: 50.0000, Best Ever: 50.0000 371 | Epoch 4, Dev Acc: 70.0000, Best Ever: 70.0000 372 | Test Acc: 71.6639 373 | Epoch 5, Dev Acc: 50.0000, Best Ever: 70.0000 374 | Epoch 6, Dev Acc: 50.0000, Best Ever: 70.0000 375 | Epoch 7, Dev Acc: 65.0000, Best Ever: 70.0000 376 | Epoch 8, Dev Acc: 50.0000, Best Ever: 70.0000 377 | Epoch 9, Dev Acc: 55.0000, Best Ever: 70.0000 378 | Final Dev Acc: 70.0000, Final Test Acc: 71.6639 379 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 380 | device: cuda 381 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.05, learning_rate=4e-05, min_epochs=0, task='sst-2', train_num_per_class=1000) 382 | getting train examples... 383 | number of examples with label '0': 50 384 | number of examples with label '1': 1000 385 | getting dev examples... 386 | number of examples with label '0': 10 387 | number of examples with label '1': 10 388 | getting test examples... 389 | #train: 1050 390 | #dev: 20 391 | #test: 1821 392 | ============================================================ 393 | Training 394 | ============================================================ 395 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 396 | Test Acc: 49.9176 397 | Epoch 1, Dev Acc: 50.0000, Best Ever: 50.0000 398 | Epoch 2, Dev Acc: 50.0000, Best Ever: 50.0000 399 | Epoch 3, Dev Acc: 50.0000, Best Ever: 50.0000 400 | Epoch 4, Dev Acc: 65.0000, Best Ever: 65.0000 401 | Test Acc: 68.8633 402 | Epoch 5, Dev Acc: 50.0000, Best Ever: 65.0000 403 | Epoch 6, Dev Acc: 65.0000, Best Ever: 65.0000 404 | Epoch 7, Dev Acc: 50.0000, Best Ever: 65.0000 405 | Epoch 8, Dev Acc: 50.0000, Best Ever: 65.0000 406 | Epoch 9, Dev Acc: 55.0000, Best Ever: 65.0000 407 | Final Dev Acc: 65.0000, Final Test Acc: 68.8633 408 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 409 | device: cuda 410 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.05, learning_rate=4e-05, min_epochs=0, task='sst-2', train_num_per_class=1000) 411 | getting train examples... 412 | number of examples with label '0': 50 413 | number of examples with label '1': 1000 414 | getting dev examples... 415 | number of examples with label '0': 10 416 | number of examples with label '1': 10 417 | getting test examples... 418 | #train: 1050 419 | #dev: 20 420 | #test: 1821 421 | ============================================================ 422 | Training 423 | ============================================================ 424 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 425 | Test Acc: 49.9176 426 | Epoch 1, Dev Acc: 50.0000, Best Ever: 50.0000 427 | Epoch 2, Dev Acc: 50.0000, Best Ever: 50.0000 428 | Epoch 3, Dev Acc: 50.0000, Best Ever: 50.0000 429 | Epoch 4, Dev Acc: 50.0000, Best Ever: 50.0000 430 | Epoch 5, Dev Acc: 50.0000, Best Ever: 50.0000 431 | Epoch 6, Dev Acc: 60.0000, Best Ever: 60.0000 432 | Test Acc: 73.4761 433 | Epoch 7, Dev Acc: 50.0000, Best Ever: 60.0000 434 | Epoch 8, Dev Acc: 50.0000, Best Ever: 60.0000 435 | Epoch 9, Dev Acc: 90.0000, Best Ever: 90.0000 436 | Test Acc: 82.1527 437 | Final Dev Acc: 90.0000, Final Test Acc: 82.1527 438 | -------------------------------------------------------------------------------- /results/text/results_baseline_sst2_imb01.txt: -------------------------------------------------------------------------------- 1 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 2 | device: cuda 3 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.1, learning_rate=4e-05, min_epochs=0, task='sst-2', train_num_per_class=1000) 4 | getting train examples... 5 | number of examples with label '0': 100 6 | number of examples with label '1': 1000 7 | getting dev examples... 8 | number of examples with label '0': 10 9 | number of examples with label '1': 10 10 | getting test examples... 11 | #train: 1100 12 | #dev: 20 13 | #test: 1821 14 | ============================================================ 15 | Training 16 | ============================================================ 17 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 18 | Test Acc: 49.9176 19 | Epoch 1, Dev Acc: 45.0000, Best Ever: 50.0000 20 | Epoch 2, Dev Acc: 55.0000, Best Ever: 55.0000 21 | Test Acc: 66.2823 22 | Epoch 3, Dev Acc: 70.0000, Best Ever: 70.0000 23 | Test Acc: 73.0917 24 | Epoch 4, Dev Acc: 75.0000, Best Ever: 75.0000 25 | Test Acc: 71.3344 26 | Epoch 5, Dev Acc: 65.0000, Best Ever: 75.0000 27 | Epoch 6, Dev Acc: 60.0000, Best Ever: 75.0000 28 | Epoch 7, Dev Acc: 80.0000, Best Ever: 80.0000 29 | Test Acc: 77.3202 30 | Epoch 8, Dev Acc: 55.0000, Best Ever: 80.0000 31 | Epoch 9, Dev Acc: 50.0000, Best Ever: 80.0000 32 | Final Dev Acc: 80.0000, Final Test Acc: 77.3202 33 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 34 | device: cuda 35 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.1, learning_rate=4e-05, min_epochs=0, task='sst-2', train_num_per_class=1000) 36 | getting train examples... 37 | number of examples with label '0': 100 38 | number of examples with label '1': 1000 39 | getting dev examples... 40 | number of examples with label '0': 10 41 | number of examples with label '1': 10 42 | getting test examples... 43 | #train: 1100 44 | #dev: 20 45 | #test: 1821 46 | ============================================================ 47 | Training 48 | ============================================================ 49 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 50 | Test Acc: 49.9176 51 | Epoch 1, Dev Acc: 55.0000, Best Ever: 55.0000 52 | Test Acc: 70.7304 53 | Epoch 2, Dev Acc: 50.0000, Best Ever: 55.0000 54 | Epoch 3, Dev Acc: 55.0000, Best Ever: 55.0000 55 | Epoch 4, Dev Acc: 65.0000, Best Ever: 65.0000 56 | Test Acc: 72.4876 57 | Epoch 5, Dev Acc: 55.0000, Best Ever: 65.0000 58 | Epoch 6, Dev Acc: 55.0000, Best Ever: 65.0000 59 | Epoch 7, Dev Acc: 55.0000, Best Ever: 65.0000 60 | Epoch 8, Dev Acc: 60.0000, Best Ever: 65.0000 61 | Epoch 9, Dev Acc: 55.0000, Best Ever: 65.0000 62 | Final Dev Acc: 65.0000, Final Test Acc: 72.4876 63 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 64 | device: cuda 65 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.1, learning_rate=4e-05, min_epochs=0, task='sst-2', train_num_per_class=1000) 66 | getting train examples... 67 | number of examples with label '0': 100 68 | number of examples with label '1': 1000 69 | getting dev examples... 70 | number of examples with label '0': 10 71 | number of examples with label '1': 10 72 | getting test examples... 73 | #train: 1100 74 | #dev: 20 75 | #test: 1821 76 | ============================================================ 77 | Training 78 | ============================================================ 79 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 80 | Test Acc: 49.9176 81 | Epoch 1, Dev Acc: 60.0000, Best Ever: 60.0000 82 | Test Acc: 70.0714 83 | Epoch 2, Dev Acc: 70.0000, Best Ever: 70.0000 84 | Test Acc: 79.5167 85 | Epoch 3, Dev Acc: 80.0000, Best Ever: 80.0000 86 | Test Acc: 79.4069 87 | Epoch 4, Dev Acc: 65.0000, Best Ever: 80.0000 88 | Epoch 5, Dev Acc: 75.0000, Best Ever: 80.0000 89 | Epoch 6, Dev Acc: 70.0000, Best Ever: 80.0000 90 | Epoch 7, Dev Acc: 75.0000, Best Ever: 80.0000 91 | Epoch 8, Dev Acc: 75.0000, Best Ever: 80.0000 92 | Epoch 9, Dev Acc: 85.0000, Best Ever: 85.0000 93 | Test Acc: 78.3635 94 | Final Dev Acc: 85.0000, Final Test Acc: 78.3635 95 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 96 | device: cuda 97 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.1, learning_rate=4e-05, min_epochs=0, task='sst-2', train_num_per_class=1000) 98 | getting train examples... 99 | number of examples with label '0': 100 100 | number of examples with label '1': 1000 101 | getting dev examples... 102 | number of examples with label '0': 10 103 | number of examples with label '1': 10 104 | getting test examples... 105 | #train: 1100 106 | #dev: 20 107 | #test: 1821 108 | ============================================================ 109 | Training 110 | ============================================================ 111 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 112 | Test Acc: 49.9176 113 | Epoch 1, Dev Acc: 50.0000, Best Ever: 50.0000 114 | Epoch 2, Dev Acc: 80.0000, Best Ever: 80.0000 115 | Test Acc: 81.9330 116 | Epoch 3, Dev Acc: 50.0000, Best Ever: 80.0000 117 | Epoch 4, Dev Acc: 65.0000, Best Ever: 80.0000 118 | Epoch 5, Dev Acc: 65.0000, Best Ever: 80.0000 119 | Epoch 6, Dev Acc: 55.0000, Best Ever: 80.0000 120 | Epoch 7, Dev Acc: 75.0000, Best Ever: 80.0000 121 | Epoch 8, Dev Acc: 55.0000, Best Ever: 80.0000 122 | Epoch 9, Dev Acc: 50.0000, Best Ever: 80.0000 123 | Final Dev Acc: 80.0000, Final Test Acc: 81.9330 124 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 125 | device: cuda 126 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.1, learning_rate=4e-05, min_epochs=0, task='sst-2', train_num_per_class=1000) 127 | getting train examples... 128 | number of examples with label '0': 100 129 | number of examples with label '1': 1000 130 | getting dev examples... 131 | number of examples with label '0': 10 132 | number of examples with label '1': 10 133 | getting test examples... 134 | #train: 1100 135 | #dev: 20 136 | #test: 1821 137 | ============================================================ 138 | Training 139 | ============================================================ 140 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 141 | Test Acc: 49.9176 142 | Epoch 1, Dev Acc: 50.0000, Best Ever: 50.0000 143 | Epoch 2, Dev Acc: 50.0000, Best Ever: 50.0000 144 | Epoch 3, Dev Acc: 55.0000, Best Ever: 55.0000 145 | Test Acc: 60.8457 146 | Epoch 4, Dev Acc: 50.0000, Best Ever: 55.0000 147 | Epoch 5, Dev Acc: 60.0000, Best Ever: 60.0000 148 | Test Acc: 64.9643 149 | Epoch 6, Dev Acc: 55.0000, Best Ever: 60.0000 150 | Epoch 7, Dev Acc: 70.0000, Best Ever: 70.0000 151 | Test Acc: 71.2246 152 | Epoch 8, Dev Acc: 65.0000, Best Ever: 70.0000 153 | Epoch 9, Dev Acc: 50.0000, Best Ever: 70.0000 154 | Final Dev Acc: 70.0000, Final Test Acc: 71.2246 155 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 156 | device: cuda 157 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.1, learning_rate=4e-05, min_epochs=0, task='sst-2', train_num_per_class=1000) 158 | getting train examples... 159 | number of examples with label '0': 100 160 | number of examples with label '1': 1000 161 | getting dev examples... 162 | number of examples with label '0': 10 163 | number of examples with label '1': 10 164 | getting test examples... 165 | #train: 1100 166 | #dev: 20 167 | #test: 1821 168 | ============================================================ 169 | Training 170 | ============================================================ 171 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 172 | Test Acc: 49.9176 173 | Epoch 1, Dev Acc: 50.0000, Best Ever: 50.0000 174 | Epoch 2, Dev Acc: 50.0000, Best Ever: 50.0000 175 | Epoch 3, Dev Acc: 55.0000, Best Ever: 55.0000 176 | Test Acc: 61.0104 177 | Epoch 4, Dev Acc: 75.0000, Best Ever: 75.0000 178 | Test Acc: 81.7683 179 | Epoch 5, Dev Acc: 70.0000, Best Ever: 75.0000 180 | Epoch 6, Dev Acc: 50.0000, Best Ever: 75.0000 181 | Epoch 7, Dev Acc: 50.0000, Best Ever: 75.0000 182 | Epoch 8, Dev Acc: 60.0000, Best Ever: 75.0000 183 | Epoch 9, Dev Acc: 70.0000, Best Ever: 75.0000 184 | Final Dev Acc: 75.0000, Final Test Acc: 81.7683 185 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 186 | device: cuda 187 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.1, learning_rate=4e-05, min_epochs=0, task='sst-2', train_num_per_class=1000) 188 | getting train examples... 189 | number of examples with label '0': 100 190 | number of examples with label '1': 1000 191 | getting dev examples... 192 | number of examples with label '0': 10 193 | number of examples with label '1': 10 194 | getting test examples... 195 | #train: 1100 196 | #dev: 20 197 | #test: 1821 198 | ============================================================ 199 | Training 200 | ============================================================ 201 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 202 | Test Acc: 49.9176 203 | Epoch 1, Dev Acc: 50.0000, Best Ever: 50.0000 204 | Epoch 2, Dev Acc: 50.0000, Best Ever: 50.0000 205 | Epoch 3, Dev Acc: 80.0000, Best Ever: 80.0000 206 | Test Acc: 77.9791 207 | Epoch 4, Dev Acc: 55.0000, Best Ever: 80.0000 208 | Epoch 5, Dev Acc: 60.0000, Best Ever: 80.0000 209 | Epoch 6, Dev Acc: 80.0000, Best Ever: 80.0000 210 | Epoch 7, Dev Acc: 55.0000, Best Ever: 80.0000 211 | Epoch 8, Dev Acc: 80.0000, Best Ever: 80.0000 212 | Epoch 9, Dev Acc: 55.0000, Best Ever: 80.0000 213 | Final Dev Acc: 80.0000, Final Test Acc: 77.9791 214 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 215 | device: cuda 216 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.1, learning_rate=4e-05, min_epochs=0, task='sst-2', train_num_per_class=1000) 217 | getting train examples... 218 | number of examples with label '0': 100 219 | number of examples with label '1': 1000 220 | getting dev examples... 221 | number of examples with label '0': 10 222 | number of examples with label '1': 10 223 | getting test examples... 224 | #train: 1100 225 | #dev: 20 226 | #test: 1821 227 | ============================================================ 228 | Training 229 | ============================================================ 230 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 231 | Test Acc: 49.9176 232 | Epoch 1, Dev Acc: 50.0000, Best Ever: 50.0000 233 | Epoch 2, Dev Acc: 65.0000, Best Ever: 65.0000 234 | Test Acc: 73.3663 235 | Epoch 3, Dev Acc: 50.0000, Best Ever: 65.0000 236 | Epoch 4, Dev Acc: 55.0000, Best Ever: 65.0000 237 | Epoch 5, Dev Acc: 70.0000, Best Ever: 70.0000 238 | Test Acc: 68.2043 239 | Epoch 6, Dev Acc: 55.0000, Best Ever: 70.0000 240 | Epoch 7, Dev Acc: 50.0000, Best Ever: 70.0000 241 | Epoch 8, Dev Acc: 75.0000, Best Ever: 75.0000 242 | Test Acc: 75.7276 243 | Epoch 9, Dev Acc: 65.0000, Best Ever: 75.0000 244 | Final Dev Acc: 75.0000, Final Test Acc: 75.7276 245 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 246 | device: cuda 247 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.1, learning_rate=4e-05, min_epochs=0, task='sst-2', train_num_per_class=1000) 248 | getting train examples... 249 | number of examples with label '0': 100 250 | number of examples with label '1': 1000 251 | getting dev examples... 252 | number of examples with label '0': 10 253 | number of examples with label '1': 10 254 | getting test examples... 255 | #train: 1100 256 | #dev: 20 257 | #test: 1821 258 | ============================================================ 259 | Training 260 | ============================================================ 261 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 262 | Test Acc: 49.9176 263 | Epoch 1, Dev Acc: 50.0000, Best Ever: 50.0000 264 | Epoch 2, Dev Acc: 50.0000, Best Ever: 50.0000 265 | Epoch 3, Dev Acc: 50.0000, Best Ever: 50.0000 266 | Epoch 4, Dev Acc: 50.0000, Best Ever: 50.0000 267 | Epoch 5, Dev Acc: 65.0000, Best Ever: 65.0000 268 | Test Acc: 68.3690 269 | Epoch 6, Dev Acc: 55.0000, Best Ever: 65.0000 270 | Epoch 7, Dev Acc: 50.0000, Best Ever: 65.0000 271 | Epoch 8, Dev Acc: 55.0000, Best Ever: 65.0000 272 | Epoch 9, Dev Acc: 50.0000, Best Ever: 65.0000 273 | Final Dev Acc: 65.0000, Final Test Acc: 68.3690 274 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 275 | device: cuda 276 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.1, learning_rate=4e-05, min_epochs=0, task='sst-2', train_num_per_class=1000) 277 | getting train examples... 278 | number of examples with label '0': 100 279 | number of examples with label '1': 1000 280 | getting dev examples... 281 | number of examples with label '0': 10 282 | number of examples with label '1': 10 283 | getting test examples... 284 | #train: 1100 285 | #dev: 20 286 | #test: 1821 287 | ============================================================ 288 | Training 289 | ============================================================ 290 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 291 | Test Acc: 49.9176 292 | Epoch 1, Dev Acc: 50.0000, Best Ever: 50.0000 293 | Epoch 2, Dev Acc: 50.0000, Best Ever: 50.0000 294 | Epoch 3, Dev Acc: 55.0000, Best Ever: 55.0000 295 | Test Acc: 69.0280 296 | Epoch 4, Dev Acc: 70.0000, Best Ever: 70.0000 297 | Test Acc: 70.8402 298 | Epoch 5, Dev Acc: 50.0000, Best Ever: 70.0000 299 | Epoch 6, Dev Acc: 55.0000, Best Ever: 70.0000 300 | Epoch 7, Dev Acc: 60.0000, Best Ever: 70.0000 301 | Epoch 8, Dev Acc: 55.0000, Best Ever: 70.0000 302 | Epoch 9, Dev Acc: 55.0000, Best Ever: 70.0000 303 | Final Dev Acc: 70.0000, Final Test Acc: 70.8402 304 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 305 | device: cuda 306 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.1, learning_rate=4e-05, min_epochs=0, task='sst-2', train_num_per_class=1000) 307 | getting train examples... 308 | number of examples with label '0': 100 309 | number of examples with label '1': 1000 310 | getting dev examples... 311 | number of examples with label '0': 10 312 | number of examples with label '1': 10 313 | getting test examples... 314 | #train: 1100 315 | #dev: 20 316 | #test: 1821 317 | ============================================================ 318 | Training 319 | ============================================================ 320 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 321 | Test Acc: 49.9176 322 | Epoch 1, Dev Acc: 50.0000, Best Ever: 50.0000 323 | Epoch 2, Dev Acc: 80.0000, Best Ever: 80.0000 324 | Test Acc: 80.8347 325 | Epoch 3, Dev Acc: 65.0000, Best Ever: 80.0000 326 | Epoch 4, Dev Acc: 80.0000, Best Ever: 80.0000 327 | Epoch 5, Dev Acc: 55.0000, Best Ever: 80.0000 328 | Epoch 6, Dev Acc: 55.0000, Best Ever: 80.0000 329 | Epoch 7, Dev Acc: 50.0000, Best Ever: 80.0000 330 | Epoch 8, Dev Acc: 65.0000, Best Ever: 80.0000 331 | Epoch 9, Dev Acc: 55.0000, Best Ever: 80.0000 332 | Final Dev Acc: 80.0000, Final Test Acc: 80.8347 333 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 334 | device: cuda 335 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.1, learning_rate=4e-05, min_epochs=0, task='sst-2', train_num_per_class=1000) 336 | getting train examples... 337 | number of examples with label '0': 100 338 | number of examples with label '1': 1000 339 | getting dev examples... 340 | number of examples with label '0': 10 341 | number of examples with label '1': 10 342 | getting test examples... 343 | #train: 1100 344 | #dev: 20 345 | #test: 1821 346 | ============================================================ 347 | Training 348 | ============================================================ 349 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 350 | Test Acc: 49.9176 351 | Epoch 1, Dev Acc: 50.0000, Best Ever: 50.0000 352 | Epoch 2, Dev Acc: 60.0000, Best Ever: 60.0000 353 | Test Acc: 71.3344 354 | Epoch 3, Dev Acc: 50.0000, Best Ever: 60.0000 355 | Epoch 4, Dev Acc: 70.0000, Best Ever: 70.0000 356 | Test Acc: 73.8056 357 | Epoch 5, Dev Acc: 50.0000, Best Ever: 70.0000 358 | Epoch 6, Dev Acc: 60.0000, Best Ever: 70.0000 359 | Epoch 7, Dev Acc: 60.0000, Best Ever: 70.0000 360 | Epoch 8, Dev Acc: 55.0000, Best Ever: 70.0000 361 | Epoch 9, Dev Acc: 50.0000, Best Ever: 70.0000 362 | Final Dev Acc: 70.0000, Final Test Acc: 73.8056 363 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 364 | device: cuda 365 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.1, learning_rate=4e-05, min_epochs=0, task='sst-2', train_num_per_class=1000) 366 | getting train examples... 367 | number of examples with label '0': 100 368 | number of examples with label '1': 1000 369 | getting dev examples... 370 | number of examples with label '0': 10 371 | number of examples with label '1': 10 372 | getting test examples... 373 | #train: 1100 374 | #dev: 20 375 | #test: 1821 376 | ============================================================ 377 | Training 378 | ============================================================ 379 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 380 | Test Acc: 49.9176 381 | Epoch 1, Dev Acc: 55.0000, Best Ever: 55.0000 382 | Test Acc: 64.8545 383 | Epoch 2, Dev Acc: 50.0000, Best Ever: 55.0000 384 | Epoch 3, Dev Acc: 55.0000, Best Ever: 55.0000 385 | Epoch 4, Dev Acc: 60.0000, Best Ever: 60.0000 386 | Test Acc: 64.4701 387 | Epoch 5, Dev Acc: 50.0000, Best Ever: 60.0000 388 | Epoch 6, Dev Acc: 70.0000, Best Ever: 70.0000 389 | Test Acc: 69.6321 390 | Epoch 7, Dev Acc: 65.0000, Best Ever: 70.0000 391 | Epoch 8, Dev Acc: 55.0000, Best Ever: 70.0000 392 | Epoch 9, Dev Acc: 65.0000, Best Ever: 70.0000 393 | Final Dev Acc: 70.0000, Final Test Acc: 69.6321 394 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 395 | device: cuda 396 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.1, learning_rate=4e-05, min_epochs=0, task='sst-2', train_num_per_class=1000) 397 | getting train examples... 398 | number of examples with label '0': 100 399 | number of examples with label '1': 1000 400 | getting dev examples... 401 | number of examples with label '0': 10 402 | number of examples with label '1': 10 403 | getting test examples... 404 | #train: 1100 405 | #dev: 20 406 | #test: 1821 407 | ============================================================ 408 | Training 409 | ============================================================ 410 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 411 | Test Acc: 49.9176 412 | Epoch 1, Dev Acc: 50.0000, Best Ever: 50.0000 413 | Epoch 2, Dev Acc: 50.0000, Best Ever: 50.0000 414 | Epoch 3, Dev Acc: 65.0000, Best Ever: 65.0000 415 | Test Acc: 71.7188 416 | Epoch 4, Dev Acc: 60.0000, Best Ever: 65.0000 417 | Epoch 5, Dev Acc: 70.0000, Best Ever: 70.0000 418 | Test Acc: 68.9182 419 | Epoch 6, Dev Acc: 65.0000, Best Ever: 70.0000 420 | Epoch 7, Dev Acc: 60.0000, Best Ever: 70.0000 421 | Epoch 8, Dev Acc: 70.0000, Best Ever: 70.0000 422 | Epoch 9, Dev Acc: 50.0000, Best Ever: 70.0000 423 | Final Dev Acc: 70.0000, Final Test Acc: 68.9182 424 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 425 | device: cuda 426 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.1, learning_rate=4e-05, min_epochs=0, task='sst-2', train_num_per_class=1000) 427 | getting train examples... 428 | number of examples with label '0': 100 429 | number of examples with label '1': 1000 430 | getting dev examples... 431 | number of examples with label '0': 10 432 | number of examples with label '1': 10 433 | getting test examples... 434 | #train: 1100 435 | #dev: 20 436 | #test: 1821 437 | ============================================================ 438 | Training 439 | ============================================================ 440 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 441 | Test Acc: 49.9176 442 | Epoch 1, Dev Acc: 50.0000, Best Ever: 50.0000 443 | Epoch 2, Dev Acc: 60.0000, Best Ever: 60.0000 444 | Test Acc: 64.0857 445 | Epoch 3, Dev Acc: 65.0000, Best Ever: 65.0000 446 | Test Acc: 78.4734 447 | Epoch 4, Dev Acc: 65.0000, Best Ever: 65.0000 448 | Epoch 5, Dev Acc: 55.0000, Best Ever: 65.0000 449 | Epoch 6, Dev Acc: 75.0000, Best Ever: 75.0000 450 | Test Acc: 76.4415 451 | Epoch 7, Dev Acc: 65.0000, Best Ever: 75.0000 452 | Epoch 8, Dev Acc: 60.0000, Best Ever: 75.0000 453 | Epoch 9, Dev Acc: 65.0000, Best Ever: 75.0000 454 | Final Dev Acc: 75.0000, Final Test Acc: 76.4415 455 | -------------------------------------------------------------------------------- /results/text/results_ren_sst2_imb01.txt: -------------------------------------------------------------------------------- 1 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 2 | device: cuda 3 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.1, learning_rate=4e-05, min_epochs=0, pretrain_epochs=0, task='sst-2', train_num_per_class=1000) 4 | getting train examples... 5 | number of examples with label '0': 100 6 | number of examples with label '1': 1000 7 | getting dev examples... 8 | number of examples with label '0': 10 9 | number of examples with label '1': 10 10 | getting test examples... 11 | #train: 1100 12 | #dev: 20 13 | #test: 1821 14 | ============================================================ 15 | Pre-training 16 | ============================================================ 17 | ============================================================ 18 | Training 19 | ============================================================ 20 | Epoch 0, Dev Acc: 75.0000, Best Ever: 75.0000 21 | Test Acc: 71.2246 22 | Epoch 1, Dev Acc: 50.0000, Best Ever: 75.0000 23 | Epoch 2, Dev Acc: 85.0000, Best Ever: 85.0000 24 | Test Acc: 83.6903 25 | Epoch 3, Dev Acc: 75.0000, Best Ever: 85.0000 26 | Epoch 4, Dev Acc: 75.0000, Best Ever: 85.0000 27 | Epoch 5, Dev Acc: 80.0000, Best Ever: 85.0000 28 | Epoch 6, Dev Acc: 80.0000, Best Ever: 85.0000 29 | Epoch 7, Dev Acc: 65.0000, Best Ever: 85.0000 30 | Epoch 8, Dev Acc: 80.0000, Best Ever: 85.0000 31 | Epoch 9, Dev Acc: 80.0000, Best Ever: 85.0000 32 | Final Dev Acc: 85.0000, Final Test Acc: 83.6903 33 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 34 | device: cuda 35 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.1, learning_rate=4e-05, min_epochs=0, pretrain_epochs=0, task='sst-2', train_num_per_class=1000) 36 | getting train examples... 37 | number of examples with label '0': 100 38 | number of examples with label '1': 1000 39 | getting dev examples... 40 | number of examples with label '0': 10 41 | number of examples with label '1': 10 42 | getting test examples... 43 | #train: 1100 44 | #dev: 20 45 | #test: 1821 46 | ============================================================ 47 | Pre-training 48 | ============================================================ 49 | ============================================================ 50 | Training 51 | ============================================================ 52 | Epoch 0, Dev Acc: 75.0000, Best Ever: 75.0000 53 | Test Acc: 80.4503 54 | Epoch 1, Dev Acc: 75.0000, Best Ever: 75.0000 55 | Epoch 2, Dev Acc: 80.0000, Best Ever: 80.0000 56 | Test Acc: 81.1093 57 | Epoch 3, Dev Acc: 90.0000, Best Ever: 90.0000 58 | Test Acc: 81.8232 59 | Epoch 4, Dev Acc: 80.0000, Best Ever: 90.0000 60 | Epoch 5, Dev Acc: 80.0000, Best Ever: 90.0000 61 | Epoch 6, Dev Acc: 85.0000, Best Ever: 90.0000 62 | Epoch 7, Dev Acc: 75.0000, Best Ever: 90.0000 63 | Epoch 8, Dev Acc: 70.0000, Best Ever: 90.0000 64 | Epoch 9, Dev Acc: 60.0000, Best Ever: 90.0000 65 | Final Dev Acc: 90.0000, Final Test Acc: 81.8232 66 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 67 | device: cuda 68 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.1, learning_rate=4e-05, min_epochs=0, pretrain_epochs=0, task='sst-2', train_num_per_class=1000) 69 | getting train examples... 70 | number of examples with label '0': 100 71 | number of examples with label '1': 1000 72 | getting dev examples... 73 | number of examples with label '0': 10 74 | number of examples with label '1': 10 75 | getting test examples... 76 | #train: 1100 77 | #dev: 20 78 | #test: 1821 79 | ============================================================ 80 | Pre-training 81 | ============================================================ 82 | ============================================================ 83 | Training 84 | ============================================================ 85 | Epoch 0, Dev Acc: 75.0000, Best Ever: 75.0000 86 | Test Acc: 77.3202 87 | Epoch 1, Dev Acc: 85.0000, Best Ever: 85.0000 88 | Test Acc: 80.4503 89 | Epoch 2, Dev Acc: 70.0000, Best Ever: 85.0000 90 | Epoch 3, Dev Acc: 70.0000, Best Ever: 85.0000 91 | Epoch 4, Dev Acc: 80.0000, Best Ever: 85.0000 92 | Epoch 5, Dev Acc: 65.0000, Best Ever: 85.0000 93 | Epoch 6, Dev Acc: 90.0000, Best Ever: 90.0000 94 | Test Acc: 81.4937 95 | Epoch 7, Dev Acc: 90.0000, Best Ever: 90.0000 96 | Epoch 8, Dev Acc: 85.0000, Best Ever: 90.0000 97 | Epoch 9, Dev Acc: 95.0000, Best Ever: 95.0000 98 | Test Acc: 82.8116 99 | Final Dev Acc: 95.0000, Final Test Acc: 82.8116 100 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 101 | device: cuda 102 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.1, learning_rate=4e-05, min_epochs=0, pretrain_epochs=0, task='sst-2', train_num_per_class=1000) 103 | getting train examples... 104 | number of examples with label '0': 100 105 | number of examples with label '1': 1000 106 | getting dev examples... 107 | number of examples with label '0': 10 108 | number of examples with label '1': 10 109 | getting test examples... 110 | #train: 1100 111 | #dev: 20 112 | #test: 1821 113 | ============================================================ 114 | Pre-training 115 | ============================================================ 116 | ============================================================ 117 | Training 118 | ============================================================ 119 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 120 | Test Acc: 50.1373 121 | Epoch 1, Dev Acc: 55.0000, Best Ever: 55.0000 122 | Test Acc: 54.4206 123 | Epoch 2, Dev Acc: 90.0000, Best Ever: 90.0000 124 | Test Acc: 80.7249 125 | Epoch 3, Dev Acc: 50.0000, Best Ever: 90.0000 126 | Epoch 4, Dev Acc: 50.0000, Best Ever: 90.0000 127 | Epoch 5, Dev Acc: 65.0000, Best Ever: 90.0000 128 | Epoch 6, Dev Acc: 80.0000, Best Ever: 90.0000 129 | Epoch 7, Dev Acc: 50.0000, Best Ever: 90.0000 130 | Epoch 8, Dev Acc: 75.0000, Best Ever: 90.0000 131 | Epoch 9, Dev Acc: 85.0000, Best Ever: 90.0000 132 | Final Dev Acc: 90.0000, Final Test Acc: 80.7249 133 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 134 | device: cuda 135 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.1, learning_rate=4e-05, min_epochs=0, pretrain_epochs=0, task='sst-2', train_num_per_class=1000) 136 | getting train examples... 137 | number of examples with label '0': 100 138 | number of examples with label '1': 1000 139 | getting dev examples... 140 | number of examples with label '0': 10 141 | number of examples with label '1': 10 142 | getting test examples... 143 | #train: 1100 144 | #dev: 20 145 | #test: 1821 146 | ============================================================ 147 | Pre-training 148 | ============================================================ 149 | ============================================================ 150 | Training 151 | ============================================================ 152 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 153 | Test Acc: 51.1807 154 | Epoch 1, Dev Acc: 50.0000, Best Ever: 50.0000 155 | Epoch 2, Dev Acc: 75.0000, Best Ever: 75.0000 156 | Test Acc: 81.1093 157 | Epoch 3, Dev Acc: 90.0000, Best Ever: 90.0000 158 | Test Acc: 76.4964 159 | Epoch 4, Dev Acc: 50.0000, Best Ever: 90.0000 160 | Epoch 5, Dev Acc: 60.0000, Best Ever: 90.0000 161 | Epoch 6, Dev Acc: 75.0000, Best Ever: 90.0000 162 | Epoch 7, Dev Acc: 70.0000, Best Ever: 90.0000 163 | Epoch 8, Dev Acc: 70.0000, Best Ever: 90.0000 164 | Epoch 9, Dev Acc: 80.0000, Best Ever: 90.0000 165 | Final Dev Acc: 90.0000, Final Test Acc: 76.4964 166 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 167 | device: cuda 168 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.1, learning_rate=4e-05, min_epochs=0, pretrain_epochs=0, task='sst-2', train_num_per_class=1000) 169 | getting train examples... 170 | number of examples with label '0': 100 171 | number of examples with label '1': 1000 172 | getting dev examples... 173 | number of examples with label '0': 10 174 | number of examples with label '1': 10 175 | getting test examples... 176 | #train: 1100 177 | #dev: 20 178 | #test: 1821 179 | ============================================================ 180 | Pre-training 181 | ============================================================ 182 | ============================================================ 183 | Training 184 | ============================================================ 185 | Epoch 0, Dev Acc: 45.0000, Best Ever: 45.0000 186 | Test Acc: 51.5651 187 | Epoch 1, Dev Acc: 80.0000, Best Ever: 80.0000 188 | Test Acc: 78.7479 189 | Epoch 2, Dev Acc: 60.0000, Best Ever: 80.0000 190 | Epoch 3, Dev Acc: 60.0000, Best Ever: 80.0000 191 | Epoch 4, Dev Acc: 75.0000, Best Ever: 80.0000 192 | Epoch 5, Dev Acc: 50.0000, Best Ever: 80.0000 193 | Epoch 6, Dev Acc: 80.0000, Best Ever: 80.0000 194 | Epoch 7, Dev Acc: 50.0000, Best Ever: 80.0000 195 | Epoch 8, Dev Acc: 50.0000, Best Ever: 80.0000 196 | Epoch 9, Dev Acc: 80.0000, Best Ever: 80.0000 197 | Final Dev Acc: 80.0000, Final Test Acc: 78.7479 198 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 199 | device: cuda 200 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.1, learning_rate=4e-05, min_epochs=0, pretrain_epochs=0, task='sst-2', train_num_per_class=1000) 201 | getting train examples... 202 | number of examples with label '0': 100 203 | number of examples with label '1': 1000 204 | getting dev examples... 205 | number of examples with label '0': 10 206 | number of examples with label '1': 10 207 | getting test examples... 208 | #train: 1100 209 | #dev: 20 210 | #test: 1821 211 | ============================================================ 212 | Pre-training 213 | ============================================================ 214 | ============================================================ 215 | Training 216 | ============================================================ 217 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 218 | Test Acc: 49.9176 219 | Epoch 1, Dev Acc: 70.0000, Best Ever: 70.0000 220 | Test Acc: 75.1785 221 | Epoch 2, Dev Acc: 90.0000, Best Ever: 90.0000 222 | Test Acc: 80.6700 223 | Epoch 3, Dev Acc: 80.0000, Best Ever: 90.0000 224 | Epoch 4, Dev Acc: 55.0000, Best Ever: 90.0000 225 | Epoch 5, Dev Acc: 70.0000, Best Ever: 90.0000 226 | Epoch 6, Dev Acc: 85.0000, Best Ever: 90.0000 227 | Epoch 7, Dev Acc: 70.0000, Best Ever: 90.0000 228 | Epoch 8, Dev Acc: 80.0000, Best Ever: 90.0000 229 | Epoch 9, Dev Acc: 85.0000, Best Ever: 90.0000 230 | Final Dev Acc: 90.0000, Final Test Acc: 80.6700 231 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 232 | device: cuda 233 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.1, learning_rate=4e-05, min_epochs=0, pretrain_epochs=0, task='sst-2', train_num_per_class=1000) 234 | getting train examples... 235 | number of examples with label '0': 100 236 | number of examples with label '1': 1000 237 | getting dev examples... 238 | number of examples with label '0': 10 239 | number of examples with label '1': 10 240 | getting test examples... 241 | #train: 1100 242 | #dev: 20 243 | #test: 1821 244 | ============================================================ 245 | Pre-training 246 | ============================================================ 247 | ============================================================ 248 | Training 249 | ============================================================ 250 | Epoch 0, Dev Acc: 75.0000, Best Ever: 75.0000 251 | Test Acc: 72.3778 252 | Epoch 1, Dev Acc: 95.0000, Best Ever: 95.0000 253 | Test Acc: 80.0659 254 | Epoch 2, Dev Acc: 80.0000, Best Ever: 95.0000 255 | Epoch 3, Dev Acc: 90.0000, Best Ever: 95.0000 256 | Epoch 4, Dev Acc: 95.0000, Best Ever: 95.0000 257 | Epoch 5, Dev Acc: 80.0000, Best Ever: 95.0000 258 | Epoch 6, Dev Acc: 50.0000, Best Ever: 95.0000 259 | Epoch 7, Dev Acc: 70.0000, Best Ever: 95.0000 260 | Epoch 8, Dev Acc: 80.0000, Best Ever: 95.0000 261 | Epoch 9, Dev Acc: 90.0000, Best Ever: 95.0000 262 | Final Dev Acc: 95.0000, Final Test Acc: 80.0659 263 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 264 | device: cuda 265 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.1, learning_rate=4e-05, min_epochs=0, pretrain_epochs=0, task='sst-2', train_num_per_class=1000) 266 | getting train examples... 267 | number of examples with label '0': 100 268 | number of examples with label '1': 1000 269 | getting dev examples... 270 | number of examples with label '0': 10 271 | number of examples with label '1': 10 272 | getting test examples... 273 | #train: 1100 274 | #dev: 20 275 | #test: 1821 276 | ============================================================ 277 | Pre-training 278 | ============================================================ 279 | ============================================================ 280 | Training 281 | ============================================================ 282 | Epoch 0, Dev Acc: 60.0000, Best Ever: 60.0000 283 | Test Acc: 50.7963 284 | Epoch 1, Dev Acc: 85.0000, Best Ever: 85.0000 285 | Test Acc: 82.1527 286 | Epoch 2, Dev Acc: 75.0000, Best Ever: 85.0000 287 | Epoch 3, Dev Acc: 50.0000, Best Ever: 85.0000 288 | Epoch 4, Dev Acc: 65.0000, Best Ever: 85.0000 289 | Epoch 5, Dev Acc: 65.0000, Best Ever: 85.0000 290 | Epoch 6, Dev Acc: 85.0000, Best Ever: 85.0000 291 | Epoch 7, Dev Acc: 55.0000, Best Ever: 85.0000 292 | Epoch 8, Dev Acc: 60.0000, Best Ever: 85.0000 293 | Epoch 9, Dev Acc: 80.0000, Best Ever: 85.0000 294 | Final Dev Acc: 85.0000, Final Test Acc: 82.1527 295 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 296 | device: cuda 297 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.1, learning_rate=4e-05, min_epochs=0, pretrain_epochs=0, task='sst-2', train_num_per_class=1000) 298 | getting train examples... 299 | number of examples with label '0': 100 300 | number of examples with label '1': 1000 301 | getting dev examples... 302 | number of examples with label '0': 10 303 | number of examples with label '1': 10 304 | getting test examples... 305 | #train: 1100 306 | #dev: 20 307 | #test: 1821 308 | ============================================================ 309 | Pre-training 310 | ============================================================ 311 | ============================================================ 312 | Training 313 | ============================================================ 314 | Epoch 0, Dev Acc: 80.0000, Best Ever: 80.0000 315 | Test Acc: 66.4470 316 | Epoch 1, Dev Acc: 85.0000, Best Ever: 85.0000 317 | Test Acc: 83.1411 318 | Epoch 2, Dev Acc: 75.0000, Best Ever: 85.0000 319 | Epoch 3, Dev Acc: 65.0000, Best Ever: 85.0000 320 | Epoch 4, Dev Acc: 85.0000, Best Ever: 85.0000 321 | Epoch 5, Dev Acc: 85.0000, Best Ever: 85.0000 322 | Epoch 6, Dev Acc: 90.0000, Best Ever: 90.0000 323 | Test Acc: 75.7825 324 | Epoch 7, Dev Acc: 90.0000, Best Ever: 90.0000 325 | Epoch 8, Dev Acc: 60.0000, Best Ever: 90.0000 326 | Epoch 9, Dev Acc: 85.0000, Best Ever: 90.0000 327 | Final Dev Acc: 90.0000, Final Test Acc: 75.7825 328 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 329 | device: cuda 330 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.1, learning_rate=4e-05, min_epochs=0, pretrain_epochs=0, task='sst-2', train_num_per_class=1000) 331 | getting train examples... 332 | number of examples with label '0': 100 333 | number of examples with label '1': 1000 334 | getting dev examples... 335 | number of examples with label '0': 10 336 | number of examples with label '1': 10 337 | getting test examples... 338 | #train: 1100 339 | #dev: 20 340 | #test: 1821 341 | ============================================================ 342 | Pre-training 343 | ============================================================ 344 | ============================================================ 345 | Training 346 | ============================================================ 347 | Epoch 0, Dev Acc: 80.0000, Best Ever: 80.0000 348 | Test Acc: 67.8199 349 | Epoch 1, Dev Acc: 75.0000, Best Ever: 80.0000 350 | Epoch 2, Dev Acc: 70.0000, Best Ever: 80.0000 351 | Epoch 3, Dev Acc: 85.0000, Best Ever: 85.0000 352 | Test Acc: 84.5689 353 | Epoch 4, Dev Acc: 90.0000, Best Ever: 90.0000 354 | Test Acc: 83.4157 355 | Epoch 5, Dev Acc: 85.0000, Best Ever: 90.0000 356 | Epoch 6, Dev Acc: 85.0000, Best Ever: 90.0000 357 | Epoch 7, Dev Acc: 80.0000, Best Ever: 90.0000 358 | Epoch 8, Dev Acc: 85.0000, Best Ever: 90.0000 359 | Epoch 9, Dev Acc: 70.0000, Best Ever: 90.0000 360 | Final Dev Acc: 90.0000, Final Test Acc: 83.4157 361 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 362 | device: cuda 363 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.1, learning_rate=4e-05, min_epochs=0, pretrain_epochs=0, task='sst-2', train_num_per_class=1000) 364 | getting train examples... 365 | number of examples with label '0': 100 366 | number of examples with label '1': 1000 367 | getting dev examples... 368 | number of examples with label '0': 10 369 | number of examples with label '1': 10 370 | getting test examples... 371 | #train: 1100 372 | #dev: 20 373 | #test: 1821 374 | ============================================================ 375 | Pre-training 376 | ============================================================ 377 | ============================================================ 378 | Training 379 | ============================================================ 380 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 381 | Test Acc: 56.2878 382 | Epoch 1, Dev Acc: 50.0000, Best Ever: 50.0000 383 | Epoch 2, Dev Acc: 50.0000, Best Ever: 50.0000 384 | Epoch 3, Dev Acc: 90.0000, Best Ever: 90.0000 385 | Test Acc: 81.7133 386 | Epoch 4, Dev Acc: 90.0000, Best Ever: 90.0000 387 | Epoch 5, Dev Acc: 80.0000, Best Ever: 90.0000 388 | Epoch 6, Dev Acc: 60.0000, Best Ever: 90.0000 389 | Epoch 7, Dev Acc: 85.0000, Best Ever: 90.0000 390 | Epoch 8, Dev Acc: 80.0000, Best Ever: 90.0000 391 | Epoch 9, Dev Acc: 80.0000, Best Ever: 90.0000 392 | Final Dev Acc: 90.0000, Final Test Acc: 81.7133 393 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 394 | device: cuda 395 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.1, learning_rate=4e-05, min_epochs=0, pretrain_epochs=0, task='sst-2', train_num_per_class=1000) 396 | getting train examples... 397 | number of examples with label '0': 100 398 | number of examples with label '1': 1000 399 | getting dev examples... 400 | number of examples with label '0': 10 401 | number of examples with label '1': 10 402 | getting test examples... 403 | #train: 1100 404 | #dev: 20 405 | #test: 1821 406 | ============================================================ 407 | Pre-training 408 | ============================================================ 409 | ============================================================ 410 | Training 411 | ============================================================ 412 | Epoch 0, Dev Acc: 55.0000, Best Ever: 55.0000 413 | Test Acc: 59.4728 414 | Epoch 1, Dev Acc: 50.0000, Best Ever: 55.0000 415 | Epoch 2, Dev Acc: 75.0000, Best Ever: 75.0000 416 | Test Acc: 80.0110 417 | Epoch 3, Dev Acc: 40.0000, Best Ever: 75.0000 418 | Epoch 4, Dev Acc: 70.0000, Best Ever: 75.0000 419 | Epoch 5, Dev Acc: 90.0000, Best Ever: 90.0000 420 | Test Acc: 80.7798 421 | Epoch 6, Dev Acc: 75.0000, Best Ever: 90.0000 422 | Epoch 7, Dev Acc: 85.0000, Best Ever: 90.0000 423 | Epoch 8, Dev Acc: 70.0000, Best Ever: 90.0000 424 | Epoch 9, Dev Acc: 85.0000, Best Ever: 90.0000 425 | Final Dev Acc: 90.0000, Final Test Acc: 80.7798 426 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 427 | device: cuda 428 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.1, learning_rate=4e-05, min_epochs=0, pretrain_epochs=0, task='sst-2', train_num_per_class=1000) 429 | getting train examples... 430 | number of examples with label '0': 100 431 | number of examples with label '1': 1000 432 | getting dev examples... 433 | number of examples with label '0': 10 434 | number of examples with label '1': 10 435 | getting test examples... 436 | #train: 1100 437 | #dev: 20 438 | #test: 1821 439 | ============================================================ 440 | Pre-training 441 | ============================================================ 442 | ============================================================ 443 | Training 444 | ============================================================ 445 | Epoch 0, Dev Acc: 55.0000, Best Ever: 55.0000 446 | Test Acc: 59.3630 447 | Epoch 1, Dev Acc: 90.0000, Best Ever: 90.0000 448 | Test Acc: 81.7133 449 | Epoch 2, Dev Acc: 75.0000, Best Ever: 90.0000 450 | Epoch 3, Dev Acc: 90.0000, Best Ever: 90.0000 451 | Epoch 4, Dev Acc: 50.0000, Best Ever: 90.0000 452 | Epoch 5, Dev Acc: 75.0000, Best Ever: 90.0000 453 | Epoch 6, Dev Acc: 75.0000, Best Ever: 90.0000 454 | Epoch 7, Dev Acc: 65.0000, Best Ever: 90.0000 455 | Epoch 8, Dev Acc: 45.0000, Best Ever: 90.0000 456 | Epoch 9, Dev Acc: 85.0000, Best Ever: 90.0000 457 | Final Dev Acc: 90.0000, Final Test Acc: 81.7133 458 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 459 | device: cuda 460 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.1, learning_rate=4e-05, min_epochs=0, pretrain_epochs=0, task='sst-2', train_num_per_class=1000) 461 | getting train examples... 462 | number of examples with label '0': 100 463 | number of examples with label '1': 1000 464 | getting dev examples... 465 | number of examples with label '0': 10 466 | number of examples with label '1': 10 467 | getting test examples... 468 | #train: 1100 469 | #dev: 20 470 | #test: 1821 471 | ============================================================ 472 | Pre-training 473 | ============================================================ 474 | ============================================================ 475 | Training 476 | ============================================================ 477 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 478 | Test Acc: 50.3569 479 | Epoch 1, Dev Acc: 90.0000, Best Ever: 90.0000 480 | Test Acc: 80.5052 481 | Epoch 2, Dev Acc: 90.0000, Best Ever: 90.0000 482 | Epoch 3, Dev Acc: 60.0000, Best Ever: 90.0000 483 | Epoch 4, Dev Acc: 80.0000, Best Ever: 90.0000 484 | Epoch 5, Dev Acc: 75.0000, Best Ever: 90.0000 485 | Epoch 6, Dev Acc: 80.0000, Best Ever: 90.0000 486 | Epoch 7, Dev Acc: 65.0000, Best Ever: 90.0000 487 | Epoch 8, Dev Acc: 75.0000, Best Ever: 90.0000 488 | Epoch 9, Dev Acc: 75.0000, Best Ever: 90.0000 489 | Final Dev Acc: 90.0000, Final Test Acc: 80.5052 490 | -------------------------------------------------------------------------------- /results/text/results_ren_sst2_imb005.txt: -------------------------------------------------------------------------------- 1 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 2 | device: cuda 3 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.05, learning_rate=4e-05, min_epochs=0, pretrain_epochs=0, task='sst-2', train_num_per_class=1000) 4 | getting train examples... 5 | number of examples with label '0': 50 6 | number of examples with label '1': 1000 7 | getting dev examples... 8 | number of examples with label '0': 10 9 | number of examples with label '1': 10 10 | getting test examples... 11 | #train: 1050 12 | #dev: 20 13 | #test: 1821 14 | ============================================================ 15 | Pre-training 16 | ============================================================ 17 | ============================================================ 18 | Training 19 | ============================================================ 20 | Epoch 0, Dev Acc: 65.0000, Best Ever: 65.0000 21 | Test Acc: 55.1345 22 | Epoch 1, Dev Acc: 75.0000, Best Ever: 75.0000 23 | Test Acc: 69.6321 24 | Epoch 2, Dev Acc: 75.0000, Best Ever: 75.0000 25 | Epoch 3, Dev Acc: 75.0000, Best Ever: 75.0000 26 | Epoch 4, Dev Acc: 50.0000, Best Ever: 75.0000 27 | Epoch 5, Dev Acc: 50.0000, Best Ever: 75.0000 28 | Epoch 6, Dev Acc: 50.0000, Best Ever: 75.0000 29 | Epoch 7, Dev Acc: 50.0000, Best Ever: 75.0000 30 | Epoch 8, Dev Acc: 50.0000, Best Ever: 75.0000 31 | Epoch 9, Dev Acc: 75.0000, Best Ever: 75.0000 32 | Final Dev Acc: 75.0000, Final Test Acc: 69.6321 33 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 34 | device: cuda 35 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.05, learning_rate=4e-05, min_epochs=0, pretrain_epochs=0, task='sst-2', train_num_per_class=1000) 36 | getting train examples... 37 | number of examples with label '0': 50 38 | number of examples with label '1': 1000 39 | getting dev examples... 40 | number of examples with label '0': 10 41 | number of examples with label '1': 10 42 | getting test examples... 43 | #train: 1050 44 | #dev: 20 45 | #test: 1821 46 | ============================================================ 47 | Pre-training 48 | ============================================================ 49 | ============================================================ 50 | Training 51 | ============================================================ 52 | Epoch 0, Dev Acc: 65.0000, Best Ever: 65.0000 53 | Test Acc: 61.5047 54 | Epoch 1, Dev Acc: 75.0000, Best Ever: 75.0000 55 | Test Acc: 80.5601 56 | Epoch 2, Dev Acc: 80.0000, Best Ever: 80.0000 57 | Test Acc: 73.4761 58 | Epoch 3, Dev Acc: 75.0000, Best Ever: 80.0000 59 | Epoch 4, Dev Acc: 85.0000, Best Ever: 85.0000 60 | Test Acc: 82.0428 61 | Epoch 5, Dev Acc: 55.0000, Best Ever: 85.0000 62 | Epoch 6, Dev Acc: 85.0000, Best Ever: 85.0000 63 | Epoch 7, Dev Acc: 85.0000, Best Ever: 85.0000 64 | Epoch 8, Dev Acc: 85.0000, Best Ever: 85.0000 65 | Epoch 9, Dev Acc: 70.0000, Best Ever: 85.0000 66 | Final Dev Acc: 85.0000, Final Test Acc: 82.0428 67 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 68 | device: cuda 69 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.05, learning_rate=4e-05, min_epochs=0, pretrain_epochs=0, task='sst-2', train_num_per_class=1000) 70 | getting train examples... 71 | number of examples with label '0': 50 72 | number of examples with label '1': 1000 73 | getting dev examples... 74 | number of examples with label '0': 10 75 | number of examples with label '1': 10 76 | getting test examples... 77 | #train: 1050 78 | #dev: 20 79 | #test: 1821 80 | ============================================================ 81 | Pre-training 82 | ============================================================ 83 | ============================================================ 84 | Training 85 | ============================================================ 86 | Epoch 0, Dev Acc: 55.0000, Best Ever: 55.0000 87 | Test Acc: 54.0362 88 | Epoch 1, Dev Acc: 70.0000, Best Ever: 70.0000 89 | Test Acc: 68.0395 90 | Epoch 2, Dev Acc: 75.0000, Best Ever: 75.0000 91 | Test Acc: 75.3432 92 | Epoch 3, Dev Acc: 80.0000, Best Ever: 80.0000 93 | Test Acc: 78.6930 94 | Epoch 4, Dev Acc: 70.0000, Best Ever: 80.0000 95 | Epoch 5, Dev Acc: 50.0000, Best Ever: 80.0000 96 | Epoch 6, Dev Acc: 90.0000, Best Ever: 90.0000 97 | Test Acc: 78.5832 98 | Epoch 7, Dev Acc: 75.0000, Best Ever: 90.0000 99 | Epoch 8, Dev Acc: 70.0000, Best Ever: 90.0000 100 | Epoch 9, Dev Acc: 70.0000, Best Ever: 90.0000 101 | Final Dev Acc: 90.0000, Final Test Acc: 78.5832 102 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 103 | device: cuda 104 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.05, learning_rate=4e-05, min_epochs=0, pretrain_epochs=0, task='sst-2', train_num_per_class=1000) 105 | getting train examples... 106 | number of examples with label '0': 50 107 | number of examples with label '1': 1000 108 | getting dev examples... 109 | number of examples with label '0': 10 110 | number of examples with label '1': 10 111 | getting test examples... 112 | #train: 1050 113 | #dev: 20 114 | #test: 1821 115 | ============================================================ 116 | Pre-training 117 | ============================================================ 118 | ============================================================ 119 | Training 120 | ============================================================ 121 | Epoch 0, Dev Acc: 70.0000, Best Ever: 70.0000 122 | Test Acc: 64.2504 123 | Epoch 1, Dev Acc: 70.0000, Best Ever: 70.0000 124 | Epoch 2, Dev Acc: 85.0000, Best Ever: 85.0000 125 | Test Acc: 79.2971 126 | Epoch 3, Dev Acc: 65.0000, Best Ever: 85.0000 127 | Epoch 4, Dev Acc: 75.0000, Best Ever: 85.0000 128 | Epoch 5, Dev Acc: 65.0000, Best Ever: 85.0000 129 | Epoch 6, Dev Acc: 70.0000, Best Ever: 85.0000 130 | Epoch 7, Dev Acc: 65.0000, Best Ever: 85.0000 131 | Epoch 8, Dev Acc: 75.0000, Best Ever: 85.0000 132 | Epoch 9, Dev Acc: 70.0000, Best Ever: 85.0000 133 | Final Dev Acc: 85.0000, Final Test Acc: 79.2971 134 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 135 | device: cuda 136 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.05, learning_rate=4e-05, min_epochs=0, pretrain_epochs=0, task='sst-2', train_num_per_class=1000) 137 | getting train examples... 138 | number of examples with label '0': 50 139 | number of examples with label '1': 1000 140 | getting dev examples... 141 | number of examples with label '0': 10 142 | number of examples with label '1': 10 143 | getting test examples... 144 | #train: 1050 145 | #dev: 20 146 | #test: 1821 147 | ============================================================ 148 | Pre-training 149 | ============================================================ 150 | ============================================================ 151 | Training 152 | ============================================================ 153 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 154 | Test Acc: 49.9176 155 | Epoch 1, Dev Acc: 60.0000, Best Ever: 60.0000 156 | Test Acc: 52.6085 157 | Epoch 2, Dev Acc: 50.0000, Best Ever: 60.0000 158 | Epoch 3, Dev Acc: 50.0000, Best Ever: 60.0000 159 | Epoch 4, Dev Acc: 65.0000, Best Ever: 65.0000 160 | Test Acc: 62.2186 161 | Epoch 5, Dev Acc: 50.0000, Best Ever: 65.0000 162 | Epoch 6, Dev Acc: 60.0000, Best Ever: 65.0000 163 | Epoch 7, Dev Acc: 60.0000, Best Ever: 65.0000 164 | Epoch 8, Dev Acc: 60.0000, Best Ever: 65.0000 165 | Epoch 9, Dev Acc: 75.0000, Best Ever: 75.0000 166 | Test Acc: 69.3026 167 | Final Dev Acc: 75.0000, Final Test Acc: 69.3026 168 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 169 | device: cuda 170 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.05, learning_rate=4e-05, min_epochs=0, pretrain_epochs=0, task='sst-2', train_num_per_class=1000) 171 | getting train examples... 172 | number of examples with label '0': 50 173 | number of examples with label '1': 1000 174 | getting dev examples... 175 | number of examples with label '0': 10 176 | number of examples with label '1': 10 177 | getting test examples... 178 | #train: 1050 179 | #dev: 20 180 | #test: 1821 181 | ============================================================ 182 | Pre-training 183 | ============================================================ 184 | ============================================================ 185 | Training 186 | ============================================================ 187 | Epoch 0, Dev Acc: 60.0000, Best Ever: 60.0000 188 | Test Acc: 60.6260 189 | Epoch 1, Dev Acc: 70.0000, Best Ever: 70.0000 190 | Test Acc: 79.1323 191 | Epoch 2, Dev Acc: 70.0000, Best Ever: 70.0000 192 | Epoch 3, Dev Acc: 80.0000, Best Ever: 80.0000 193 | Test Acc: 81.6584 194 | Epoch 4, Dev Acc: 85.0000, Best Ever: 85.0000 195 | Test Acc: 74.2998 196 | Epoch 5, Dev Acc: 75.0000, Best Ever: 85.0000 197 | Epoch 6, Dev Acc: 80.0000, Best Ever: 85.0000 198 | Epoch 7, Dev Acc: 80.0000, Best Ever: 85.0000 199 | Epoch 8, Dev Acc: 85.0000, Best Ever: 85.0000 200 | Epoch 9, Dev Acc: 85.0000, Best Ever: 85.0000 201 | Final Dev Acc: 85.0000, Final Test Acc: 74.2998 202 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 203 | device: cuda 204 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.05, learning_rate=4e-05, min_epochs=0, pretrain_epochs=0, task='sst-2', train_num_per_class=1000) 205 | getting train examples... 206 | number of examples with label '0': 50 207 | number of examples with label '1': 1000 208 | getting dev examples... 209 | number of examples with label '0': 10 210 | number of examples with label '1': 10 211 | getting test examples... 212 | #train: 1050 213 | #dev: 20 214 | #test: 1821 215 | ============================================================ 216 | Pre-training 217 | ============================================================ 218 | ============================================================ 219 | Training 220 | ============================================================ 221 | Epoch 0, Dev Acc: 70.0000, Best Ever: 70.0000 222 | Test Acc: 74.9039 223 | Epoch 1, Dev Acc: 65.0000, Best Ever: 70.0000 224 | Epoch 2, Dev Acc: 65.0000, Best Ever: 70.0000 225 | Epoch 3, Dev Acc: 50.0000, Best Ever: 70.0000 226 | Epoch 4, Dev Acc: 55.0000, Best Ever: 70.0000 227 | Epoch 5, Dev Acc: 65.0000, Best Ever: 70.0000 228 | Epoch 6, Dev Acc: 65.0000, Best Ever: 70.0000 229 | Epoch 7, Dev Acc: 50.0000, Best Ever: 70.0000 230 | Epoch 8, Dev Acc: 50.0000, Best Ever: 70.0000 231 | Epoch 9, Dev Acc: 70.0000, Best Ever: 70.0000 232 | Final Dev Acc: 70.0000, Final Test Acc: 74.9039 233 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 234 | device: cuda 235 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.05, learning_rate=4e-05, min_epochs=0, pretrain_epochs=0, task='sst-2', train_num_per_class=1000) 236 | getting train examples... 237 | number of examples with label '0': 50 238 | number of examples with label '1': 1000 239 | getting dev examples... 240 | number of examples with label '0': 10 241 | number of examples with label '1': 10 242 | getting test examples... 243 | #train: 1050 244 | #dev: 20 245 | #test: 1821 246 | ============================================================ 247 | Pre-training 248 | ============================================================ 249 | ============================================================ 250 | Training 251 | ============================================================ 252 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 253 | Test Acc: 49.9176 254 | Epoch 1, Dev Acc: 80.0000, Best Ever: 80.0000 255 | Test Acc: 76.0022 256 | Epoch 2, Dev Acc: 70.0000, Best Ever: 80.0000 257 | Epoch 3, Dev Acc: 65.0000, Best Ever: 80.0000 258 | Epoch 4, Dev Acc: 80.0000, Best Ever: 80.0000 259 | Epoch 5, Dev Acc: 75.0000, Best Ever: 80.0000 260 | Epoch 6, Dev Acc: 75.0000, Best Ever: 80.0000 261 | Epoch 7, Dev Acc: 80.0000, Best Ever: 80.0000 262 | Epoch 8, Dev Acc: 75.0000, Best Ever: 80.0000 263 | Epoch 9, Dev Acc: 80.0000, Best Ever: 80.0000 264 | Final Dev Acc: 80.0000, Final Test Acc: 76.0022 265 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 266 | device: cuda 267 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.05, learning_rate=4e-05, min_epochs=0, pretrain_epochs=0, task='sst-2', train_num_per_class=1000) 268 | getting train examples... 269 | number of examples with label '0': 50 270 | number of examples with label '1': 1000 271 | getting dev examples... 272 | number of examples with label '0': 10 273 | number of examples with label '1': 10 274 | getting test examples... 275 | #train: 1050 276 | #dev: 20 277 | #test: 1821 278 | ============================================================ 279 | Pre-training 280 | ============================================================ 281 | ============================================================ 282 | Training 283 | ============================================================ 284 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 285 | Test Acc: 49.9176 286 | Epoch 1, Dev Acc: 50.0000, Best Ever: 50.0000 287 | Epoch 2, Dev Acc: 85.0000, Best Ever: 85.0000 288 | Test Acc: 78.5283 289 | Epoch 3, Dev Acc: 65.0000, Best Ever: 85.0000 290 | Epoch 4, Dev Acc: 60.0000, Best Ever: 85.0000 291 | Epoch 5, Dev Acc: 80.0000, Best Ever: 85.0000 292 | Epoch 6, Dev Acc: 75.0000, Best Ever: 85.0000 293 | Epoch 7, Dev Acc: 75.0000, Best Ever: 85.0000 294 | Epoch 8, Dev Acc: 80.0000, Best Ever: 85.0000 295 | Epoch 9, Dev Acc: 65.0000, Best Ever: 85.0000 296 | Final Dev Acc: 85.0000, Final Test Acc: 78.5283 297 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 298 | device: cuda 299 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.05, learning_rate=4e-05, min_epochs=0, pretrain_epochs=0, task='sst-2', train_num_per_class=1000) 300 | getting train examples... 301 | number of examples with label '0': 50 302 | number of examples with label '1': 1000 303 | getting dev examples... 304 | number of examples with label '0': 10 305 | number of examples with label '1': 10 306 | getting test examples... 307 | #train: 1050 308 | #dev: 20 309 | #test: 1821 310 | ============================================================ 311 | Pre-training 312 | ============================================================ 313 | ============================================================ 314 | Training 315 | ============================================================ 316 | Epoch 0, Dev Acc: 70.0000, Best Ever: 70.0000 317 | Test Acc: 73.2015 318 | Epoch 1, Dev Acc: 70.0000, Best Ever: 70.0000 319 | Epoch 2, Dev Acc: 50.0000, Best Ever: 70.0000 320 | Epoch 3, Dev Acc: 70.0000, Best Ever: 70.0000 321 | Epoch 4, Dev Acc: 50.0000, Best Ever: 70.0000 322 | Epoch 5, Dev Acc: 60.0000, Best Ever: 70.0000 323 | Epoch 6, Dev Acc: 75.0000, Best Ever: 75.0000 324 | Test Acc: 80.9995 325 | Epoch 7, Dev Acc: 75.0000, Best Ever: 75.0000 326 | Epoch 8, Dev Acc: 50.0000, Best Ever: 75.0000 327 | Epoch 9, Dev Acc: 60.0000, Best Ever: 75.0000 328 | Final Dev Acc: 75.0000, Final Test Acc: 80.9995 329 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 330 | device: cuda 331 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.05, learning_rate=4e-05, min_epochs=0, pretrain_epochs=0, task='sst-2', train_num_per_class=1000) 332 | getting train examples... 333 | number of examples with label '0': 50 334 | number of examples with label '1': 1000 335 | getting dev examples... 336 | number of examples with label '0': 10 337 | number of examples with label '1': 10 338 | getting test examples... 339 | #train: 1050 340 | #dev: 20 341 | #test: 1821 342 | ============================================================ 343 | Pre-training 344 | ============================================================ 345 | ============================================================ 346 | Training 347 | ============================================================ 348 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 349 | Test Acc: 49.9176 350 | Epoch 1, Dev Acc: 80.0000, Best Ever: 80.0000 351 | Test Acc: 86.1065 352 | Epoch 2, Dev Acc: 65.0000, Best Ever: 80.0000 353 | Epoch 3, Dev Acc: 70.0000, Best Ever: 80.0000 354 | Epoch 4, Dev Acc: 70.0000, Best Ever: 80.0000 355 | Epoch 5, Dev Acc: 75.0000, Best Ever: 80.0000 356 | Epoch 6, Dev Acc: 75.0000, Best Ever: 80.0000 357 | Epoch 7, Dev Acc: 50.0000, Best Ever: 80.0000 358 | Epoch 8, Dev Acc: 65.0000, Best Ever: 80.0000 359 | Epoch 9, Dev Acc: 70.0000, Best Ever: 80.0000 360 | Final Dev Acc: 80.0000, Final Test Acc: 86.1065 361 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 362 | device: cuda 363 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.05, learning_rate=4e-05, min_epochs=0, pretrain_epochs=0, task='sst-2', train_num_per_class=1000) 364 | getting train examples... 365 | number of examples with label '0': 50 366 | number of examples with label '1': 1000 367 | getting dev examples... 368 | number of examples with label '0': 10 369 | number of examples with label '1': 10 370 | getting test examples... 371 | #train: 1050 372 | #dev: 20 373 | #test: 1821 374 | ============================================================ 375 | Pre-training 376 | ============================================================ 377 | ============================================================ 378 | Training 379 | ============================================================ 380 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 381 | Test Acc: 50.4119 382 | Epoch 1, Dev Acc: 60.0000, Best Ever: 60.0000 383 | Test Acc: 62.2735 384 | Epoch 2, Dev Acc: 70.0000, Best Ever: 70.0000 385 | Test Acc: 69.0280 386 | Epoch 3, Dev Acc: 60.0000, Best Ever: 70.0000 387 | Epoch 4, Dev Acc: 75.0000, Best Ever: 75.0000 388 | Test Acc: 72.2131 389 | Epoch 5, Dev Acc: 80.0000, Best Ever: 80.0000 390 | Test Acc: 78.9127 391 | Epoch 6, Dev Acc: 55.0000, Best Ever: 80.0000 392 | Epoch 7, Dev Acc: 75.0000, Best Ever: 80.0000 393 | Epoch 8, Dev Acc: 50.0000, Best Ever: 80.0000 394 | Epoch 9, Dev Acc: 50.0000, Best Ever: 80.0000 395 | Final Dev Acc: 80.0000, Final Test Acc: 78.9127 396 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 397 | device: cuda 398 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.05, learning_rate=4e-05, min_epochs=0, pretrain_epochs=0, task='sst-2', train_num_per_class=1000) 399 | getting train examples... 400 | number of examples with label '0': 50 401 | number of examples with label '1': 1000 402 | getting dev examples... 403 | number of examples with label '0': 10 404 | number of examples with label '1': 10 405 | getting test examples... 406 | #train: 1050 407 | #dev: 20 408 | #test: 1821 409 | ============================================================ 410 | Pre-training 411 | ============================================================ 412 | ============================================================ 413 | Training 414 | ============================================================ 415 | Epoch 0, Dev Acc: 50.0000, Best Ever: 50.0000 416 | Test Acc: 49.9176 417 | Epoch 1, Dev Acc: 65.0000, Best Ever: 65.0000 418 | Test Acc: 64.4152 419 | Epoch 2, Dev Acc: 50.0000, Best Ever: 65.0000 420 | Epoch 3, Dev Acc: 50.0000, Best Ever: 65.0000 421 | Epoch 4, Dev Acc: 65.0000, Best Ever: 65.0000 422 | Epoch 5, Dev Acc: 70.0000, Best Ever: 70.0000 423 | Test Acc: 69.2477 424 | Epoch 6, Dev Acc: 70.0000, Best Ever: 70.0000 425 | Epoch 7, Dev Acc: 65.0000, Best Ever: 70.0000 426 | Epoch 8, Dev Acc: 50.0000, Best Ever: 70.0000 427 | Epoch 9, Dev Acc: 50.0000, Best Ever: 70.0000 428 | Final Dev Acc: 70.0000, Final Test Acc: 69.2477 429 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 430 | device: cuda 431 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.05, learning_rate=4e-05, min_epochs=0, pretrain_epochs=0, task='sst-2', train_num_per_class=1000) 432 | getting train examples... 433 | number of examples with label '0': 50 434 | number of examples with label '1': 1000 435 | getting dev examples... 436 | number of examples with label '0': 10 437 | number of examples with label '1': 10 438 | getting test examples... 439 | #train: 1050 440 | #dev: 20 441 | #test: 1821 442 | ============================================================ 443 | Pre-training 444 | ============================================================ 445 | ============================================================ 446 | Training 447 | ============================================================ 448 | Epoch 0, Dev Acc: 60.0000, Best Ever: 60.0000 449 | Test Acc: 51.0708 450 | Epoch 1, Dev Acc: 50.0000, Best Ever: 60.0000 451 | Epoch 2, Dev Acc: 70.0000, Best Ever: 70.0000 452 | Test Acc: 74.0802 453 | Epoch 3, Dev Acc: 90.0000, Best Ever: 90.0000 454 | Test Acc: 83.3059 455 | Epoch 4, Dev Acc: 75.0000, Best Ever: 90.0000 456 | Epoch 5, Dev Acc: 60.0000, Best Ever: 90.0000 457 | Epoch 6, Dev Acc: 70.0000, Best Ever: 90.0000 458 | Epoch 7, Dev Acc: 75.0000, Best Ever: 90.0000 459 | Epoch 8, Dev Acc: 80.0000, Best Ever: 90.0000 460 | Epoch 9, Dev Acc: 90.0000, Best Ever: 90.0000 461 | Final Dev Acc: 90.0000, Final Test Acc: 83.3059 462 | Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex. 463 | device: cuda 464 | Namespace(batch_size=25, data_seed=159, dev_num_per_class=10, epochs=10, imbalance_rate=0.05, learning_rate=4e-05, min_epochs=0, pretrain_epochs=0, task='sst-2', train_num_per_class=1000) 465 | getting train examples... 466 | number of examples with label '0': 50 467 | number of examples with label '1': 1000 468 | getting dev examples... 469 | number of examples with label '0': 10 470 | number of examples with label '1': 10 471 | getting test examples... 472 | #train: 1050 473 | #dev: 20 474 | #test: 1821 475 | ============================================================ 476 | Pre-training 477 | ============================================================ 478 | ============================================================ 479 | Training 480 | ============================================================ 481 | Epoch 0, Dev Acc: 65.0000, Best Ever: 65.0000 482 | Test Acc: 64.7996 483 | Epoch 1, Dev Acc: 75.0000, Best Ever: 75.0000 484 | Test Acc: 72.2680 485 | Epoch 2, Dev Acc: 65.0000, Best Ever: 75.0000 486 | Epoch 3, Dev Acc: 50.0000, Best Ever: 75.0000 487 | Epoch 4, Dev Acc: 50.0000, Best Ever: 75.0000 488 | Epoch 5, Dev Acc: 70.0000, Best Ever: 75.0000 489 | Epoch 6, Dev Acc: 50.0000, Best Ever: 75.0000 490 | Epoch 7, Dev Acc: 75.0000, Best Ever: 75.0000 491 | Epoch 8, Dev Acc: 75.0000, Best Ever: 75.0000 492 | Epoch 9, Dev Acc: 75.0000, Best Ever: 75.0000 493 | Final Dev Acc: 75.0000, Final Test Acc: 72.2680 494 | --------------------------------------------------------------------------------