├── bert
    ├── requirements.txt
    ├── eval.py
    ├── predict.py
    ├── makefile
    ├── learner.py
    ├── task-finetune.py
    ├── optimizer.py
    ├── ner_data.py
    ├── run_ner.py
    ├── bert_train_data.py
    ├── lm_finetune.py
    └── eng-tune-ex-i.ipynb
├── .gitignore
└── README.md


/bert/requirements.txt:
--------------------------------------------------------------------------------
1 | fastai
2 | pytorch_pretrained_bert
3 | fire
4 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | /ulmfit/data/sst/models/fit_head.pth
 2 | *logs*/
 3 | *.ipynb
 4 | *.tar.gz
 5 | *__pycache__
 6 | *data
 7 | *.job
 8 | *.csv
 9 | /bert/installs
10 | /ulmfit/fastai-scripts/README.md
11 | /ulmfit/fastai-scripts/create_toks.py
12 | 


--------------------------------------------------------------------------------
/bert/eval.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | import fire
 4 | from ner_data import label2idx
 5 | 
 6 | 
 7 | def eval(filename, eps=1e-9):
 8 |     y_true = np.array([label2idx[line.split()[0]]
 9 |                        for line in open(filename, 'r').read().splitlines()
10 |                        if len(line.split()) > 2])
11 |     y_pred = np.array([label2idx[line.split()[1]]
12 |                        for line in open(filename, 'r').read().splitlines()
13 |                        if len(line.split()) > 2])
14 | 
15 |     all_pos = len(y_pred[y_pred>1])
16 |     actual_pos = len(y_true[y_true>1])
17 |     correct_pos =(np.logical_and(y_true==y_pred, y_true>1)).sum().item()
18 |     print(f'{all_pos} - {actual_pos} -> {correct_pos}')
19 |     prec = correct_pos / (all_pos + eps)
20 |     rec = correct_pos / (actual_pos + eps)
21 |     f1 = (2*prec*rec)/(prec+rec+eps)
22 |     print('f1 ',f1)
23 | 
24 | if __name__=="__main__":
25 |     fire.Fire(eval)
26 | 


--------------------------------------------------------------------------------
/bert/predict.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import fire
 4 | import torch
 5 | from fastai.basic_train import load_learner
 6 | from ner_data import VOCAB, idx2label
 7 | from pytorch_pretrained_bert import BertForTokenClassification
 8 | from pytorch_pretrained_bert.tokenization import BertTokenizer
 9 | 
10 | 
11 | def to_feature(sent, bert_model):
12 |     tokenizer = BertTokenizer.from_pretrained(bert_model, do_lower_case=False)
13 |     words = ['[CLS]']+sent+['[SEP]']
14 | 
15 |     x, mask = [], []
16 |     for w in words:
17 |         w = w.strip()
18 |         tokens = tokenizer.tokenize(w) if w not in ("[CLS]", "[SEP]") else [w]
19 |         xx = tokenizer.convert_tokens_to_ids(tokens)
20 |         m = [1] + [0]*(len(tokens)-1)
21 |         x.extend(xx)
22 |         mask.extend(m)
23 |     t = torch.LongTensor
24 |     print('input: ',tokenizer.convert_ids_to_tokens(x))
25 |     return t([x]), t(mask)
26 | 
27 | def predict(name, lang='eng', path='learn', model_dir='models'):
28 |     path, model_dir = Path(path), Path(model_dir)
29 |     print('Loading model...')
30 |     device = 'cpu'
31 |     state = torch.load(path/model_dir/f'{name}.pth', map_location=device)
32 |     bert_model = 'bert-base-cased' if lang=='eng' else 'bert-base-multilingual-cased'
33 |     print(f'Lang: {lang}\nModel: {bert_model}\nRun: {name}')
34 |     model = BertForTokenClassification.from_pretrained(bert_model, num_labels=len(VOCAB), cache_dir='bertm')
35 |     model.load_state_dict(state['model'], strict=True)
36 |     print('Done')
37 | 
38 |     try:
39 |         while True:
40 |             # get sentence
41 |             sent = input('Enter sentence: ')
42 |             words = sent.split()
43 |             x, mask = to_feature(words, bert_model)
44 |             with torch.no_grad():
45 |                 # predict named entities
46 |                 out = model(x)
47 |                 pred = out.argmax(-1).view(-1)
48 |                 print(pred)
49 |                 active_pred = pred[mask==1]
50 |                 print('Named Entities')
51 |                 active_pred = active_pred.tolist()
52 |                 for w,l in zip(words,active_pred[1:-1]):
53 |                     print(f'{w} {idx2label[l]}')
54 | 
55 |     except Exception as e:
56 |         print('See ya')
57 | 
58 | if __name__ == '__main__':
59 |     fire.Fire(predict)
60 | 


--------------------------------------------------------------------------------
/bert/makefile:
--------------------------------------------------------------------------------
 1 | GERM_URL=https://raw.githubusercontent.com/Nidhi-K/Natural-Language-Processing-Projects/master/Sequential%20CRF%20for%20NER/data/
 2 | GERMI_URL=https://raw.githubusercontent.com/MaviccPRP/ger_ner_evals/master/corpora/conll2003/
 3 | ENG_URL=https://raw.githubusercontent.com/kyzhouhzau/BERT-NER/master/data/
 4 | ENGI_URL=https://raw.githubusercontent.com/smb564/NLP/master/src/
 5 | 
 6 | 
 7 | D_TRAIN=$(GERM_URL)deu.train
 8 | D_VALID=$(GERM_URL)deu.testa
 9 | D_TEST=$(GERM_URL)deu.testb
10 | TRAIN=$(ENG_URL)train.txt
11 | VALID=$(ENG_URL)dev.txt
12 | TEST=$(ENG_URL)test.txt
13 | 
14 | EXEC=python run_ner.py
15 | 
16 | help:
17 | 	$(EXEC) --help
18 | 
19 | basic-bert:
20 | 	$(EXEC) --batch-size=$(BS) --epochs=4 --lr=5e-5 $(W)
21 | 	$(EXEC) --batch-size=$(BS) --epochs=4 --lr=3e-5 $(W)
22 | 	$(EXEC) --batch-size=$(BS) --epochs=4 --lr=2e-5 $(W)
23 | 
24 | icetown:
25 | 	$(EXEC) --do-train --do-eval --batch-size=32 --epochs=4 --lr=$(LR) --freez --discr --one-cycle --lang=$(L)
26 | 
27 | test1:
28 | 	$(EXEC) --ds-size=1 $(W)
29 | 
30 | apex-test:
31 | 	$(EXEC) --ds-size=1 --fp16 $(W)
32 | 
33 | run:
34 | 	$(EXEC) --batch-size=32 --epochs=4 --lr=5e-5 $(W)
35 | 
36 | run-c:
37 | 	$(EXEC) --batch-size=8 --epochs=3 --lr=5e-5 $(W)
38 | 
39 | run-b:
40 | 	$(EXEC) --batch-size=16 --epochs=3 --lr=5e-5 $(W)
41 | 
42 | apex-run:
43 | 	$(EXEC) --batch_size=16 --epochs=1 --fp16 $(W)
44 | 
45 | 2bert:
46 | 	python bert_train_data.py --train_corpus $(DIR)docs.txt --bert_model $(M) --output_dir $(DIR) --epochs_to_generate $(E) --max_seq_len 256
47 | 
48 | pretrain_lm:
49 | 	python $(FILE) --pregenerated_data $(DIR) --bert_model $(M) --output_dir pretrain --epochs $(E) --train_batch_size 16
50 | 
51 | datasets:
52 | 	make dataset-eng
53 | 	make dataset-deu
54 | 
55 | dataset-eng:
56 | 	mkdir -p data/conll-2003/eng
57 | 	wget --progress=bar $(TRAIN) && mv train.txt data/conll-2003/eng
58 | 	wget --progress=bar $(VALID) && mv dev.txt data/conll-2003/eng
59 | 	wget --progress=bar $(TEST) && mv test.txt data/conll-2003/eng
60 | 
61 | dataset-engI:
62 | 	mkdir -p data/conll-2003-I/eng
63 | 	wget --progress=bar $(ENGI_URL)conll2003.eng.train && mv conll2003.eng.train data/conll-2003-I/eng/train.txt
64 | 	wget --progress=bar $(ENGI_URL)conll2003.eng.testa && mv conll2003.eng.testa data/conll-2003-I/eng/dev.txt
65 | 	wget --progress=bar $(ENGI_URL)conll2003.eng.testb && mv conll2003.eng.testb data/conll-2003-I/eng/test.txt
66 | 
67 | dataset-deu:
68 | 	mkdir -p data/conll-2003/deu
69 | 	wget --progress=bar $(D_TRAIN) && mv deu.train train.txt && mv train.txt data/conll-2003/deu
70 | 	wget --progress=bar $(D_VALID) && mv deu.testa dev.txt && mv dev.txt data/conll-2003/deu
71 | 	wget --progress=bar $(D_TEST) && mv deu.testb test.txt && mv test.txt data/conll-2003/deu
72 | 
73 | dataset-deuI:
74 | 	mkdir -p data/conll-2003-I/deu
75 | 	wget --progress=bar $(GERMI_URL)deuutf.train && mv deuutf.train data/conll-2003-I/deu/train.txt
76 | 	wget --progress=bar $(GERMI_URL)deuutf.testa && mv deuutf.testa data/conll-2003-I/deu/dev.txt
77 | 	wget --progress=bar $(GERMI_URL)deu.testb && mv deu.testb data/conll-2003-I/deu/test.txt
78 | 
79 | 
80 | model:
81 | 	wget https://s3.amazonaws.com/models.huggingface.co/bert/$(BERT).tar.gz
82 | 
83 | mv-logs:
84 | 	mkdir -p $(DIR)
85 | 	mv logs/* $(DIR)/
86 | 
87 | clean-logs:
88 | 	rm logs/*
89 | 
90 | clean-models:
91 | 	rm *.tar.gz
92 | 
93 | rm-dataset:
94 | 	rm -rf data
95 | 
96 | fastai:
97 | 	pip install git+https://github.com/fastai/fastai.git
98 | 


--------------------------------------------------------------------------------
/bert/learner.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | 
  3 | import numpy as np
  4 | 
  5 | import torch
  6 | from fastai.basic_train import Learner, LearnerCallback
  7 | from fastai.callback import Callback
  8 | from fastai.core import is_listy
  9 | from fastai.metrics import fbeta
 10 | from fastai.torch_core import add_metrics, num_distrib, to_device
 11 | from ner_data import VOCAB, idx2label
 12 | from pytorch_pretrained_bert.modeling import BertModel, BertPreTrainedModel
 13 | from pytorch_pretrained_bert.optimization import warmup_linear
 14 | 
 15 | EPOCH =0
 16 | WEIGHTS = torch.tensor([0.2, 0.2, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
 17 | 
 18 | def ner_loss_func(out, *ys, zero=False): 
 19 |     '''
 20 |     Loss function - to use with fastai learner
 21 |     It calculates the loss for token classification using softmax cross entropy
 22 |     If out is already the loss, we simply return the loss
 23 |     '''
 24 |     if torch.cuda.is_available():
 25 |         ys = to_device(ys, torch.cuda.current_device())
 26 | 
 27 |     # If out is already the loss
 28 |     if out.size()<=torch.Size([2]):
 29 |         loss = out.mean() # return mean in case dataparallel is used
 30 |     else:
 31 |         loss_fct = torch.nn.CrossEntropyLoss(reduction='none')
 32 |         if zero: loss_fct = torch.nn.CrossEntropyLoss(ignore_index=0 , reduction='none')
 33 | 
 34 |         one, labels, attention_mask = ys
 35 |         # Only keep active parts of the loss
 36 |         if attention_mask is not None:
 37 |             active_loss = attention_mask.view(-1) == 1
 38 |             active_logits = out.view(-1, len(VOCAB))[active_loss]
 39 |             active_labels = labels.view(-1)[active_loss]
 40 |             loss = loss_fct(active_logits, active_labels)
 41 |             logging.info(active_labels)
 42 |             logging.info(loss)
 43 |             logging.info(loss.sum(-1))
 44 |             loss = loss.mean(-1)
 45 |             logging.info(loss)
 46 |         else: # if no attention mask specified calculate loss on all tokens
 47 |             loss = loss_fct(out.view(-1, len(VOCAB)), labels.view(-1))
 48 |     return loss
 49 | 
 50 | 
 51 | def conll_f1(pred, *true, eps:float = 1e-9):
 52 |     ''' NOTE: calulcates F1 per batch
 53 |     - use Conll_F1 callback class to calculate overall F1 score
 54 |     '''
 55 |     if torch.cuda.is_available():
 56 |         true = to_device(true, torch.cuda.current_device())
 57 |     pred = pred.argmax(-1)
 58 |     _, label_ids, label_mask = true
 59 |     mask = label_mask.view(-1)==1
 60 |     y_pred = pred.view(-1)[mask]
 61 |     y_true = label_ids.view(-1)[mask]
 62 | 
 63 |     all_pos = len(y_pred[y_pred>1])
 64 |     actual_pos = len(y_true[y_true>1])
 65 |     correct_pos =(np.logical_and(y_true==y_pred, y_true>1)).sum().item()
 66 |     logging.info(f'{all_pos} - {actual_pos} -> {correct_pos}')
 67 |     prec = correct_pos / (all_pos + eps)
 68 |     rec = correct_pos / (actual_pos + eps)
 69 |     f1 = (2*prec*rec)/(prec+rec+eps)
 70 |     logging.info(f'f1: {f1}   prec: {prec}, rec: {rec}')
 71 | 
 72 |     return torch.Tensor([f1])
 73 | 
 74 | class Conll_F1(Callback):
 75 | 
 76 |     def __init__(self):
 77 |         super().__init__()
 78 |         self.__name__='Total F1'
 79 |         self.name = 'Total F1'
 80 | 
 81 |     def on_epoch_begin(self, **kwargs):
 82 |         self.correct, self.predict, self.true, self.predict2 = 0,0,0,0
 83 | 
 84 |     def on_batch_end(self, last_output, last_target, **kwargs):
 85 |         pred = last_output.argmax(-1)
 86 |         true = last_target
 87 |         if torch.cuda.is_available():
 88 |             true = to_device(true, torch.cuda.current_device())
 89 |         _, label_ids, label_mask = true
 90 |         y_pred = pred.view(-1)
 91 |         y_true = label_ids.view(-1)
 92 |         self.predict2 += len(y_pred[y_pred>1])
 93 |         preds = y_pred[y_true!=0] # mask of padding
 94 |         logging.info(y_true)
 95 |         logging.info(y_pred)
 96 |         logging.info(preds)
 97 |         self.predict += len(preds[preds>1])
 98 |         self.true += len(y_true[y_true>1])
 99 |         self.correct +=(np.logical_and(y_true==y_pred, y_true>1)).sum().item()
100 | 
101 |     def on_epoch_end(self, last_metrics, **kwargs):
102 |         eps = 1e-9
103 |         prec = self.correct / (self.predict + eps)
104 |         rec = self.correct / (self.true + eps)
105 |         logging.info(f"====epoch {kwargs['epoch']}====")
106 |         logging.info(f'num pred2: {self.predict2}')
107 |         logging.info(f'num pred: {self.predict}')
108 |         logging.info(f'num corr: {self.correct}')
109 |         logging.info(f'num true: {self.true}')
110 |         logging.info(f'prec: {prec}')
111 |         logging.info(f'rec: {rec}')
112 |         f1 =(2*prec*rec)/(prec+rec+eps)
113 |         logging.info(f'f1: {f1}')
114 |         return add_metrics(last_metrics,f1)
115 | 
116 | 
117 | 
118 | class FP16_Callback(LearnerCallback):
119 | 
120 |     def __init__(self,
121 |                  learn: Learner,
122 |                  train_opt_steps: int,
123 |                  gradient_accumulation_steps: int = 1,
124 |                  warmup_proportion: float = 0.1,
125 |                  fp16: bool = True,
126 |                  global_step: int = 0):
127 |         super().__init__(learn)
128 |         self.train_opt_steps = train_opt_steps
129 |         self.gradient_accumulation_steps = gradient_accumulation_steps
130 |         self.warmup_proportion = warmup_proportion
131 |         self.fp16 = fp16
132 |         self.global_step = global_step
133 | 
134 |     def on_batch_begin(self, last_input, last_target, train, **kwards):
135 | #        if not train:
136 |          return {'last_input': last_input[:2], 'last_target': last_target}
137 | 
138 |     def on_backward_begin(self, last_loss, **kwargs):
139 |         '''
140 |         returns loss, skip_backward
141 |         '''
142 |         loss = last_loss
143 |         if self.gradient_accumulation_steps > 1:
144 |             loss /= self.gradient_accumulation_steps
145 | 
146 |         if self.fp16:
147 |             learn.opt.backwards(loss)
148 |             # modify learning rate with special BERT warm up
149 | 
150 |             lr_this_step = learn.opt.get_lr() * warmup_linear(
151 |                self.global_step/self.train_opt_steps,self.warmup_proportion)
152 |             for param_group in learn.opt.param_groups:
153 |                 param_group['lr'] = lr_this_step
154 |             global_step += 1
155 |         return {'last_loss': loss, 'skip_bwd': self.fp16}
156 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # GermLM
  2 | Exploring Multilingual Language Models and their effectinves for Named Entity Recognition (NER) in German and English.
  3 | 
  4 | ## Requierements
  5 | * Python 3.x
  6 | 
  7 | Requierements can be installed via `pip` using the `requierements.txt`.
  8 | We use 
  9 | * pytorch
 10 | * [pytorch_pretrained_bert](https://github.com/huggingface/pytorch-pretrained-BERT/)
 11 | * [fastai]
 12 | 
 13 | It is recommended to run the experiments on at least 1 GPU. Our experiments were conducted using 2.
 14 | The prediction is run on CPU only.
 15 | 
 16 | ## NER experiments
 17 | We uset Google's [BERT] model (english bert base and multilingual bert base, both cased) and evaluate them on the [CoNLL-2003] NER dataset.
 18 | 
 19 | Create the appropriate datasets using the makefile
 20 | 
 21 | Run `run_ner.py`. Usage (listing the most important options) :
 22 | * `lang`: select the language to train. Supported languages are `eng`, `deu`, and `engm` using the english data on the multilingual models
 23 | * `batch_size`
 24 | * `lr`: define learning rate
 25 | * `epochs`: define epochs to train
 26 | * `dataset`: path to dataset. Note: `lang` will be appended to this path to access the language specific dataset.
 27 | * `loss`: set to `zero` to mask of all padding during loss calculation
 28 | * `ds_size`: limit the dataset loaded for testing
 29 | * `bertAdam`: if flag set uses the BertAdam optimiser
 30 | * `save`: saves the final model, it can then be loaded with `predict.py` for NER.
 31 | 
 32 | #### (Example) Replicating English BERT NER experiment
 33 | Create the dataset:
 34 | ```shell
 35 | make dataset-engI
 36 | ```
 37 | Train the NER model:
 38 | ```
 39 | python run_ner.py --do-train --do-eval --lr=3e-5 --batch-size=16 --epochs=4 --bertAdam --dataset=data/conll-2003-I/
 40 | ```
 41 | 
 42 | ### [DEMO] Use your trained model for NER
 43 | If you use `run_ner.py` with the `save` flag, the saved model can be loaded in `predict.py` and it will recognise the named entities of the senteces provided. Note, you just need to proved the file name, the learner will automatically look for it in it's directory and append to correct extension.
 44 | 
 45 | ```
 46 | python predict.py eng_3_model
 47 | ```
 48 | 
 49 | Example output:
 50 | ```
 51 | Loading model...
 52 | Lang: eng
 53 | Model: bert-base-cased
 54 | Run: eng_3_model
 55 | Done
 56 | Enter sentence: Antonia goes to Trinity College Dublin, in Ireland.
 57 | input:  ['[CLS]', 'Anton', '##ia', 'goes', 'to', 'Trinity', 'College', 'Dublin', ',', 'in', 'Ireland', '.', '[SEP]']
 58 | tensor([0, 4, 0, 1, 1, 5, 5, 5, 1, 1, 2, 1, 0])
 59 | Named Entities
 60 | Antonia I-PER
 61 | goes O
 62 | to O
 63 | Trinity I-ORG
 64 | College I-ORG
 65 | Dublin, I-ORG
 66 | in O
 67 | Ireland. I-LOC
 68 | Enter sentence: ...
 69 | ```
 70 | 
 71 | ## Fine-tuning experiments
 72 | We apply the LM fine-tuning methods from [ULMFIT] to the BERT model, in order to boost performance. It does not work.
 73 | 
 74 | ### LM - pretraining
 75 | 
 76 | Use `conl_to_docs` from `ner_data.py` to convert the trainings set to a document of sentences.
 77 | 
 78 | Use the output file you specified as input to the data generation:
 79 | ```
 80 | make 2bert DIR='data/conll-2003/eng/' M='bert-base-cased' E=20
 81 | ```
 82 | 
 83 | Then fine-tune the language model on the task data:
 84 | ```
 85 | make pretrain_lm FILE='lm_finetune.py' DIR='data/conll-2003/deu/' M='bert-base-multilingual-cased' E=20 
 86 | ```
 87 | 
 88 | ### Task-finetuning
 89 | 
 90 | Learnig rates were selected using the jupter notebooks.
 91 | 
 92 | Run `task-finetuning.py` to fine-tuning using the tuning methods from [ULMFIT]. Add `tuned_learner` to load the fine-tuned LM:
 93 | ```
 94 | python task-finetuning.py --batch-size=16 --epochs=4 --lr=5e-5 --do-train --do-eval --dataset=data/conll-2003-I/ --lang=deu --tuned-learner='pretrain/pytorch_fastai_model_i_bert-base-multilingual-cased_10.bin'
 95 | ```
 96 | 
 97 | ## Results
 98 | #### English
 99 |  model | datset | dev f1 | test f1
100 |   -- |--|--|--
101 |  BERT Large | - | 96.6 | 92.8
102 |  BERT Base | - | 96.4 | 92.4
103 |   English BERT (ours) | IOB1 | 96.4 | **92.6**
104 |  " | BIO | 95.6 | 92.2
105 |  Mutlilingual BERT (ours) | IOB1 | 96.4 | 91.9
106 |  " | BIO | 96.5 | 92.1
107 | #### German
108 |  model | datset | dev f1 | test f1
109 |   -- |--|--|--
110 |  Ahmed & Mehler | IOB1 | - | 83.64
111 |  Riedl & Pado | - | - | 84.73
112 |  Mutlilingual BERT (ours) | IOB1 | **88.44** | **85.81**
113 |  " | BIO | 87.49 | 84.98 
114 | 
115 | Fine-tuning showed no improvement, the results stayed about the same.
116 | 
117 | ## File overview:
118 | 
119 | * `bert_train_data.py`: generates data for LM fine-tuning (see `make 2bert` for example usage)
120 | * `{deu|eng}-tune-ex-i.ipynb`: used to select discriminative learning rates for fine-tuning
121 | * `learner.py`: provides helper functions for the fastai learner: e.g loss function , metric callback
122 | * `lm_finetune.py`: fine-tunes LM on pregenerated bert data
123 | * `makefile`: make instructions for dataset generation etc.
124 | * `ner_data.py`: contains the data preprocessing
125 | * `optimizer.py`: adation of BertAdam optimiser to work with fastai
126 | * `plots.ipynb`: generate plots for discriminative learning rate selection.
127 | * `predict.py`: use pretrained model for NER
128 | * `requirements.txt`: requirements of project
129 | * `run_ner.py`: Run NER experiment; train bert model on conll-2003 data
130 | * `task-finetune.py`: fine-tune with [ULMFIT] fine-tuning methods (current discriminative lrs are hard coded).
131 | 
132 | [BERT]:https://arxiv.org/pdf/1810.04805.pdf
133 | [ULMFiT]: https://arxiv.org/pdf/1801.06146.pdf
134 | [ELMo]: https://arxiv.org/abs/1802.05365
135 | [OpenAi]: https://s3-us-west-2.amazonaws.com/openai-assets/research-covers/language-unsupervised/language_understanding_paper.pdf
136 | [AWD-LSTM]: TODO
137 | [Wikitext-103]: https://www.salesforce.com/products/einstein/ai-research/the-wikitext-dependency-language-modeling-dataset/
138 | [Bookcorpus]: http://yknzhu.wixsite.com/mbweb
139 | 
140 | 
141 | [CoNLL 2003]:https://www.clips.uantwerpen.be/conll2003/ner/
142 | [Peters.]:https://www.aclweb.org/anthology/P/P17/P17-1161.pdf
143 | [SNLI 2015]:https://nlp.stanford.edu/projects/snli/
144 | [ROCStories]:http://cs.rochester.edu/nlp/rocstories/
145 | 
146 | [SB-10K]:http://www.spinningbytes.com/resources/
147 | [GermEval2014]:https://sites.google.com/site/germeval2014ner/data
148 | [CoNLL2011]:http://conll.cemantix.org/2011/data.html
149 | 
150 | [Twitter Corpus+Benchmark]:http://www.aclweb.org/anthology/W17-1106
151 | [NER Shootout]:http://aclweb.org/anthology/P18-2020.pdf
152 | [fastai]:https://github.com/fastai/fastai
153 | [imdb_scripts]:https://github.com/fastai/fastai/tree/master/courses/dl2/imdb_scripts
154 | 


--------------------------------------------------------------------------------
/bert/task-finetune.py:
--------------------------------------------------------------------------------
  1 | import csv
  2 | import logging
  3 | import random
  4 | from functools import partial
  5 | from pathlib import Path
  6 | 
  7 | import numpy as np
  8 | 
  9 | import fire
 10 | import torch
 11 | from fastai.basic_data import DataBunch, DatasetType
 12 | from fastai.basic_train import Learner
 13 | from fastai.callback import OptimWrapper
 14 | from fastai.metrics import fbeta
 15 | from fastai.torch_core import flatten_model, to_device
 16 | from fastai.train import to_fp16
 17 | from learner import Conll_F1, ner_loss_func
 18 | from ner_data import VOCAB, NerDataset, idx2label, pad
 19 | from optimizer import BertAdam, initBertAdam
 20 | from pytorch_pretrained_bert import BertForTokenClassification
 21 | from torch.utils.data import DataLoader
 22 | 
 23 | NER = 'conll-2003'
 24 | 
 25 | def init_logger(log_dir, name):
 26 |     logging.basicConfig(filename=log_dir / (name+'.log'),
 27 |                     filemode='w',
 28 |                     format='%(asctime)s, %(message)s',
 29 |                     datefmt='%H:%M%S',
 30 |                     level=logging.INFO
 31 |     )
 32 | 
 33 | def apply_freez(learn, layers, lay):
 34 |     if lay==0: learn.freeze()
 35 |     if lay==layers: learn.unfreeze()
 36 |     else: learn.freeze_to(lay)
 37 |     print('Freezing layers ', lay, ' off ', layers)
 38 | 
 39 | def bert_layer_list(model):
 40 |     ms = torch.nn.ModuleList()
 41 | 
 42 |     flm = flatten_model(model)
 43 |     # embedding = [0:5] layer
 44 |     ms.append(torch.nn.ModuleList(flm[0:5]))
 45 |     # encoder (12 layers) = [5:16] [16:27] ... [126:136]
 46 |     bert_layergroup_size = 11#33
 47 |     for i in range(5, 137, bert_layergroup_size):
 48 |         ms.append(torch.nn.ModuleList(flm[i: i+bert_layergroup_size]))
 49 |     # pooling layer = [137:139]
 50 |     ms.append(torch.nn.ModuleList(flm[-4:-2]))
 51 |     # head = [-2:]
 52 |     ms.append(torch.nn.ModuleList(flm[-2:]))
 53 |     return ms
 54 | 
 55 | def run_ner(lang:str='eng',
 56 |             log_dir:str='logs',
 57 |             task:str=NER,
 58 |             batch_size:int=1,
 59 |             epochs:int=1,
 60 |             dataset:str='data/conll-2003/',
 61 |             loss:str='cross',
 62 |             max_seq_len:int=128,
 63 |             do_lower_case:bool=False,
 64 |             warmup_proportion:float=0.1,
 65 |             rand_seed:int=None,
 66 |             ds_size:int=None,
 67 |             data_bunch_path:str='data/conll-2003/db',
 68 |             tuned_learner:str=None,
 69 |             do_train:str=False,
 70 |             do_eval:str=False,
 71 | 	        save:bool=False,
 72 |             nameX:str='ner',
 73 |             mask:tuple=('s','s'),
 74 | ):
 75 |     name = "_".join(map(str,[nameX,task, lang, mask[0],mask[1], loss, batch_size, max_seq_len,do_train, do_eval]))
 76 |     log_dir = Path(log_dir)
 77 |     log_dir.mkdir(parents=True, exist_ok=True)
 78 |     init_logger(log_dir, name)
 79 | 
 80 |     if rand_seed:
 81 |         random.seed(rand_seed)
 82 |         np.random.seed(rand_seed)
 83 |         torch.manual_seed(rand_seed)
 84 |         if torch.cuda.is_available():
 85 |             torch.cuda.manual_seed_all(rand_seed)
 86 | 
 87 |     trainset = dataset + lang + '/train.txt'
 88 |     devset = dataset +lang + '/dev.txt'
 89 |     testset = dataset + lang + '/test.txt'
 90 | 
 91 |     bert_model = 'bert-base-cased' if lang=='eng' else 'bert-base-multilingual-cased'
 92 |     print(f'Lang: {lang}\nModel: {bert_model}\nRun: {name}')
 93 |     model = BertForTokenClassification.from_pretrained(bert_model, num_labels=len(VOCAB), cache_dir='bertm')
 94 |     if tuned_learner:
 95 |         print('Loading pretrained learner: ', tuned_learner)
 96 |         model.bert.load_state_dict(torch.load(tuned_learner))
 97 | 
 98 |     model = torch.nn.DataParallel(model)
 99 |     model_lr_group = bert_layer_list(model)
100 |     layers = len(model_lr_group)
101 |     kwargs = {'max_seq_len':max_seq_len, 'ds_size':ds_size, 'mask':mask}
102 | 
103 |     train_dl = DataLoader(
104 |         dataset=NerDataset(trainset,bert_model,train=True, **kwargs),
105 |         batch_size=batch_size,
106 |         shuffle=True,
107 |         collate_fn=partial(pad, train=True)
108 |     )
109 | 
110 |     dev_dl = DataLoader(
111 |         dataset=NerDataset(devset, bert_model, **kwargs),
112 |         batch_size=batch_size,
113 |         shuffle=False,
114 |         collate_fn=pad
115 |     )
116 | 
117 |     test_dl = DataLoader(
118 |         dataset=NerDataset(testset, bert_model, **kwargs),
119 |         batch_size=batch_size,
120 |         shuffle=False,
121 |         collate_fn=pad
122 |     )
123 | 
124 |     data = DataBunch(
125 |         train_dl= train_dl,
126 |         valid_dl= dev_dl,
127 |         test_dl = test_dl,
128 |         collate_fn=pad,
129 |         path = Path(data_bunch_path)
130 |     )
131 | 
132 |     train_opt_steps = int(len(train_dl.dataset) / batch_size) * epochs
133 |     optim = BertAdam(model.parameters(),
134 |                      lr=0.01,
135 |                      warmup=warmup_proportion,
136 |                      t_total=train_opt_steps)
137 | 
138 |     loss_fun = ner_loss_func if loss=='cross' else partial(ner_loss_func, zero=True)
139 |     metrics = [Conll_F1()]
140 | 
141 |     learn = Learner(data, model, BertAdam,
142 |                     loss_func=loss_fun,
143 |                     metrics=metrics,
144 |                     true_wd=False,
145 |                     layer_groups=model_lr_group,
146 |                     path='learn'+nameX,
147 |                     )
148 | 
149 |     learn.opt = OptimWrapper(optim)
150 | 
151 |     lrm = 1.6
152 | 
153 |     # select set of starting lrs
154 |     lrs_eng = [0.01, 5e-4, 3e-4, 3e-4, 1e-5]
155 |     lrs_deu = [0.01, 5e-4, 5e-4, 3e-4, 2e-5]
156 | 
157 |     startlr = lrs_eng if lang=='eng' else lrs_deu
158 |     results = [['epoch', 'lr', 'f1', 'val_loss', 'train_loss', 'train_losses']]
159 |     if do_train:
160 |         learn.freeze()
161 |         learn.fit_one_cycle(1, startlr[0], moms=(0.8, 0.7))
162 |         learn.freeze_to(-3)
163 |         lrs = learn.lr_range(slice(startlr[1]/(1.6**15), startlr[1]))
164 |         learn.fit_one_cycle(1, lrs, moms=(0.8, 0.7))
165 |         learn.freeze_to(-6)
166 |         lrs = learn.lr_range(slice(startlr[2]/(1.6**15), startlr[2]))
167 |         learn.fit_one_cycle(1, lrs, moms=(0.8, 0.7))
168 |         learn.freeze_to(-12)
169 |         lrs = learn.lr_range(slice(startlr[3]/(1.6**15), startlr[3]))
170 |         learn.fit_one_cycle(1, lrs, moms=(0.8, 0.7))
171 |         learn.unfreeze()
172 |         lrs = learn.lr_range(slice(startlr[4]/(1.6**15), startlr[4]))
173 |         learn.fit_one_cycle(1, lrs, moms=(0.8, 0.7))
174 | 
175 |     if do_eval:
176 |         res = learn.validate(test_dl, metrics=metrics)
177 |         met_res = [f'{m.__name__}: {r}' for m, r in zip(metrics, res[1:])]
178 |         print(f'Validation on TEST SET:\nloss {res[0]}, {met_res}')
179 |         results.append([
180 |             'val', '-', res[1], res[0], '-','-'
181 |         ])
182 | 
183 |     with open(log_dir / (name+'.csv'), 'a') as resultFile:
184 |         wr = csv.writer(resultFile)
185 |         wr.writerows(results)
186 | 
187 | if __name__ == '__main__':
188 |     fire.Fire(run_ner)
189 | 


--------------------------------------------------------------------------------
/bert/optimizer.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | """PyTorch optimization for BERT model."""
 16 | 
 17 | import math
 18 | 
 19 | import torch
 20 | from torch.nn.utils import clip_grad_norm_
 21 | from torch.optim import Optimizer
 22 | from torch.optim.optimizer import required
 23 | 
 24 | 
 25 | def warmup_cosine(x, warmup=0.002):
 26 |     if x < warmup:
 27 |         return x/warmup
 28 |     return 0.5 * (1.0 + torch.cos(math.pi * x))
 29 | 
 30 | def warmup_constant(x, warmup=0.002):
 31 |     if x < warmup:
 32 |         return x/warmup
 33 |     return 1.0
 34 | 
 35 | def warmup_linear(x, warmup=0.002):
 36 |     if x < warmup:
 37 |         return x/warmup
 38 |     return 1.0 - x
 39 | 
 40 | SCHEDULES = {
 41 |     'warmup_cosine':warmup_cosine,
 42 |     'warmup_constant':warmup_constant,
 43 |     'warmup_linear':warmup_linear,
 44 | }
 45 | 
 46 | def initBertAdam(params, lr, warmup=-1, t_total=-1, schedule='warmup_linear',
 47 |                  betas=(0.9, 0.999), e=1e-6, weight_decay=0.01, max_grad_norm=1.0):
 48 |     return BertAdam(params, lr, warmup, t_total, schedule, betas, e, weight_decay, max_grad_norm)
 49 | 
 50 | class BertAdam(Optimizer):
 51 |     """Implements BERT version of Adam algorithm with weight decay fix.
 52 |     Params:
 53 |         lr: learning rate
 54 |         warmup: portion of t_total for the warmup, -1  means no warmup. Default: -1
 55 |         t_total: total number of training steps for the learning
 56 |             rate schedule, -1  means constant learning rate. Default: -1
 57 |         schedule: schedule to use for the warmup (see above). Default: 'warmup_linear'
 58 |         b1: Adams b1. Default: 0.9
 59 |         b2: Adams b2. Default: 0.999
 60 |         e: Adams epsilon. Default: 1e-6
 61 |         weight_decay: Weight decay. Default: 0.01
 62 |         max_grad_norm: Maximum norm for the gradients (-1 means no clipping). Default: 1.0
 63 |     """
 64 |     def __init__(self, params, lr=required, warmup=-1, t_total=-1, schedule='warmup_linear',
 65 |                  betas=(0.9, 0.999), e=1e-6, weight_decay=0.01, max_grad_norm=1.0):
 66 |         if lr is not required and lr < 0.0:
 67 |             raise ValueError("Invalid learning rate: {} - should be >= 0.0".format(lr))
 68 |         if schedule not in SCHEDULES:
 69 |             raise ValueError("Invalid schedule parameter: {}".format(schedule))
 70 |         if not 0.0 <= warmup < 1.0 and not warmup == -1:
 71 |             raise ValueError("Invalid warmup: {} - should be in [0.0, 1.0[ or -1".format(warmup))
 72 |         if not 0.0 <= betas[0] < 1.0:
 73 |             raise ValueError("Invalid betas[0] parameter: {} - should be in [0.0, 1.0[".format(betas[0]))
 74 |         if not 0.0 <= betas[1] < 1.0:
 75 |             raise ValueError("Invalid betas[1] parameter: {} - should be in [0.0, 1.0[".format(betas[1]))
 76 |         if not e >= 0.0:
 77 |             raise ValueError("Invalid epsilon value: {} - should be >= 0.0".format(e))
 78 |         defaults = dict(lr=lr, schedule=schedule, warmup=warmup, t_total=t_total,
 79 |                         betas=betas, e=e, weight_decay=weight_decay,
 80 |                         max_grad_norm=max_grad_norm)
 81 | 
 82 |         super(BertAdam, self).__init__(params, defaults)
 83 | 
 84 |     def get_lr(self):
 85 |         lr = []
 86 |         for group in self.param_groups:
 87 |             for p in group['params']:
 88 |                 state = self.state[p]
 89 |                 if len(state) == 0:
 90 |                     return [0]
 91 |                 if group['t_total'] != -1:
 92 |                     schedule_fct = SCHEDULES[group['schedule']]
 93 |                     lr_scheduled = group['lr'] * schedule_fct(state['step']/group['t_total'], group['warmup'])
 94 |                 else:
 95 |                     lr_scheduled = group['lr']
 96 |                 lr.append(lr_scheduled)
 97 |         return lr
 98 | 
 99 |     def step(self, closure=None):
100 |         """Performs a single optimization step.
101 |         Arguments:
102 |             closure (callable, optional): A closure that reevaluates the model
103 |                 and returns the loss.
104 |         """
105 |         loss = None
106 |         if closure is not None:
107 |             loss = closure()
108 | 
109 |         for group in self.param_groups:
110 |             for p in group['params']:
111 |                 if p.grad is None:
112 |                     continue
113 |                 grad = p.grad.data
114 |                 if grad.is_sparse:
115 |                     raise RuntimeError('Adam does not support sparse gradients, please consider SparseAdam instead')
116 | 
117 |                 state = self.state[p]
118 | 
119 |                 # State initialization
120 |                 if len(state) == 0:
121 |                     state['step'] = 0
122 |                     # Exponential moving average of gradient values
123 |                     state['next_m'] = torch.zeros_like(p.data)
124 |                     # Exponential moving average of squared gradient values
125 |                     state['next_v'] = torch.zeros_like(p.data)
126 | 
127 |                 next_m, next_v = state['next_m'], state['next_v']
128 |                 beta1, beta2 = group['betas']
129 | 
130 |                 # Add grad clipping
131 |                 if group['max_grad_norm'] > 0:
132 |                     clip_grad_norm_(p, group['max_grad_norm'])
133 | 
134 |                 # Decay the first and second moment running average coefficient
135 |                 # In-place operations to update the averages at the same time
136 |                 next_m.mul_(beta1).add_(1 - beta1, grad)
137 |                 next_v.mul_(beta2).addcmul_(1 - beta2, grad, grad)
138 |                 update = next_m / (next_v.sqrt() + group['e'])
139 | 
140 |                 # Just adding the square of the weights to the loss function is *not*
141 |                 # the correct way of using L2 regularization/weight decay with Adam,
142 |                 # since that will interact with the m and v parameters in strange ways.
143 |                 #
144 |                 # Instead we want to decay the weights in a manner that doesn't interact
145 |                 # with the m/v parameters. This is equivalent to adding the square
146 |                 # of the weights to the loss with plain (non-momentum) SGD.
147 |                 if group['weight_decay'] > 0.0:
148 |                     update += group['weight_decay'] * p.data
149 | 
150 |                 if group['t_total'] != -1:
151 |                     schedule_fct = SCHEDULES[group['schedule']]
152 |                     lr_scheduled = group['lr'] * schedule_fct(state['step']/group['t_total'], group['warmup'])
153 |                 else:
154 |                     lr_scheduled = group['lr']
155 | 
156 |                 update_with_lr = lr_scheduled * update
157 |                 p.data.add_(-update_with_lr)
158 | 
159 |                 state['step'] += 1
160 | 
161 |                 # step_size = lr_scheduled * math.sqrt(bias_correction2) / bias_correction1
162 |                 # No bias correction
163 |                 # bias_correction1 = 1 - beta1 ** state['step']
164 |                 # bias_correction2 = 1 - beta2 ** state['step']
165 | 
166 |         return loss
167 | 


--------------------------------------------------------------------------------
/bert/ner_data.py:
--------------------------------------------------------------------------------
  1 | import codecs
  2 | import logging
  3 | from pathlib import Path
  4 | 
  5 | import numpy as np
  6 | 
  7 | import torch
  8 | from pytorch_pretrained_bert.tokenization import BertTokenizer
  9 | from torch.utils.data import Dataset
 10 | 
 11 | PAD = '[PAD]'
 12 | VOCAB = (PAD, 'O', 'I-LOC', 'B-PER', 'I-PER', 'I-ORG', 'I-MISC', 'B-MISC', 'B-LOC', 'B-ORG')
 13 | label2idx = {tag: idx for idx, tag in enumerate(VOCAB)}
 14 | idx2label = {idx: tag for idx, tag in enumerate(VOCAB)}
 15 | b2i = {'B-PER':'I-PER', 'B-LOC':'I-LOC','B-ORG':'I-ORG', 'B-MISC':'I-MISC'}
 16 | 
 17 | TRAIN = 'train'
 18 | DEV = 'dev'
 19 | TEST = 'test'
 20 | 
 21 | class NerDataset(Dataset):
 22 |     """
 23 |     creates a conll Dataset
 24 |     filepath:     path to conll file
 25 |     tokenizer:    default is BertTokenizer from pytorch pretrained bert
 26 |     max_seq_len:  max length for examples, shorter ones are padded longer discarded
 27 |     ds_size:      for debug peruses: truncates the dataset to ds_size examples
 28 |     mask:         experiment with the masking type mask[0] is the input mask, mask[1] the label mask
 29 |     """
 30 |     def __init__(self, filepath, bert_model, max_seq_len=512, ds_size=None, train=False, mask=('s','s')):
 31 |         self.xmask, self.ymask = mask
 32 |         self.train= train
 33 |         self.max_seq_len = max_seq_len
 34 |         self.tokenizer = BertTokenizer.from_pretrained(bert_model, do_lower_case=False)
 35 | 
 36 |         #data = read_conll_data(filepath)
 37 |         data = open(filepath, 'r').read().strip().split("\n\n")
 38 |         if ds_size: data = data[:ds_size]
 39 |         size = len(data)
 40 |         skipped=0
 41 |         sents, labels = [],[]
 42 | 
 43 |         for entry in data:
 44 |             words = [line.split()[0] for line in entry.splitlines()] #words.split()
 45 |             tags = ([line.split()[-1] for line in entry.splitlines()]) #tags.split()
 46 | 
 47 |             if words[0]=='-DOCSTART-': continue
 48 |             # account for [cls] [sep] token
 49 |             #if (len(tokens)+2) > max_seq_len:
 50 |             #    skipped +=1
 51 |             #    continue
 52 | 
 53 |             sents.append(["[CLS]"]+words+["[SEP]"])
 54 |             labels.append([PAD]+tags+[PAD])
 55 | 
 56 |         org_size = len(sents)
 57 |         self.labels, self.sents = labels, sents
 58 |         print()
 59 |         print(filepath)
 60 |         print(f'lines {size} sents {org_size}	style: x={self.xmask} y={self.ymask}')
 61 |         # print(f'Truncated examples: {(skipped/org_size)*100:.2}% => {skipped}/{org_size} ')
 62 | 
 63 |     def __len__(self):
 64 |         return len(self.sents)
 65 | 
 66 |     def __getitem__(self, index):
 67 |         text, labels = self.sents[index], self.labels[index]
 68 | 
 69 |         x, y = [], []
 70 |         # is heads counts the words (disregarding worpiece sub tokens)
 71 |         is_heads, is_labels = [], []
 72 | 
 73 |         for w, t in zip(text, labels):
 74 |             tokens = self.tokenizer.tokenize(w) if w not in ("[CLS]", "[SEP]") else [w]
 75 |             xx = self.tokenizer.convert_tokens_to_ids(tokens)
 76 | 
 77 |             is_head = [1] + [0]*(len(tokens) - 1)
 78 | 
 79 |             t = [t] + [PAD] * (len(tokens) - 1)  # <PAD>: no decision
 80 |             yy = [label2idx[each] for each in t]  # (T,)
 81 |             is_label = [1] if yy[0]>0 else [0]
 82 |             is_label += [0] * (len(tokens)-1)
 83 | 
 84 |             x.extend(xx)
 85 |             y.extend(yy)
 86 |             is_heads.extend(is_head)
 87 |             is_labels.extend(is_label)
 88 | 
 89 |         one_hot_labels = np.eye(len(label2idx), dtype=np.float32)[y]
 90 | 
 91 |         seqlen = len(y)
 92 |         segment_ids = [0] * seqlen
 93 |         seq_mask = [1] * seqlen
 94 |         masks = {'s':seq_mask, 'h':is_heads, 'l': is_labels}
 95 |         x_mask = masks[self.xmask]
 96 |         y_mask = masks[self.ymask]
 97 |         assert_str = f"len(x)={len(x)}, len(y)={len(y)}, len(x_mask)={len(x_mask)}, len(y_mask)={len(y_mask)},"
 98 |         assert len(x)==len(y)==len(x_mask)==len(y_mask), assert_str
 99 | 
100 |         xb = (x, segment_ids, x_mask)
101 |         yb = (one_hot_labels, y, y_mask)
102 | 
103 |         return xb, yb
104 | 
105 | def pad(batch, bertmax=512, train=False):
106 |     ''' Function to pad samples in batch to the same length '''
107 |     seqlens = [len(x[0]) for x,_ in batch]
108 |     maxlen = np.array(seqlens).max()
109 | 
110 |     pad_fun = lambda sample: (sample+[0]*(maxlen-len(sample)))
111 |     t = torch.tensor
112 | 
113 |     input_ids, segment_ids, input_mask, texts =  [],[],[], []
114 |     label_ids, label_mask, one_hot_labels, labels = [],[],[],[]
115 | 
116 |     for x, y in batch:
117 |         input_ids.append( pad_fun(x[0]) )
118 |         segment_ids.append( pad_fun(x[1]))
119 |         input_mask.append( pad_fun(x[2]))
120 | 
121 |         label_id = pad_fun(y[1])
122 |         label_ids.append(label_id)
123 |         label_mask.append( pad_fun(y[2]))
124 |         one_hot_labels.append(np.eye(len(label2idx), dtype=np.float32)[label_id])
125 |     x = ( t(input_ids), t(segment_ids), t(input_mask))
126 |     # add labels to x if they are used in the sample.
127 |     if len(batch[0][0])>3: x = ( t(input_ids), t(segment_ids), t(input_mask), t(label_ids))
128 |     y =  ( t(one_hot_labels), t(label_ids), t(label_mask).byte()) 
129 | 
130 |     return x,y
131 | 
132 | # from https://github.com/sberbank-ai/ner-bert/blob/master/examples/conll-2003.ipynb
133 | def read_conll_data(input_file:str):
134 |     """Read CONLL-2003 format data."""
135 |     with codecs.open(input_file, "r", encoding="utf-8") as f:
136 |         lines = []
137 |         words = []
138 |         labels = []
139 |         for line in f:
140 |             contends = line.strip()
141 |             word = line.strip().split(' ')[0]
142 |             label = line.strip().split(' ')[-1]
143 |             if contends.startswith("-DOCSTART-"):
144 |                 words.append('')
145 |                 continue
146 | 
147 |             if len(contends) == 0 and words[-1] == '.':
148 |                 l = ' '.join([label for label in labels if len(label) > 0])
149 |                 w = ' '.join([word for word in words if len(word) > 0])
150 |                 lines.append([l, w])
151 |                 words = []
152 |                 labels = []
153 |                 continue
154 |             words.append(word)
155 |             labels.append(label)
156 |         return lines
157 | 
158 | def conll_to_docs(input_file:str, output_file:str):
159 |     '''
160 |     Converts a conll-style file to a doc file used to generate the pre-training data
161 |     input_file:    Input file - must follow the conll data format
162 |     output_file:   the resulting file, collecting the sentences/documents
163 |     '''
164 |     with codecs.open(output_file, 'w', encoding="utf-8") as outfile:
165 |         data = open(input_file, 'r').read().strip().split("\n\n")
166 |         for entry in data:
167 |             words = [line.split()[0] for line in entry.splitlines()]
168 |             if words[0]=='-DOCSTART-':
169 |                outfile.write("\n")
170 |                continue
171 |             else:
172 |                 w = ' '.join([word for word in words if len(word) > 0])
173 |                 outfile.write(w+"\n")
174 | 
175 | def conll_to_csv(file:str):
176 |     """Write CONLL-2003 to csv"""
177 |     csv_dir = Path('./csv')
178 |     csv_dir.mkdir(parents=True, exist_ok=True)
179 | 
180 |     filepath = Path(file)
181 |     if(filepath.is_file()):
182 |         data = read_conll_data(filepath)
183 |         df = pd.DataFrame(data, columns=['labels', 'text'])
184 | 
185 |         csv_path = csv_dir / (filepath.name + '.csv')
186 |         df.to_csv(csv_path, index=False)
187 |         logging.info(f'Wrote {csv_path}')
188 |         return csv_path
189 |     else:
190 |         raise ValueError(f'{file} does not exist, or is not a file')
191 | 


--------------------------------------------------------------------------------
/bert/run_ner.py:
--------------------------------------------------------------------------------
  1 | import csv
  2 | import logging
  3 | import random
  4 | from functools import partial
  5 | from pathlib import Path
  6 | 
  7 | import numpy as np
  8 | 
  9 | import fire
 10 | import torch
 11 | from fastai.basic_data import DataBunch, DatasetType
 12 | from fastai.basic_train import Learner
 13 | from fastai.callback import OptimWrapper
 14 | from fastai.metrics import fbeta
 15 | from fastai.torch_core import flatten_model, to_device
 16 | from fastai.train import to_fp16
 17 | from learner import Conll_F1, conll_f1, ner_loss_func
 18 | from ner_data import VOCAB, NerDataset, idx2label, pad
 19 | from optimizer import BertAdam, initBertAdam
 20 | from pytorch_pretrained_bert import BertForTokenClassification
 21 | from torch.utils.data import DataLoader
 22 | 
 23 | NER = 'conll-2003'
 24 | 
 25 | def init_logger(log_dir, name):
 26 |     logging.basicConfig(filename=log_dir / (name+'.log'),
 27 |                     filemode='w',
 28 |                     format='%(asctime)s, %(message)s',
 29 |                     datefmt='%H:%M%S',
 30 |                     level=logging.INFO
 31 |     )
 32 | 
 33 | def apply_freez(learn, layers, lay):
 34 |     if lay==0: learn.freeze()
 35 |     if lay==layers: learn.unfreeze()
 36 |     else: learn.freeze_to(lay)
 37 |     print('Freezing layers ', lay, ' off ', layers)
 38 | 
 39 | def bert_layer_list(model):
 40 |     ''' Break a bert base model in to a list of layers'''
 41 |     ms = torch.nn.ModuleList()
 42 | 
 43 |     flm = flatten_model(model)
 44 |     # embedding = [0:5] layer
 45 |     ms.append(torch.nn.ModuleList(flm[0:5]))
 46 |     # encoder (12 layers) = [5:16] [16:27] ... [126:136]
 47 |     bert_layergroup_size = 11#33
 48 |     for i in range(5, 137, bert_layergroup_size):
 49 |         ms.append(torch.nn.ModuleList(flm[i: i+bert_layergroup_size]))
 50 |     # pooling layer = [137:139]
 51 |     ms.append(torch.nn.ModuleList(flm[-4:-2]))
 52 |     # head = [-2:]
 53 |     ms.append(torch.nn.ModuleList(flm[-2:]))
 54 |     return ms
 55 | 
 56 | def run_ner(lang:str='eng',
 57 |             log_dir:str='logs',
 58 |             task:str=NER,
 59 |             batch_size:int=1,
 60 |             lr:float=5e-5,
 61 |             epochs:int=1,
 62 |             dataset:str='data/conll-2003/',
 63 |             loss:str='cross',
 64 |             max_seq_len:int=128,
 65 |             do_lower_case:bool=False,
 66 |             warmup_proportion:float=0.1,
 67 |             grad_acc_steps:int=1,
 68 |             rand_seed:int=None,
 69 |             fp16:bool=False,
 70 |             loss_scale:float=None,
 71 |             ds_size:int=None,
 72 |             data_bunch_path:str='data/conll-2003/db',
 73 |             bertAdam:bool=False,
 74 |             freez:bool=False,
 75 |             one_cycle:bool=False,
 76 |             discr:bool=False,
 77 |             lrm:int=2.6,
 78 |             div:int=None,
 79 |             tuned_learner:str=None,
 80 |             do_train:str=False,
 81 |             do_eval:str=False,
 82 | 	        save:bool=False,
 83 |             name:str='ner',
 84 |             mask:tuple=('s','s'),
 85 | ):
 86 |     name = "_".join(map(str,[name,task, lang, mask[0],mask[1], loss, batch_size, lr, max_seq_len,do_train, do_eval]))
 87 |     
 88 |     log_dir = Path(log_dir)
 89 |     log_dir.mkdir(parents=True, exist_ok=True)
 90 |     init_logger(log_dir, name)
 91 | 
 92 |     if rand_seed:
 93 |         random.seed(rand_seed)
 94 |         np.random.seed(rand_seed)
 95 |         torch.manual_seed(rand_seed)
 96 |         if torch.cuda.is_available():
 97 |             torch.cuda.manual_seed_all(rand_seed)
 98 | 
 99 |     trainset = dataset + lang + '/train.txt'
100 |     devset = dataset +lang + '/dev.txt'
101 |     testset = dataset + lang + '/test.txt'
102 | 
103 |     bert_model = 'bert-base-cased' if lang=='eng' else 'bert-base-multilingual-cased'
104 |     print(f'Lang: {lang}\nModel: {bert_model}\nRun: {name}')
105 |     model = BertForTokenClassification.from_pretrained(bert_model, num_labels=len(VOCAB), cache_dir='bertm')
106 | 
107 |     model = torch.nn.DataParallel(model)
108 |     model_lr_group = bert_layer_list(model)
109 |     layers = len(model_lr_group)
110 |     kwargs = {'max_seq_len':max_seq_len, 'ds_size':ds_size, 'mask':mask}
111 | 
112 |     train_dl = DataLoader(
113 |         dataset=NerDataset(trainset,bert_model,train=True, **kwargs),
114 |         batch_size=batch_size,
115 |         shuffle=True,
116 |         collate_fn=partial(pad, train=True)
117 |     )
118 | 
119 |     dev_dl = DataLoader(
120 |         dataset=NerDataset(devset, bert_model, **kwargs),
121 |         batch_size=batch_size,
122 |         shuffle=False,
123 |         collate_fn=pad
124 |     )
125 | 
126 |     test_dl = DataLoader(
127 |         dataset=NerDataset(testset, bert_model, **kwargs),
128 |         batch_size=batch_size,
129 |         shuffle=False,
130 |         collate_fn=pad
131 |     )
132 | 
133 |     data = DataBunch(
134 |         train_dl= train_dl,
135 |         valid_dl= dev_dl,
136 |         test_dl = test_dl,
137 |         collate_fn=pad,
138 |         path = Path(data_bunch_path)
139 |     )
140 | 
141 |     loss_fun = ner_loss_func if loss=='cross' else partial(ner_loss_func, zero=True)
142 |     metrics = [Conll_F1()]
143 | 
144 |     learn = Learner(data, model, BertAdam,
145 |                     loss_func=loss_fun,
146 |                     metrics=metrics,
147 |                     true_wd=False,
148 |                     layer_groups=None if not freez else model_lr_group,
149 |                     path='learn',
150 |                     )
151 | 
152 |     # initialise bert adam optimiser
153 |     train_opt_steps = int(len(train_dl.dataset) / batch_size) * epochs
154 |     optim = BertAdam(model.parameters(),
155 |                      lr=lr,
156 |                      warmup=warmup_proportion,
157 |                      t_total=train_opt_steps)
158 | 
159 |     if bertAdam: learn.opt = OptimWrapper(optim)
160 |     else: print("No Bert Adam")
161 | 
162 |     # load fine-tuned learner
163 |     if tuned_learner:
164 |         print('Loading pretrained learner: ', tuned_learner)
165 |         learn.load(tuned_learner)
166 | 
167 |     # Uncomment to graph learning rate plot
168 |     # learn.lr_find()
169 |     # learn.recorder.plot(skip_end=15)
170 | 
171 |     # set lr (discriminative learning rates)
172 |     if div: layers=div
173 |     lrs = lr if not discr else learn.lr_range(slice(lr/lrm**(layers), lr))
174 | 
175 |     results = [['epoch', 'lr', 'f1', 'val_loss', 'train_loss', 'train_losses']]
176 | 
177 |     if do_train:
178 |         for epoch in range(epochs):
179 |             if freez:
180 |                 lay= (layers//(epochs-1)) * epoch * -1
181 |                 if lay==0:print('Freeze'); learn.freeze()
182 |                 elif lay==layers: print('unfreeze');learn.unfreeze()
183 |                 else: print('freeze2');learn.freeze_to(lay)
184 |                 print('Freezing layers ', lay, ' off ', layers)
185 | 
186 |             # Fit Learner - eg train model
187 |             if one_cycle: learn.fit_one_cycle(1, lrs, moms=(0.8, 0.7))
188 |             else: learn.fit(1, lrs)
189 | 
190 |             results.append([
191 |                 epoch, lrs,
192 |                 learn.recorder.metrics[0][0],
193 |                 learn.recorder.val_losses[0],
194 |                 np.array(learn.recorder.losses).mean(),
195 |                 learn.recorder.losses,
196 |             ])
197 | 
198 |             if save:
199 |                 m_path = learn.save(f"{lang}_{epoch}_model", return_path=True)
200 |                 print(f'Saved model to {m_path}')
201 |     if save: learn.export(f'{lang}.pkl')
202 | 
203 |     if do_eval:
204 |         res = learn.validate(test_dl, metrics=metrics)
205 |         met_res = [f'{m.__name__}: {r}' for m, r in zip(metrics, res[1:])]
206 |         print(f'Validation on TEST SET:\nloss {res[0]}, {met_res}')
207 |         results.append([
208 |             'val', '-', res[1], res[0], '-','-'
209 |         ])
210 | 
211 |     with open(log_dir / (name+'.csv'), 'a') as resultFile:
212 |         wr = csv.writer(resultFile)
213 |         wr.writerows(results)
214 | 
215 | if __name__ == '__main__':
216 |     fire.Fire(run_ner)
217 | 


--------------------------------------------------------------------------------
/bert/bert_train_data.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Source: https://github.com/huggingface/pytorch-pretrained-BERT/blob/master/examples/lm_finetuning/pregenerate_training_data.py
  3 | '''
  4 | import json
  5 | import shelve
  6 | from argparse import ArgumentParser
  7 | from pathlib import Path
  8 | from random import choice, randint, random, sample, shuffle
  9 | from tempfile import TemporaryDirectory
 10 | 
 11 | import numpy as np
 12 | from tqdm import tqdm, trange
 13 | 
 14 | from pytorch_pretrained_bert.tokenization import BertTokenizer
 15 | 
 16 | 
 17 | class DocumentDatabase:
 18 |     def __init__(self, reduce_memory=False):
 19 |         if reduce_memory:
 20 |             self.temp_dir = TemporaryDirectory()
 21 |             self.working_dir = Path(self.temp_dir.name)
 22 |             self.document_shelf_filepath = self.working_dir / 'shelf.db'
 23 |             self.document_shelf = shelve.open(str(self.document_shelf_filepath),
 24 |                                               flag='n', protocol=-1)
 25 |             self.documents = None
 26 |         else:
 27 |             self.documents = []
 28 |             self.document_shelf = None
 29 |             self.document_shelf_filepath = None
 30 |             self.temp_dir = None
 31 |         self.doc_lengths = []
 32 |         self.doc_cumsum = None
 33 |         self.cumsum_max = None
 34 |         self.reduce_memory = reduce_memory
 35 | 
 36 |     def add_document(self, document):
 37 |         if self.reduce_memory:
 38 |             current_idx = len(self.doc_lengths)
 39 |             self.document_shelf[str(current_idx)] = document
 40 |         else:
 41 |             self.documents.append(document)
 42 |         self.doc_lengths.append(len(document))
 43 | 
 44 |     def _precalculate_doc_weights(self):
 45 |         self.doc_cumsum = np.cumsum(self.doc_lengths)
 46 |         self.cumsum_max = self.doc_cumsum[-1]
 47 | 
 48 |     def sample_doc(self, current_idx, sentence_weighted=True):
 49 |         # Uses the current iteration counter to ensure we don't sample the same doc twice
 50 |         sampled_doc_index = current_idx
 51 |         while sampled_doc_index == current_idx:
 52 |             if sentence_weighted:
 53 |                 # With sentence weighting, we sample docs proportionally to their sentence length
 54 |                 if self.doc_cumsum is None or len(self.doc_cumsum) != len(self.doc_lengths):
 55 |                     self._precalculate_doc_weights()
 56 |                 rand_start = self.doc_cumsum[current_idx]
 57 |                 rand_end = rand_start + self.cumsum_max - self.doc_lengths[current_idx]
 58 |                 sentence_index = randint(rand_start, rand_end) % self.cumsum_max
 59 |                 sampled_doc_index = np.searchsorted(self.doc_cumsum, sentence_index, side='right')
 60 |             else:
 61 |                 # If we don't use sentence weighting, then every doc has an equal chance to be chosen
 62 |                 sampled_doc_index = current_idx + randint(1, len(self.doc_lengths)-1)
 63 |         assert sampled_doc_index != current_idx
 64 |         if self.reduce_memory:
 65 |             return self.document_shelf[str(sampled_doc_index)]
 66 |         else:
 67 |             return self.documents[sampled_doc_index]
 68 | 
 69 |     def __len__(self):
 70 |         return len(self.doc_lengths)
 71 | 
 72 |     def __getitem__(self, item):
 73 |         if self.reduce_memory:
 74 |             return self.document_shelf[str(item)]
 75 |         else:
 76 |             return self.documents[item]
 77 | 
 78 |     def __enter__(self):
 79 |         return self
 80 | 
 81 |     def __exit__(self, exc_type, exc_val, traceback):
 82 |         if self.document_shelf is not None:
 83 |             self.document_shelf.close()
 84 |         if self.temp_dir is not None:
 85 |             self.temp_dir.cleanup()
 86 | 
 87 | 
 88 | def truncate_seq_pair(tokens_a, tokens_b, max_num_tokens):
 89 |     """Truncates a pair of sequences to a maximum sequence length. Lifted from Google's BERT repo."""
 90 |     while True:
 91 |         total_length = len(tokens_a) + len(tokens_b)
 92 |         if total_length <= max_num_tokens:
 93 |             break
 94 | 
 95 |         trunc_tokens = tokens_a if len(tokens_a) > len(tokens_b) else tokens_b
 96 |         assert len(trunc_tokens) >= 1
 97 | 
 98 |         # We want to sometimes truncate from the front and sometimes from the
 99 |         # back to add more randomness and avoid biases.
100 |         if random() < 0.5:
101 |             del trunc_tokens[0]
102 |         else:
103 |             trunc_tokens.pop()
104 | 
105 | 
106 | def create_masked_lm_predictions(tokens, masked_lm_prob, max_predictions_per_seq, vocab_list):
107 |     """Creates the predictions for the masked LM objective. This is mostly copied from the Google BERT repo, but
108 |     with several refactors to clean it up and remove a lot of unnecessary variables."""
109 |     cand_indices = []
110 |     for (i, token) in enumerate(tokens):
111 |         if token == "[CLS]" or token == "[SEP]":
112 |             continue
113 |         cand_indices.append(i)
114 | 
115 |     num_to_mask = min(max_predictions_per_seq,
116 |                       max(1, int(round(len(tokens) * masked_lm_prob))))
117 |     shuffle(cand_indices)
118 |     mask_indices = sorted(sample(cand_indices, num_to_mask))
119 |     masked_token_labels = []
120 |     for index in mask_indices:
121 |         # 80% of the time, replace with [MASK]
122 |         if random() < 0.8:
123 |             masked_token = "[MASK]"
124 |         else:
125 |             # 10% of the time, keep original
126 |             if random() < 0.5:
127 |                 masked_token = tokens[index]
128 |             # 10% of the time, replace with random word
129 |             else:
130 |                 masked_token = choice(vocab_list)
131 |         masked_token_labels.append(tokens[index])
132 |         # Once we've saved the true label for that token, we can overwrite it with the masked version
133 |         tokens[index] = masked_token
134 | 
135 |     return tokens, mask_indices, masked_token_labels
136 | 
137 | 
138 | def create_instances_from_document(
139 |         doc_database, doc_idx, max_seq_length, short_seq_prob,
140 |         masked_lm_prob, max_predictions_per_seq, vocab_list):
141 |     """This code is mostly a duplicate of the equivalent function from Google BERT's repo.
142 |     However, we make some changes and improvements. Sampling is improved and no longer requires a loop in this function.
143 |     Also, documents are sampled proportionally to the number of sentences they contain, which means each sentence
144 |     (rather than each document) has an equal chance of being sampled as a false example for the NextSentence task."""
145 |     document = doc_database[doc_idx]
146 |     # Account for [CLS], [SEP], [SEP]
147 |     max_num_tokens = max_seq_length - 3
148 | 
149 |     # We *usually* want to fill up the entire sequence since we are padding
150 |     # to `max_seq_length` anyways, so short sequences are generally wasted
151 |     # computation. However, we *sometimes*
152 |     # (i.e., short_seq_prob == 0.1 == 10% of the time) want to use shorter
153 |     # sequences to minimize the mismatch between pre-training and fine-tuning.
154 |     # The `target_seq_length` is just a rough target however, whereas
155 |     # `max_seq_length` is a hard limit.
156 |     target_seq_length = max_num_tokens
157 |     if random() < short_seq_prob:
158 |         target_seq_length = randint(2, max_num_tokens)
159 | 
160 |     # We DON'T just concatenate all of the tokens from a document into a long
161 |     # sequence and choose an arbitrary split point because this would make the
162 |     # next sentence prediction task too easy. Instead, we split the input into
163 |     # segments "A" and "B" based on the actual "sentences" provided by the user
164 |     # input.
165 |     instances = []
166 |     current_chunk = []
167 |     current_length = 0
168 |     i = 0
169 |     while i < len(document):
170 |         segment = document[i]
171 |         current_chunk.append(segment)
172 |         current_length += len(segment)
173 |         if i == len(document) - 1 or current_length >= target_seq_length:
174 |             if current_chunk:
175 |                 # `a_end` is how many segments from `current_chunk` go into the `A`
176 |                 # (first) sentence.
177 |                 a_end = 1
178 |                 if len(current_chunk) >= 2:
179 |                     a_end = randint(1, len(current_chunk) - 1)
180 | 
181 |                 tokens_a = []
182 |                 for j in range(a_end):
183 |                     tokens_a.extend(current_chunk[j])
184 | 
185 |                 tokens_b = []
186 | 
187 |                 # Random next
188 |                 if len(current_chunk) == 1 or random() < 0.5:
189 |                     is_random_next = True
190 |                     target_b_length = target_seq_length - len(tokens_a)
191 | 
192 |                     # Sample a random document, with longer docs being sampled more frequently
193 |                     random_document = doc_database.sample_doc(current_idx=doc_idx, sentence_weighted=True)
194 | 
195 |                     random_start = randint(0, len(random_document) - 1)
196 |                     for j in range(random_start, len(random_document)):
197 |                         tokens_b.extend(random_document[j])
198 |                         if len(tokens_b) >= target_b_length:
199 |                             break
200 |                     # We didn't actually use these segments so we "put them back" so
201 |                     # they don't go to waste.
202 |                     num_unused_segments = len(current_chunk) - a_end
203 |                     i -= num_unused_segments
204 |                 # Actual next
205 |                 else:
206 |                     is_random_next = False
207 |                     for j in range(a_end, len(current_chunk)):
208 |                         tokens_b.extend(current_chunk[j])
209 |                 truncate_seq_pair(tokens_a, tokens_b, max_num_tokens)
210 | 
211 |                 assert len(tokens_a) >= 1
212 |                 assert len(tokens_b) >= 1
213 | 
214 |                 tokens = ["[CLS]"] + tokens_a + ["[SEP]"] + tokens_b + ["[SEP]"]
215 |                 # The segment IDs are 0 for the [CLS] token, the A tokens and the first [SEP]
216 |                 # They are 1 for the B tokens and the final [SEP]
217 |                 segment_ids = [0 for _ in range(len(tokens_a) + 2)] + [1 for _ in range(len(tokens_b) + 1)]
218 | 
219 |                 tokens, masked_lm_positions, masked_lm_labels = create_masked_lm_predictions(
220 |                     tokens, masked_lm_prob, max_predictions_per_seq, vocab_list)
221 | 
222 |                 instance = {
223 |                     "tokens": tokens,
224 |                     "segment_ids": segment_ids,
225 |                     "is_random_next": is_random_next,
226 |                     "masked_lm_positions": masked_lm_positions,
227 |                     "masked_lm_labels": masked_lm_labels}
228 |                 instances.append(instance)
229 |             current_chunk = []
230 |             current_length = 0
231 |         i += 1
232 | 
233 |     return instances
234 | 
235 | 
236 | def main():
237 |     parser = ArgumentParser()
238 |     parser.add_argument('--train_corpus', type=Path, required=True)
239 |     parser.add_argument("--output_dir", type=Path, required=True)
240 |     parser.add_argument("--bert_model", type=str, required=True,
241 |                         choices=["bert-base-uncased", "bert-large-uncased", "bert-base-cased",
242 |                                  "bert-base-multilingual-cased", "bert-base-chinese"])
243 |     parser.add_argument("--do_lower_case", action="store_true")
244 | 
245 |     parser.add_argument("--reduce_memory", action="store_true",
246 |                         help="Reduce memory usage for large datasets by keeping data on disc rather than in memory")
247 | 
248 |     parser.add_argument("--epochs_to_generate", type=int, default=3,
249 |                         help="Number of epochs of data to pregenerate")
250 |     parser.add_argument("--max_seq_len", type=int, default=128)
251 |     parser.add_argument("--short_seq_prob", type=float, default=0.1,
252 |                         help="Probability of making a short sentence as a training example")
253 |     parser.add_argument("--masked_lm_prob", type=float, default=0.15,
254 |                         help="Probability of masking each token for the LM task")
255 |     parser.add_argument("--max_predictions_per_seq", type=int, default=20,
256 |                         help="Maximum number of tokens to mask in each sequence")
257 | 
258 |     args = parser.parse_args()
259 | 
260 |     tokenizer = BertTokenizer.from_pretrained(args.bert_model, do_lower_case=args.do_lower_case)
261 |     vocab_list = list(tokenizer.vocab.keys())
262 |     with DocumentDatabase(reduce_memory=args.reduce_memory) as docs:
263 |         with args.train_corpus.open() as f:
264 |             doc = []
265 |             for line in tqdm(f, desc="Loading Dataset", unit=" lines"):
266 |                 line = line.strip()
267 |                 if line == "":
268 |                     docs.add_document(doc)
269 |                     doc = []
270 |                 else:
271 |                     tokens = tokenizer.tokenize(line)
272 |                     doc.append(tokens)
273 | 
274 |         args.output_dir.mkdir(exist_ok=True)
275 |         for epoch in trange(args.epochs_to_generate, desc="Epoch"):
276 |             epoch_filename = args.output_dir / f"epoch_{epoch}.json"
277 |             num_instances = 0
278 |             with epoch_filename.open('w') as epoch_file:
279 |                 for doc_idx in trange(len(docs), desc="Document"):
280 |                     doc_instances = create_instances_from_document(
281 |                         docs, doc_idx, max_seq_length=args.max_seq_len, short_seq_prob=args.short_seq_prob,
282 |                         masked_lm_prob=args.masked_lm_prob, max_predictions_per_seq=args.max_predictions_per_seq,
283 |                         vocab_list=vocab_list)
284 |                     doc_instances = [json.dumps(instance) for instance in doc_instances]
285 |                     for instance in doc_instances:
286 |                         epoch_file.write(instance + '\n')
287 |                         num_instances += 1
288 |             metrics_file = args.output_dir / f"epoch_{epoch}_metrics.json"
289 |             with metrics_file.open('w') as metrics_file:
290 |                 metrics = {
291 |                     "num_training_examples": num_instances,
292 |                     "max_seq_len": args.max_seq_len
293 |                 }
294 |                 metrics_file.write(json.dumps(metrics))
295 | 
296 | 
297 | if __name__ == '__main__':
298 |     main()
299 | 


--------------------------------------------------------------------------------
/bert/lm_finetune.py:
--------------------------------------------------------------------------------
  1 | """
  2 | modefified from: https://github.com/huggingface/pytorch-pretrained-BERT/blob/master/examples/lm_finetuning/finetune_on_pregenerated.py#L281
  3 | """
  4 | import json
  5 | import logging
  6 | import random
  7 | from argparse import ArgumentParser
  8 | from collections import namedtuple
  9 | from pathlib import Path
 10 | from tempfile import TemporaryDirectory
 11 | 
 12 | import numpy as np
 13 | from tqdm import tqdm
 14 | 
 15 | import fastai.train
 16 | import torch
 17 | from fastai.basic_data import DataBunch
 18 | from fastai.basic_train import Learner
 19 | from fastai.callback import Callback
 20 | from fastai.torch_core import flatten_model, split_no_wd_params, to_device
 21 | from optimizer import BertAdam
 22 | from pytorch_pretrained_bert.modeling import BertForPreTraining
 23 | from pytorch_pretrained_bert.optimization import warmup_linear  # ,BertAdam
 24 | from pytorch_pretrained_bert.tokenization import BertTokenizer
 25 | from torch.utils.data import DataLoader, Dataset, RandomSampler
 26 | from torch.utils.data.distributed import DistributedSampler
 27 | 
 28 | InputFeatures = namedtuple("InputFeatures", "input_ids input_mask segment_ids lm_label_ids is_next")
 29 | 
 30 | log_format = '%(asctime)-10s: %(message)s'
 31 | logging.basicConfig(level=logging.INFO, format=log_format)
 32 | 
 33 | def bert_layer_list(model):
 34 |     '''
 35 |     Get Layers for BERT WITH LM OBJECTIVE
 36 |     '''
 37 |     ms = torch.nn.ModuleList()
 38 | 
 39 |     flm = flatten_model(model)
 40 |     print(f'Modules Len : {len(flm)}')
 41 |     # embedding = [0:5] layer
 42 |     ms.append(torch.nn.ModuleList(flm[0:5]))
 43 |     # encoder (12 layers) = [5:16] [16:27] ... [126:136]
 44 |     bert_layergroup_size = 11#33
 45 |     for i in range(5, 137, bert_layergroup_size):
 46 |         ms.append(torch.nn.ModuleList(flm[i: i+bert_layergroup_size]))
 47 |     # pooling layer = [137:139]
 48 |     ms.append(torch.nn.ModuleList(flm[137:139]))
 49 |     # head = [-2:]
 50 |     #ms.append(torch.nn.Sequential(flm[139:-2]+[flm[-1]]))
 51 |     return ms
 52 | 
 53 | # this PR for ref https://github.com/fastai/fastai/commit/16a858751fc3bd37153a8ada434042f7a53df111
 54 | class PregeneratedData(Callback):
 55 |     '''
 56 |     Change traing dataset at every epoch
 57 |     '''
 58 | 
 59 |     def __init__(self, path, tokenizer, epochs, batch_size, epoch=0):
 60 |         self.path = path
 61 |         self.tokenizer = tokenizer
 62 |         self.epochs = epochs
 63 |         self.batch_size = batch_size
 64 |         data = PregeneratedDataset(
 65 |             epoch=epoch,
 66 |             training_path=self.path,
 67 |             tokenizer=self.tokenizer,
 68 |             num_data_epochs=self.epochs)
 69 |         self.dataset = data
 70 | 
 71 |     def __len__(self): return len(self.dataset)
 72 |     def __getattr(self, k:str): return getattr(self.dataset, k)
 73 | 
 74 |     def on_epoch_begin(self, **kwargs):
 75 |         epoch = kwargs['epoch']
 76 |         print(epoch)
 77 |         data = PregeneratedDataset(
 78 |             epoch=epoch,
 79 |             training_path=self.path,
 80 |             tokenizer=self.tokenizer,
 81 |             num_data_epochs=self.epochs)
 82 |         self.dataset = data
 83 | 
 84 |     def __getitem__(self, idx:int):
 85 |         return self.dataset[idx]
 86 | 
 87 | def convert_example_to_features(example, tokenizer, max_seq_length):
 88 |     tokens = example["tokens"]
 89 |     segment_ids = example["segment_ids"]
 90 |     is_random_next = example["is_random_next"]
 91 |     masked_lm_positions = example["masked_lm_positions"]
 92 |     masked_lm_labels = example["masked_lm_labels"]
 93 | 
 94 |     assert len(tokens) == len(segment_ids) <= max_seq_length  # The preprocessed data should be already truncated
 95 |     input_ids = tokenizer.convert_tokens_to_ids(tokens)
 96 |     masked_label_ids = tokenizer.convert_tokens_to_ids(masked_lm_labels)
 97 | 
 98 |     input_array = np.zeros(max_seq_length, dtype=np.int)
 99 |     input_array[:len(input_ids)] = input_ids
100 | 
101 |     mask_array = np.zeros(max_seq_length, dtype=np.bool)
102 |     mask_array[:len(input_ids)] = 1
103 | 
104 |     segment_array = np.zeros(max_seq_length, dtype=np.bool)
105 |     segment_array[:len(segment_ids)] = segment_ids
106 | 
107 |     lm_label_array = np.full(max_seq_length, dtype=np.int, fill_value=-1)
108 |     lm_label_array[masked_lm_positions] = masked_label_ids
109 | 
110 |     features = InputFeatures(input_ids=input_array,
111 |                              input_mask=mask_array,
112 |                              segment_ids=segment_array,
113 |                              lm_label_ids=lm_label_array,
114 |                              is_next=is_random_next)
115 |     return features
116 | 
117 | 
118 | class PregeneratedDataset(Dataset):
119 |     def __init__(self, training_path, epoch, tokenizer, num_data_epochs, reduce_memory=False):
120 |         self.vocab = tokenizer.vocab
121 |         self.tokenizer = tokenizer
122 |         self.epoch = epoch
123 |         self.data_epoch = epoch % num_data_epochs
124 |         data_file = training_path / f"epoch_{self.data_epoch}.json"
125 |         metrics_file = training_path / f"epoch_{self.data_epoch}_metrics.json"
126 |         assert data_file.is_file() and metrics_file.is_file()
127 |         metrics = json.loads(metrics_file.read_text())
128 |         num_samples = metrics['num_training_examples']
129 |         seq_len = metrics['max_seq_len']
130 |         self.temp_dir = None
131 |         self.working_dir = None
132 |         if reduce_memory:
133 |             self.temp_dir = TemporaryDirectory()
134 |             self.working_dir = Path(self.temp_dir.name)
135 |             input_ids = np.memmap(filename=self.working_dir/'input_ids.memmap',
136 |                                   mode='w+', dtype=np.int32, shape=(num_samples, seq_len))
137 |             input_masks = np.memmap(filename=self.working_dir/'input_masks.memmap',
138 |                                     shape=(num_samples, seq_len), mode='w+', dtype=np.bool)
139 |             segment_ids = np.memmap(filename=self.working_dir/'input_masks.memmap',
140 |                                     shape=(num_samples, seq_len), mode='w+', dtype=np.bool)
141 |             lm_label_ids = np.memmap(filename=self.working_dir/'lm_label_ids.memmap',
142 |                                      shape=(num_samples, seq_len), mode='w+', dtype=np.int32)
143 |             lm_label_ids[:] = -1
144 |             is_nexts = np.memmap(filename=self.working_dir/'is_nexts.memmap',
145 |                                  shape=(num_samples,), mode='w+', dtype=np.bool)
146 |         else:
147 |             input_ids = np.zeros(shape=(num_samples, seq_len), dtype=np.int32)
148 |             input_masks = np.zeros(shape=(num_samples, seq_len), dtype=np.bool)
149 |             segment_ids = np.zeros(shape=(num_samples, seq_len), dtype=np.bool)
150 |             lm_label_ids = np.full(shape=(num_samples, seq_len), dtype=np.int32, fill_value=-1)
151 |             is_nexts = np.zeros(shape=(num_samples,), dtype=np.bool)
152 |         logging.info(f"Loading training examples for epoch {epoch}")
153 |         with data_file.open() as f:
154 |             for i, line in enumerate(tqdm(f, total=num_samples, desc="Training examples")):
155 |                 line = line.strip()
156 |                 example = json.loads(line)
157 |                 features = convert_example_to_features(example, tokenizer, seq_len)
158 |                 input_ids[i] = features.input_ids
159 |                 segment_ids[i] = features.segment_ids
160 |                 input_masks[i] = features.input_mask
161 |                 lm_label_ids[i] = features.lm_label_ids
162 |                 is_nexts[i] = features.is_next
163 | 
164 |         inlen = len(input_ids[0])
165 |         for x in input_ids:
166 |             assert len(x) == inlen
167 | 
168 |         assert i == num_samples - 1  # Assert that the sample count metric was true
169 |         logging.info("Loading complete!")
170 |         self.num_samples = num_samples
171 |         self.seq_len = seq_len
172 |         self.input_ids = input_ids
173 |         self.input_masks = input_masks
174 |         self.segment_ids = segment_ids
175 |         self.lm_label_ids = lm_label_ids
176 |         self.is_nexts = is_nexts
177 | 
178 |     def __len__(self):
179 |         return self.num_samples
180 | 
181 |     def __getitem__(self, item):
182 |         return ((torch.tensor(self.input_ids[item].astype(np.int64)),
183 |                 torch.tensor(self.input_masks[item].astype(np.int64)),
184 |                 torch.tensor(self.segment_ids[item].astype(np.int64)),
185 |                 torch.tensor(self.lm_label_ids[item].astype(np.int64)),
186 |                 torch.tensor(self.is_nexts[item].astype(np.int64))),
187 |                 torch.tensor([0.0]))
188 | 
189 | 
190 | def main():
191 |     parser = ArgumentParser()
192 |     parser.add_argument('--pregenerated_data', type=Path, required=True)
193 |     parser.add_argument('--output_dir', type=Path, required=True)
194 |     parser.add_argument("--bert_model", type=str, required=True,
195 |                         choices=["bert-base-uncased", "bert-large-uncased", "bert-base-cased",
196 |                                  "bert-base-multilingual-cased", "bert-base-chinese"])
197 |     parser.add_argument("--do_lower_case", action="store_true")
198 |     parser.add_argument("--reduce_memory", action="store_true",
199 |                         help="Store training data as on-disc memmaps to massively reduce memory usage")
200 | 
201 |     parser.add_argument("--epochs", type=int, default=3, help="Number of epochs to train for")
202 |     parser.add_argument("--local_rank",
203 |                         type=int,
204 |                         default=-1,
205 |                         help="local_rank for distributed training on gpus")
206 |     parser.add_argument("--no_cuda",
207 |                         action='store_true',
208 |                         help="Whether not to use CUDA when available")
209 |     parser.add_argument('--gradient_accumulation_steps',
210 |                         type=int,
211 |                         default=1,
212 |                         help="Number of updates steps to accumulate before performing a backward/update pass.")
213 |     parser.add_argument("--train_batch_size",
214 |                         default=16,
215 |                         type=int,
216 |                         help="Total batch size for training.")
217 |     parser.add_argument('--fp16',
218 |                         action='store_true',
219 |                         help="Whether to use 16-bit float precision instead of 32-bit")
220 |     parser.add_argument('--loss_scale',
221 |                         type=float, default=0,
222 |                         help="Loss scaling to improve fp16 numeric stability. Only used when fp16 set to True.\n"
223 |                         "0 (default value): dynamic loss scaling.\n"
224 |                         "Positive power of 2: static loss scaling value.\n")
225 |     parser.add_argument("--warmup_proportion",
226 |                         default=0.1,
227 |                         type=float,
228 |                         help="Proportion of training to perform linear learning rate warmup for. "
229 |                              "E.g., 0.1 = 10%% of training.")
230 |     parser.add_argument("--learning_rate",
231 |                         default=3e-5,
232 |                         type=float,
233 |                         help="The initial learning rate for Adam.")
234 |     parser.add_argument('--seed',
235 |                         type=int,
236 |                         default=None,
237 |                         help="random seed for initialization")
238 |     args = parser.parse_args()
239 | 
240 |     assert args.pregenerated_data.is_dir(), \
241 |         "--pregenerated_data should point to the folder of files made by pregenerate_training_data.py!"
242 | 
243 |     samples_per_epoch = []
244 |     for i in range(args.epochs):
245 |         epoch_file = args.pregenerated_data / f"epoch_{i}.json"
246 |         metrics_file = args.pregenerated_data / f"epoch_{i}_metrics.json"
247 |         if epoch_file.is_file() and metrics_file.is_file():
248 |             metrics = json.loads(metrics_file.read_text())
249 |             samples_per_epoch.append(metrics['num_training_examples'])
250 |         else:
251 |             if i == 0:
252 |                 exit("No training data was found!")
253 |             print(f"Warning! There are fewer epochs of pregenerated data ({i}) than training epochs ({args.epochs}).")
254 |             print("This script will loop over the available data, but training diversity may be negatively impacted.")
255 |             num_data_epochs = i
256 |             break
257 |     else:
258 |         num_data_epochs = args.epochs
259 |     print(samples_per_epoch)
260 | 
261 |     if args.gradient_accumulation_steps < 1:
262 |         raise ValueError("Invalid gradient_accumulation_steps parameter: {}, should be >= 1".format(
263 |                             args.gradient_accumulation_steps))
264 | 
265 |     args.train_batch_size = args.train_batch_size // args.gradient_accumulation_steps
266 | 
267 |     if args.seed:
268 |         random.seed(args.seed)
269 |         np.random.seed(args.seed)
270 |         torch.manual_seed(args.seed)
271 |         if n_gpu > 0:
272 |             torch.cuda.manual_seed_all(args.seed)
273 | 
274 |     if args.output_dir.is_dir() and list(args.output_dir.iterdir()):
275 |         logging.warning(f"Output directory ({args.output_dir}) already exists and is not empty!")
276 |     args.output_dir.mkdir(parents=True, exist_ok=True)
277 | 
278 |     tokenizer = BertTokenizer.from_pretrained(args.bert_model, do_lower_case=args.do_lower_case)
279 | 
280 |     total_train_examples = 0
281 |     for i in range(args.epochs):
282 |         # The modulo takes into account the fact that we may loop over limited epochs of data
283 |         total_train_examples += samples_per_epoch[i % len(samples_per_epoch)]
284 | 
285 |     num_train_optimization_steps = int(
286 |         total_train_examples / args.train_batch_size / args.gradient_accumulation_steps)
287 | 
288 |     # Prepare model
289 |     model = BertForPreTraining.from_pretrained(args.bert_model)
290 |     model = torch.nn.DataParallel(model)
291 | 
292 |     # Prepare optimizer
293 |     optimizer = BertAdam
294 | 
295 |     train_dataloader = DataLoader(
296 |         PregeneratedData(args.pregenerated_data,  tokenizer,args.epochs, args.train_batch_size),
297 |         batch_size=args.train_batch_size,
298 |     )
299 | 
300 |     data = DataBunch(train_dataloader,train_dataloader)
301 |     global_step = 0
302 |     logging.info("***** Running training *****")
303 |     logging.info(f"  Num examples = {total_train_examples}")
304 |     logging.info("  Batch size = %d", args.train_batch_size)
305 |     logging.info("  Num steps = %d", num_train_optimization_steps)
306 |     def loss(x, y):
307 |         return x.mean()
308 | 
309 |     learn = Learner(data, model, optimizer,
310 |                     loss_func=loss,
311 |                     true_wd=False,
312 |                     path='learn',
313 |                     layer_groups=bert_layer_list(model),
314 |     )
315 | 
316 |     lr= args.learning_rate
317 |     layers = len(bert_layer_list(model))
318 |     lrs = learn.lr_range(slice(lr/(2.6**4), lr))
319 |     for epoch in range(args.epochs):
320 |         learn.fit_one_cycle(1, lrs, wd=0.01)
321 |         # save model at half way point
322 |         if epoch == args.epochs//2:
323 |             savem = learn.model.module.bert if hasattr(learn.model, 'module') else learn.model.bert
324 |             output_model_file = args.output_dir / (f"pytorch_fastai_model_{args.bert_model}_{epoch}.bin")
325 |             torch.save(savem.state_dict(), str(output_model_file))
326 |             print(f'Saved bert to {output_model_file}')
327 | 
328 |     savem = learn.model.module.bert if hasattr(learn.model, 'module') else learn.model.bert
329 |     output_model_file = args.output_dir / (f"pytorch_fastai_model_{args.bert_model}_{args.epochs}.bin")
330 |     torch.save(savem.state_dict(), str(output_model_file))
331 |     print(f'Saved bert to {output_model_file}')
332 | 
333 | if __name__ == '__main__':
334 |     main()
335 | 


--------------------------------------------------------------------------------
/bert/eng-tune-ex-i.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "code",
   5 |    "execution_count": 1,
   6 |    "metadata": {},
   7 |    "outputs": [],
   8 |    "source": [
   9 |     "%reload_ext autoreload\n",
  10 |     "%autoreload 2\n",
  11 |     "%matplotlib inline"
  12 |    ]
  13 |   },
  14 |   {
  15 |    "cell_type": "code",
  16 |    "execution_count": 2,
  17 |    "metadata": {},
  18 |    "outputs": [
  19 |     {
  20 |      "name": "stdout",
  21 |      "output_type": "stream",
  22 |      "text": [
  23 |       "/home/abaumann/anaconda3/envs/cuda/bin/jupyter\r\n"
  24 |      ]
  25 |     }
  26 |    ],
  27 |    "source": [
  28 |     "! which jupyter"
  29 |    ]
  30 |   },
  31 |   {
  32 |    "cell_type": "code",
  33 |    "execution_count": 3,
  34 |    "metadata": {
  35 |     "scrolled": true
  36 |    },
  37 |    "outputs": [
  38 |     {
  39 |      "name": "stdout",
  40 |      "output_type": "stream",
  41 |      "text": [
  42 |       "Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex.\n"
  43 |      ]
  44 |     }
  45 |    ],
  46 |    "source": [
  47 |     "import logging\n",
  48 |     "import random\n",
  49 |     "from functools import partial\n",
  50 |     "from pathlib import Path\n",
  51 |     "import pandas as pd\n",
  52 |     "import numpy as np\n",
  53 |     "\n",
  54 |     "import fire\n",
  55 |     "import torch\n",
  56 |     "from fastai.basic_data import DataBunch\n",
  57 |     "from fastai.basic_train import Learner\n",
  58 |     "from fastai.metrics import fbeta\n",
  59 |     "from fastai.train import to_fp16\n",
  60 |     "from learner import (OneHotCallBack, conll_f1, create_fp16_cb,\n",
  61 |     "                     ner_loss_func, Conll_F1)\n",
  62 |     "from ner_data import NerDataset, pad\n",
  63 |     "from optimizer import BertAdam\n",
  64 |     "from pytorch_pretrained_bert import BertForTokenClassification\n",
  65 |     "from torch.utils.data import DataLoader\n",
  66 |     "from fastai.torch_core import flatten_model"
  67 |    ]
  68 |   },
  69 |   {
  70 |    "cell_type": "markdown",
  71 |    "metadata": {},
  72 |    "source": [
  73 |     "# Initialisation\n",
  74 |     "\n",
  75 |     "Define Values for the Run"
  76 |    ]
  77 |   },
  78 |   {
  79 |    "cell_type": "code",
  80 |    "execution_count": 24,
  81 |    "metadata": {},
  82 |    "outputs": [],
  83 |    "source": [
  84 |     "lang='eng'\n",
  85 |     "ds_size=None\n",
  86 |     "name='jpei'\n",
  87 |     "batch_size:int=16\n",
  88 |     "csvn = 'logs/eng-tune'+name+'.csv'\n",
  89 |     "datapath = 'data/conll-2003-I/'\n",
  90 |     "pretrain=False\n",
  91 |     "pretrained_lm = 'pretrain/pytorch_fastai_model_bert-base-casedm_10.bin'"
  92 |    ]
  93 |   },
  94 |   {
  95 |    "cell_type": "code",
  96 |    "execution_count": 5,
  97 |    "metadata": {},
  98 |    "outputs": [],
  99 |    "source": [
 100 |     "log_dir:str='logs'\n",
 101 |     "lr:float=5e-5\n",
 102 |     "epochs:int=1\n",
 103 |     "trainset:str=datapath\n",
 104 |     "devset:str=datapath\n",
 105 |     "testset:str=datapath\n",
 106 |     "max_seq_len:int=128\n",
 107 |     "do_lower_case:bool=False\n",
 108 |     "warmup_proportion:float=0.1\n",
 109 |     "data_bunch_path:str='data/conll-2003/db'"
 110 |    ]
 111 |   },
 112 |   {
 113 |    "cell_type": "code",
 114 |    "execution_count": 6,
 115 |    "metadata": {},
 116 |    "outputs": [],
 117 |    "source": [
 118 |     "bert_model = 'bert-base-cased' if lang=='eng' else 'bert-base-multilingual-cased'"
 119 |    ]
 120 |   },
 121 |   {
 122 |    "cell_type": "markdown",
 123 |    "metadata": {},
 124 |    "source": [
 125 |     "## Dataset\n",
 126 |     "Load the dataset and define a databunch"
 127 |    ]
 128 |   },
 129 |   {
 130 |    "cell_type": "code",
 131 |    "execution_count": 7,
 132 |    "metadata": {},
 133 |    "outputs": [],
 134 |    "source": [
 135 |     "trainset += lang + '/train.txt'\n",
 136 |     "devset += lang + '/dev.txt'\n",
 137 |     "testset += lang + '/test.txt'"
 138 |    ]
 139 |   },
 140 |   {
 141 |    "cell_type": "code",
 142 |    "execution_count": 9,
 143 |    "metadata": {},
 144 |    "outputs": [
 145 |     {
 146 |      "name": "stdout",
 147 |      "output_type": "stream",
 148 |      "text": [
 149 |       "\n",
 150 |       "data/conll-2003/eng/train.txt\n",
 151 |       "lines 14041 sents 14041\tstyle: x=s y=s\n",
 152 |       "\n",
 153 |       "data/conll-2003/eng/dev.txt\n",
 154 |       "lines 3250 sents 3250\tstyle: x=s y=s\n",
 155 |       "\n",
 156 |       "data/conll-2003/eng/test.txt\n",
 157 |       "lines 3453 sents 3453\tstyle: x=s y=s\n"
 158 |      ]
 159 |     }
 160 |    ],
 161 |    "source": [
 162 |     "train_dl = DataLoader(\n",
 163 |     "    dataset=NerDataset(trainset, bert_model, max_seq_len=max_seq_len, ds_size=ds_size),\n",
 164 |     "    batch_size=batch_size,\n",
 165 |     "    shuffle=True,\n",
 166 |     "    collate_fn=pad\n",
 167 |     ")\n",
 168 |     "\n",
 169 |     "dev_dl = DataLoader(\n",
 170 |     "    dataset=NerDataset(devset, bert_model, max_seq_len=max_seq_len, ds_size=ds_size),\n",
 171 |     "    batch_size=batch_size,\n",
 172 |     "    shuffle=False,\n",
 173 |     "    collate_fn=pad\n",
 174 |     ")\n",
 175 |     "\n",
 176 |     "test_dl = DataLoader(\n",
 177 |     "    dataset=NerDataset(testset, bert_model, max_seq_len=max_seq_len, ds_size=ds_size),\n",
 178 |     "    batch_size=batch_size,\n",
 179 |     "    shuffle=False,\n",
 180 |     "    collate_fn=pad\n",
 181 |     ")\n",
 182 |     "\n",
 183 |     "data = DataBunch(\n",
 184 |     "    train_dl= train_dl,\n",
 185 |     "    valid_dl= dev_dl,\n",
 186 |     "    test_dl = test_dl,\n",
 187 |     "    collate_fn=pad,\n",
 188 |     "    path = Path(data_bunch_path)\n",
 189 |     ")"
 190 |    ]
 191 |   },
 192 |   {
 193 |    "cell_type": "markdown",
 194 |    "metadata": {},
 195 |    "source": [
 196 |     "## Model & Learner\n",
 197 |     "define BERT model\n",
 198 |     "define Learner object"
 199 |    ]
 200 |   },
 201 |   {
 202 |    "cell_type": "code",
 203 |    "execution_count": 11,
 204 |    "metadata": {},
 205 |    "outputs": [],
 206 |    "source": [
 207 |     "def bert_layer_list(model):\n",
 208 |     "    ms = torch.nn.ModuleList()\n",
 209 |     "\n",
 210 |     "    flm = flatten_model(model)\n",
 211 |     "    # embedding = [0:5] layer\n",
 212 |     "    ms.append(torch.nn.ModuleList(flm[0:5]))\n",
 213 |     "    # encoder (12 layers) = [5:16] [16:27] ... [126:136]\n",
 214 |     "    for i in range(5, 137, 11):\n",
 215 |     "        ms.append(torch.nn.ModuleList(flm[i: i+11]))\n",
 216 |     "    # pooling layer = [137:139]\n",
 217 |     "    ms.append(torch.nn.ModuleList(flm[-4:-2]))\n",
 218 |     "    ms.append(torch.nn.ModuleList(flm[-2:]))\n",
 219 |     "    # head = [-2:]\n",
 220 |     "    return ms"
 221 |    ]
 222 |   },
 223 |   {
 224 |    "cell_type": "code",
 225 |    "execution_count": 12,
 226 |    "metadata": {},
 227 |    "outputs": [],
 228 |    "source": [
 229 |     "model = BertForTokenClassification.from_pretrained(bert_model, num_labels=10, cache_dir='bertm')"
 230 |    ]
 231 |   },
 232 |   {
 233 |    "cell_type": "code",
 234 |    "execution_count": 13,
 235 |    "metadata": {},
 236 |    "outputs": [],
 237 |    "source": [
 238 |     "if pretrain:\n",
 239 |     "    model.bert.load_state_dict(torch.load(pretrained_lm))"
 240 |    ]
 241 |   },
 242 |   {
 243 |    "cell_type": "code",
 244 |    "execution_count": 14,
 245 |    "metadata": {},
 246 |    "outputs": [
 247 |     {
 248 |      "data": {
 249 |       "text/plain": [
 250 |        "15"
 251 |       ]
 252 |      },
 253 |      "execution_count": 14,
 254 |      "metadata": {},
 255 |      "output_type": "execute_result"
 256 |     }
 257 |    ],
 258 |    "source": [
 259 |     "model = torch.nn.DataParallel(model)\n",
 260 |     "num_layers = len(bert_layer_list(model))\n",
 261 |     "num_layers"
 262 |    ]
 263 |   },
 264 |   {
 265 |    "cell_type": "code",
 266 |    "execution_count": 15,
 267 |    "metadata": {},
 268 |    "outputs": [],
 269 |    "source": [
 270 |     "optim = BertAdam\n",
 271 |     "loss_fun = ner_loss_func"
 272 |    ]
 273 |   },
 274 |   {
 275 |    "cell_type": "code",
 276 |    "execution_count": 16,
 277 |    "metadata": {},
 278 |    "outputs": [],
 279 |    "source": [
 280 |     "metrics = [Conll_F1()]"
 281 |    ]
 282 |   },
 283 |   {
 284 |    "cell_type": "code",
 285 |    "execution_count": 17,
 286 |    "metadata": {},
 287 |    "outputs": [],
 288 |    "source": [
 289 |     "learn = Learner(data, model, optim,\n",
 290 |     "                    loss_func=loss_fun,\n",
 291 |     "                    metrics=metrics,\n",
 292 |     "                    true_wd=False,\n",
 293 |     "                    layer_groups= bert_layer_list(model),\n",
 294 |     "                    path='learn',\n",
 295 |     "                    )"
 296 |    ]
 297 |   },
 298 |   {
 299 |    "cell_type": "code",
 300 |    "execution_count": 18,
 301 |    "metadata": {},
 302 |    "outputs": [
 303 |     {
 304 |      "data": {
 305 |       "text/plain": [
 306 |        "16"
 307 |       ]
 308 |      },
 309 |      "execution_count": 18,
 310 |      "metadata": {},
 311 |      "output_type": "execute_result"
 312 |     }
 313 |    ],
 314 |    "source": [
 315 |     "batch_size"
 316 |    ]
 317 |   },
 318 |   {
 319 |    "cell_type": "code",
 320 |    "execution_count": 19,
 321 |    "metadata": {},
 322 |    "outputs": [
 323 |     {
 324 |      "data": {
 325 |       "text/plain": [
 326 |        "'eng'"
 327 |       ]
 328 |      },
 329 |      "execution_count": 19,
 330 |      "metadata": {},
 331 |      "output_type": "execute_result"
 332 |     }
 333 |    ],
 334 |    "source": [
 335 |     "lang"
 336 |    ]
 337 |   },
 338 |   {
 339 |    "cell_type": "code",
 340 |    "execution_count": null,
 341 |    "metadata": {},
 342 |    "outputs": [],
 343 |    "source": []
 344 |   },
 345 |   {
 346 |    "cell_type": "code",
 347 |    "execution_count": 30,
 348 |    "metadata": {
 349 |     "scrolled": true
 350 |    },
 351 |    "outputs": [
 352 |     {
 353 |      "data": {
 354 |       "text/plain": [
 355 |        "[3e-05,\n",
 356 |        " 3e-05,\n",
 357 |        " 3e-05,\n",
 358 |        " 3e-05,\n",
 359 |        " 3e-05,\n",
 360 |        " 3e-05,\n",
 361 |        " 3e-05,\n",
 362 |        " 3e-05,\n",
 363 |        " 3e-05,\n",
 364 |        " 3e-05,\n",
 365 |        " 3e-05,\n",
 366 |        " 3e-05,\n",
 367 |        " 3e-05,\n",
 368 |        " 3e-05,\n",
 369 |        " 5e-05]"
 370 |       ]
 371 |      },
 372 |      "execution_count": 30,
 373 |      "metadata": {},
 374 |      "output_type": "execute_result"
 375 |     }
 376 |    ],
 377 |    "source": [
 378 |     "chtw = ([3e-5]*14+[3e-4], 0)\n",
 379 |     "chtw2 = ([3e-5]*2 +[5e-5]*3+[1e-4]*3+[3e-4]*3+[5e-4]*3 +[5e-3], 0)"
 380 |    ]
 381 |   },
 382 |   {
 383 |    "cell_type": "code",
 384 |    "execution_count": 20,
 385 |    "metadata": {},
 386 |    "outputs": [],
 387 |    "source": [
 388 |     "def test_lrs(stage, all_lrs, runs=2):\n",
 389 |     "    res = []\n",
 390 |     "    for i,lrs in enumerate(all_lrs):\n",
 391 |     "        lrs,lrm = lrs\n",
 392 |     "        print(i, lrm, lrs)\n",
 393 |     "        i_res = {'lrs': lrs, 'lrm':lrm, 'f1s':[], 'vals':[], 'train':[]}\n",
 394 |     "        for _ in range(runs):\n",
 395 |     "            learn.load(stage)\n",
 396 |     "            learn.fit_one_cycle(1, lrs, moms=(0.8,0.7))\n",
 397 |     "            \n",
 398 |     "            i_res['f1s'].append(learn.recorder.metrics[0][0])\n",
 399 |     "            i_res['vals'].append(learn.recorder.val_losses[0])\n",
 400 |     "            i_res['train'].append(learn.recorder.losses)\n",
 401 |     "        i_res['val'] = sum(i_res['vals'])/runs\n",
 402 |     "        i_res['f1'] = sum(i_res['f1s'])/runs\n",
 403 |     "        res.append(i_res)\n",
 404 |     "    return res"
 405 |    ]
 406 |   },
 407 |   {
 408 |    "cell_type": "code",
 409 |    "execution_count": 21,
 410 |    "metadata": {
 411 |     "scrolled": false
 412 |    },
 413 |    "outputs": [
 414 |     {
 415 |      "data": {
 416 |       "text/html": [],
 417 |       "text/plain": [
 418 |        "<IPython.core.display.HTML object>"
 419 |       ]
 420 |      },
 421 |      "metadata": {},
 422 |      "output_type": "display_data"
 423 |     },
 424 |     {
 425 |      "name": "stdout",
 426 |      "output_type": "stream",
 427 |      "text": [
 428 |       "LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.\n"
 429 |      ]
 430 |     },
 431 |     {
 432 |      "data": {
 433 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEKCAYAAAD9xUlFAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3Xd8XNWZ//HPo25Vy5bcLdvYYIrBgGUwmFDCLi0hwAYWSIDQ4jiUwC/AkoXdkA2b7BI2EDa0mBIgIRDAhgBJKMkaTLEBN9ypso2rJBc1W/35/TEjIYQsybbu3Bnp+3695uWZO2fmfkeW9Ojec8855u6IiIgAJIUdQERE4oeKgoiItFJREBGRVioKIiLSSkVBRERaqSiIiEgrFQUREWmloiAiIq1UFEREpFVK2AF2V0FBgY8ePTrsGCIiCWXBggXl7l7YVbuEKwqjR49m/vz5YccQEUkoZramO+10+khERFqpKIiISCsVBRERaaWiICIirVQURESklYqCiIi0UlEQEZFWKgoiIgngrr99xBsflQW+HxUFEZE419Ts3PX3D3m3ZGvg+1JREBGJc1tr6ml2KMxJD3xfgRUFMxtpZrPNbIWZLTezazpok2dmL5jZ+9E2lwSVR0QkUZVV1QFQkB18UQhy7qNG4Dp3X2hmOcACM3vV3Ve0aXMlsMLdTzezQuADM3vc3esDzCUiklDKq2NXFAI7UnD3je6+MHq/ClgJDG/fDMgxMwOyga1EiomIiES1FIVYnD6KySypZjYaOAx4p91TdwPPAxuAHOBcd2+ORSYRkUTx+emjtMD3FXhHs5llAzOBa929st3TJwOLgWHAocDdZpbbwXtMM7P5Zja/rCz4S7JEROJJeXUdGalJZKcH/3d8oEXBzFKJFITH3X1WB00uAWZ5xMdACbB/+0buPsPdi929uLCwyzUiRER6lbKqOgqy04mcaQ9WkFcfGfAQsNLd79hFs7XAidH2g4HxwKdBZRIRSUTl1fUx6WSGYPsUpgIXAkvNbHF0201AEYC73w/cCjxiZksBA2509/IAM4mIJJzy6jpGDsiMyb4CKwru/iaRX/SdtdkAnBRUBhGR3qCsqo7DivJjsi+NaBYRiWONTc1s3VEfk8tRQUVBRCSuba2pxx0KY3A5KqgoiIjEtbIYDlwDFQURkbgWy3mPQEVBRCSulVdHpoJTURARkZjOewQqCiIica2sqo5+qclkxWCKC1BREBGJa+XVdTE7SgAVBRGRuBaZ9yg2l6OCioKISFzTkYKIiLSK5WR4oKIgIhK3Gpqa2VqjoiAiIkSmuIDYXY4KKgoiInEr1qOZQUVBRCRuxXreI1BREBGJW+XRI4XC3nCkYGYjzWy2ma0ws+Vmds0u2h1vZoujbV4PKo+ISKJpOVIoyIndOIUgx003Ate5+0IzywEWmNmr7r6ipYGZ9QfuBU5x97VmNijAPCIiCaW8qp6stGQy02IzxQUEeKTg7hvdfWH0fhWwEhjertm3gFnuvjbarjSoPCIiiaasuo6CGPYnQIz6FMxsNHAY8E67p/YD8s3sNTNbYGYXxSKPiEgiKK+qi2l/AgR7+ggAM8sGZgLXuntlB/ufBJwI9APmmtk8d/+w3XtMA6YBFBUVBR1ZRCQulFfXMbYwO6b7DPRIwcxSiRSEx919VgdN1gEvu3uNu5cDc4CJ7Ru5+wx3L3b34sLCwiAji4jEjbIYz3sEwV59ZMBDwEp3v2MXzf4EHGNmKWaWCRxJpO9BRKRPq29sZvuOhpgOXINgTx9NBS4ElprZ4ui2m4AiAHe/391XmtlLwBKgGXjQ3ZcFmElEJCFsqYn9wDUIsCi4+5uAdaPd7cDtQeUQEUlE5VUtazPHbowCaESziEhcKm8duNZL+hRERGTPlYUwxQWoKIiIxKUwJsMDFQURkbhUVlVHTnoKGanJMd2vioKISBwqD2GKC1BREBGJS+XVsZ/iAlQURETiUllVXUynzG6hoiAiEodKq+oYlJMR8/2qKIiIxJnahiaqahtjfuURqCiIiMSd0spwLkcFFQURkbhTVl0LwCAVBRERaTlSUJ+CiIhQGp3iYlCujhRERPq80qpakpOMAZm6JFVEpM8rrayjIDuNpKQuVx/ocUGuvDbSzGab2QozW25m13TSdrKZNZrZ2UHlERFJFGXV4YxRgGBXXmsErnP3hWaWAywws1fdfUXbRmaWDNwGvBJgFhGRhFFaWcfQvHCKQmBHCu6+0d0XRu9XEVl7eXgHTa8GZgKlQWUREUkkpVV1oXQyQ4z6FMxsNHAY8E677cOBs4D7YpFDRCTeNTY1s6UmnMnwIAZFwcyyiRwJXOvule2e/hVwo7s3d/Ee08xsvpnNLysrCyqqiEjottTU4w6Fub2vTwEzSyVSEB5391kdNCkGnjQzgALgNDNrdPfn2jZy9xnADIDi4mIPMrOISJhaluEMYzQzBFgULPKb/iFgpbvf0VEbdx/Tpv0jwIvtC4KISF9SWhXeFBcQ7JHCVOBCYKmZLY5uuwkoAnD3+wPct4hIQmqd4qK3nT5y9zeBbo+8cPeLg8oiIpIoWqa4KMiO/Whm0IhmEZG4UlZVR//MVNJTkkPZv4qCiEgcKa2qDa0/AVQURETiSljLcLZQURARiSOllXU6UhAREXB3yqrqQlmGs4WKgohInKjc2Uh9U7OKgoiItBm4FtIYBVBREBGJG6UhT3EBKgoiInEj7CkuQEVBRCRutExxoT4FERGhrKqOfqnJZKcHOoF1p1QURETiRMuKa9HlBEKhoiAiEifCnuICVBREROJG2FNcgIqCiEjcKKsMdzQzqCiIiMSFnfVNVNU19t6iYGYjzWy2ma0ws+Vmdk0Hbb5tZkvMbKmZvW1mE4PKIyISz8Jem7lFkNc9NQLXuftCM8sBFpjZq+6+ok2bEuA4d99mZqcCM4AjA8wkIhKX4mGKCwh2Oc6NwMbo/SozWwkMB1a0afN2m5fMA0YElUdEJJ7FwxQXEKM+BTMbDRwGvNNJs8uAv+7i9dPMbL6ZzS8rK+v5gCIiISutjBwp9No+hRZmlg3MBK5198pdtDmBSFG4saPn3X2Guxe7e3FhYWFwYUVEQlJWXUdKkjEgMy3UHIGOpTazVCIF4XF3n7WLNocADwKnuvuWIPOIiMSr0so6CrLTSUoKbzQzBHv1kQEPASvd/Y5dtCkCZgEXuvuHQWUREYl3Gyp2MiQv3E5mCPZIYSpwIbDUzBZHt90EFAG4+/3Aj4GBwL3RuT4a3b04wEwiInGppKyGI/cZGHaMQK8+ehPo9DjI3S8HLg8qg4hIIqhtaGJDRS1jCrLCjqIRzSIiYVu9pQaA0SoKIiKyujxSFMYMVFEQEenzSsp3ADC6IDPkJCoKIiKhKymvpiA7nZyM1LCjBDtOIZ6s3FjJc4vW0+xOs0Oze2SgSFY6BdlpFGSnk52R0tozbgZ1Dc1U1jZSVdtAVW1j9Ba5X13XSG6/VAbnpjMkN4PBuRnk9kslNyMl+m8q/dKSdztnTV0jpVV1NLuTnpJEekoyqclGVW0j23c0sG1HPZW1DQAkmZFkkJKURF5mKv37pUb/TSMtRfVeJFGsLt/BPnHQnwDdLApmNhZY5+51ZnY8cAjwmLtvDzJcT1pdXsOjc1eTZIYR+YVa39RMXWPzbr1PZloyORkpZKWnULmzgfLq+k7bFmRHik5ORiqNzc00NDkNTc00N3ukUXTZvaraBkor66iua9zDT/hFORkpDMxKY2B0/0NyMxicl8GQ3Azys9LITk8hKy2FnIwUCnPSyUjd/QImIj2jZEsNJ4yPj9kaunukMBMoNrNxRGYy/RPwB+C0oIL1tFMPHsqpBw/90vYd9Y1sqa6nrLqOmugvZHdwID0liZyMFHLSUyP/ZqSQkvzFv8DrG5spraplc2Udla1HFA1s39HA1pp6yqvrKK+uY/uOelKSk0hNNrLTU0iOjlps2dfw/hkct18hg3IyGJSTTkqyUdcYKVr1jc3kpKfQPzOV/Kw08vqlYkCzQ1NzpMhURve5fWcD22rq2VpTz5aaesqr6vikrIa3P95C1S4KjhkMy+vHmIIsRg3MZGheBoNyIzkG52YwLK8fuf1SQl03VqS3qqptoKyqLi6uPILuF4Vmd280s7OAX7v7r81sUZDBYiUzLYXMASmMHLBnHTxpKUmMyM9kRH74HURdqalrZHNlLdt3NlBT10hNXSOVtY2s37aTNVtqKNmygz8v3cj2HQ1fem1mWjJD8zIYnp/JPgVZjCnIYp/CyL9D8/q1FjkR2T1rtkQ6mRPq9BHQYGbnA98BTo9uC79HRHZLVnoK+xRmd9mutqGJsqo6NlfWsqmylk0VtWzYXsuG7TtZt30HC1Zvpaa+qbV9WnISIwb0Y/TALMYNyubg4XkcMiKPogGZOroQ6UJJefyMUYDuF4VLgOnAz9y9xMzGAL8LLpaEKSM1mZEDMnd59OTulEVPS5WU17Bmaw1rt+ygpLyGNz8qp74p0k+T1y+ViSP7Uzwqn+JR+Uwc2Z+s9D5zbYNIt7QUhVEDEqgoRFdL+wGAmeUDOe5+W5DBJH6ZWaTPITeDo8Z+ca6W+sZmPtxcxdL1FSxZt51Fa7dz598+xB2Sk4wDhuYwqSifw0flM2lUfkKcdhMJ0uryGoblZezR1YpB6O7VR68B34i2XwCUmtlb7v7DALNJAkpLSWLC8DwmDM/j/COKAKjY2cCitdtYsGYbC9du4+kF63h07hoARg/M5PjxgzhufCFH7TNQV0FJn/NpeU3cnDqC7p8+ynP3SjO7nMilqLeY2ZIgg0nvkdcvlePHD+L48YMAaGxq5oPNVbxbspU5H5bx5HtreeTt1WSkJnHawUM5b3IRk0fnqz9C+oTVW2o4rYMrI8PS3aKQYmZDgX8Gbg4wj/QBKclJHDQsj4OG5XHJ1DHUNjTxTslWXl6+iecXb2DWwvXsU5jFeZNHcs6kkeRnhbsSlUhQttXUs31HQ9xceQTdLwo/BV4G3nL398xsH+Cj4GJJX5KRmsxx+xVy3H6F/NvXDuDPSzbyx/c+4+d/WcUvX/mQ0ycO46KjRnHIiP5hRxXpUSUts6PGwUR4Lbrb0fw08HSbx58C3+zsNWY2EngMGExkfNYMd7+rXRsD7iIyCG4HcLG7L9ydDyC9S2ZaCucUj+Sc4pGs2lTJ7+au4dlF63lmwTomjuzPpVNHc9rBQ0lN1jQekvhWx9nlqNDNCfHMbISZPWtmpdHbTDMb0cXLGoHr3P1AYApwpZkd2K7NqcC+0ds04L7dzC+92P5DcvnZWQcz76YT+cnpB1K5s4FrnlzMsb+YzX2vfUJFB4PsRBLJ6vIakgyK9nDwbBC6++fWb4HngWHR2wvRbbvk7htb/up39ypgJTC8XbMziHRcu7vPA/pH+y5EWuVmpHLx1DH8/YfH8dB3ihlTkMVtL63iqP/+Oz99YQXrt+8MO6LIHvm0vIYR+ZlxNYFld/sUCt29bRF4xMyu7e5OzGw0cBjwTrunhgOftXm8LrptY3ffW/qOpCTjxAMGc+IBg1mxoZIH3viUR+eu5rG5q/nGxGFcccJYxg3KCTumSLet3hJfl6NC948UtpjZBWaWHL1dAGzpzgvNLJvIhHrXunvlnoQ0s2lmNt/M5peVle3JW0gvc+CwXO4891Bev+F4LjxqFH9dtomT7pzDj2YuYVNFbdjxRLrk7nE1ZXaL7haFS4lcjrqJyF/xZwMXd/UiM0slUhAed/dZHTRZD4xs83hEdNsXuPsMdy929+LCwviYXlbiw4j8TG45/SDevPEELj56DDMXruO422dz20urWtedEIlHZdWRqfJHD4yf/gToZlFw9zXu/g13L3T3Qe5+Jl1ffWTAQ8BKd79jF82eBy6yiClAhbvr1JHstoHZ6fz49AP5v+uO57SDh3L/65/wj3e8zsvLN4UdTaRDq1uX4EzMI4WOdDXFxVTgQuCrZrY4ejvNzKab2fRom78AnwIfAw8AV+xFHhFGDsjkznMP5bkrppKfmcb3freA6b9bwOZKnVKS+FJSXg3APgVdz1wcS3szZWWncxC4+5vdaOPAlXuRQaRDE0f254Wrj+GBNz7lrr99xFt3lPOT0w/inw4frukzJC6s2bKDlCRjWP+MsKN8wd4cKXiPpRAJQGpyElccP46Xrz2WA4bkct3T73PlHxayrWbXS6iKxMrGilqG5GV8aTXHsHWaxsyqzKyyg1sVkfEKInFvdEEWT0ybwo2n7M+rKzZz8q/mMOdDXcUm4dqwfSdD8+LrKAG6KArunuPuuR3cctxdq6VIwkhOMr5//FievWIqef1Suejhd/nPF1dQ39gcdjTpozZV1jIkr1/YMb4kvo5bRAI2YXgeL1x9DBdOGcWDb5Zw9v1vt84/IxIr7s7GilqGJdqRgkhvlJGazK1nTuD+CyaxZssOvva/b/Dcoi8NjxEJzNaaeuobmxmioiASP06ZMIS/XPMVDhyWy7V/XKxOaImZjdFR9wnXpyDS2w3v348nvjuFG04ezyvLN3HSr+Ywe1Vp2LGkl2uZikV9CiJxKCU5iStPGMdzV05lQGYalzzyHv/23FJ1QktgNkYHU6pPQSSOHTQsjz9dNZXvfmUMv5+3lgseeoetOp0kAdi4fScpScbA7PSwo3yJioJIGxmpydz8tQO567xDWfzZds685y0+2lwVdizpZTZV1DI4N4PkpPgbXa+iINKBMw4dzh+nTWFHfRNn3fu2+hmkR7WMZo5HKgoiu3BYUT7PXzWVUQMzuezR93jozRIi03WJ7J2NFfE5mhlUFEQ6Nax/P56efhQnHTiEW19cwU3PqgNa9k7LwDUVBZEElZmWwr3fPpwrTxjLE+9+xkUPv6PxDLLHtu9ooK6xOS4vRwUVBZFuSUoybjh5f+48dyIL12znnN/MZWPFzrBjSQKK54FrEGBRMLOHzazUzJbt4vk8M3vBzN43s+VmdklQWUR6ylmHjeCxy45gU0UtZ983lxLNmyS7qeWPiT5XFIBHgFM6ef5KYIW7TwSOB35pZmkB5hHpEVP2GciT06aws6GJc+5/m+UbKsKOJAnk8yOFPnb6yN3nAFs7awLkRNdyzo62bQwqj0hPmjA8j6e+dxRpyUmcN2Me81d39q0u8rlNFbUkJxmFOfE3cA3C7VO4GzgA2AAsBa5xd13WIQlj3KBsnv7+0RRmp3PBQ+/w2gcayyBd21Cxk8E56XE5cA3CLQonA4uJrOB2KHC3meV21NDMppnZfDObX1amFbMkfgzv34+nph/FPgXZfPex+fx5ycawI0mc2xTHA9cg3KJwCTDLIz4GSoD9O2ro7jPcvdjdiwsLC2MaUqQrBdnpPDFtCoeO7M/VTyzkyXfXhh1J4timitq47U+AcIvCWuBEADMbDIwHPg0xj8gey+uXymOXHslX9i3kR7OW8stXPqC5WaOf5YtaBq71ySMFM3sCmAuMN7N1ZnaZmU03s+nRJrcCR5vZUuDvwI3uXh5UHpGg9UtL5oGLivnn4hH8+v8+5nu/X0B1na6dkM9V7GxgZ0NT3F6OCpAS1Bu7+/ldPL8BOCmo/YuEIS0lidu+eQgHDs3l1j+v5J/ufYsHLipm1MCssKNJHIj3y1FBI5pFepyZcfHUMTx26RGUVtVxxj1vsXDttrBjSRz4fMW1+D1SUFEQCcjUcQU8f+Ux9O+XygUPvsObH+nsaF+3Ic5HM4OKgkigigZm8tT0oygakMmlj7zHS8t0yWpftqmiliSDQXE6cA1UFEQCNygngz9OO4oJw3O54vGFPPXeZ2FHkpBsrKhlUE4GKcnx+6s3fpOJ9CJ5man8/vIjmTqugH+ZuYT/eGG51mXog+J94BqoKIjETGZaCg9fPJlLp47ht2+t5vwH5rV2PErfsCGOV1xroaIgEkOpyUn8+PQD+fX5h7FyYyVf//UbzP1kS9ixJAbcPe5HM4OKgkgoTp84jOevmkpev1S+89t3NZleH1BZ28iO+vgeuAYqCiKhGTcoh5nfP5pxhdlM+90C5nyoyR57s0QYowAqCiKh6p+ZxuOXH8nYwsgsq298pMLQW7WMURjWX0VBRDqRnxUpDGMKsrj8URWG3mpNdOnWYf3VpyAiXRjQpjBc9sh8XlyyIexI0sPeW7ONoXkZDMnVkYKIdMPA7HT+OO0oDhmRx9VPLOLRt1eHHUl6iLvzXslWJo8eQGQF4viloiASR1oGuZ24/2BueX45t7+8Cnety5Do1m7dQWlVHZPHDAg7SpdUFETiTEZqMvdfcDjnHzGSe2Z/wg+eXKx1GRLcuyVbAThidPwXhcDWUxCRPZeSnMTPzzqYkQMy+Z+XP2D5hgru+/Ykxg/JCTua7IH3Vm8lr18q+w7KDjtKl4Jcee1hMys1s2WdtDnezBab2XIzez2oLCKJyMy44vhx/P7yI6nc2cgZ97zJMwvWhR1L9sB7q7cxeXQ+SUnx3Z8AwZ4+egQ4ZVdPmll/4F7gG+5+EHBOgFlEEtbRYwv4yzXHcOjI/lz/9Pvc9pL6GRJJaVUtJeU1HJEA/QkQYFFw9znA1k6afAuY5e5ro+01zl9kFwblZPD7y47kW0cWcd9rn/CzP69UYUgQ75VEVt2bnAD9CRBun8J+QKqZvQbkAHe5+2MdNTSzacA0gKKiopgFFIknKclJ/OzMCaQlJ/HgmyU0NDVzy+kHJcQpib7svdVb6ZeazITheWFH6ZYwi0IKMAk4EegHzDWzee7+YfuG7j4DmAFQXFysP4+kzzIzbjn9QFKTjQfeKKG+yfnZmRNUGOLYuyVbOayoP6lxvLBOW2EWhXXAFnevAWrMbA4wEfhSURCRz5kZN512AGkpSdwz+xNqG5q4/exD4no1r76qsraBlZsq+cFX9w07SreF+V30J+AYM0sxs0zgSGBliHlEEoaZcf1J47nh5PE8u2g93398IbUNTWHHknYWrNmGOwnTyQwBHimY2RPA8UCBma0DbgFSAdz9fndfaWYvAUuAZuBBd9/l5asi8kVmxpUnjCM7PYVbnl/OZY++x4wLi8lK1/CjePFeyVZSkozDivqHHaXbAvvucffzu9HmduD2oDKI9AXfOXo02ekp3PDM+1zw0Ds8cvER5GWmhh1LiHQyTxieR2Za4hRqnYQU6QW+OWkE9357EsvXV3LujLmUVmnt57DVNjTx/mcVCXXqCFQURHqNUyYM4eGLJ7N26w7OuX8un23dEXakPm3Z+grqm5opHpUfdpTdoqIg0oscs28Bv7/8SLbvaODs+9/mo81VYUfqs5asqwDg0JGJ058AKgoivc7hRfn88XtTaHY4+/65vP1JediR+qRl6ysYlJPOoDhfVKc9FQWRXmj/IbnMnH40g3LSueihd3n8nTVhR+pzlq6v4OAEGcXcloqCSC9VNDCTmVcczTH7FnDzs8v4yfPLaWxqDjtWn7CjvpFPyqo5SEVBROJJbkYqD31nMpcdM4ZH3l7N5Y/NZ0e9FuwJ2sqNlTQ7OlIQkfiTnGT8+9cP5OdnHcycD8u44MF32L6jPuxYvdrSaCezioKIxK1vHVnEvd8+nGXrK/nn38xlU4XGMgRl6fpKCrLTGJybHnaU3aaiINKHnDJhKI9cOpkN22v55n1v80lZddiReqVl6yuYMDwPs8SbvVZFQaSPOXpsAU9Om0JtQxPf+PWbzFq4Tgv29KCd9U18VFqVkKeOQEVBpE+aMDyP568+hoOG5/HDp97n2j8uprK2IexYvcLKTZFO5kRZVKc9FQWRPmp4/3488d0pXPeP+/Hiko2cdtcbLFq7LexYCW/Z+kgns4qCiCSc5CTj6hP35anvHQXAub+Zxx/fWxtyqsS2dF0FA7LSGJaXWCOZW6goiAiTRuXzwlXHcOQ+A7hx5lJufnYp9Y0a6LYnlm2oTNhOZgiwKJjZw2ZWamadLpxjZpPNrNHMzg4qi4h0LT8rjUcuOYLpx43l8XfWcv4D8yit1GWru6O2oYmPNlcxYVhu2FH2WJBHCo8Ap3TWwMySgduAVwLMISLdlJxk/OjU/bnnW4ezcmMlX//1myxYszXsWAlj1aYqGps9Ya88ggCLgrvPAbr6broamAmUBpVDRHbf1w4ZyrNXTKVfWjLnzZjH7+et0WWr3bA0wTuZIcQ+BTMbDpwF3BdWBhHZtfFDcnj+ymOYOq6Af3tuGf/yzBIt3NOF5esr6J+Zyoj8fmFH2WNhLhz6K+BGd2/uqkPGzKYB0wCKiopiEE1EAPIyIxPq3fnqh9w9+2OeXrCOQ0bk8bWDh/K1Q4YyIj8z7IhxZen6CiYMS9xOZgj36qNi4EkzWw2cDdxrZmd21NDdZ7h7sbsXFxYWxjKjSJ+XnGRcf/J43viXE/jRqfsD8F9/XcXxt7/GPbM/pqlZp5UAKmsb+HBzVUKfOoIQjxTcfUzLfTN7BHjR3Z8LK4+IdG7kgEymHzeW6ceNZe2WHdz28ipuf/kDZq8q5c5zD2XkgL571NDU7FzzxCLcI2tlJ7IgL0l9ApgLjDezdWZ2mZlNN7PpQe1TRGKjaGAmd59/GHeeO5EPNlVxyq/m8PT8z/psZ/QvXlrF7A/KuOUbByXcmsztBXak4O7n70bbi4PKISLBMDPOOmwEk0cP4Lqn3ueGZ5bwt5Wb+flZBzMwO/GmjN5Tsxau4zdzPuWCKUVcOGVU2HH2mkY0i8heGZGfyR++O4WbTtuf2avKOPlXb/D3lZvDjhUTi9Zu40ezljJlnwHccvpBYcfpESoKIrLXkpOMaceO5U9XTaUgO43LHp3PD59azPrtO8OOFpgt1XV873cLGJybzr3fnkRqcu/4ddo7PoWIxIUDhubyp6um8v3jx/Li+xs54X9e49YXV7Clui7saD3K3bn+6ffZvrOB31xQzICstLAj9RgVBRHpUekpydx4yv783/XHccbEYfz2rRKO/cVs/uuvK1m3rXcMfvvtW6uZ/UEZN592AAcm8DxHHbFEu1qguLjY58+fH3YMEemmj0uruPNvH/HXpRsBOPGAwVx01CiOGVeQkIO8lq2v4J/ufZtj9yvggYuKE+YzmNkCdy/uql2YI5pFpA8YNyiHe751OOu37+QP76zhyXc/49UVm5kTCgjQAAAL20lEQVQ8Op//PPNgxg/JCTtit9XUNfKDJxaRn5XKL86emDAFYXfo9JGIxMTw/v244eT9eftfv8rPzzqYj0urOe1/3+Dnf1lJTV1j2PG6tLWmnmueXEzJlhruPPfQXtWP0JaOFEQkptJTkvnWkUWcOmEIt720ihlzPuXF9zdw29mH8JV9428am+Zm56n5n/HfL62iuraRm087gKPHFoQdKzDqUxCRUC1Ys40fzVzCx2XVfO/YsVx30n6hX965s76J1Vtq+KSsmoffLGHh2u0cMXoAt545IaFOd7WlPgURSQiTRuXz/FXH8NMXV3D/658w99Mt/Pq8wygaGJu5lDZs38mSdRUs31DBsvUVrNpUxcaKz1ecG5iVxi/Pmcg/HT68V/YhtKcjBRGJG39espEfzVqCO0w7dh8umTqanIzUwPb3xLtr+ddZS4HIALxxhdkcMDSHsYXZjC7IYkxBFuMGZZORmhxYhljp7pGCioKIxJXPtu7gpy+u4NUVm+mfmcr048Zy0VGjyEzr2RMbKzdWcsY9b3HE6AFcf/J49h+S0yt++e+KioKIJLT3P9vOHa9+yOsfllGQnc41J47jvCOKeqS/YUd9I9+4+y0qdzbwl2u+QkEfmMCvu0VBl6SKSFyaOLI/j156BM9MP4p9CrL49z8t5x/veJ0Xl2zY6ym6/+P5FXxSVs2vzj20TxSE3aEjBRGJe+7O7A9Kue2vH/DB5ioG56ZTkJ1OXr9UcjNSGZybzpiCLMYUZjNmYBYNzc1s2L6TDdt3sqmijsG56ew/NJf9Bmfzt5Wl/OCJRVx1wjiuP3l82B8tZkK/+sjMHga+DpS6+4QOnv82cCNgQBXwfXd/P6g8IpK4zIyv7j+Y4/YbxHOL1vPGR2VU1TZSsbOBT8qqeevjcqq6MQDODJLNKB6Vz7X/sG8MkieeIC9JfQS4G3hsF8+XAMe5+zYzOxWYARwZYB4RSXDJScY3J43gm5NGfGG7u1NeXU9JeQ2ry2tIS0liaF4Gw/r3Y1BuOpsr6li5qZJVG6tYv30H1/7DfqT0kqmue1qgp4/MbDSRtZe/dKTQrl0+sMzdh3f1njp9JCKy+xKto/ky4K9hhxAR6etCH9FsZicQKQrHdNJmGjANoKioKEbJRET6nlCPFMzsEOBB4Ax337Krdu4+w92L3b24sDD+JswSEektQisKZlYEzAIudPcPw8ohIiKfC/KS1CeA44ECM1sH3AKkArj7/cCPgYHAvdFJphq70wkiIiLBCawouPv5XTx/OXB5UPsXEZHdFy9XH4mISBxQURARkVYJN/eRmVUAH3XwVB5Q0cm29s+3PO6oTQFQvocRO8rRnee7yt/+cUf3lT8+8sOef4au8nfWprO87R/3xvxt78dD/s5ytn0cq99Bo9y968s33T2hbsCM7m5vu6398y2PO2oDzO/pfHubv7PP0/6zKH+4+ffmM3SVf3c+Q1/L3xPfQz2Zv7OcnXzdA/8Z6OqWiKePXtiN7S908vwL3WizJ7p6jz3N3/5xR/eVv/fn76xNZ3nbP+6N+bu7/870ZP722+Lld1CnEu70USyY2XxP4MtjlT98if4ZlD9cYeZPxCOFWJgRdoC9pPzhS/TPoPzhCi2/jhRERKSVjhRERKRVry8KZvawmZWa2bI9eO0kM1tqZh+b2f9adD6O6HNXm9kqM1tuZr/o2dRfyNDj+c3sJ2a23swWR2+n9Xzy1gyBfP2jz19nZm5mBT2X+EsZgvj632pmS6Jf+1fMbFjPJ2/NEET+26Pf+0vM7Fkz69/zyb+QI4jPcE70Z7fZzHr83P3eZN7F+33HzD6K3r7TZnunPyN7ZG8uXUqEG3AscDiRRXx297XvAlOILBn6V+DU6PYTgL8B6dHHgxIs/0+A6xP16x99biTwMrAGKEik/EBumzY/AO5PsPwnASnR+7cBtyXa9xBwADAeeA0ojpfM0Tyj220bAHwa/Tc/ej+/s8+3N7def6Tg7nOArW23mdlYM3vJzBaY2Rtmtn/715nZUCI/vPM88tV/DDgz+vT3gf9297roPkoTLH/MBJj/TuBfgEA7xYLI7+6VbZpmEeBnCCj/K+7esiDyPGBE+9cnwGdY6e4fxFvmXTgZeNXdt7r7NuBV4JSgfsZ7fVHYhRnA1e4+CbgeuLeDNsOBdW0er4tuA9gP+IqZvWNmr5vZ5EDTftne5ge4Knr4/7BFlkONpb3Kb2ZnAOvd/f2gg+7CXn/9zexnZvYZ8G0iMwbHUk98/7S4lHBWTezJzxAr3cnckeHAZ20et3yOQD5f6CuvxZqZZQNHA0+3Of2Wvptvk0LkUG4KMBl4ysz2iVbrQPVQ/vuAW4n8hXor8EsiP9yB29v8ZpYJ3ETkFEbM9dDXH3e/GbjZzP4VuIrI1PKB66n80fe6GWgEHu+ZdN3eb499hljpLLOZXQJcE902DviLmdUDJe5+Vqyz9rmiQOToaLu7H9p2o5klAwuiD58n8ouz7WHxCGB99P46YFa0CLxrZs1E5iopCzJ41F7nd/fNbV73APBikIHb2dv8Y4ExwPvRH64RwEIzO8LdNwWcHXrm+6etx4G/EKOiQA/lN7OLga8DJ8bij6F2evr/IBY6zAzg7r8FfgtgZq8BF7v76jZN1hNZm6bFCCJ9D+sJ4vP1dAdLPN6A0bTp8AHeBs6J3jdg4i5e174T57To9unAT6P39yNyaGcJlH9omzb/D3gykb7+7dqsJsCO5oC+/vu2aXM18EyC5T8FWAEUBpk7Ft9DBNTRvKeZ2XVHcwmRTub86P0B3fl8e5Q7Vv+pYd2AJ4CNQAORv/AvI/KX5kvA+9Fv7h/v4rXFwDLgE+BuPh/slwb8PvrcQuCrCZb/d8BSYAmRv6iGJlL+dm1WE+zVR0F8/WdGty8hMk/N8ATL/zGRP4QWR2+BXT0V4Gc4K/pedcBm4OV4yEwHRSG6/dLo1/1j4JLd+RnZ3ZtGNIuISKu+evWRiIh0QEVBRERaqSiIiEgrFQUREWmloiAiIq1UFKRXMLPqGO/vQTM7sIfeq8kiM6YuM7MXupp11Mz6m9kVPbFvkfZ0Sar0CmZW7e7ZPfh+Kf75pG+BapvdzB4FPnT3n3XSfjTwortPiEU+6Vt0pCC9lpkVmtlMM3svepsa3X6Emc01s0Vm9raZjY9uv9jMnjez/wP+bmbHm9lrZvaMRdYPeLxlvvro9uLo/eroBHfvm9k8Mxsc3T42+nipmf1nN49m5vL5xH/ZZvZ3M1sYfY8zom3+GxgbPbq4Pdr2huhnXGJm/9GDX0bpY1QUpDe7C7jT3ScD3wQejG5fBXzF3Q8jMkPpz9u85nDgbHc/Lvr4MOBa4EBgH2BqB/vJAua5+0RgDvDdNvu/y90P5ouzWXYoOnfPiURGmQPUAme5++FE1vD4ZbQo/Qj4xN0PdfcbzOwkYF/gCOBQYJKZHdvV/kQ60hcnxJO+4x+AA9vMSpkbna0yD3jUzPYlMlNsapvXvOrubefBf9fd1wGY2WIi89m82W4/9Xw+qeAC4B+j94/i8/nt/wD8zy5y9ou+93BgJZH58iEyn83Po7/gm6PPD+7g9SdFb4uij7OJFIk5u9ifyC6pKEhvlgRMcffathvN7G5gtrufFT0//1qbp2vavUddm/tNdPwz0+Cfd87tqk1ndrr7odFpwV8GrgT+l8haC4XAJHdvMLPVQEYHrzfgv9z9N7u5X5Ev0ekj6c1eITILKQBm1jJtcR6fTzF8cYD7n0fktBXAeV01dvcdRJbnvM7MUojkLI0WhBOAUdGmVUBOm5e+DFwaPQrCzIab2aAe+gzSx6goSG+RaWbr2tx+SOQXbHG083UFkSnPAX4B/JeZLSLYo+VrgR+a2RIii6dUdPUCd19EZPbU84mstVBsZkuBi4j0heDuW4C3opew3u7urxA5PTU32vYZvlg0RLpNl6SKBCR6Ominu7uZnQec7+5ndPU6kTCpT0EkOJOAu6NXDG0nRkueiuwNHSmIiEgr9SmIiEgrFQUREWmloiAiIq1UFEREpJWKgoiItFJREBGRVv8fJUCa7fU8AMMAAAAASUVORK5CYII=\n",
 434 |       "text/plain": [
 435 |        "<Figure size 432x288 with 1 Axes>"
 436 |       ]
 437 |      },
 438 |      "metadata": {
 439 |       "needs_background": "light"
 440 |      },
 441 |      "output_type": "display_data"
 442 |     }
 443 |    ],
 444 |    "source": [
 445 |     "# learn.freeze()\n",
 446 |     "# learn.save(f'{name}x')\n",
 447 |     "# learn.lr_find()\n",
 448 |     "# learn.recorder.plot()"
 449 |    ]
 450 |   },
 451 |   {
 452 |    "cell_type": "code",
 453 |    "execution_count": 43,
 454 |    "metadata": {
 455 |     "scrolled": true
 456 |    },
 457 |    "outputs": [
 458 |     {
 459 |      "name": "stdout",
 460 |      "output_type": "stream",
 461 |      "text": [
 462 |       "0 1 0.01\n"
 463 |      ]
 464 |     },
 465 |     {
 466 |      "data": {
 467 |       "text/html": [
 468 |        "\n",
 469 |        "    <div>\n",
 470 |        "        <style>\n",
 471 |        "            /* Turns off some styling */\n",
 472 |        "            progress {\n",
 473 |        "                /* gets rid of default border in Firefox and Opera. */\n",
 474 |        "                border: none;\n",
 475 |        "                /* Needs to be in here for Safari polyfill so background images work as expected. */\n",
 476 |        "                background-size: auto;\n",
 477 |        "            }\n",
 478 |        "            .progress-bar-interrupted, .progress-bar-interrupted::-webkit-progress-bar {\n",
 479 |        "                background: #F44336;\n",
 480 |        "            }\n",
 481 |        "        </style>\n",
 482 |        "      <progress value='0' class='' max='1', style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
 483 |        "      0.00% [0/1 00:00<00:00]\n",
 484 |        "    </div>\n",
 485 |        "    \n",
 486 |        "<table border=\"1\" class=\"dataframe\">\n",
 487 |        "  <thead>\n",
 488 |        "    <tr style=\"text-align: left;\">\n",
 489 |        "      <th>epoch</th>\n",
 490 |        "      <th>train_loss</th>\n",
 491 |        "      <th>valid_loss</th>\n",
 492 |        "      <th>Total F1</th>\n",
 493 |        "      <th>time</th>\n",
 494 |        "    </tr>\n",
 495 |        "  </thead>\n",
 496 |        "  <tbody>\n",
 497 |        "  </tbody>\n",
 498 |        "</table><p>\n",
 499 |        "\n",
 500 |        "    <div>\n",
 501 |        "        <style>\n",
 502 |        "            /* Turns off some styling */\n",
 503 |        "            progress {\n",
 504 |        "                /* gets rid of default border in Firefox and Opera. */\n",
 505 |        "                border: none;\n",
 506 |        "                /* Needs to be in here for Safari polyfill so background images work as expected. */\n",
 507 |        "                background-size: auto;\n",
 508 |        "            }\n",
 509 |        "            .progress-bar-interrupted, .progress-bar-interrupted::-webkit-progress-bar {\n",
 510 |        "                background: #F44336;\n",
 511 |        "            }\n",
 512 |        "        </style>\n",
 513 |        "      <progress value='0' class='progress-bar-interrupted' max='878', style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
 514 |        "      Interrupted\n",
 515 |        "    </div>\n",
 516 |        "    "
 517 |       ],
 518 |       "text/plain": [
 519 |        "<IPython.core.display.HTML object>"
 520 |       ]
 521 |      },
 522 |      "metadata": {},
 523 |      "output_type": "display_data"
 524 |     },
 525 |     {
 526 |      "ename": "KeyboardInterrupt",
 527 |      "evalue": "",
 528 |      "output_type": "error",
 529 |      "traceback": [
 530 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
 531 |       "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
 532 |       "\u001b[0;32m<ipython-input-43-b19a9d5d5b40>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      3\u001b[0m ]\n\u001b[1;32m      4\u001b[0m \u001b[0mlrs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mlrs\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0mstage1\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtest_lrs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf'{name}x'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlrs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mruns\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
 533 |       "\u001b[0;32m<ipython-input-41-68bb1acbc400>\u001b[0m in \u001b[0;36mtest_lrs\u001b[0;34m(stage, all_lrs, runs)\u001b[0m\n\u001b[1;32m      7\u001b[0m         \u001b[0;32mfor\u001b[0m \u001b[0m_\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mruns\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      8\u001b[0m             \u001b[0mlearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstage\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m             \u001b[0mlearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit_one_cycle\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlrs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmoms\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0.8\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m0.7\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     10\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     11\u001b[0m             \u001b[0mi_res\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'f1s'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrecorder\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmetrics\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 534 |       "\u001b[0;32m~/anaconda3/envs/cuda/lib/python3.7/site-packages/fastai/train.py\u001b[0m in \u001b[0;36mfit_one_cycle\u001b[0;34m(learn, cyc_len, max_lr, moms, div_factor, pct_start, final_div, wd, callbacks, tot_epochs, start_epoch)\u001b[0m\n\u001b[1;32m     20\u001b[0m     callbacks.append(OneCycleScheduler(learn, max_lr, moms=moms, div_factor=div_factor, pct_start=pct_start,\n\u001b[1;32m     21\u001b[0m                                        final_div=final_div, tot_epochs=tot_epochs, start_epoch=start_epoch))\n\u001b[0;32m---> 22\u001b[0;31m     \u001b[0mlearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcyc_len\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmax_lr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mwd\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mwd\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcallbacks\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcallbacks\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     23\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     24\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mlr_find\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlearn\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mLearner\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstart_lr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mFloats\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1e-7\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mend_lr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mFloats\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnum_it\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m100\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstop_div\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mbool\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mwd\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mfloat\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 535 |       "\u001b[0;32m~/anaconda3/envs/cuda/lib/python3.7/site-packages/fastai/basic_train.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, epochs, lr, wd, callbacks)\u001b[0m\n\u001b[1;32m    194\u001b[0m         \u001b[0mcallbacks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mcb\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mcb\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcallback_fns\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mlistify\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcallbacks\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    195\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mdefaults\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mextra_callbacks\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mcallbacks\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0mdefaults\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mextra_callbacks\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 196\u001b[0;31m         \u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mepochs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmetrics\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmetrics\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcallbacks\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcallbacks\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0mcallbacks\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    197\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    198\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mcreate_opt\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mFloats\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mwd\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mFloats\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0.\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m->\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 536 |       "\u001b[0;32m~/anaconda3/envs/cuda/lib/python3.7/site-packages/fastai/basic_train.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(epochs, learn, callbacks, metrics)\u001b[0m\n\u001b[1;32m     98\u001b[0m             \u001b[0;32mfor\u001b[0m \u001b[0mxb\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0myb\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mprogress_bar\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrain_dl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mparent\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mpbar\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     99\u001b[0m                 \u001b[0mxb\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0myb\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcb_handler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mon_batch_begin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mxb\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0myb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 100\u001b[0;31m                 \u001b[0mloss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mloss_batch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mxb\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0myb\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloss_func\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mopt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcb_handler\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    101\u001b[0m                 \u001b[0;32mif\u001b[0m \u001b[0mcb_handler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mon_batch_end\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mloss\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;32mbreak\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    102\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
 537 |       "\u001b[0;32m~/anaconda3/envs/cuda/lib/python3.7/site-packages/fastai/basic_train.py\u001b[0m in \u001b[0;36mloss_batch\u001b[0;34m(model, xb, yb, loss_func, opt, cb_handler)\u001b[0m\n\u001b[1;32m     32\u001b[0m         \u001b[0mloss\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mskip_bwd\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcb_handler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mon_backward_begin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mloss\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     33\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mskip_bwd\u001b[0m\u001b[0;34m:\u001b[0m                     \u001b[0mloss\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 34\u001b[0;31m         \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mcb_handler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mon_backward_end\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mopt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     35\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mcb_handler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mon_step_end\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m     \u001b[0mopt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mzero_grad\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     36\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
 538 |       "\u001b[0;32m~/anaconda3/envs/cuda/lib/python3.7/site-packages/fastai/callback.py\u001b[0m in \u001b[0;36mstep\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m     54\u001b[0m                     \u001b[0;32mfor\u001b[0m \u001b[0mp\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mpg2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'params'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmul_\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mwd\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mlr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     55\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_val\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'weight_decay'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlistify\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_wd\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 56\u001b[0;31m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mopt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     57\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     58\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mzero_grad\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m->\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 539 |       "\u001b[0;32m~/fyp/bert/optimizer.py\u001b[0m in \u001b[0;36mstep\u001b[0;34m(self, closure)\u001b[0m\n\u001b[1;32m    130\u001b[0m                 \u001b[0;31m# Add grad clipping\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    131\u001b[0m                 \u001b[0;32mif\u001b[0m \u001b[0mgroup\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'max_grad_norm'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 132\u001b[0;31m                     \u001b[0mclip_grad_norm_\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mp\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgroup\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'max_grad_norm'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    133\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    134\u001b[0m                 \u001b[0;31m# Decay the first and second moment running average coefficient\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 540 |       "\u001b[0;32m~/anaconda3/envs/cuda/lib/python3.7/site-packages/torch/nn/utils/clip_grad.py\u001b[0m in \u001b[0;36mclip_grad_norm_\u001b[0;34m(parameters, max_norm, norm_type)\u001b[0m\n\u001b[1;32m     30\u001b[0m         \u001b[0mtotal_norm\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     31\u001b[0m         \u001b[0;32mfor\u001b[0m \u001b[0mp\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mparameters\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 32\u001b[0;31m             \u001b[0mparam_norm\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgrad\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnorm\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnorm_type\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     33\u001b[0m             \u001b[0mtotal_norm\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0mparam_norm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m**\u001b[0m \u001b[0mnorm_type\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     34\u001b[0m         \u001b[0mtotal_norm\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtotal_norm\u001b[0m \u001b[0;34m**\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;36m1.\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0mnorm_type\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 541 |       "\u001b[0;32m~/anaconda3/envs/cuda/lib/python3.7/site-packages/torch/tensor.py\u001b[0m in \u001b[0;36mnorm\u001b[0;34m(self, p, dim, keepdim)\u001b[0m\n\u001b[1;32m    250\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mnorm\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mp\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"fro\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdim\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkeepdim\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    251\u001b[0m         \u001b[0;34mr\"\"\"See :func: `torch.norm`\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 252\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnorm\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mp\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdim\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkeepdim\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    253\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    254\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mbtrifact\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minfo\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpivot\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 542 |       "\u001b[0;32m~/anaconda3/envs/cuda/lib/python3.7/site-packages/torch/functional.py\u001b[0m in \u001b[0;36mnorm\u001b[0;34m(input, p, dim, keepdim, out)\u001b[0m\n\u001b[1;32m    713\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_C\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_VariableFunctions\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrobenius_norm\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    714\u001b[0m         \u001b[0;32melif\u001b[0m \u001b[0mp\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0;34m\"nuc\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 715\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_C\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_VariableFunctions\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnorm\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    716\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    717\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mp\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"fro\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 543 |       "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
 544 |      ]
 545 |     }
 546 |    ],
 547 |    "source": [
 548 |     "# lrs = [\n",
 549 |     "#     1e-02, 2e-02, 8e-3, 5e-3, 5e-4,\n",
 550 |     "# ]\n",
 551 |     "# lrs = [(i, 1) for i in lrs]\n",
 552 |     "# stage1 = test_lrs(f'{name}x', lrs)"
 553 |    ]
 554 |   },
 555 |   {
 556 |    "cell_type": "code",
 557 |    "execution_count": 27,
 558 |    "metadata": {
 559 |     "scrolled": true
 560 |    },
 561 |    "outputs": [
 562 |     {
 563 |      "data": {
 564 |       "text/html": [
 565 |        "<div>\n",
 566 |        "<style scoped>\n",
 567 |        "    .dataframe tbody tr th:only-of-type {\n",
 568 |        "        vertical-align: middle;\n",
 569 |        "    }\n",
 570 |        "\n",
 571 |        "    .dataframe tbody tr th {\n",
 572 |        "        vertical-align: top;\n",
 573 |        "    }\n",
 574 |        "\n",
 575 |        "    .dataframe thead th {\n",
 576 |        "        text-align: right;\n",
 577 |        "    }\n",
 578 |        "</style>\n",
 579 |        "<table border=\"1\" class=\"dataframe\">\n",
 580 |        "  <thead>\n",
 581 |        "    <tr style=\"text-align: right;\">\n",
 582 |        "      <th></th>\n",
 583 |        "      <th>f1</th>\n",
 584 |        "      <th>f1s</th>\n",
 585 |        "      <th>lrm</th>\n",
 586 |        "      <th>lrs</th>\n",
 587 |        "      <th>train</th>\n",
 588 |        "      <th>val</th>\n",
 589 |        "      <th>vals</th>\n",
 590 |        "    </tr>\n",
 591 |        "  </thead>\n",
 592 |        "  <tbody>\n",
 593 |        "    <tr>\n",
 594 |        "      <th>2</th>\n",
 595 |        "      <td>0.873526</td>\n",
 596 |        "      <td>[0.875592416561694, 0.8714589230127644]</td>\n",
 597 |        "      <td>1</td>\n",
 598 |        "      <td>0.008</td>\n",
 599 |        "      <td>[[tensor(2.1103), tensor(1.9529), tensor(1.762...</td>\n",
 600 |        "      <td>0.122910</td>\n",
 601 |        "      <td>[0.12528615, 0.1205342]</td>\n",
 602 |        "    </tr>\n",
 603 |        "    <tr>\n",
 604 |        "      <th>3</th>\n",
 605 |        "      <td>0.870685</td>\n",
 606 |        "      <td>[0.869699672411258, 0.8716701270587013]</td>\n",
 607 |        "      <td>1</td>\n",
 608 |        "      <td>0.005</td>\n",
 609 |        "      <td>[[tensor(2.1416), tensor(2.0134), tensor(1.872...</td>\n",
 610 |        "      <td>0.126849</td>\n",
 611 |        "      <td>[0.12991036, 0.12378812]</td>\n",
 612 |        "    </tr>\n",
 613 |        "    <tr>\n",
 614 |        "      <th>0</th>\n",
 615 |        "      <td>0.870211</td>\n",
 616 |        "      <td>[0.8718372156604313, 0.8685855840228345]</td>\n",
 617 |        "      <td>1</td>\n",
 618 |        "      <td>0.010</td>\n",
 619 |        "      <td>[[tensor(2.1820), tensor(1.9064), tensor(1.698...</td>\n",
 620 |        "      <td>0.128747</td>\n",
 621 |        "      <td>[0.12960954, 0.12788524]</td>\n",
 622 |        "    </tr>\n",
 623 |        "    <tr>\n",
 624 |        "      <th>1</th>\n",
 625 |        "      <td>0.868124</td>\n",
 626 |        "      <td>[0.8678646929460414, 0.8683840744413774]</td>\n",
 627 |        "      <td>1</td>\n",
 628 |        "      <td>0.020</td>\n",
 629 |        "      <td>[[tensor(2.1505), tensor(1.7335), tensor(1.451...</td>\n",
 630 |        "      <td>0.154693</td>\n",
 631 |        "      <td>[0.15259233, 0.1567946]</td>\n",
 632 |        "    </tr>\n",
 633 |        "  </tbody>\n",
 634 |        "</table>\n",
 635 |        "</div>"
 636 |       ],
 637 |       "text/plain": [
 638 |        "         f1                                       f1s  lrm    lrs  \\\n",
 639 |        "2  0.873526   [0.875592416561694, 0.8714589230127644]    1  0.008   \n",
 640 |        "3  0.870685   [0.869699672411258, 0.8716701270587013]    1  0.005   \n",
 641 |        "0  0.870211  [0.8718372156604313, 0.8685855840228345]    1  0.010   \n",
 642 |        "1  0.868124  [0.8678646929460414, 0.8683840744413774]    1  0.020   \n",
 643 |        "\n",
 644 |        "                                               train       val  \\\n",
 645 |        "2  [[tensor(2.1103), tensor(1.9529), tensor(1.762...  0.122910   \n",
 646 |        "3  [[tensor(2.1416), tensor(2.0134), tensor(1.872...  0.126849   \n",
 647 |        "0  [[tensor(2.1820), tensor(1.9064), tensor(1.698...  0.128747   \n",
 648 |        "1  [[tensor(2.1505), tensor(1.7335), tensor(1.451...  0.154693   \n",
 649 |        "\n",
 650 |        "                       vals  \n",
 651 |        "2   [0.12528615, 0.1205342]  \n",
 652 |        "3  [0.12991036, 0.12378812]  \n",
 653 |        "0  [0.12960954, 0.12788524]  \n",
 654 |        "1   [0.15259233, 0.1567946]  "
 655 |       ]
 656 |      },
 657 |      "execution_count": 27,
 658 |      "metadata": {},
 659 |      "output_type": "execute_result"
 660 |     }
 661 |    ],
 662 |    "source": [
 663 |     "# df = pd.DataFrame(stage1).sort_values(by=['f1', 'val'],ascending=[False, True])\n",
 664 |     "# df.to_csv(csvn, mode='a')\n",
 665 |     "# df"
 666 |    ]
 667 |   },
 668 |   {
 669 |    "cell_type": "code",
 670 |    "execution_count": 25,
 671 |    "metadata": {},
 672 |    "outputs": [
 673 |     {
 674 |      "data": {
 675 |       "text/html": [
 676 |        "Total time: 03:52 <p><table border=\"1\" class=\"dataframe\">\n",
 677 |        "  <thead>\n",
 678 |        "    <tr style=\"text-align: left;\">\n",
 679 |        "      <th>epoch</th>\n",
 680 |        "      <th>train_loss</th>\n",
 681 |        "      <th>valid_loss</th>\n",
 682 |        "      <th>Total F1</th>\n",
 683 |        "      <th>time</th>\n",
 684 |        "    </tr>\n",
 685 |        "  </thead>\n",
 686 |        "  <tbody>\n",
 687 |        "    <tr>\n",
 688 |        "      <td>0</td>\n",
 689 |        "      <td>0.209884</td>\n",
 690 |        "      <td>0.160633</td>\n",
 691 |        "      <td>0.874354</td>\n",
 692 |        "      <td>03:52</td>\n",
 693 |        "    </tr>\n",
 694 |        "  </tbody>\n",
 695 |        "</table>"
 696 |       ],
 697 |       "text/plain": [
 698 |        "<IPython.core.display.HTML object>"
 699 |       ]
 700 |      },
 701 |      "metadata": {},
 702 |      "output_type": "display_data"
 703 |     }
 704 |    ],
 705 |    "source": [
 706 |     "# learn.load(f'{name}x')\n",
 707 |     "# lr0 = df.iloc[0]['lrs']\n",
 708 |     "# learn.fit_one_cycle(1, lr0, moms=(0.8,0.7))\n",
 709 |     "# #learn.fit_one_cycle(1, 0.008, moms=(0.8,0.7))"
 710 |    ]
 711 |   },
 712 |   {
 713 |    "cell_type": "code",
 714 |    "execution_count": 27,
 715 |    "metadata": {
 716 |     "scrolled": true
 717 |    },
 718 |    "outputs": [
 719 |     {
 720 |      "data": {
 721 |       "text/html": [],
 722 |       "text/plain": [
 723 |        "<IPython.core.display.HTML object>"
 724 |       ]
 725 |      },
 726 |      "metadata": {},
 727 |      "output_type": "display_data"
 728 |     },
 729 |     {
 730 |      "name": "stdout",
 731 |      "output_type": "stream",
 732 |      "text": [
 733 |       "LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.\n"
 734 |      ]
 735 |     },
 736 |     {
 737 |      "data": {
 738 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAEKCAYAAADjDHn2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3Xl8lOW99/HPLysBQgiENWEJq4KASABRcamoqKeibR+3alurta1a28fT8xx7bG2PraetHu2mbbXntNa9atWiYtVatG4IAWTfEcgCEpawZs/v+WPu6JAmTEIymcnk+3695sXMvcz8chvnm/u67vu6zN0RERE5mqRYFyAiIvFPYSEiIhEpLEREJCKFhYiIRKSwEBGRiBQWIiISkcJCREQiUliIiEhECgsREYkoJdYFtJecnBwfPnx4rMsQEelUFi9evMvd+0XaLmHCYvjw4RQWFsa6DBGRTsXMtrZkOzVDiYhIRAoLERGJSGEhIiIRKSxERCQihYWIiESksBARkYgUFiIiEpHCQkSkE3t2STFPFRZF/XMUFiIinZS7c9/8jTy/tCTqn6WwEBHppNbuOMDmskNcOHFQ1D9LYSEi0knNW7GdJIPzxg+M+mcpLEREOiF356UV2zl5RF9yeqZH/fMUFiIinVBHNkGBwkJEpFPqyCYoUFiIiHQ6Hd0EBQoLEZFOp6EJ6oIJHdMEBQoLEZFOp6EJavYJHdMEBQoLEZFOJRZNUKCwEBHpVGLRBAUKCxGRTiUWTVCgsBAR6TRi1QQFCgsRkU4jVk1QoLAQEek0YtUEBQoLEZFOIZZNUBDlsDCz2Wa2zsw2mtmtTaz/mpmtMLMPzOxtMxsXtu47wX7rzOy8aNYpIhLvYtkEBVEMCzNLBu4HzgfGAVeEh0HgcXef4O4nAncB9wb7jgMuB8YDs4FfB+8nItIlxbIJCqJ7ZjEN2Ojum929GngSmBO+gbvvD3vZA/Dg+RzgSXevcvcPgY3B+4mIdDmxboKC6IZFLhA+MWxxsOwIZnajmW0idGZxcyv3vd7MCs2ssKysrN0KFxGJJ7FugoI46OB29/vdfSTw78B3W7nvg+5e4O4F/fr1i06BIiIxFusmKIhuWJQAQ8Je5wXLmvMkcPEx7isikpDioQkKohsWi4DRZpZvZmmEOqznhm9gZqPDXl4IbAiezwUuN7N0M8sHRgMLo1iriEhcWvdR7JugAFKi9cbuXmtmNwGvAMnA7919lZndARS6+1zgJjObBdQAe4EvBvuuMrOngNVALXCju9dFq1YRkXi1vGgfADNH58S0jqiFBYC7zwPmNVp2e9jzbx5l3zuBO6NXnYhI/Cspr8AMBmVlxLSOmHdwi4hI80rLK+ifmU5aSmy/rhUWIiJxrHRfBYN7x/asAhQWIiJxrbS8UmEhIiLNc3dKyivIU1iIiEhzdh+qprq2XmcWIiLSvNLyCgCFhYiINO+TsOgW40oUFiIicaukvBKAXJ1ZiIhIc0rLK+ielkxWRmqsS1FYiIjEq5K9oXsszCzWpSgsRETiVbzckAcKCxGRuFVaXkFuHHRug8JCRCQuVdbUsetgNYNjPIBgA4WFiEgc2r4vdCWUmqFERKRZ8XRDHigsRETiUkkQFvFwjwUoLERE4lJpMOnRgKzYzbsdTmEhIhKHSssr6NcznfSU5FiXAigsRETiUrzMY9FAYSEiEodC91goLEREpBkNkx7Fw2izDRQWIiJxZs+haqpq63VmISIizSstj68b8kBhISISd0ri7IY8UFiIiMSd0ji7IQ8UFiIicae0vIKM1GR6d4/9pEcNohoWZjbbzNaZ2UYzu7WJ9beY2WozW25mr5vZsLB1d5nZKjNbY2a/tHiY/UNEpAOE5rHoFheTHjWIWliYWTJwP3A+MA64wszGNdpsKVDg7hOBZ4C7gn1PAU4FJgInAFOBM6JVq4hIPGmYIS+eRPPMYhqw0d03u3s18CQwJ3wDd5/v7oeDlwuAvIZVQDcgDUgHUoGPoliriEjcKCmvjKv+CohuWOQCRWGvi4NlzbkWeBnA3d8D5gPbg8cr7r6m8Q5mdr2ZFZpZYVlZWbsVLiISK6FJj6q61JlFi5nZVUABcHfwehRwPKEzjVzgU2Y2s/F+7v6guxe4e0G/fv06smQRkajYEWeTHjWIZliUAEPCXucFy45gZrOA24CL3L0qWHwJsMDdD7r7QUJnHDOiWKuISFz4ZNKj+BnqA6IbFouA0WaWb2ZpwOXA3PANzGwy8AChoNgZtmobcIaZpZhZKqHO7X9qhhIRSTTxNulRg6iFhbvXAjcBrxD6on/K3VeZ2R1mdlGw2d1AT+BpM/vAzBrC5BlgE7ACWAYsc/cXolWriEi8aBjqY2BWfJ1ZpETzzd19HjCv0bLbw57Pama/OuCr0axNRCQelZZX0C8zfiY9ahAXHdwiIhISuiEvvpqgQGEhIhJXSsoryI2zzm1QWIiIxA13p7S8gsFZOrMQEZFm7D1cQ2VNvZqhRESkeR8PTZ6tsBARkWbE6z0WoLAQEYkbpXE4Q14DhYWISJwoLa+gW2oS2XE06VEDhYWISJwoLa9kcO+MuJr0qIHCQkQkThSXV8RlfwUoLERE4ka83mMBCgsRkbhQVVtH2YH4m/SogcJCRCQOfDLpUfwN9QEKCxGRuBDP91iAwkJEJC40zGOhZigREWlWww158TbpUQOFhYhIHCgtryCnZzrdUuNr0qMGCgsRkTgQr/NYNFBYiIjEgdLy+Jwhr4HCQkQkxkKTHlUqLEREpHnlh2uoqKlTWIiISPM+ucdCfRYiItKMj2fI6909xpU0T2EhIhJjn0x6pDMLERFpRum+StJTkujTIy3WpTQrqmFhZrPNbJ2ZbTSzW5tYf4uZrTaz5Wb2upkNC1s31MxeNbM1wTbDo1mriEislATzWMTjpEcNohYWZpYM3A+cD4wDrjCzcY02WwoUuPtE4BngrrB1DwN3u/vxwDRgZ7RqFRGJpXi/xwKie2YxDdjo7pvdvRp4EpgTvoG7z3f3w8HLBUAeQBAqKe7+WrDdwbDtREQSxoHKGrbsOsSgOB0TqkE0wyIXKAp7XRwsa861wMvB8zFAuZk9a2ZLzezu4ExFRCRhVFTXce0fC9lfWcvFk4/29Rh7cdHBbWZXAQXA3cGiFGAm8G1gKjAC+FIT+11vZoVmVlhWVtZB1YqItF11bT1ff2wxi7bs4d5LJ3HqqJxYl3RULQoLMxtpZunB8zPN7GYz6x1htxJgSNjrvGBZ4/eeBdwGXOTuVcHiYuCDoAmrFngeOKnxvu7+oLsXuHtBv379WvKjiIjEXG1dPd/601LeWFfGjy+ZwJwT4/usAlp+ZvFnoM7MRgEPEgqBxyPsswgYbWb5ZpYGXA7MDd/AzCYDDxAKip2N9u1tZg0J8ClgdQtrFRGJW/X1zq3PrmDeih1898LjuXza0FiX1CItDYv64C/8S4Bfufu/AYOOtkOw/U3AK8Aa4Cl3X2Vmd5jZRcFmdwM9gafN7AMzmxvsW0eoCep1M1sBGPC7Vv5sIiJxxd2548XVPLO4mG/NGs11M0fEuqQWS2nhdjVmdgXwReDTwbLUSDu5+zxgXqNlt4c9n3WUfV8DJrawPhGRuHfPq+t56N0tXHdaPt88e3Ssy2mVlp5ZXAPMAO509w/NLB94JHpliYgklt+8sYn75m/kimlDuO3C4+P6BrymtOjMwt1XAzcDmFk2kOnuP41mYSIiieLF5aX89K9ruWjSYH508YROFxTQ8quh3jCzXmbWB1gC/M7M7o1uaSIiieGZxcUM79udey6dRHJS5wsKaHkzVJa77wc+Azzs7tOBZvsbREQkpLaunkUf7uG00TmkJsfFrW3HpKWVp5jZIOBS4MUo1iMiklBWlu7nUHUdJ4/oG+tS2qSlYXEHoUtgN7n7IjMbAWyIXlkiIolhwebdAEzP79xh0dIO7qeBp8NebwY+G62iREQSxYLNuxnVvyf9MtNjXUqbtLSDO8/MnjOzncHjz2aWF+3iREQ6s4b+ipNH9Il1KW3W0maoPxAaqmNw8HghWCYiIs1IlP4KaHlY9HP3P7h7bfB4CNDIfSIiR5Eo/RXQ8rDYbWZXmVly8LgK2B3NwkREOrtE6a+AlofFlwldNrsD2A58jibmlxARkZBE6q+AFoaFu29194vcvZ+793f3i9HVUCIizUqk/gpo20x5t7RbFSIiCSaR+iugbWHROQc4ERHpAInUXwFtCwtvtypERBJIovVXQIQ7uM3sAE2HggEZUalIRKSTS7T+CogQFu6e2VGFiIgkikTrr4C2NUOJiEgTEq2/AhQWIiLtKhH7K0BhISLSrhKxvwIUFiIi7SoR+ytAYSEi0q4Ssb8CFBYiIu0mUfsrQGEhItJuErW/AhQWIiLtJlH7KyDKYWFms81snZltNLNbm1h/i5mtNrPlZva6mQ1rtL6XmRWb2X3RrFNEpD0kan8FRDEszCwZuB84HxgHXGFm4xptthQocPeJwDPAXY3W/xD4R7RqFBFpL4ncXwHRPbOYBmx0983uXg08CcwJ38Dd57v74eDlAiCvYZ2ZTQEGAK9GsUYRkXaRyP0VEN2wyAWKwl4XB8uacy3wMoCZJQH3AN+OWnUiIu0okfsrIMJAgh0lmNO7ADgjWHQDMM/di82anzbDzK4HrgcYOnRotMsUEWlWIvdXQHTDogQYEvY6L1h2BDObBdwGnOHuVcHiGcBMM7sB6AmkmdlBdz+ik9zdHwQeBCgoKND8GiISEw39FZecdLTGk84tmmGxCBhtZvmEQuJy4MrwDcxsMvAAMNvddzYsd/fPh23zJUKd4P90NZWISDyYv66MQ9V1nDYqJ9alRE3U+izcvRa4CXgFWAM85e6rzOwOM7so2OxuQmcOT5vZB2Y2N1r1iIhEg7tz3/yN5GVnMOv4AbEuJ2qi2mfh7vOAeY2W3R72fFYL3uMh4KH2rk1EpD28u2k3y4rK+dHFJ5CSnLj3OSfuTyYi0gHun7+R/pnpfG5KXuSNOzGFhYjIMVqybS/vbtrNV2aOoFtqcqzLiSqFhYjIMfr1/I307p7KldMT/9J9hYWIyDFYs30/f1uzk2tOyadHelzcshZVCgsRkWPw6zc20SMtmS+dMjzWpXQIhYWISCt9uOsQLy0v5aoZw8jqnhrrcjqEwkJEpJV+88ZGUpOTuO60EbEupcMoLEREWqGkvIJnl5Rw+dQhCTsOVFMUFiIirfC7f2wG4PozRsa4ko6lsBARaaFdB6t4YuE2LpmcS27vjFiX06EUFiIiLfS/b39IdV09Xzuza51VgMJCRKRF9lXU8Mh7W7lgwiBG9usZ63I6nMJCRKQFHn53CwerarmhC55VgMJCRCSidTsOcP8bGzln3ADGD86KdTkxobAQETmKw9W13Pj4Enqmp3LnJSfEupyYSfwBTURE2uB7z69iU9lBHr12Ov0zu8W6nJjRmYWISDOeWVzMn5cU841PjebUBJ4ytSUUFiIiTdjw0QG+9/xKTh7Rh2+ePTrW5cScwkJEpJGK6jpufHwJ3dOS+cXlk0lOsliXFHPqsxARaeT7c1eyYedB/njNNAb06rr9FOF0ZiEiEua5pcU8VVjMDWeO5PQx/WJdTtxQWIiIBDbuPMhtz61k2vA+/N9ZY2JdTlxRWIiIAJU1ddz0+BK6pSbzyysmk5Ksr8dw6rMQEQH++O4W1u44wB+umcrALPVTNKboFJEur67eeWTBVqbl9+Gssf1jXU5cUliISJf3xrqdFO+t4AszhsW6lLgV1bAws9lmts7MNprZrU2sv8XMVpvZcjN73cyGBctPNLP3zGxVsO6yaNYpIl3bw+9tpX9mOueNHxjrUuJW1MLCzJKB+4HzgXHAFWY2rtFmS4ECd58IPAPcFSw/DHzB3ccDs4Gfm1nvaNUqIl3Xll2HeHN9GVdMG0qqOrWbFc0jMw3Y6O6b3b0aeBKYE76Bu89398PBywVAXrB8vbtvCJ6XAjsBXfAsIu3u0QVbSUkyrpw+NNalxLVohkUuUBT2ujhY1pxrgZcbLzSzaUAasKldqxORLq+iuo6nCos4b/xA3akdQVxcOmtmVwEFwBmNlg8CHgG+6O71Tex3PXA9wNCh+qtARFpn7rIS9lfWcrU6tiOK5plFCTAk7HVesOwIZjYLuA24yN2rwpb3Al4CbnP3BU19gLs/6O4F7l7Qr59aqUSk5dydh9/bytgBmUzP7xPrcuJeNMNiETDazPLNLA24HJgbvoGZTQYeIBQUO8OWpwHPAQ+7+zNRrFFEuqgl28pZVbqfq2cMw0yjykYStbBw91rgJuAVYA3wlLuvMrM7zOyiYLO7gZ7A02b2gZk1hMmlwOnAl4LlH5jZidGqVUS6nkfe20JmegqXTD5aV6o0iGqfhbvPA+Y1WnZ72PNZzez3KPBoNGsTka5r18Eq5q3YwZXTh9IjPS66buOeLioWkS7nT4uKqK6r56qT1bHdUgoLEelSauvqeWzBVk4d1ZdR/XvGupxOQ2EhIl3K62t3UrqvkqtPHh7rUjoVhYWIdCmPLtjK4KxuzDpeo8u2hsJCRLqMzWUHeWvDLq6cPlSTG7WSjpaIdBmPLNhKarJx2VSN+NBaumZMRBKeu/PkoiKeWLiNCyYMol9meqxL6nQUFi3g7rrDU6ST2rGvkn//83LeXF/GqaP6ctuFx8e6pE5JYXEUJeUV/PCF1Szetpd7L53EzNEaf0qks3B3nltawg/mrqKmzvnhnPF8fvowkpL0h9+xUFg0obq2nt+9tZlf/X0DhjGgVzpf/P1Cvn3eWL5+xkidZYjEubIDVdz23ApeXf0RU4dnc/fnJjE8p0esy+rUFBaNvL1hF7fPXcnmskPMHj+Q7316HNndU/n3P6/grr+uY3nRPv770kn01BABIh2upLyC7/9lFWYwKKsbg7Iygn9DzwdkpfP6mp189/mVHKyq5bsXHs81p+aTrLOJNtM3XmD7vgp+9NIaXlq+nWF9u/PQNVM5c+wn12H/8vITmZSXxY9fXsuc+97mgasLdPenSAeqrq3npseXsG7HAYZkd+f9zbvZX1nb5LaT8rK459JJjOqf2cFVJq4uHxY1dfX84Z0P+fnfNlBX79xyzhiuP30E3VKTj9jOzLhu5gjGDe7FNx5fysX3v8M9l07SBO8iHeSnf13L0m3l3H/lSVw4cRAAB6tq2bGvku37Kti+r5Id+yrJ7pHGFVOH6D6KdmbuHusa2kVBQYEXFha2er9tuw8z62dvMnNUDt//9HiG9u0ecZ/S8gq+/uhilhXv48azRnLLOWN1misSRX9duYOvPbqYL50ynB9cND7W5SQUM1vs7gURt+vqYQHw4a5D5Ley86uypo7v/2UVfyosYlT/nnz2pDwunjyYQVkZx1SDiDRt2+7DXPirtxiR04OnvjaD9JTkyDtJiyksOsjcZaU8/O4WCrfuxQxOHZnDJZNzmX3CQI2TL9JGVbV1fO4377F19yFeunkmQ/pEPvOX1lFYdLAtuw7x3NISnl1aTNGeCjJSkzn/hIF8dkoep4zsq8ttRY7B7X9ZycPvbeXBq6dwrvoHo0JhESPuTuHWvTy7pIQXl5dyoLKWCycO4sefmUCvbqmxLk+k03hxeSk3Pb6Ur8zM57YLx8W6nISlsIgDlTV1/O/bH3Lva+sZlNWNX10xmclDs2Ndlkjc+3DXIT79q7cZM6Anf/rqDFJ1ZVPUtDQs9F8girqlJnPjWaN46qszcIf/89v3+O2bm6ivT4yAFomGypo6bnhsCanJxn1XnqSgiBPqge0AU4ZlM++bM/nOs8v5yctreWfjLu699MSEHPny72s/4rEF28jNzmDMgEyOG5jJ6AGZZGWoCU6Orrq2nnc27eKhd7awZvt+/nDNVAb31tWF8ULNUB3I3XliYRH/+cIqMrul8rPLEmdwwrp652evree++RsZ0CudQ1V1HKz65O7aQVndGDswk7EDMhnatzs5PdPJ6ZkW/JuuK8e6qIaAmLd8O6+u/oh9FTVkpqdw89mj+crpI2JdXpfQ0mYo/R/agcyMK6cPZcqwbL7xxBKu/t+F3HDmSP713Pa/qe9AZQ1pKUnHdE362xt2Ubz3MJ+eNLhFX+K7D1bxzSc/4O2Nu7isYAj/OWc86SlJlJRXsP6jA6zdcYD1O0L/vrtxN9V19f/0HhmpyeRkpjGwVzeuOTWf808YqCvIEoS7U1VbT0V1HRU1dRyurqNoz2HmrTgyIM4ZN4ALJgxi5pgc3UsRh3RmESMV1XXc8eIqnlhYxMzROfzy8slk90hr03tu31fBq6s+4pVVO3j/wz30zkjl62eO5KqTh/3T8CVNWf/RAe58aQ1vri8DILt7Kl8+NZ8vnDK82Wakpdv2cuNjS9h1qJofzhkfcQaymrp6dh2sYteB6tC/B6vYdfCT5ytL9rGp7BCzju/PHXNOUDNEJ1Nf78xbuZ3fvLGJ7fsqPw6Ipigg4oOuhuok/rRoG997fhUDstJ54KoCxg3u1ar9N5Ud5JVVO3hl1UcsKyoHYGS/HpwzbiArSsp5Z+NuBvRK56azRnHp1CFN/g+562AVP3ttPU8s3EaP9BS+efZoJub15rdvbuLva3eSmZ7CF04ZxpdPzadvz1A/i7vz6IKt3PHiagb06sZvr5rCCblZbT4etXX1/OGdLdz72nrM4NvnjuWLpwyP2XAqlTV1LN66l8Ite8nLzqBgeDZD+3TXWU8T3tpQxl1/XceKkn2MGdCTafl9yEhNDj3SUshITSIjLZluqcn06ZHGtPw+Cog4oLDoRJZu28vXH11CeUU1P/3sROacmHvU7fccqubpwiKeWVzMhp0HgdAom+eOH8h54wceMRrue5t2c+9r61i0ZS+5vTO4+exRfOakPFKTk6isqeOhd7dw/983crimjqtPHsbNZ4+mT9gZzsqSffz6jY28vHIH3VKSuXL6UK4+eRi/eH0Dzy0t4VPH9efeSyfRu3vbzooaK9pzmO8+v5I315cxMS+LH39mAuMHtz2MInF3Nuw8yD/Wl/HWhl28/+FuKmuObDbL6ZlOwbBsCoZnM2VYNuMHZ5GW0nWv2FlWVM5dr6zlnY27ye2dwS3njOHiybkaL62TiIuwMLPZwC+AZOB/3P0njdbfAlwH1AJlwJfdfWuw7ovAd4NNf+TufzzaZ3XmsIDQZC03PraEhVv2cN1p+dx6/nFHjJrp7nxQVM4jC7by4vLtVNfWM3V4NhdOGMS54wcetbnG3Xlrwy7ueXUdy4r3Mbxvdz43JY8/FRZRtKeCs4/rz3cuOP6oQ65v3HmAX8/fxF+WlVJX75jBLbPGcONZo6I285i788Ly7dzxwir2Hq7hutPy+eas0XRPa9+utorqOuav28n8tTt5a8MuduyvBGBEvx6cProfp4/JYerwPpSUV1C4ZW/oTGPrHor2VACQnpLEhNwsjhuUydiBvTh+YCZjBmYm/E2Ym8oOcs+r65i3Ygd9eqRx01mj+PzJQ3W20MnEPCzMLBlYD5wDFAOLgCvcfXXYNmcB77v7YTP7OnCmu19mZn2AQqAAcGAxMMXd9zb3eZ09LCDUnn/nS2t46N0tzBjRl/uunEz3tBTmLivhkQVbWVmynx5pyXzmpDyunjGMMQNaN1a/u/P6mp3c89p61mzfz3EDM/nuheM4bXROi99j2+7DPPb+VmaO7teq/dqi/HA1P3l5LU8uKiK7eyrT8/tSMDybafl9GDeo1zENRd0QEC8t387f1+6koqaOrIxUThuVw8zROZw2Ooe87KOPQ/TR/sqPm6iWFZezfscBDoRdAZbbOyN0BdjATGaM6MspI/t22mGzK2vq2LjzIGu272fdjgOs2bGfBZv30C0lietmjuArp4/QhGCdVDyExQzgB+5+XvD6OwDu/uNmtp8M3Ofup5rZFYSC46vBugeAN9z9ieY+LxHCosEzi4v5j+dW0DsjlcqaOvZX1jJ2QCZXzRjGJZNz2/w/ZX29szkYabczNRUs/HAPTy7cxqKwv+p7pCVz0rBsCob1YWp+NoOzMkhPDV0Flp6SRFpKEilJhplxuLqW+WvLmLfik4DI6ZnGeeMHcuGEQUzL79OmL3N3p6S8gnXBlV/rgsemsoPU1jt9eqQx+4SB/MuEQUwf0Teuj/223Yd5cUUpq0v3s3bHAT7cdYi64GbSbqlJjBmQyckj+nL96SPI6Zl49wt1JfFw6WwuUBT2uhiYfpTtrwVePsq+R2/ITyCfm5LH2AGZfH/uSgb3zuALM4YzdXh2u3WqJiVZp5zlb1p+H6bl9wFCV34t2rKXwi17WPjhHn7++nqa+7snySA9JZna+npq6pycnml8dkouF0wYxPT89vvSNjPysruTl92ds48f8PHyypo63lxfxkvLt/P80hIef38bOT3TuWDCQP5l4mAKhmVHrSmvNWrq6nl9zUc89v423tqwC4ChfbozdmAmF5wwkOMG9eK4gZkM69u5/siQ9hEX541mdhWhJqczWrnf9cD1AEOHHv2Szc5mQl4Wz95waqzLiFuDsjK4aFIGF00aDMC+ihqWbtvL3sPVVNXUU1VbT1Vt3RHPk5KMM8f0Z1p+nw79suuWmsx5wcUHDc1fLy4v5anCIh5+bys5PdPIz+nBwGA+6YG9QnNKDwjmlu6f2S2q9RbvPcyTC4v4U2ERZQeqGJTVjW/NGs1lU4dofhb5WDTDogQYEvY6L1h2BDObBdwGnOHuVWH7ntlo3zca7+vuDwIPQqgZqj2Kls4pKyP1iDnT41VGWjIXTBjEBRMGcaiqlr+t+Yg315dRWl7BiuJyXl1VSVXtkVdf9eqWwswx/ThrbH/OHNuvTc0+dfXOjv2VFO85zLY9h3lpxXbeXF+GAWeO7c/npw/lzLH9deYg/ySafRYphDq4zyb05b8IuNLdV4VtMxl4Bpjt7hvClvch1Kl9UrBoCaEO7j3NfV4i9VlI1+XulB+uCc0nvb+C0vJKlhWV88b6MsoOhP6WmpSXxZlj+3PWcf2ZmJtFUpJRX+/sq6hh96Fq9hyqZvfBKnYfqqbsQBUl5RUU7z1M8d4KduyrpDZsIMsBvdK5bOpQLps6hFzdANklxbyDOyjiAuCPq7BdAAAHt0lEQVTnhC6d/b2732lmdwCF7j7XzP4GTAC2B7tsc/eLgn2/DPxHsPxOd//D0T5LYSGJrL7eWb19P/PX7mT+up0sLSrHHXp3TyUlydh7uObjDuhwZjAgsxt52RnkZmeQl51BXnZ3cnuHXg/r073TXqEl7SMuwqIjKSykK9lzqJp/rC/j3U27SE4y+vZIp0+PNPr2TAv92yOdvj3TyO6e1qVvGJTI4uFqKBGJkj490rh4ci4XT+4yFwlKjOlPDhERiUhhISIiESksREQkIoWFiIhEpLAQEZGIFBYiIhKRwkJERCJSWIiISEQJcwe3mZUBW5tYlQXsO8quza1vanlLl+UAu47yme0t0s8YjfdoyfbteeybWq5jf+zbtPXYN7Wso499UzVEe/9EPPbD3L1fxK3cPaEfwIPHsr6p5a1YVhhPP2M03qMl27fnsW9quY597I59M/89OvTYt8fx17Fv+aMrNEO9cIzrm1re0mUdrT1qaO17tGT79jz2TS3XsT/2bdp67FtaR7S1tQYd+xZKmGaoeGJmhd6Cgbmk/enYx46Ofex0xLHvCmcWsfBgrAvownTsY0fHPnaifux1ZiEiIhHpzEJERCJSWByFmf3ezHaa2cpj2HeKma0ws41m9kszs7B13zCztWa2yszuat+qE0c0jr+Z/cDMSszsg+BxQftX3vlF63c/WP+vZuZmltN+FSeOKP3e/9DMlge/86+a2eDWvrfC4ugeAmYf476/Ab4CjA4eswHM7CxgDjDJ3ccD/932MhPWQ7Tz8Q/8zN1PDB7z2lZiwnqIKBx7MxsCnAtsa2N9iewh2v/Y3+3uE939ROBF4PbWvrHC4ijc/R/AnvBlZjbSzP5qZovN7C0zO67xfmY2COjl7gs81Cn0MHBxsPrrwE/cvSr4jJ3R/Sk6rygdf2mBKB77nwH/D1BnaTOicezdfX/Ypj04huOvsGi9B4FvuPsU4NvAr5vYJhcoDntdHCwDGAPMNLP3zexNM5sa1WoTT1uPP8BNwSn5780sO3qlJpw2HXszmwOUuPuyaBeagNr8e29md5pZEfB5juHMQnNwt4KZ9QROAZ4Oa4ZNb+XbpAB9gJOBqcBTZjbCdVlaRO10/H8D/JDQX1Y/BO4BvtxeNSaqth57M+sO/AehJihphXb6vcfdbwNuM7PvADcB32/N/gqL1kkCyoN2v4+ZWTKwOHg5l9AXUl7YJnlASfC8GHg2CIeFZlZPaFyXsmgWniDafPzd/aOw/X5HqP1WImvrsR8J5APLgi+8PGCJmU1z9x1Rrr2za4/vnXCPAfNoZVioGaoVgna/D83s/wBYyCR3rwvrML3d3bcD+83s5OBqhC8Afwne5nngrGD/MUAaHT/4WqfUHsc/aNdtcAnQ6itOuqK2Hnt3X+Hu/d19uLsPJ/RH00kKisja6fd+dNhbzgHWHkshejQ/4NcTwHaghtAv97WE/jr6K7AMWA3c3sy+BYS+iDYB9/HJDZBpwKPBuiXAp2L9c8brI0rH/xFgBbCc0F9jg2L9c8bjIxrHvtE2W4CcWP+c8fiI0u/9n4PlywmNK5Xb2rp0B7eIiESkZigREYlIYSEiIhEpLEREJCKFhYiIRKSwEBGRiBQWktDM7GAHf97/mNm4dnqvumCU0JVm9oKZ9Y6wfW8zu6E9PlukMV06KwnNzA66e892fL8Ud69tr/eL8Fkf125mfwTWu/udR9l+OPCiu5/QEfVJ16IzC+lyzKyfmf3ZzBYFj1OD5dPM7D0zW2pm75rZ2GD5l8xsrpn9HXjdzM40szfM7BkLzUvyWHDHLMHyguD5wWDwtmVmtsDMBgTLRwavV5jZj1p49vMenwzI19PMXjezJcF7zAm2+QkwMjgbuTvY9t+Cn3G5mf1nOx5G6WIUFtIV/YLQnBZTgc8C/xMsXwvMdPfJhEbl/K+wfU4CPufuZwSvJwPfAsYBI4BTm/icHsACd58E/IPQPAMNn/8Ld5/AkaOENikYA+hsQnecA1QCl7j7SYSGjrknCKtbgU0eGv7h38zsXEJzGkwDTgSmmNnpkT5PpCkaSFC6olnAuLARPHsFI3tmAX8MxtFxIDVsn9fcPXyOgYXuXgxgZh8Aw4G3G31ONZ8MVLgYOCd4PoNP5nh4nOYnwMoI3jsXWAO8Fiw34L+CL/76YP2AJvY/N3gsDV73JBQe/2jm80SapbCQrigJONndK8MXmtl9wHx3vyRo/38jbPWhRu9RFfa8jqb/X6rxTzoFm9vmaCrc/cRgeO9XgBuBXxKaj6AfMMXda8xsC9Ctif0N+LG7P9DKzxX5J2qGkq7oVeAbDS/MrGHo5yw+GdL5S1H8/AWEmr8ALo+0sbsfBm4G/tXMUgjVuTMIirOAYcGmB4DMsF1fAb4cnDVhZrlm1r+dfgbpYhQWkui6m1lx2OMWQl+8BUGn72rga8G2dwE/NrOlRPes+1vALWa2HBgF7Iu0g7svJTRi6BWE5iMoMLMVhIahXhtssxt4J7jU9m53f5VQM9d7wbbPcGSYiLSYLp0V6WBBs1KFu7uZXQ5c4e5zIu0nEkvqsxDpeFOA+4IrmMrRtK7SCejMQkREIlKfhYiIRKSwEBGRiBQWIiISkcJCREQiUliIiEhECgsREYno/wO/jN6rDkpQNwAAAABJRU5ErkJggg==\n",
 739 |       "text/plain": [
 740 |        "<Figure size 432x288 with 1 Axes>"
 741 |       ]
 742 |      },
 743 |      "metadata": {
 744 |       "needs_background": "light"
 745 |      },
 746 |      "output_type": "display_data"
 747 |     }
 748 |    ],
 749 |    "source": [
 750 |     "# learn.freeze_to(-3)\n",
 751 |     "# learn.save(f'{name}x3')\n",
 752 |     "# learn.lr_find()\n",
 753 |     "# learn.recorder.plot()"
 754 |    ]
 755 |   },
 756 |   {
 757 |    "cell_type": "code",
 758 |    "execution_count": null,
 759 |    "metadata": {},
 760 |    "outputs": [],
 761 |    "source": [
 762 |     "# lrs =[]\n",
 763 |     "# lrms = [2.6, 1.6, 1]\n",
 764 |     "# lrs.append(chtw2)\n",
 765 |     "# lrs.append(chtw)\n",
 766 |     "# for lrm in lrms:\n",
 767 |     "#     div = (lrm**num_layers)\n",
 768 |     "#     lrs.append((learn.lr_range(slice(2e-4/div, 2e-4)),lrm))\n",
 769 |     "#     lrs.append((learn.lr_range(slice(3e-4/div, 3e-4)),lrm))\n",
 770 |     "#     lrs.append((learn.lr_range(slice(5e-4/div, 5e-4)),lrm))\n",
 771 |     "#     if lrm!=1: lrs.append((learn.lr_range(slice(lr0/div, lr0)),lrm))\n",
 772 |     "# stage2 = test_lrs(f'{name}x3', lrs)"
 773 |    ]
 774 |   },
 775 |   {
 776 |    "cell_type": "code",
 777 |    "execution_count": null,
 778 |    "metadata": {},
 779 |    "outputs": [],
 780 |    "source": [
 781 |     "# df = pd.DataFrame(stage2).sort_values(by=['f1', 'val'],ascending=[False, True])\n",
 782 |     "# df.to_csv(csvn, mode='a')\n",
 783 |     "# df"
 784 |    ]
 785 |   },
 786 |   {
 787 |    "cell_type": "code",
 788 |    "execution_count": 27,
 789 |    "metadata": {},
 790 |    "outputs": [
 791 |     {
 792 |      "data": {
 793 |       "text/html": [
 794 |        "Total time: 05:49 <p><table border=\"1\" class=\"dataframe\">\n",
 795 |        "  <thead>\n",
 796 |        "    <tr style=\"text-align: left;\">\n",
 797 |        "      <th>epoch</th>\n",
 798 |        "      <th>train_loss</th>\n",
 799 |        "      <th>valid_loss</th>\n",
 800 |        "      <th>Total F1</th>\n",
 801 |        "      <th>time</th>\n",
 802 |        "    </tr>\n",
 803 |        "  </thead>\n",
 804 |        "  <tbody>\n",
 805 |        "    <tr>\n",
 806 |        "      <td>0</td>\n",
 807 |        "      <td>0.029114</td>\n",
 808 |        "      <td>0.051450</td>\n",
 809 |        "      <td>0.928314</td>\n",
 810 |        "      <td>05:49</td>\n",
 811 |        "    </tr>\n",
 812 |        "  </tbody>\n",
 813 |        "</table>"
 814 |       ],
 815 |       "text/plain": [
 816 |        "<IPython.core.display.HTML object>"
 817 |       ]
 818 |      },
 819 |      "metadata": {},
 820 |      "output_type": "display_data"
 821 |     }
 822 |    ],
 823 |    "source": [
 824 |     "# learn.load(f'{name}x3')\n",
 825 |     "# # lrs = learn.lr_range(slice(1e-5/(2.6**14),1e-5))\n",
 826 |     "# learn.fit_one_cycle(1,df.iloc[0]['lrs'], moms=(0.8,0.7))"
 827 |    ]
 828 |   },
 829 |   {
 830 |    "cell_type": "code",
 831 |    "execution_count": null,
 832 |    "metadata": {},
 833 |    "outputs": [],
 834 |    "source": [
 835 |     "# learn.freeze_to(-6)\n",
 836 |     "# learn.save(f'{name}x6')\n",
 837 |     "# learn.lr_find()\n",
 838 |     "# learn.recorder.plot()\n"
 839 |    ]
 840 |   },
 841 |   {
 842 |    "cell_type": "code",
 843 |    "execution_count": null,
 844 |    "metadata": {},
 845 |    "outputs": [],
 846 |    "source": [
 847 |     "# lrs =[]\n",
 848 |     "# lrms = [2.6, 1.6, 1]\n",
 849 |     "# lrs.append(chtw)\n",
 850 |     "# lrs.append(chtw2)\n",
 851 |     "# for lrm in lrms:\n",
 852 |     "#     div = (lrm**num_layers)\n",
 853 |     "#     if lrm!= 1: lrs.append((learn.lr_range(slice(lr0/div, lr0)),lrm))\n",
 854 |     "#     lrs.append((learn.lr_range(slice(3e-4/div, 3e-4)),lrm))\n",
 855 |     "#     lrs.append((learn.lr_range(slice(1e-4/div, 1e-4)),lrm))\n",
 856 |     "#     lrs.append((learn.lr_range(slice(5e-5/div, 5e-5)),lrm))\n",
 857 |     "# stage3 = test_lrs(f'{name}x6', lrs)"
 858 |    ]
 859 |   },
 860 |   {
 861 |    "cell_type": "code",
 862 |    "execution_count": null,
 863 |    "metadata": {},
 864 |    "outputs": [],
 865 |    "source": [
 866 |     "# df = pd.DataFrame(stage3).sort_values(by=['f1', 'val'],ascending=[False, True])\n",
 867 |     "# df.to_csv(csvn, mode='a')\n",
 868 |     "# df"
 869 |    ]
 870 |   },
 871 |   {
 872 |    "cell_type": "code",
 873 |    "execution_count": null,
 874 |    "metadata": {},
 875 |    "outputs": [],
 876 |    "source": [
 877 |     "# learn.load(f'{name}x6')\n",
 878 |     "# learn.fit_one_cycle(1, df.iloc[0]['lrs'], moms=(0.8, 0.7))"
 879 |    ]
 880 |   },
 881 |   {
 882 |    "cell_type": "code",
 883 |    "execution_count": null,
 884 |    "metadata": {},
 885 |    "outputs": [],
 886 |    "source": [
 887 |     "# learn.freeze_to(-12)\n",
 888 |     "# learn.save(f'{name}x12')\n",
 889 |     "# learn.lr_find()\n",
 890 |     "# learn.recorder.plot()"
 891 |    ]
 892 |   },
 893 |   {
 894 |    "cell_type": "code",
 895 |    "execution_count": null,
 896 |    "metadata": {},
 897 |    "outputs": [],
 898 |    "source": [
 899 |     "lrs =[]\n",
 900 |     "lr0 = 0.01\n",
 901 |     "lrms = [2.6, 1.6, 1] # 2.6,\n",
 902 |     "lrs.append(chtw)\n",
 903 |     "lrs.append(chtw2)\n",
 904 |     "for lrm in lrms:\n",
 905 |     "    div = (lrm**num_layers)\n",
 906 |     "    if lrm!= 1: lrs.append((learn.lr_range(slice(lr0/div, lr0)),lrm))\n",
 907 |     "    lrs.append((learn.lr_range(slice(3e-4/div, 3e-4)),lrm))\n",
 908 |     "    lrs.append((learn.lr_range(slice(1e-4/div, 1e-4)),lrm))\n",
 909 |     "    lrs.append((learn.lr_range(slice(5e-5/div, 5e-5)),lrm))\n",
 910 |     "print(lrs)"
 911 |    ]
 912 |   },
 913 |   {
 914 |    "cell_type": "code",
 915 |    "execution_count": null,
 916 |    "metadata": {},
 917 |    "outputs": [],
 918 |    "source": [
 919 |     "stage12 = test_lrs(f'{name}x12', lrs)"
 920 |    ]
 921 |   },
 922 |   {
 923 |    "cell_type": "code",
 924 |    "execution_count": null,
 925 |    "metadata": {},
 926 |    "outputs": [],
 927 |    "source": [
 928 |     "df = pd.DataFrame(stage12).sort_values(by=['f1', 'val'],ascending=[False, True])\n",
 929 |     "df.to_csv(csvn, mode='a')\n",
 930 |     "df"
 931 |    ]
 932 |   },
 933 |   {
 934 |    "cell_type": "code",
 935 |    "execution_count": null,
 936 |    "metadata": {},
 937 |    "outputs": [],
 938 |    "source": []
 939 |   },
 940 |   {
 941 |    "cell_type": "code",
 942 |    "execution_count": null,
 943 |    "metadata": {},
 944 |    "outputs": [],
 945 |    "source": [
 946 |     "learn.load(f'{name}x12')\n",
 947 |     "learn.fit_one_cycle(1, df.iloc[0]['lrs'], moms=(0.8, 0.7))\n",
 948 |     "learn.recorder.plot_losses()"
 949 |    ]
 950 |   },
 951 |   {
 952 |    "cell_type": "code",
 953 |    "execution_count": null,
 954 |    "metadata": {},
 955 |    "outputs": [],
 956 |    "source": [
 957 |     "learn.unfreeze()\n",
 958 |     "learn.save(f'{name}x15')\n",
 959 |     "learn.lr_find()\n",
 960 |     "learn.recorder.plot()"
 961 |    ]
 962 |   },
 963 |   {
 964 |    "cell_type": "code",
 965 |    "execution_count": null,
 966 |    "metadata": {},
 967 |    "outputs": [],
 968 |    "source": []
 969 |   },
 970 |   {
 971 |    "cell_type": "code",
 972 |    "execution_count": null,
 973 |    "metadata": {},
 974 |    "outputs": [],
 975 |    "source": [
 976 |     "lrs =[]\n",
 977 |     "lrms = [2.6, 1.6, 1]#2.6, \n",
 978 |     "lrs.append(chtw)\n",
 979 |     "lrs.append(chtw2)\n",
 980 |     "for lrm in lrms:\n",
 981 |     "    div = (lrm**num_layers)\n",
 982 |     "    if lrm!= 1: lrs.append((learn.lr_range(slice(lr0/div, lr0)),lrm))\n",
 983 |     "    lrs.append((learn.lr_range(slice(1e-4/div, 1e-4)),lrm))\n",
 984 |     "    lrs.append((learn.lr_range(slice(2e-5/div, 2e-5)),lrm))\n",
 985 |     "    lrs.append((learn.lr_range(slice(3e-5/div, 3e-5)),lrm))\n",
 986 |     "    lrs.append((learn.lr_range(slice(5e-5/div, 5e-5)),lrm))"
 987 |    ]
 988 |   },
 989 |   {
 990 |    "cell_type": "code",
 991 |    "execution_count": null,
 992 |    "metadata": {},
 993 |    "outputs": [],
 994 |    "source": [
 995 |     "stage15 = test_lrs(f'{name}x15', lrs)"
 996 |    ]
 997 |   },
 998 |   {
 999 |    "cell_type": "code",
1000 |    "execution_count": null,
1001 |    "metadata": {},
1002 |    "outputs": [],
1003 |    "source": [
1004 |     "df = pd.DataFrame(stage15).sort_values(by=['f1', 'val'],ascending=[False, True])\n",
1005 |     "df.to_csv(csvn, mode='a')\n",
1006 |     "df"
1007 |    ]
1008 |   },
1009 |   {
1010 |    "cell_type": "code",
1011 |    "execution_count": null,
1012 |    "metadata": {
1013 |     "scrolled": true
1014 |    },
1015 |    "outputs": [],
1016 |    "source": [
1017 |     "learn.load(f'{name}x15')\n",
1018 |     "learn.fit_one_cycle(1, df.iloc[0]['lrs'], moms=(0.8, 0.7))\n",
1019 |     "learn.recorder.plot_losses()"
1020 |    ]
1021 |   },
1022 |   {
1023 |    "cell_type": "code",
1024 |    "execution_count": null,
1025 |    "metadata": {
1026 |     "scrolled": true
1027 |    },
1028 |    "outputs": [],
1029 |    "source": [
1030 |     "learn.recorder.metrics\n",
1031 |     "learn.recorder.val_losses\n",
1032 |     "learn.recorder.losses"
1033 |    ]
1034 |   },
1035 |   {
1036 |    "cell_type": "code",
1037 |    "execution_count": null,
1038 |    "metadata": {},
1039 |    "outputs": [],
1040 |    "source": []
1041 |   },
1042 |   {
1043 |    "cell_type": "code",
1044 |    "execution_count": null,
1045 |    "metadata": {},
1046 |    "outputs": [],
1047 |    "source": [
1048 |     "met = learn.validate(test_dl, metrics=metrics)\n",
1049 |     "met"
1050 |    ]
1051 |   },
1052 |   {
1053 |    "cell_type": "code",
1054 |    "execution_count": null,
1055 |    "metadata": {},
1056 |    "outputs": [],
1057 |    "source": [
1058 |     "df = pd.DataFrame(met)\n",
1059 |     "df.to_csv(csvn, mode='a')\n",
1060 |     "df"
1061 |    ]
1062 |   },
1063 |   {
1064 |    "cell_type": "code",
1065 |    "execution_count": null,
1066 |    "metadata": {},
1067 |    "outputs": [],
1068 |    "source": []
1069 |   }
1070 |  ],
1071 |  "metadata": {
1072 |   "kernelspec": {
1073 |    "display_name": "cuda",
1074 |    "language": "python",
1075 |    "name": "cuda"
1076 |   },
1077 |   "language_info": {
1078 |    "codemirror_mode": {
1079 |     "name": "ipython",
1080 |     "version": 3
1081 |    },
1082 |    "file_extension": ".py",
1083 |    "mimetype": "text/x-python",
1084 |    "name": "python",
1085 |    "nbconvert_exporter": "python",
1086 |    "pygments_lexer": "ipython3",
1087 |    "version": "3.7.3"
1088 |   }
1089 |  },
1090 |  "nbformat": 4,
1091 |  "nbformat_minor": 2
1092 | }
1093 | 


--------------------------------------------------------------------------------