├── run_tagger.sh ├── README.md ├── run_influence.sh ├── data ├── negated_small_heuristics_evaluation_set.txt └── small_heuristics_evaluation_set.txt ├── bert_tagger.py ├── bert_token_influence.py ├── bert_influence.py ├── bert_loo_tagger.py ├── bert_util.py ├── NLI_mask_token_analysis.ipynb ├── SA_mask_token_analysis.ipynb └── NLI_analysis.ipynb /run_tagger.sh: -------------------------------------------------------------------------------- 1 | # tagger 2 | CUDA_VISIBLE_DEVICES='0' python -W ignore bert_tagger.py --data_dir="data/" --bert_model="bert-base-uncased" --max_seq_length=128 --do_train --do_test --train_batch_size=32 --eval_batch_size=1 --learning_rate=5e-5 --num_train_epochs=3 --warmup_proportion=0.1 --seed=2019 --do_lower_case --num_train_samples=10000 --task="SA" --output_dir="SA_tagger_output_bert_e3/" 3 | 4 | CUDA_VISIBLE_DEVICES='0' python -W ignore bert_tagger.py --data_dir="data/" --bert_model="bert-base-uncased" --max_seq_length=128 --do_train --do_test --train_batch_size=32 --eval_batch_size=1 --learning_rate=5e-5 --num_train_epochs=3 --warmup_proportion=0.1 --seed=2019 --do_lower_case --num_train_samples=10000 --task="NLI" --output_dir="NLI_tagger_output_bert_e3/" 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Influence function analysis 2 | 3 | The main environment requirements for this project are `python 3.6`, `pytorch 1.2.0`, and `pytorch-pretrained-bert 0.6.1`. 4 | 5 | Datasets used in the paper can be found in `data/`, except for `mnli_train.tsv` (too large), which can be downloaded from https://gluebenchmark.com/tasks. 6 | 7 | `run_tagger.sh`: finetune a BERT model for sentiment analysis (SA) and natural language inference (NLI). 8 | 9 | `run_influence.sh`: generate influence function results in the paper, including influence score calculation, leave-one-out (LOO) training, and token removals. 10 | 11 | `SA_analysis.ipynb`: see influential examples in SA (Figure 1 in the paper). 12 | 13 | `NLI_analysis.ipynb`: see influential examples in NLI (Table 5 in the paper). 14 | 15 | `LOO.ipynb`: see LOO training results for both SA and NLI (Table 1 and 2 in the paper). 16 | 17 | `SA_sal_if_direct_consistency.ipynb`, `NLI_sal_if_direct_consistency.ipynb`: Figure 2 and 3 in the paper. 18 | 19 | `SA_mask_token_analysis.ipynb`, `NLI_mask_token_analysis.ipynb`: Table 3 and 4 in the paper. 20 | 21 | `NLI_heuristic_analysis.ipynb`: Section 5.1 in the paper. 22 | 23 | If you have any questions, please email Xiaochuang Han at xiaochuang.han@gmail.com. Thank you! 24 | -------------------------------------------------------------------------------- /run_influence.sh: -------------------------------------------------------------------------------- 1 | # influence 2 | CUDA_VISIBLE_DEVICES='0' python -W ignore bert_influence.py --data_dir="data/" --bert_model="bert-base-uncased" --trained_model_dir="SA_tagger_output_bert_e3/" --max_seq_length=128 --train_batch_size=8 --seed=2019 --do_lower_case --num_train_samples=10000 --damping=3e-3 --lissa_repeat=1 --lissa_depth=0.25 --influence_on_decision --start_test_idx=1 --end_test_idx=50 --task="SA" --output_dir="SA_influence_output_bert_e3" 3 | 4 | CUDA_VISIBLE_DEVICES='0' python -W ignore bert_influence.py --data_dir="data/" --bert_model="bert-base-uncased" --trained_model_dir="NLI_tagger_output_bert_e3/" --max_seq_length=128 --train_batch_size=8 --seed=2019 --do_lower_case --num_train_samples=10000 --damping=3e-3 --lissa_repeat=1 --lissa_depth=0.25 --influence_on_decision --start_test_idx=1 --end_test_idx=30 --task="NLI" --output_dir="NLI_influence_output_bert_e3" 5 | 6 | # loo 7 | for i in {1..50} 8 | do 9 | CUDA_VISIBLE_DEVICES='0' python -W ignore bert_loo_tagger.py --data_dir="data/" --bert_model="bert-base-uncased" --max_seq_length=128 --do_train --do_test --train_batch_size=32 --eval_batch_size=1 --learning_rate=5e-5 --num_train_epochs=3 --warmup_proportion=0.1 --seed=2019 --do_lower_case --num_train_samples=10000 --task="SA" --output_dir="SA_loo_tagger_output_bert_e3_s19_c0/" --test_idx=$i --influence_file_dir="SA_influence_output_bert_e3" --loo_percentage=0.1 10 | CUDA_VISIBLE_DEVICES='0' python -W ignore bert_loo_tagger.py --data_dir="data/" --bert_model="bert-base-uncased" --max_seq_length=128 --do_train --do_test --train_batch_size=32 --eval_batch_size=1 --learning_rate=5e-5 --num_train_epochs=3 --warmup_proportion=0.1 --seed=2029 --do_lower_case --num_train_samples=10000 --task="SA" --output_dir="SA_loo_tagger_output_bert_e3_s29_c0/" --test_idx=$i --influence_file_dir="SA_influence_output_bert_e3" --loo_percentage=0.1 11 | CUDA_VISIBLE_DEVICES='0' python -W ignore bert_loo_tagger.py --data_dir="data/" --bert_model="bert-base-uncased" --max_seq_length=128 --do_train --do_test --train_batch_size=32 --eval_batch_size=1 --learning_rate=5e-5 --num_train_epochs=3 --warmup_proportion=0.1 --seed=2039 --do_lower_case --num_train_samples=10000 --task="SA" --output_dir="SA_loo_tagger_output_bert_e3_s39_c0/" --test_idx=$i --influence_file_dir="SA_influence_output_bert_e3" --loo_percentage=0.1 12 | CUDA_VISIBLE_DEVICES='0' python -W ignore bert_loo_tagger.py --data_dir="data/" --bert_model="bert-base-uncased" --max_seq_length=128 --do_train --do_test --train_batch_size=32 --eval_batch_size=1 --learning_rate=5e-5 --num_train_epochs=3 --warmup_proportion=0.1 --seed=2049 --do_lower_case --num_train_samples=10000 --task="SA" --output_dir="SA_loo_tagger_output_bert_e3_s49_c0/" --test_idx=$i --influence_file_dir="SA_influence_output_bert_e3" --loo_percentage=0.1 13 | CUDA_VISIBLE_DEVICES='0' python -W ignore bert_loo_tagger.py --data_dir="data/" --bert_model="bert-base-uncased" --max_seq_length=128 --do_train --do_test --train_batch_size=32 --eval_batch_size=1 --learning_rate=5e-5 --num_train_epochs=3 --warmup_proportion=0.1 --seed=2059 --do_lower_case --num_train_samples=10000 --task="SA" --output_dir="SA_loo_tagger_output_bert_e3_s59_c0/" --test_idx=$i --influence_file_dir="SA_influence_output_bert_e3" --loo_percentage=0.1 14 | done 15 | 16 | for i in {1..30} 17 | do 18 | CUDA_VISIBLE_DEVICES='0' python -W ignore bert_loo_tagger.py --data_dir="data/" --bert_model="bert-base-uncased" --max_seq_length=128 --do_train --do_test --train_batch_size=32 --eval_batch_size=1 --learning_rate=5e-5 --num_train_epochs=3 --warmup_proportion=0.1 --seed=2019 --do_lower_case --num_train_samples=10000 --task="NLI" --output_dir="NLI_loo_tagger_output_bert_e3_s19_c0/" --test_idx=$i --influence_file_dir="NLI_influence_output_bert_e3" --loo_percentage=0.1 19 | CUDA_VISIBLE_DEVICES='0' python -W ignore bert_loo_tagger.py --data_dir="data/" --bert_model="bert-base-uncased" --max_seq_length=128 --do_train --do_test --train_batch_size=32 --eval_batch_size=1 --learning_rate=5e-5 --num_train_epochs=3 --warmup_proportion=0.1 --seed=2029 --do_lower_case --num_train_samples=10000 --task="NLI" --output_dir="NLI_loo_tagger_output_bert_e3_s29_c0/" --test_idx=$i --influence_file_dir="NLI_influence_output_bert_e3" --loo_percentage=0.1 20 | CUDA_VISIBLE_DEVICES='0' python -W ignore bert_loo_tagger.py --data_dir="data/" --bert_model="bert-base-uncased" --max_seq_length=128 --do_train --do_test --train_batch_size=32 --eval_batch_size=1 --learning_rate=5e-5 --num_train_epochs=3 --warmup_proportion=0.1 --seed=2039 --do_lower_case --num_train_samples=10000 --task="NLI" --output_dir="NLI_loo_tagger_output_bert_e3_s39_c0/" --test_idx=$i --influence_file_dir="NLI_influence_output_bert_e3" --loo_percentage=0.1 21 | CUDA_VISIBLE_DEVICES='0' python -W ignore bert_loo_tagger.py --data_dir="data/" --bert_model="bert-base-uncased" --max_seq_length=128 --do_train --do_test --train_batch_size=32 --eval_batch_size=1 --learning_rate=5e-5 --num_train_epochs=3 --warmup_proportion=0.1 --seed=2049 --do_lower_case --num_train_samples=10000 --task="NLI" --output_dir="NLI_loo_tagger_output_bert_e3_s49_c0/" --test_idx=$i --influence_file_dir="NLI_influence_output_bert_e3" --loo_percentage=0.1 22 | CUDA_VISIBLE_DEVICES='0' python -W ignore bert_loo_tagger.py --data_dir="data/" --bert_model="bert-base-uncased" --max_seq_length=128 --do_train --do_test --train_batch_size=32 --eval_batch_size=1 --learning_rate=5e-5 --num_train_epochs=3 --warmup_proportion=0.1 --seed=2059 --do_lower_case --num_train_samples=10000 --task="NLI" --output_dir="NLI_loo_tagger_output_bert_e3_s59_c0/" --test_idx=$i --influence_file_dir="NLI_influence_output_bert_e3" --loo_percentage=0.1 23 | done 24 | 25 | # token-level 26 | CUDA_VISIBLE_DEVICES='0' python -W ignore bert_token_influence.py --data_dir="data/" --bert_model="bert-base-uncased" --trained_model_dir="SA_tagger_output_bert_e3/" --max_seq_length=128 --train_batch_size=8 --seed=2019 --do_lower_case --num_train_samples=10000 --damping=3e-3 --lissa_repeat=1 --lissa_depth=0.25 --influence_on_decision --start_test_idx=1 --end_test_idx=50 --task="SA" --output_dir="SA_diff_influence_output_bert_e3" --mask_token 27 | 28 | CUDA_VISIBLE_DEVICES='0' python -W ignore bert_token_influence.py --data_dir="data/" --bert_model="bert-base-uncased" --trained_model_dir="NLI_tagger_output_bert_e3/" --max_seq_length=128 --train_batch_size=8 --seed=2019 --do_lower_case --num_train_samples=10000 --damping=3e-3 --lissa_repeat=1 --lissa_depth=0.25 --influence_on_decision --start_test_idx=1 --end_test_idx=30 --task="NLI" --output_dir="NLI_diff_influence_output_bert_e3" --mask_token 29 | -------------------------------------------------------------------------------- /data/negated_small_heuristics_evaluation_set.txt: -------------------------------------------------------------------------------- 1 | gold_label sentence1_binary_parse sentence2_binary_parse sentence1_parse sentence2_parse sentence1 sentence2 pairID heuristic subcase template 2 | non-entailment ( ( ( The tourist ) ( who ( ( the banker ) contacted ) ) ) ( ( called ( the lawyers ) ) . ) ) ( ( The banker ) ( ( contacted ( the tourist ) ) . ) ) (ROOT (S (NP (NP (DT The) (NN tourist)) (SBAR (WHNP (WP who)) (S (NP (DT the) (NN banker)) (VP (VBD contacted))))) (VP (VBD called) (NP (DT the) (NNS lawyers))) (. .))) (ROOT (S (NP (DT The) (NN banker)) (VP (VBD contacted) (NP (DT the) (NN tourist))) (. .))) The tourist who the banker contacted called the lawyers . The banker did not contact the tourist . ex5574 lexical_overlap le_relative_clause temp26 3 | non-entailment ( ( ( The athlete ) ( by ( the doctors ) ) ) ( ( encouraged ( the senator ) ) . ) ) ( ( The athlete ) ( ( encouraged ( the senator ) ) . ) ) (ROOT (S (NP (NP (DT The) (NN athlete)) (PP (IN by) (NP (DT the) (NNS doctors)))) (VP (VBD encouraged) (NP (DT the) (NN senator))) (. .))) (ROOT (S (NP (DT The) (NN athlete)) (VP (VBD encouraged) (NP (DT the) (NN senator))) (. .))) The athlete by the doctors encouraged the senator . The athlete did not encourage the senator . ex6983 lexical_overlap le_around_prepositional_phrase temp30 4 | non-entailment ( ( ( The actors ) ( who ( thanked ( the students ) ) ) ) ( ( admired ( the secretaries ) ) . ) ) ( ( The actors ) ( ( admired ( the secretaries ) ) . ) ) (ROOT (S (NP (NP (DT The) (NNS actors)) (SBAR (WHNP (WP who)) (S (VP (VBD thanked) (NP (DT the) (NNS students)))))) (VP (VBD admired) (NP (DT the) (NNS secretaries))) (. .))) (ROOT (S (NP (DT The) (NNS actors)) (VP (VBD admired) (NP (DT the) (NNS secretaries))) (. .))) The actors who thanked the students admired the secretaries . The actors did not admire the secretaries . ex7263 lexical_overlap le_around_relative_clause temp31 5 | non-entailment ( ( The actors ) ( ( recognized ( ( the lawyer ) ( and ( the professor ) ) ) ) . ) ) ( ( The actors ) ( ( recognized ( the professor ) ) . ) ) (ROOT (S (NP (DT The) (NNS actors)) (VP (VBD recognized) (NP (NP (DT the) (NN lawyer)) (CC and) (NP (DT the) (NN professor)))) (. .))) (ROOT (S (NP (DT The) (NNS actors)) (VP (VBD recognized) (NP (DT the) (NN professor))) (. .))) The actors recognized the lawyer and the professor . The actors did not recognize the professor . ex8976 lexical_overlap le_conjunction temp33 6 | non-entailment ( ( The manager ) ( ( was ( encouraged ( by ( the secretary ) ) ) ) . ) ) ( ( The secretary ) ( ( encouraged ( the manager ) ) . ) ) (ROOT (S (NP (DT The) (NN manager)) (VP (VBD was) (VP (VBN encouraged) (PP (IN by) (NP (DT the) (NN secretary))))) (. .))) (ROOT (S (NP (DT The) (NN secretary)) (VP (VBD encouraged) (NP (DT the) (NN manager))) (. .))) The manager was encouraged by the secretary . The secretary did not encourage the manager . ex9599 lexical_overlap le_passive temp35 7 | non-entailment ( ( ( The students ) ( and ( the secretaries ) ) ) ( ( mentioned ( the doctor ) ) . ) ) ( ( The secretaries ) ( ( mentioned ( the doctor ) ) . ) ) (ROOT (S (NP (NP (DT The) (NNS students)) (CC and) (NP (DT the) (NNS secretaries))) (VP (VBD mentioned) (NP (DT the) (NN doctor))) (. .))) (ROOT (S (NP (DT The) (NNS secretaries)) (VP (VBD mentioned) (NP (DT the) (NN doctor))) (. .))) The students and the secretaries mentioned the doctor . The secretaries did not mention the doctor . ex15274 subsequence se_conjunction temp44 8 | non-entailment ( ( Important athletes ) ( arrived . ) ) ( Athletes ( arrived . ) ) (ROOT (S (NP (JJ Important) (NNS athletes)) (VP (VBD arrived)) (. .))) (ROOT (S (NP (NNS Athletes)) (VP (VBD arrived)) (. .))) Important athletes arrived . Athletes did not arrive . ex16997 subsequence se_adjective temp46 9 | non-entailment ( ( The lawyer ) ( ( won ( the tournament ) ) . ) ) ( ( The lawyer ) ( won . ) ) (ROOT (S (NP (DT The) (NN lawyer)) (VP (VBD won) (NP (DT the) (NN tournament))) (. .))) (ROOT (S (NP (DT The) (NN lawyer)) (VP (VBD won)) (. .))) The lawyer won the tournament . The lawyer did not win . ex17081 subsequence se_understood_object temp47 10 | non-entailment ( ( The students ) ( ( saw ( ( the tourist ) ( that waited ) ) ) . ) ) ( ( The students ) ( ( saw ( the tourist ) ) . ) ) (ROOT (S (NP (DT The) (NNS students)) (VP (VBD saw) (NP (NP (DT the) (NN tourist)) (SBAR (WHNP (WDT that)) (S (VP (VBD waited)))))) (. .))) (ROOT (S (NP (DT The) (NNS students)) (VP (VBD saw) (NP (DT the) (NN tourist))) (. .))) The students saw the tourist that waited . The students did not see the tourist . ex18658 subsequence se_relative_clause_on_obj temp48 11 | non-entailment ( ( The lawyers ) ( ( saw ( ( the professor ) ( behind ( the bankers ) ) ) ) . ) ) ( ( The lawyers ) ( ( saw ( the professor ) ) . ) ) (ROOT (S (NP (DT The) (NNS lawyers)) (VP (VBD saw) (NP (NP (DT the) (NN professor)) (PP (IN behind) (NP (DT the) (NNS bankers))))) (. .))) (ROOT (S (NP (DT The) (NNS lawyers)) (VP (VBD saw) (NP (DT the) (NN professor))) (. .))) The lawyers saw the professor behind the bankers . The lawyers did not see the professor . ex19409 subsequence se_PP_on_obj temp49 12 | non-entailment ( ( While ( ( the scientists ) ( saw ( the tourists ) ) ) ) ( , ( ( ( the artist ) resigned ) . ) ) ) ( ( The scientists ) ( ( saw ( the tourists ) ) . ) ) (ROOT (S (SBAR (IN While) (S (NP (DT the) (NNS scientists)) (VP (VBD saw) (NP (DT the) (NN tourists))))) (, ,) (S (NP (DT the) (NN artist)) (VP (VBD resigned))) (. .))) (ROOT (S (NP (DT The) (NNS scientists)) (VP (VBD saw) (NP (DT the) (NN tourists))) (. .))) While the scientists saw the tourists , the artist resigned . The scientists did not see the tourists . ex25113 constituent ce_embedded_under_since temp59 13 | non-entailment ( ( Since ( ( the author ) ( introduced ( the actors ) ) ) ) ( , ( ( ( the senators ) ( called ( the tourists ) ) ) . ) ) ) ( ( The senators ) ( ( called ( the tourists ) ) . ) ) (ROOT (S (SBAR (IN Since) (S (NP (DT the) (NN author)) (VP (VBD introduced) (NP (DT the) (NN actors))))) (, ,) (S (NP (DT the) (NNS senators)) (VP (VBD called) (NP (DT the) (NN tourists)))) (. .))) (ROOT (S (NP (DT The) (NNS senators)) (VP (VBD called) (NP (DT the) (NN tourists))) (. .))) Since the author introduced the actors , the senators called the tourists . The senators did not call the tourists . ex26200 constituent ce_after_since_clause temp60 14 | non-entailment ( ( The athlete ) ( ( remembered ( that ( ( the artist ) ( introduced ( the doctor ) ) ) ) ) . ) ) ( ( The artist ) ( ( introduced ( the doctor ) ) . ) ) (ROOT (S (NP (DT The) (NN athlete)) (VP (VBD remembered) (SBAR (IN that) (S (NP (DT the) (NN artist)) (VP (VBD introduced) (NP (DT the) (NN doctor)))))) (. .))) (ROOT (S (NP (DT The) (NN artist)) (VP (VBD introduced) (NP (DT the) (NN doctor))) (. .))) The athlete remembered that the artist introduced the doctor . The artist did not introduce the doctor . ex27636 constituent ce_embedded_under_verb temp61 15 | non-entailment ( ( ( The bankers ) ( called ( the student ) ) ) ( , ( and ( ( ( the presidents ) ( recommended ( the secretaries ) ) ) . ) ) ) ) ( ( The presidents ) ( ( recommended ( the secretaries ) ) . ) ) (ROOT (S (S (NP (DT The) (NNS bankers)) (VP (VBD called) (NP (DT the) (NN student)))) (, ,) (CC and) (S (NP (DT the) (NNS presidents)) (VP (VBD recommended) (NP (DT the) (NN secretaries)))) (. .))) (ROOT (S (NP (DT The) (NNS presidents)) (VP (VBD recommended) (NP (DT the) (NN secretaries))) (. .))) The bankers called the student , and the presidents recommended the secretaries . The presidents did not recommend the secretaries . ex28532 constituent ce_conjunction temp63 16 | non-entailment ( Definitely ( ( ( the artists ) ( mentioned ( the professor ) ) ) . ) ) ( ( The artists ) ( ( mentioned ( the professor ) ) . ) ) (ROOT (S (ADVP (RB Definitely)) (S (NP (DT the) (NNS artists)) (VP (VBD mentioned) (NP (DT the) (NN professor)))) (. .))) (ROOT (S (NP (DT The) (NNS artists)) (VP (VBD mentioned) (NP (DT the) (NN professor))) (. .))) Definitely the artists mentioned the professor . The artists did not mention the professor . ex29388 constituent ce_adverb temp68 17 | -------------------------------------------------------------------------------- /data/small_heuristics_evaluation_set.txt: -------------------------------------------------------------------------------- 1 | gold_label sentence1_binary_parse sentence2_binary_parse sentence1_parse sentence2_parse sentence1 sentence2 pairID heuristic subcase template 2 | non-entailment ( ( The managers ) ( ( saw ( the secretaries ) ) . ) ) ( ( The secretaries ) ( ( saw ( the managers ) ) . ) ) (ROOT (S (NP (DT The) (NNS managers)) (VP (VBD saw) (NP (DT the) (NNS secretaries))) (. .))) (ROOT (S (NP (DT The) (NNS secretaries)) (VP (VBD saw) (NP (DT the) (NNS managers))) (. .))) The managers saw the secretaries . The secretaries saw the managers . ex6 lexical_overlap ln_subject/object_swap temp1 3 | non-entailment ( ( The banker ) ( ( recommended ( ( the judge ) ( by ( the presidents ) ) ) ) . ) ) ( ( The judge ) ( ( recommended ( the presidents ) ) . ) ) (ROOT (S (NP (DT The) (NN banker)) (VP (VBD recommended) (NP (NP (DT the) (NN judge)) (PP (IN by) (NP (DT the) (NNS presidents))))) (. .))) (ROOT (S (NP (DT The) (NN judge)) (VP (VBD recommended) (NP (DT the) (NNS presidents))) (. .))) The banker recommended the judge by the presidents . The judge recommended the presidents . ex1080 lexical_overlap ln_preposition temp6 4 | non-entailment ( ( ( The actors ) ( who ( recommended ( the senators ) ) ) ) ( ( called ( the artists ) ) . ) ) ( ( The artists ) ( ( called ( the senators ) ) . ) ) (ROOT (S (NP (NP (DT The) (NNS actors)) (SBAR (WHNP (WP who)) (S (VP (VBD recommended) (NP (DT the) (NNS senators)))))) (VP (VBD called) (NP (DT the) (NNS artists))) (. .))) (ROOT (S (NP (DT The) (NNS artists)) (VP (VBD called) (NP (DT the) (NNS senators))) (. .))) The actors who recommended the senators called the artists . The artists called the senators . ex2040 lexical_overlap ln_relative_clause temp13 5 | non-entailment ( ( The banker ) ( ( was ( encouraged ( by ( the athletes ) ) ) ) . ) ) ( ( The banker ) ( ( encouraged ( the athletes ) ) . ) ) (ROOT (S (NP (DT The) (NN banker)) (VP (VBD was) (VP (VBN encouraged) (PP (IN by) (NP (DT the) (NNS athletes))))) (. .))) (ROOT (S (NP (DT The) (NN banker)) (VP (VBD encouraged) (NP (DT the) (NNS athletes))) (. .))) The banker was encouraged by the athletes . The banker encouraged the athletes . ex3239 lexical_overlap ln_passive temp20 6 | non-entailment ( ( ( The scientist ) ( and ( the student ) ) ) ( ( called ( the athlete ) ) . ) ) ( ( The student ) ( ( called ( the scientist ) ) . ) ) (ROOT (S (NP (NP (DT The) (NN scientist)) (CC and) (NP (DT the) (NN student))) (VP (VBD called) (NP (DT the) (NN athlete))) (. .))) (ROOT (S (NP (DT The) (NN student)) (VP (VBD called) (NP (DT the) (NN scientist))) (. .))) The scientist and the student called the athlete . The student called the scientist . ex4995 lexical_overlap ln_conjunction temp23 7 | entailment ( ( ( The tourist ) ( who ( ( the banker ) contacted ) ) ) ( ( called ( the lawyers ) ) . ) ) ( ( The banker ) ( ( contacted ( the tourist ) ) . ) ) (ROOT (S (NP (NP (DT The) (NN tourist)) (SBAR (WHNP (WP who)) (S (NP (DT the) (NN banker)) (VP (VBD contacted))))) (VP (VBD called) (NP (DT the) (NNS lawyers))) (. .))) (ROOT (S (NP (DT The) (NN banker)) (VP (VBD contacted) (NP (DT the) (NN tourist))) (. .))) The tourist who the banker contacted called the lawyers . The banker contacted the tourist . ex5574 lexical_overlap le_relative_clause temp26 8 | entailment ( ( ( The athlete ) ( by ( the doctors ) ) ) ( ( encouraged ( the senator ) ) . ) ) ( ( The athlete ) ( ( encouraged ( the senator ) ) . ) ) (ROOT (S (NP (NP (DT The) (NN athlete)) (PP (IN by) (NP (DT the) (NNS doctors)))) (VP (VBD encouraged) (NP (DT the) (NN senator))) (. .))) (ROOT (S (NP (DT The) (NN athlete)) (VP (VBD encouraged) (NP (DT the) (NN senator))) (. .))) The athlete by the doctors encouraged the senator . The athlete encouraged the senator . ex6983 lexical_overlap le_around_prepositional_phrase temp30 9 | entailment ( ( ( The actors ) ( who ( thanked ( the students ) ) ) ) ( ( admired ( the secretaries ) ) . ) ) ( ( The actors ) ( ( admired ( the secretaries ) ) . ) ) (ROOT (S (NP (NP (DT The) (NNS actors)) (SBAR (WHNP (WP who)) (S (VP (VBD thanked) (NP (DT the) (NNS students)))))) (VP (VBD admired) (NP (DT the) (NNS secretaries))) (. .))) (ROOT (S (NP (DT The) (NNS actors)) (VP (VBD admired) (NP (DT the) (NNS secretaries))) (. .))) The actors who thanked the students admired the secretaries . The actors admired the secretaries . ex7263 lexical_overlap le_around_relative_clause temp31 10 | entailment ( ( The actors ) ( ( recognized ( ( the lawyer ) ( and ( the professor ) ) ) ) . ) ) ( ( The actors ) ( ( recognized ( the professor ) ) . ) ) (ROOT (S (NP (DT The) (NNS actors)) (VP (VBD recognized) (NP (NP (DT the) (NN lawyer)) (CC and) (NP (DT the) (NN professor)))) (. .))) (ROOT (S (NP (DT The) (NNS actors)) (VP (VBD recognized) (NP (DT the) (NN professor))) (. .))) The actors recognized the lawyer and the professor . The actors recognized the professor . ex8976 lexical_overlap le_conjunction temp33 11 | entailment ( ( The manager ) ( ( was ( encouraged ( by ( the secretary ) ) ) ) . ) ) ( ( The secretary ) ( ( encouraged ( the manager ) ) . ) ) (ROOT (S (NP (DT The) (NN manager)) (VP (VBD was) (VP (VBN encouraged) (PP (IN by) (NP (DT the) (NN secretary))))) (. .))) (ROOT (S (NP (DT The) (NN secretary)) (VP (VBD encouraged) (NP (DT the) (NN manager))) (. .))) The manager was encouraged by the secretary . The secretary encouraged the manager . ex9599 lexical_overlap le_passive temp35 12 | non-entailment ( ( The senators ) ( ( heard ( ( the athlete ) ( stopped ( the artists ) ) ) ) . ) ) ( ( The senators ) ( ( heard ( the athlete ) ) . ) ) (ROOT (S (NP (DT The) (NNS senators)) (VP (VBD heard) (SBAR (S (NP (DT the) (NN athlete)) (VP (VBD stopped) (NP (DT the) (NN artists)))))) (. .))) (ROOT (S (NP (DT The) (NNS senators)) (VP (VBD heard) (NP (DT the) (NN athlete))) (. .))) The senators heard the athlete stopped the artists . The senators heard the athlete . ex10402 subsequence sn_NP/S temp37 13 | non-entailment ( ( ( The doctor ) ( near ( the athlete ) ) ) ( ( avoided ( the lawyer ) ) . ) ) ( ( The athlete ) ( ( avoided ( the lawyer ) ) . ) ) (ROOT (S (NP (NP (DT The) (NN doctor)) (PP (IN near) (NP (DT the) (NN athlete)))) (VP (VBD avoided) (NP (DT the) (NN lawyer))) (. .))) (ROOT (S (NP (DT The) (NN athlete)) (VP (VBD avoided) (NP (DT the) (NN lawyer))) (. .))) The doctor near the athlete avoided the lawyer . The athlete avoided the lawyer . ex11465 subsequence sn_PP_on_subject temp38 14 | non-entailment ( ( ( The president ) ( who ( contacted ( the student ) ) ) ) ( shouted . ) ) ( ( The student ) ( shouted . ) ) (ROOT (S (NP (NP (DT The) (NN president)) (SBAR (WHNP (WP who)) (S (VP (VBD contacted) (NP (DT the) (NN student)))))) (VP (VBD shouted)) (. .))) (ROOT (S (NP (DT The) (NN student)) (VP (VBD shouted)) (. .))) The president who contacted the student shouted . The student shouted . ex12815 subsequence sn_relative_clause_on_subject temp39 15 | non-entailment ( ( ( The scientist ) ( presented ( in ( the museum ) ) ) ) ( ( contacted ( the secretaries ) ) . ) ) ( ( The scientist ) ( ( presented ( in ( the museum ) ) ) . ) ) (ROOT (S (NP (NP (DT The) (NN scientist)) (VP (VBN presented) (PP (IN in) (NP (DT the) (NN museum))))) (VP (VBD contacted) (NP (DT the) (NN secretaries))) (. .))) (ROOT (S (NP (DT The) (NN scientist)) (VP (VBD presented) (PP (IN in) (NP (DT the) (NN museum)))) (. .))) The scientist presented in the museum contacted the secretaries . The scientist presented in the museum . ex13050 subsequence sn_past_participle temp40 16 | non-entailment ( ( Before ( ( the managers ) presented ) ) ( ( the lawyers ) ( resigned . ) ) ) ( ( The managers ) ( ( presented ( the lawyers ) ) . ) ) (ROOT (S (SBAR (IN Before) (S (NP (DT the) (NNS managers)) (VP (VBD presented)))) (NP (DT the) (NNS lawyers)) (VP (VBD resigned)) (. .))) (ROOT (S (NP (DT The) (NNS managers)) (VP (VBD presented) (NP (DT the) (NNS lawyers))) (. .))) Before the managers presented the lawyers resigned . The managers presented the lawyers . ex14651 subsequence sn_NP/Z temp42 17 | entailment ( ( ( The students ) ( and ( the secretaries ) ) ) ( ( mentioned ( the doctor ) ) . ) ) ( ( The secretaries ) ( ( mentioned ( the doctor ) ) . ) ) (ROOT (S (NP (NP (DT The) (NNS students)) (CC and) (NP (DT the) (NNS secretaries))) (VP (VBD mentioned) (NP (DT the) (NN doctor))) (. .))) (ROOT (S (NP (DT The) (NNS secretaries)) (VP (VBD mentioned) (NP (DT the) (NN doctor))) (. .))) The students and the secretaries mentioned the doctor . The secretaries mentioned the doctor . ex15274 subsequence se_conjunction temp44 18 | entailment ( ( Important athletes ) ( arrived . ) ) ( Athletes ( arrived . ) ) (ROOT (S (NP (JJ Important) (NNS athletes)) (VP (VBD arrived)) (. .))) (ROOT (S (NP (NNS Athletes)) (VP (VBD arrived)) (. .))) Important athletes arrived . Athletes arrived . ex16997 subsequence se_adjective temp46 19 | entailment ( ( The lawyer ) ( ( won ( the tournament ) ) . ) ) ( ( The lawyer ) ( won . ) ) (ROOT (S (NP (DT The) (NN lawyer)) (VP (VBD won) (NP (DT the) (NN tournament))) (. .))) (ROOT (S (NP (DT The) (NN lawyer)) (VP (VBD won)) (. .))) The lawyer won the tournament . The lawyer won . ex17081 subsequence se_understood_object temp47 20 | entailment ( ( The students ) ( ( saw ( ( the tourist ) ( that waited ) ) ) . ) ) ( ( The students ) ( ( saw ( the tourist ) ) . ) ) (ROOT (S (NP (DT The) (NNS students)) (VP (VBD saw) (NP (NP (DT the) (NN tourist)) (SBAR (WHNP (WDT that)) (S (VP (VBD waited)))))) (. .))) (ROOT (S (NP (DT The) (NNS students)) (VP (VBD saw) (NP (DT the) (NN tourist))) (. .))) The students saw the tourist that waited . The students saw the tourist . ex18658 subsequence se_relative_clause_on_obj temp48 21 | entailment ( ( The lawyers ) ( ( saw ( ( the professor ) ( behind ( the bankers ) ) ) ) . ) ) ( ( The lawyers ) ( ( saw ( the professor ) ) . ) ) (ROOT (S (NP (DT The) (NNS lawyers)) (VP (VBD saw) (NP (NP (DT the) (NN professor)) (PP (IN behind) (NP (DT the) (NNS bankers))))) (. .))) (ROOT (S (NP (DT The) (NNS lawyers)) (VP (VBD saw) (NP (DT the) (NN professor))) (. .))) The lawyers saw the professor behind the bankers . The lawyers saw the professor . ex19409 subsequence se_PP_on_obj temp49 22 | non-entailment ( ( Whether ( or ( not ( ( the secretary ) ( stopped ( the actors ) ) ) ) ) ) ( , ( ( ( the artist ) ( helped ( the bankers ) ) ) . ) ) ) ( ( The secretary ) ( ( stopped ( the actors ) ) . ) ) (ROOT (S (SBAR (IN Whether) (CC or) (RB not) (S (NP (DT the) (NN secretary)) (VP (VBD stopped) (NP (DT the) (NN actors))))) (, ,) (S (NP (DT the) (NN artist)) (VP (VBD helped) (NP (DT the) (NN bankers)))) (. .))) (ROOT (S (NP (DT The) (NN secretary)) (VP (VBD stopped) (NP (DT the) (NN actors))) (. .))) Whether or not the secretary stopped the actors , the artist helped the bankers . The secretary stopped the actors . ex20080 constituent cn_embedded_under_if temp50 23 | non-entailment ( ( If ( ( the secretary ) ( helped ( the tourists ) ) ) ) ( , ( ( ( the student ) ( contacted ( the lawyers ) ) ) . ) ) ) ( ( The student ) ( ( contacted ( the lawyers ) ) . ) ) (ROOT (S (SBAR (IN If) (S (NP (DT the) (NN secretary)) (VP (VBD helped) (NP (DT the) (NN tourists))))) (, ,) (S (NP (DT the) (NN student)) (VP (VBD contacted) (NP (DT the) (NN lawyers)))) (. .))) (ROOT (S (NP (DT The) (NN student)) (VP (VBD contacted) (NP (DT the) (NN lawyers))) (. .))) If the secretary helped the tourists , the student contacted the lawyers . The student contacted the lawyers . ex21616 constituent cn_after_if_clause temp51 24 | non-entailment ( ( The president ) ( ( assumed ( that ( ( the senator ) resigned ) ) ) . ) ) ( ( The senator ) ( resigned . ) ) (ROOT (S (NP (DT The) (NN president)) (VP (VBD assumed) (SBAR (IN that) (S (NP (DT the) (NN senator)) (VP (VBD resigned))))) (. .))) (ROOT (S (NP (DT The) (NN senator)) (VP (VBD resigned)) (. .))) The president assumed that the senator resigned . The senator resigned . ex22930 constituent cn_embedded_under_verb temp52 25 | non-entailment ( ( ( The author ) ( helped ( the secretary ) ) ) ( , ( or ( ( ( the actor ) ( recognized ( the judges ) ) ) . ) ) ) ) ( ( The actor ) ( ( recognized ( the judges ) ) . ) ) (ROOT (S (S (NP (DT The) (NN author)) (VP (VBD helped) (NP (DT the) (NN secretary)))) (, ,) (CC or) (S (NP (DT the) (NN actor)) (VP (VBD recognized) (NP (DT the) (NN judges)))) (. .))) (ROOT (S (NP (DT The) (NN actor)) (VP (VBD recognized) (NP (DT the) (NN judges))) (. .))) The author helped the secretary , or the actor recognized the judges . The actor recognized the judges . ex23487 constituent cn_disjunction temp54 26 | non-entailment ( Maybe ( ( ( the artist ) slept ) . ) ) ( ( The artist ) ( slept . ) ) (ROOT (S (ADVP (RB Maybe)) (S (NP (DT the) (NN artist)) (VP (VBD slept))) (. .))) (ROOT (S (NP (DT The) (NN artist)) (VP (VBD slept)) (. .))) Maybe the artist slept . The artist slept . ex24289 constituent cn_adverb temp58 27 | entailment ( ( While ( ( the scientists ) ( saw ( the tourists ) ) ) ) ( , ( ( ( the artist ) resigned ) . ) ) ) ( ( The scientists ) ( ( saw ( the tourists ) ) . ) ) (ROOT (S (SBAR (IN While) (S (NP (DT the) (NNS scientists)) (VP (VBD saw) (NP (DT the) (NN tourists))))) (, ,) (S (NP (DT the) (NN artist)) (VP (VBD resigned))) (. .))) (ROOT (S (NP (DT The) (NNS scientists)) (VP (VBD saw) (NP (DT the) (NN tourists))) (. .))) While the scientists saw the tourists , the artist resigned . The scientists saw the tourists . ex25113 constituent ce_embedded_under_since temp59 28 | entailment ( ( Since ( ( the author ) ( introduced ( the actors ) ) ) ) ( , ( ( ( the senators ) ( called ( the tourists ) ) ) . ) ) ) ( ( The senators ) ( ( called ( the tourists ) ) . ) ) (ROOT (S (SBAR (IN Since) (S (NP (DT the) (NN author)) (VP (VBD introduced) (NP (DT the) (NN actors))))) (, ,) (S (NP (DT the) (NNS senators)) (VP (VBD called) (NP (DT the) (NN tourists)))) (. .))) (ROOT (S (NP (DT The) (NNS senators)) (VP (VBD called) (NP (DT the) (NN tourists))) (. .))) Since the author introduced the actors , the senators called the tourists . The senators called the tourists . ex26200 constituent ce_after_since_clause temp60 29 | entailment ( ( The athlete ) ( ( remembered ( that ( ( the artist ) ( introduced ( the doctor ) ) ) ) ) . ) ) ( ( The artist ) ( ( introduced ( the doctor ) ) . ) ) (ROOT (S (NP (DT The) (NN athlete)) (VP (VBD remembered) (SBAR (IN that) (S (NP (DT the) (NN artist)) (VP (VBD introduced) (NP (DT the) (NN doctor)))))) (. .))) (ROOT (S (NP (DT The) (NN artist)) (VP (VBD introduced) (NP (DT the) (NN doctor))) (. .))) The athlete remembered that the artist introduced the doctor . The artist introduced the doctor . ex27636 constituent ce_embedded_under_verb temp61 30 | entailment ( ( ( The bankers ) ( called ( the student ) ) ) ( , ( and ( ( ( the presidents ) ( recommended ( the secretaries ) ) ) . ) ) ) ) ( ( The presidents ) ( ( recommended ( the secretaries ) ) . ) ) (ROOT (S (S (NP (DT The) (NNS bankers)) (VP (VBD called) (NP (DT the) (NN student)))) (, ,) (CC and) (S (NP (DT the) (NNS presidents)) (VP (VBD recommended) (NP (DT the) (NN secretaries)))) (. .))) (ROOT (S (NP (DT The) (NNS presidents)) (VP (VBD recommended) (NP (DT the) (NN secretaries))) (. .))) The bankers called the student , and the presidents recommended the secretaries . The presidents recommended the secretaries . ex28532 constituent ce_conjunction temp63 31 | entailment ( Definitely ( ( ( the artists ) ( mentioned ( the professor ) ) ) . ) ) ( ( The artists ) ( ( mentioned ( the professor ) ) . ) ) (ROOT (S (ADVP (RB Definitely)) (S (NP (DT the) (NNS artists)) (VP (VBD mentioned) (NP (DT the) (NN professor)))) (. .))) (ROOT (S (NP (DT The) (NNS artists)) (VP (VBD mentioned) (NP (DT the) (NN professor))) (. .))) Definitely the artists mentioned the professor . The artists mentioned the professor . ex29388 constituent ce_adverb temp68 32 | -------------------------------------------------------------------------------- /bert_tagger.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import argparse 4 | import csv 5 | import logging 6 | import os 7 | import random 8 | import sys 9 | import pickle 10 | 11 | # os.environ['CUDA_VISIBLE_DEVICES'] = "0" 12 | 13 | import numpy as np 14 | import torch 15 | from torch import nn 16 | from torch.nn import CrossEntropyLoss 17 | from torch.utils.data import (DataLoader, RandomSampler, SequentialSampler, 18 | TensorDataset) 19 | from torch.utils.data.distributed import DistributedSampler 20 | from tqdm import tqdm, trange 21 | 22 | from pytorch_pretrained_bert.file_utils import PYTORCH_PRETRAINED_BERT_CACHE 23 | from pytorch_pretrained_bert.modeling import BertPreTrainedModel, BertModel, BertConfig, WEIGHTS_NAME, CONFIG_NAME 24 | from pytorch_pretrained_bert.tokenization import BertTokenizer 25 | from pytorch_pretrained_bert.optimization import BertAdam, warmup_linear 26 | 27 | from bert_util import * 28 | 29 | logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s', 30 | datefmt = '%m/%d/%Y %H:%M:%S', 31 | level = logging.INFO) 32 | logger = logging.getLogger(__name__) 33 | 34 | def main(): 35 | parser = argparse.ArgumentParser() 36 | 37 | ## Required parameters 38 | parser.add_argument("--data_dir", 39 | default=None, 40 | type=str, 41 | required=True, 42 | help="The input data dir. Should contain the .tsv files (or other data files) for the task.") 43 | parser.add_argument("--bert_model", default=None, type=str, required=True, 44 | help="Bert pre-trained model selected in the list: bert-base-uncased, " 45 | "bert-large-uncased, bert-base-cased, bert-large-cased, bert-base-multilingual-uncased, " 46 | "bert-base-multilingual-cased, bert-base-chinese.") 47 | parser.add_argument("--output_dir", 48 | default=None, 49 | type=str, 50 | required=True, 51 | help="The output directory where the model predictions and checkpoints will be written.") 52 | parser.add_argument("--task", 53 | default=None, 54 | type=str, 55 | required=True, 56 | help="Sentiment analysis or natural language inference? (SA or NLI)") 57 | 58 | ## Other parameters 59 | parser.add_argument("--cache_dir", 60 | default="", 61 | type=str, 62 | help="Where do you want to store the pre-trained models downloaded from s3") 63 | parser.add_argument("--trained_model_dir", 64 | default="", 65 | type=str, 66 | help="Where is the fine-tuned (with the cloze-style LM objective) BERT model?") 67 | parser.add_argument("--max_seq_length", 68 | default=128, 69 | type=int, 70 | help="The maximum total input sequence length after WordPiece tokenization. \n" 71 | "Sequences longer than this will be truncated, and sequences shorter \n" 72 | "than this will be padded.") 73 | parser.add_argument("--do_train", 74 | action='store_true', 75 | help="Whether to run training.") 76 | parser.add_argument("--do_eval", 77 | action='store_true', 78 | help="Whether to run eval on the dev set.") 79 | parser.add_argument("--do_test", 80 | action='store_true', 81 | help="Whether to run eval on the test set.") 82 | parser.add_argument("--do_lower_case", 83 | action='store_true', 84 | help="Set this flag if you are using an uncased model.") 85 | parser.add_argument("--train_batch_size", 86 | default=32, 87 | type=int, 88 | help="Total batch size for training.") 89 | parser.add_argument("--eval_batch_size", 90 | default=8, 91 | type=int, 92 | help="Total batch size for eval.") 93 | parser.add_argument("--learning_rate", 94 | default=5e-5, 95 | type=float, 96 | help="The initial learning rate for Adam.") 97 | parser.add_argument("--num_train_epochs", 98 | default=3.0, 99 | type=float, 100 | help="Total number of training epochs to perform.") 101 | parser.add_argument("--warmup_proportion", 102 | default=0.1, 103 | type=float, 104 | help="Proportion of training to perform linear learning rate warmup for. " 105 | "E.g., 0.1 = 10%% of training.") 106 | parser.add_argument("--no_cuda", 107 | action='store_true', 108 | help="Whether not to use CUDA when available") 109 | parser.add_argument('--seed', 110 | type=int, 111 | default=42, 112 | help="random seed for initialization") 113 | parser.add_argument('--fp16', 114 | action='store_true', 115 | help="Whether to use 16-bit float precision instead of 32-bit") 116 | parser.add_argument('--loss_scale', 117 | type=float, default=0, 118 | help="Loss scaling to improve fp16 numeric stability. Only used when fp16 set to True.\n" 119 | "0 (default value): dynamic loss scaling.\n" 120 | "Positive power of 2: static loss scaling value.\n") 121 | parser.add_argument('--freeze_bert', 122 | action='store_true', 123 | help="Whether to freeze BERT") 124 | parser.add_argument('--full_bert', 125 | action='store_true', 126 | help="Whether to use full BERT") 127 | parser.add_argument('--num_train_samples', 128 | type=int, 129 | default=-1, 130 | help="-1 for full train set, otherwise please specify") 131 | args = parser.parse_args() 132 | 133 | device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") 134 | n_gpu = torch.cuda.device_count() 135 | 136 | random.seed(args.seed) 137 | np.random.seed(args.seed) 138 | torch.manual_seed(args.seed) 139 | if n_gpu > 0: 140 | torch.cuda.manual_seed_all(args.seed) 141 | 142 | if not args.do_train and not args.do_eval and not args.do_test: 143 | raise ValueError("At least one of `do_train` or `do_eval` or `do_test` must be True.") 144 | 145 | if os.path.exists(args.output_dir) and os.listdir(args.output_dir) and args.do_train: 146 | logger.info("WARNING: Output directory ({}) already exists and is not empty.".format(args.output_dir)) 147 | if not os.path.exists(args.output_dir): 148 | os.makedirs(args.output_dir) 149 | 150 | # Prepare data processor 151 | mnli_processor = MnliProcessor() 152 | hans_processor = HansProcessor() 153 | sst_processor = Sst2Processor() 154 | if args.task == "SA": 155 | label_list = sst_processor.get_labels() 156 | elif args.task == "NLI": 157 | label_list = mnli_processor.get_labels() 158 | else: 159 | raise ValueError("") 160 | num_labels = len(label_list) 161 | 162 | tokenizer = BertTokenizer.from_pretrained(args.bert_model, do_lower_case=args.do_lower_case) 163 | 164 | # Prepare training data 165 | train_examples = None 166 | num_train_optimization_steps = None 167 | if args.do_train: 168 | if args.task == "SA": 169 | train_examples = sst_processor.get_train_examples(args.data_dir, args.num_train_samples) 170 | elif args.task == "NLI": 171 | train_examples = mnli_processor.get_train_examples(args.data_dir, args.num_train_samples) 172 | 173 | num_train_optimization_steps = int( 174 | len(train_examples) / args.train_batch_size) * args.num_train_epochs 175 | 176 | # Prepare model 177 | cache_dir = args.cache_dir if args.cache_dir else os.path.join(PYTORCH_PRETRAINED_BERT_CACHE, 'distributed_{}'.format(-1)) 178 | if args.trained_model_dir: # load in fine-tuned (with cloze-style LM objective) model 179 | if os.path.exists(os.path.join(args.output_dir, WEIGHTS_NAME)): 180 | previous_state_dict = torch.load(os.path.join(args.output_dir, WEIGHTS_NAME)) 181 | else: 182 | from collections import OrderedDict 183 | previous_state_dict = OrderedDict() 184 | distant_state_dict = torch.load(os.path.join(args.trained_model_dir, WEIGHTS_NAME)) 185 | previous_state_dict.update(distant_state_dict) # note that the final layers of previous model and distant model must have different attribute names! 186 | model = MyBertForSequenceClassification.from_pretrained(args.trained_model_dir, state_dict=previous_state_dict, num_labels=num_labels) 187 | else: 188 | model = MyBertForSequenceClassification.from_pretrained(args.bert_model, cache_dir=cache_dir, num_labels=num_labels) 189 | if args.fp16: 190 | model.half() 191 | model.to(device) 192 | if n_gpu > 1: 193 | model = torch.nn.DataParallel(model) 194 | 195 | # Prepare optimizer 196 | param_optimizer = list(model.named_parameters()) 197 | if args.freeze_bert: # freeze BERT if needed 198 | frozen = ['bert'] 199 | elif args.full_bert: 200 | frozen = [] 201 | else: 202 | frozen = ['bert.embeddings.', 203 | 'bert.encoder.layer.0.', 204 | 'bert.encoder.layer.1.', 205 | 'bert.encoder.layer.2.', 206 | 'bert.encoder.layer.3.', 207 | 'bert.encoder.layer.4.', 208 | 'bert.encoder.layer.5.', 209 | 'bert.encoder.layer.6.', 210 | 'bert.encoder.layer.7.', 211 | ] # *** change here to filter out params we don't want to track *** 212 | no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] 213 | optimizer_grouped_parameters = [ 214 | {'params': [p for n, p in param_optimizer if (not any(fr in n for fr in frozen)) and (not any(nd in n for nd in no_decay))], 'weight_decay': 0.01}, 215 | {'params': [p for n, p in param_optimizer if (not any(fr in n for fr in frozen)) and (any(nd in n for nd in no_decay))], 'weight_decay': 0.0} 216 | ] 217 | if args.fp16: 218 | raise ValueError("Not sure if FP16 precision works yet.") 219 | else: 220 | optimizer = BertAdam(optimizer_grouped_parameters, 221 | lr=args.learning_rate, 222 | warmup=args.warmup_proportion, 223 | t_total=num_train_optimization_steps) 224 | 225 | if args.do_train: 226 | global_step = 0 227 | train_features = convert_examples_to_features( 228 | train_examples, label_list, args.max_seq_length, tokenizer) 229 | logger.info("***** Running training *****") 230 | logger.info(" Num examples = %d", len(train_examples)) 231 | logger.info(" Batch size = %d", args.train_batch_size) 232 | logger.info(" Num steps = %d", num_train_optimization_steps) 233 | all_input_ids = torch.tensor([f.input_ids for f in train_features], dtype=torch.long) 234 | all_input_mask = torch.tensor([f.input_mask for f in train_features], dtype=torch.long) 235 | all_segment_ids = torch.tensor([f.segment_ids for f in train_features], dtype=torch.long) 236 | all_label_id = torch.tensor([f.label_id for f in train_features], dtype=torch.long) 237 | train_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label_id) 238 | train_sampler = RandomSampler(train_data) 239 | train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=args.train_batch_size) 240 | 241 | model.train() 242 | # model.eval() # train in eval mode to avoid dropout 243 | for _ in trange(int(args.num_train_epochs), desc="Epoch"): 244 | epoch_loss = [] 245 | for step, batch in enumerate(tqdm(train_dataloader, desc="Iteration")): 246 | batch = tuple(t.to(device) for t in batch) 247 | input_ids, input_mask, segment_ids, label_ids = batch 248 | loss = model(input_ids, segment_ids, input_mask, label_ids) 249 | 250 | if n_gpu > 1: 251 | loss = loss.mean() # mean() to average on multi-gpu. 252 | if args.fp16: 253 | optimizer.backward(loss) 254 | else: 255 | loss.backward() 256 | if args.fp16: 257 | # modify learning rate with special warm up BERT uses 258 | # if args.fp16 is False, BertAdam is used that handles this automatically 259 | lr_this_step = args.learning_rate * warmup_linear(global_step/num_train_optimization_steps, args.warmup_proportion) 260 | for param_group in optimizer.param_groups: 261 | param_group['lr'] = lr_this_step 262 | 263 | optimizer.step() 264 | optimizer.zero_grad() 265 | global_step += 1 266 | epoch_loss.append(loss.item()) 267 | logger.info(" epoch loss = %f", np.mean(epoch_loss)) 268 | 269 | if args.do_train: 270 | # Save a trained model and the associated configuration 271 | model_to_save = model.module if hasattr(model, 'module') else model # Only save the model it-self 272 | output_model_file = os.path.join(args.output_dir, WEIGHTS_NAME) 273 | torch.save(model_to_save.state_dict(), output_model_file) 274 | output_config_file = os.path.join(args.output_dir, CONFIG_NAME) 275 | with open(output_config_file, 'w') as f: 276 | f.write(model_to_save.config.to_json_string()) 277 | 278 | if args.do_test: 279 | if args.task == "SA": 280 | test_examples = sst_processor.get_dev_examples(args.data_dir) 281 | elif args.task == "NLI": 282 | test_examples = mnli_processor.get_dev_examples(args.data_dir) 283 | 284 | test_features = convert_examples_to_features( 285 | test_examples, label_list, args.max_seq_length, tokenizer) 286 | logger.info("***** Running final test *****") 287 | logger.info(" Num examples = %d", len(test_examples)) 288 | logger.info(" Batch size = %d", args.eval_batch_size) 289 | all_input_ids = torch.tensor([f.input_ids for f in test_features], dtype=torch.long) 290 | all_input_mask = torch.tensor([f.input_mask for f in test_features], dtype=torch.long) 291 | all_segment_ids = torch.tensor([f.segment_ids for f in test_features], dtype=torch.long) 292 | all_label_id = torch.tensor([f.label_id for f in test_features], dtype=torch.long) 293 | all_guid = torch.tensor([f.guid for f in test_features], dtype=torch.long) 294 | test_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label_id, all_guid) 295 | # Run prediction for full data 296 | test_sampler = SequentialSampler(test_data) 297 | test_dataloader = DataLoader(test_data, sampler=test_sampler, batch_size=args.eval_batch_size) 298 | 299 | model.eval() 300 | test_loss, test_accuracy = 0, 0 301 | nb_test_steps, nb_test_examples = 0, 0 302 | wrong_list = [] 303 | 304 | for input_ids, input_mask, segment_ids, label_ids, guids in tqdm(test_dataloader, desc="Testing"): 305 | input_ids = input_ids.to(device) 306 | input_mask = input_mask.to(device) 307 | segment_ids = segment_ids.to(device) 308 | label_ids = label_ids.to(device) 309 | 310 | with torch.no_grad(): 311 | tmp_test_loss = model(input_ids, segment_ids, input_mask, label_ids) 312 | logits = model(input_ids, segment_ids, input_mask) 313 | 314 | logits = logits.detach().cpu().numpy() 315 | label_ids = label_ids.to('cpu').numpy() 316 | 317 | tmp_test_correct, tmp_test_total = accuracy(logits, label_ids) 318 | 319 | assert tmp_test_total == 1 320 | if tmp_test_correct == 0: 321 | wrong_list.append(guids[0].item()) 322 | 323 | test_loss += tmp_test_loss.mean().item() 324 | test_accuracy += tmp_test_correct 325 | 326 | nb_test_examples += tmp_test_total 327 | nb_test_steps += 1 328 | 329 | test_loss = test_loss / nb_test_steps 330 | test_accuracy = test_accuracy / nb_test_examples 331 | result = {'test_loss': test_loss, 332 | 'test_accuracy': test_accuracy} 333 | 334 | output_test_file = os.path.join(args.output_dir, "test_results.txt") 335 | with open(output_test_file, "w") as writer: 336 | logger.info("***** Test results *****") 337 | for key in sorted(result.keys()): 338 | logger.info(" %s = %s", key, str(result[key])) 339 | writer.write("%s = %s\n" % (key, str(result[key]))) 340 | 341 | # pickle.dump(wrong_list, open(os.path.join(args.output_dir, "wrong_pred_guid.txt"), "wb")) 342 | 343 | if __name__ == "__main__": 344 | main() 345 | -------------------------------------------------------------------------------- /bert_token_influence.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import argparse 4 | import csv 5 | import logging 6 | import os 7 | import random 8 | import sys 9 | import pickle 10 | import time 11 | import math 12 | 13 | # os.environ['CUDA_VISIBLE_DEVICES'] = "0" 14 | 15 | import numpy as np 16 | import torch 17 | from torch import nn 18 | from torch.nn import CrossEntropyLoss 19 | from torch.utils.data import (DataLoader, RandomSampler, SequentialSampler, 20 | TensorDataset) 21 | from torch.utils.data.distributed import DistributedSampler 22 | from tqdm import tqdm, trange 23 | 24 | from pytorch_pretrained_bert.file_utils import PYTORCH_PRETRAINED_BERT_CACHE 25 | from pytorch_pretrained_bert.modeling import BertPreTrainedModel, BertModel, BertConfig, WEIGHTS_NAME, CONFIG_NAME 26 | from pytorch_pretrained_bert.tokenization import BertTokenizer 27 | from pytorch_pretrained_bert.optimization import BertAdam, warmup_linear 28 | 29 | import torch.autograd as autograd 30 | 31 | from bert_util import * 32 | 33 | logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s', 34 | datefmt = '%m/%d/%Y %H:%M:%S', 35 | level = logging.INFO) 36 | logger = logging.getLogger(__name__) 37 | 38 | def main(): 39 | parser = argparse.ArgumentParser() 40 | 41 | ## Required parameters 42 | parser.add_argument("--data_dir", 43 | default=None, 44 | type=str, 45 | required=True, 46 | help="The input data dir. Should contain the .tsv files (or other data files) for the task.") 47 | parser.add_argument("--bert_model", default=None, type=str, required=True, 48 | help="Bert pre-trained model selected in the list: bert-base-uncased, " 49 | "bert-large-uncased, bert-base-cased, bert-large-cased, bert-base-multilingual-uncased, " 50 | "bert-base-multilingual-cased, bert-base-chinese.") 51 | parser.add_argument("--output_dir", 52 | default=None, 53 | type=str, 54 | required=True, 55 | help="The output directory where the model predictions and checkpoints will be written.") 56 | parser.add_argument("--task", 57 | default=None, 58 | type=str, 59 | required=True, 60 | help="Sentiment analysis or natural language inference? (SA or NLI)") 61 | 62 | ## Other parameters 63 | parser.add_argument("--cache_dir", 64 | default="", 65 | type=str, 66 | help="Where do you want to store the pre-trained models downloaded from s3") 67 | parser.add_argument("--trained_model_dir", 68 | default="", 69 | type=str, 70 | help="Where is the fine-tuned (with the cloze-style LM objective) BERT model?") 71 | parser.add_argument("--max_seq_length", 72 | default=128, 73 | type=int, 74 | help="The maximum total input sequence length after WordPiece tokenization. \n" 75 | "Sequences longer than this will be truncated, and sequences shorter \n" 76 | "than this will be padded.") 77 | parser.add_argument("--do_lower_case", 78 | action='store_true', 79 | help="Set this flag if you are using an uncased model.") 80 | parser.add_argument("--train_batch_size", 81 | default=32, 82 | type=int, 83 | help="Total batch size for training.") 84 | parser.add_argument("--eval_batch_size", 85 | default=8, 86 | type=int, 87 | help="Total batch size for eval.") 88 | parser.add_argument("--learning_rate", 89 | default=5e-5, 90 | type=float, 91 | help="The initial learning rate for Adam.") 92 | parser.add_argument("--num_train_epochs", 93 | default=3.0, 94 | type=float, 95 | help="Total number of training epochs to perform.") 96 | parser.add_argument("--warmup_proportion", 97 | default=0.1, 98 | type=float, 99 | help="Proportion of training to perform linear learning rate warmup for. " 100 | "E.g., 0.1 = 10%% of training.") 101 | parser.add_argument("--no_cuda", 102 | action='store_true', 103 | help="Whether not to use CUDA when available") 104 | parser.add_argument('--seed', 105 | type=int, 106 | default=42, 107 | help="random seed for initialization") 108 | parser.add_argument('--fp16', 109 | action='store_true', 110 | help="Whether to use 16-bit float precision instead of 32-bit") 111 | parser.add_argument('--loss_scale', 112 | type=float, default=0, 113 | help="Loss scaling to improve fp16 numeric stability. Only used when fp16 set to True.\n" 114 | "0 (default value): dynamic loss scaling.\n" 115 | "Positive power of 2: static loss scaling value.\n") 116 | parser.add_argument('--freeze_bert', 117 | action='store_true', 118 | help="Whether to freeze BERT") 119 | parser.add_argument('--full_bert', 120 | action='store_true', 121 | help="Whether to use full BERT") 122 | parser.add_argument('--num_train_samples', 123 | type=int, 124 | default=-1, 125 | help="-1 for full train set, otherwise please specify") 126 | parser.add_argument('--damping', 127 | type=float, 128 | default=0.0, 129 | help="probably need damping for deep models") 130 | parser.add_argument('--test_idx', 131 | type=int, 132 | default=1, 133 | help="test index we want to examine") 134 | parser.add_argument('--influence_on_decision', 135 | action='store_true', 136 | help="Whether to compute influence on decision (rather than influence on ground truth)") 137 | parser.add_argument("--if_compute_saliency", 138 | default=1, 139 | type=int) 140 | parser.add_argument('--start_test_idx', 141 | type=int, 142 | default=-1, 143 | help="when not -1, --test_idx will be disabled") 144 | parser.add_argument('--end_test_idx', 145 | type=int, 146 | default=-1, 147 | help="when not -1, --test_idx will be disabled") 148 | parser.add_argument("--lissa_repeat", 149 | default=1, 150 | type=int) 151 | parser.add_argument("--lissa_depth", 152 | default=1.0, 153 | type=float) 154 | parser.add_argument('--mask_token', 155 | action='store_true', 156 | help="mask token and compute influence") 157 | parser.add_argument('--wrt_token', 158 | action='store_true', 159 | help="compute influence w.r.t. token saliency") 160 | 161 | args = parser.parse_args() 162 | 163 | device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") 164 | 165 | random.seed(args.seed) 166 | np.random.seed(args.seed) 167 | torch.manual_seed(args.seed) 168 | 169 | if not args.influence_on_decision: 170 | raise ValueError("To use loss function w.r.t. the ground truth, manually disable this error in the code.") 171 | if args.if_compute_saliency == 0: 172 | raise ValueError("Must compute saliency for token level influence.") 173 | 174 | if os.path.exists(args.output_dir) and os.listdir(args.output_dir): 175 | #raise ValueError("Output directory ({}) already exists and is not empty.".format(args.output_dir)) 176 | logger.info("WARNING: Output directory already exists and is not empty.") 177 | if not os.path.exists(args.output_dir): 178 | os.makedirs(args.output_dir) 179 | 180 | mnli_processor = MnliProcessor() 181 | hans_processor = HansProcessor() 182 | sst_processor = Sst2Processor() 183 | if args.task == "SA": 184 | label_list = sst_processor.get_labels() 185 | elif args.task == "NLI": 186 | label_list = mnli_processor.get_labels() 187 | num_labels = len(label_list) 188 | 189 | tokenizer = BertTokenizer.from_pretrained(args.bert_model, do_lower_case=args.do_lower_case) 190 | 191 | # Prepare model 192 | model = MyBertForSequenceClassification.from_pretrained(args.trained_model_dir, num_labels=num_labels) 193 | if args.fp16: 194 | raise ValueError("Not sure if FP16 precision works yet.") 195 | model.half() 196 | model.to(device) 197 | 198 | # Prepare optimizer 199 | param_optimizer = list(model.named_parameters()) 200 | # for n, p in param_optimizer: 201 | # print(n) 202 | # sys.exit() 203 | if args.freeze_bert: 204 | frozen = ['bert'] 205 | elif args.full_bert: 206 | frozen = [] 207 | else: 208 | frozen = ['bert.embeddings.', 209 | 'bert.encoder.layer.0.', 210 | 'bert.encoder.layer.1.', 211 | 'bert.encoder.layer.2.', 212 | 'bert.encoder.layer.3.', 213 | 'bert.encoder.layer.4.', 214 | 'bert.encoder.layer.5.', 215 | 'bert.encoder.layer.6.', 216 | 'bert.encoder.layer.7.', 217 | ] # *** change here to filter out params we don't want to track *** 218 | 219 | param_influence = [] 220 | for n, p in param_optimizer: 221 | if (not any(fr in n for fr in frozen)): 222 | param_influence.append(p) 223 | elif 'bert.embeddings.word_embeddings.' in n: 224 | pass # need gradients through embedding layer for computing saliency map 225 | else: 226 | p.requires_grad = False 227 | 228 | param_shape_tensor = [] 229 | param_size = 0 230 | for p in param_influence: 231 | tmp_p = p.clone().detach() 232 | param_shape_tensor.append(tmp_p) 233 | param_size += torch.numel(tmp_p) 234 | logger.info(" Parameter size = %d", param_size) 235 | 236 | if args.task == "SA": 237 | train_examples = sst_processor.get_train_examples(args.data_dir, args.num_train_samples) 238 | elif args.task == "NLI": 239 | train_examples = mnli_processor.get_train_examples(args.data_dir, args.num_train_samples) 240 | 241 | train_features = convert_examples_to_features( 242 | train_examples, label_list, args.max_seq_length, tokenizer) 243 | logger.info("***** Train set *****") 244 | logger.info(" Num examples = %d", len(train_examples)) 245 | all_input_ids = torch.tensor([f.input_ids for f in train_features], dtype=torch.long) 246 | all_input_mask = torch.tensor([f.input_mask for f in train_features], dtype=torch.long) 247 | all_segment_ids = torch.tensor([f.segment_ids for f in train_features], dtype=torch.long) 248 | all_label_id = torch.tensor([f.label_id for f in train_features], dtype=torch.long) 249 | all_guids = torch.tensor([f.guid for f in train_features], dtype=torch.long) 250 | train_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label_id, all_guids) 251 | train_dataloader_wbatch = DataLoader(train_data, sampler=SequentialSampler(train_data), batch_size=args.train_batch_size) 252 | train_dataloader = DataLoader(train_data, sampler=SequentialSampler(train_data), batch_size=1) 253 | 254 | if args.task == "SA": 255 | test_examples = sst_processor.get_dev_examples(args.data_dir) 256 | elif args.task == "NLI": 257 | test_examples = hans_processor.get_test_examples(args.data_dir) 258 | 259 | test_features = convert_examples_to_features( 260 | test_examples, label_list, args.max_seq_length, tokenizer) 261 | logger.info("***** Test set *****") 262 | logger.info(" Num examples = %d", len(test_examples)) 263 | all_input_ids = torch.tensor([f.input_ids for f in test_features], dtype=torch.long) 264 | all_input_mask = torch.tensor([f.input_mask for f in test_features], dtype=torch.long) 265 | all_segment_ids = torch.tensor([f.segment_ids for f in test_features], dtype=torch.long) 266 | all_label_id = torch.tensor([f.label_id for f in test_features], dtype=torch.long) 267 | all_guids = torch.tensor([f.guid for f in test_features], dtype=torch.long) 268 | test_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label_id, all_guids) 269 | test_dataloader = DataLoader(test_data, sampler=SequentialSampler(test_data), batch_size=1) 270 | 271 | damping = args.damping 272 | 273 | test_idx = args.test_idx 274 | start_test_idx = args.start_test_idx 275 | end_test_idx = args.end_test_idx 276 | 277 | for input_ids, input_mask, segment_ids, label_ids, guids in test_dataloader: 278 | model.eval() 279 | 280 | guid = guids[0].item() # test set loader must have a batch size of 1 now 281 | if start_test_idx != -1 and end_test_idx != -1: 282 | if guid < start_test_idx: 283 | continue 284 | if guid > end_test_idx: 285 | break 286 | else: 287 | if guid < test_idx: 288 | continue 289 | if guid > test_idx: 290 | break 291 | input_ids = input_ids.to(device) 292 | input_mask = input_mask.to(device) 293 | segment_ids = segment_ids.to(device) 294 | label_ids = label_ids.to(device) 295 | 296 | ######## GET TEST EXAMPLE DECISION ######## 297 | with torch.no_grad(): 298 | logits = model(input_ids, segment_ids, input_mask) 299 | logits = logits.detach().cpu().numpy() 300 | outputs = np.argmax(logits, axis=1) 301 | pred_label_ids = torch.from_numpy(outputs).long().to(device) 302 | if label_ids.item() == pred_label_ids.item(): 303 | test_pred_status = "correct" 304 | else: 305 | test_pred_status = "wrong" 306 | if args.influence_on_decision: 307 | label_ids = torch.from_numpy(outputs).long().to(device) 308 | ################ 309 | 310 | ######## TEST EXAMPLE SALIENCY MAP ######## 311 | if args.if_compute_saliency: 312 | saliency_scores = saliency_map(model, input_ids, segment_ids, input_mask, pred_label_ids) 313 | test_tok_sal_list = [] 314 | for tok, sal in zip(tokenizer.convert_ids_to_tokens(input_ids.view(-1).cpu().numpy()), saliency_scores): 315 | if tok == '[PAD]': 316 | break 317 | test_tok_sal_list.append((tok, sal)) 318 | pickle.dump((test_tok_sal_list, [], test_pred_status), open(os.path.join(args.output_dir, "saliency_test_" + str(guid) + ".pkl"), "wb")) 319 | ################ 320 | 321 | ######## COMPUTE INFLUENCE WITH TOKENS MASKED ######## 322 | if args.mask_token: 323 | diff_input_masks, mask_ix = get_diff_input_masks(input_mask, test_tok_sal_list) 324 | diff_influences_list = [] 325 | for tok_i, diff_input_mask in enumerate(diff_input_masks): 326 | model.eval() 327 | random.seed(args.seed) 328 | np.random.seed(args.seed) 329 | torch.manual_seed(args.seed) 330 | train_dataloader_lissa = DataLoader(train_data, batch_size=args.train_batch_size, shuffle=True, drop_last=True) 331 | 332 | logger.info("computing influence with token masked: " + str(tok_i)) 333 | with torch.no_grad(): 334 | logits = model(input_ids, segment_ids, diff_input_mask) 335 | logits = logits.detach().cpu().numpy() 336 | outputs = np.argmax(logits, axis=1) 337 | if args.influence_on_decision: 338 | label_ids = torch.from_numpy(outputs).long().to(device) 339 | 340 | model.zero_grad() 341 | test_loss = model(input_ids, segment_ids, diff_input_mask, label_ids) 342 | test_grads = autograd.grad(test_loss, param_influence) 343 | 344 | model.train() 345 | inverse_hvp = get_inverse_hvp_lissa(test_grads, model, device, param_influence, train_dataloader_lissa, damping=damping, num_samples=args.lissa_repeat, recursion_depth=int(len(train_examples)*args.lissa_depth)) 346 | 347 | diff_influences = np.zeros(len(train_dataloader.dataset)) 348 | for train_idx, (_input_ids, _input_mask, _segment_ids, _label_ids, _) in enumerate(tqdm(train_dataloader, desc="Train set index")): 349 | model.train() 350 | _input_ids = _input_ids.to(device) 351 | _input_mask = _input_mask.to(device) 352 | _segment_ids = _segment_ids.to(device) 353 | _label_ids = _label_ids.to(device) 354 | 355 | model.zero_grad() 356 | train_loss = model(_input_ids, _segment_ids, _input_mask, _label_ids) 357 | train_grads = autograd.grad(train_loss, param_influence) 358 | diff_influences[train_idx] = torch.dot(inverse_hvp, gather_flat_grad(train_grads)).item() 359 | diff_influences_list.append(diff_influences) 360 | 361 | if args.influence_on_decision: 362 | pickle.dump((diff_influences_list, mask_ix), open(os.path.join(args.output_dir, "diff_mask_influences_test_" + str(guid) + ".pkl"), "wb")) 363 | else: 364 | pickle.dump((diff_influences_list, mask_ix), open(os.path.join(args.output_dir, "diff_mask_influences_on_x_test_" + str(guid) + ".pkl"), "wb")) 365 | ################ 366 | 367 | 368 | if __name__ == "__main__": 369 | main() 370 | -------------------------------------------------------------------------------- /bert_influence.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import argparse 4 | import csv 5 | import logging 6 | import os 7 | import random 8 | import sys 9 | import pickle 10 | import time 11 | import math 12 | 13 | # os.environ['CUDA_VISIBLE_DEVICES'] = "0" 14 | 15 | import numpy as np 16 | import torch 17 | from torch import nn 18 | from torch.nn import CrossEntropyLoss 19 | from torch.utils.data import (DataLoader, RandomSampler, SequentialSampler, 20 | TensorDataset) 21 | from torch.utils.data.distributed import DistributedSampler 22 | from tqdm import tqdm, trange 23 | 24 | from pytorch_pretrained_bert.file_utils import PYTORCH_PRETRAINED_BERT_CACHE 25 | from pytorch_pretrained_bert.modeling import BertPreTrainedModel, BertModel, BertConfig, WEIGHTS_NAME, CONFIG_NAME 26 | from pytorch_pretrained_bert.tokenization import BertTokenizer 27 | from pytorch_pretrained_bert.optimization import BertAdam, warmup_linear 28 | 29 | import torch.autograd as autograd 30 | 31 | from bert_util import * 32 | 33 | logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s', 34 | datefmt = '%m/%d/%Y %H:%M:%S', 35 | level = logging.INFO) 36 | logger = logging.getLogger(__name__) 37 | 38 | def main(): 39 | parser = argparse.ArgumentParser() 40 | 41 | ## Required parameters 42 | parser.add_argument("--data_dir", 43 | default=None, 44 | type=str, 45 | required=True, 46 | help="The input data dir. Should contain the .tsv files (or other data files) for the task.") 47 | parser.add_argument("--bert_model", default=None, type=str, required=True, 48 | help="Bert pre-trained model selected in the list: bert-base-uncased, " 49 | "bert-large-uncased, bert-base-cased, bert-large-cased, bert-base-multilingual-uncased, " 50 | "bert-base-multilingual-cased, bert-base-chinese.") 51 | parser.add_argument("--output_dir", 52 | default=None, 53 | type=str, 54 | required=True, 55 | help="The output directory where the model predictions and checkpoints will be written.") 56 | parser.add_argument("--task", 57 | default=None, 58 | type=str, 59 | required=True, 60 | help="Sentiment analysis or natural language inference? (SA or NLI)") 61 | 62 | ## Other parameters 63 | parser.add_argument("--cache_dir", 64 | default="", 65 | type=str, 66 | help="Where do you want to store the pre-trained models downloaded from s3") 67 | parser.add_argument("--trained_model_dir", 68 | default="", 69 | type=str, 70 | help="Where is the fine-tuned (with the cloze-style LM objective) BERT model?") 71 | parser.add_argument("--max_seq_length", 72 | default=128, 73 | type=int, 74 | help="The maximum total input sequence length after WordPiece tokenization. \n" 75 | "Sequences longer than this will be truncated, and sequences shorter \n" 76 | "than this will be padded.") 77 | parser.add_argument("--do_lower_case", 78 | action='store_true', 79 | help="Set this flag if you are using an uncased model.") 80 | parser.add_argument("--train_batch_size", 81 | default=32, 82 | type=int, 83 | help="Total batch size for training.") 84 | parser.add_argument("--eval_batch_size", 85 | default=8, 86 | type=int, 87 | help="Total batch size for eval.") 88 | parser.add_argument("--learning_rate", 89 | default=5e-5, 90 | type=float, 91 | help="The initial learning rate for Adam.") 92 | parser.add_argument("--num_train_epochs", 93 | default=3.0, 94 | type=float, 95 | help="Total number of training epochs to perform.") 96 | parser.add_argument("--warmup_proportion", 97 | default=0.1, 98 | type=float, 99 | help="Proportion of training to perform linear learning rate warmup for. " 100 | "E.g., 0.1 = 10%% of training.") 101 | parser.add_argument("--no_cuda", 102 | action='store_true', 103 | help="Whether not to use CUDA when available") 104 | parser.add_argument('--seed', 105 | type=int, 106 | default=42, 107 | help="random seed for initialization") 108 | parser.add_argument('--fp16', 109 | action='store_true', 110 | help="Whether to use 16-bit float precision instead of 32-bit") 111 | parser.add_argument('--loss_scale', 112 | type=float, default=0, 113 | help="Loss scaling to improve fp16 numeric stability. Only used when fp16 set to True.\n" 114 | "0 (default value): dynamic loss scaling.\n" 115 | "Positive power of 2: static loss scaling value.\n") 116 | parser.add_argument('--freeze_bert', 117 | action='store_true', 118 | help="Whether to freeze BERT") 119 | parser.add_argument('--full_bert', 120 | action='store_true', 121 | help="Whether to use full BERT") 122 | parser.add_argument('--num_train_samples', 123 | type=int, 124 | default=-1, 125 | help="-1 for full train set, otherwise please specify") 126 | parser.add_argument('--damping', 127 | type=float, 128 | default=0.0, 129 | help="probably need damping for deep models") 130 | parser.add_argument('--test_idx', 131 | type=int, 132 | default=1, 133 | help="test index we want to examine") 134 | parser.add_argument('--influence_on_decision', 135 | action='store_true', 136 | help="Whether to compute influence on decision (rather than influence on ground truth)") 137 | parser.add_argument("--if_compute_saliency", 138 | default=1, 139 | type=int) 140 | parser.add_argument('--start_test_idx', 141 | type=int, 142 | default=-1, 143 | help="when not -1, --test_idx will be disabled") 144 | parser.add_argument('--end_test_idx', 145 | type=int, 146 | default=-1, 147 | help="when not -1, --test_idx will be disabled") 148 | parser.add_argument("--lissa_repeat", 149 | default=1, 150 | type=int) 151 | parser.add_argument("--lissa_depth", 152 | default=1.0, 153 | type=float) 154 | 155 | args = parser.parse_args() 156 | 157 | device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") 158 | 159 | random.seed(args.seed) 160 | np.random.seed(args.seed) 161 | torch.manual_seed(args.seed) 162 | 163 | if not args.influence_on_decision: 164 | raise ValueError("To use loss function w.r.t. the ground truth, manually disable this error in the code.") 165 | 166 | if os.path.exists(args.output_dir) and os.listdir(args.output_dir): 167 | #raise ValueError("Output directory ({}) already exists and is not empty.".format(args.output_dir)) 168 | logger.info("WARNING: Output directory already exists and is not empty.") 169 | if not os.path.exists(args.output_dir): 170 | os.makedirs(args.output_dir) 171 | 172 | mnli_processor = MnliProcessor() 173 | hans_processor = HansProcessor() 174 | sst_processor = Sst2Processor() 175 | if args.task == "SA": 176 | label_list = sst_processor.get_labels() 177 | elif args.task == "NLI": 178 | label_list = mnli_processor.get_labels() 179 | elif args.task == "NLI_negation": 180 | label_list = mnli_processor.get_labels() 181 | elif args.task == "NLI_natural": 182 | label_list = mnli_processor.get_labels() 183 | num_labels = len(label_list) 184 | 185 | tokenizer = BertTokenizer.from_pretrained(args.bert_model, do_lower_case=args.do_lower_case) 186 | 187 | # Prepare model 188 | model = MyBertForSequenceClassification.from_pretrained(args.trained_model_dir, num_labels=num_labels) 189 | if args.fp16: 190 | raise ValueError("Not sure if FP16 precision works yet.") 191 | model.half() 192 | model.to(device) 193 | 194 | # Prepare optimizer 195 | param_optimizer = list(model.named_parameters()) 196 | # for n, p in param_optimizer: 197 | # print(n) 198 | # sys.exit() 199 | if args.freeze_bert: 200 | frozen = ['bert'] 201 | elif args.full_bert: 202 | frozen = [] 203 | else: 204 | frozen = ['bert.embeddings.', 205 | 'bert.encoder.layer.0.', 206 | 'bert.encoder.layer.1.', 207 | 'bert.encoder.layer.2.', 208 | 'bert.encoder.layer.3.', 209 | 'bert.encoder.layer.4.', 210 | 'bert.encoder.layer.5.', 211 | 'bert.encoder.layer.6.', 212 | 'bert.encoder.layer.7.', 213 | ] # *** change here to filter out params we don't want to track *** 214 | 215 | param_influence = [] 216 | for n, p in param_optimizer: 217 | if (not any(fr in n for fr in frozen)): 218 | param_influence.append(p) 219 | elif 'bert.embeddings.word_embeddings.' in n: 220 | pass # need gradients through embedding layer for computing saliency map 221 | else: 222 | p.requires_grad = False 223 | 224 | param_shape_tensor = [] 225 | param_size = 0 226 | for p in param_influence: 227 | tmp_p = p.clone().detach() 228 | param_shape_tensor.append(tmp_p) 229 | param_size += torch.numel(tmp_p) 230 | logger.info(" Parameter size = %d", param_size) 231 | 232 | if args.task == "SA": 233 | train_examples = sst_processor.get_train_examples(args.data_dir, args.num_train_samples) 234 | elif args.task == "NLI": 235 | train_examples = mnli_processor.get_train_examples(args.data_dir, args.num_train_samples) 236 | elif args.task == "NLI_negation": 237 | train_examples = mnli_processor.get_train_examples(args.data_dir, args.num_train_samples) 238 | elif args.task == "NLI_natural": 239 | train_examples = mnli_processor.get_train_examples(args.data_dir, args.num_train_samples) 240 | 241 | train_features = convert_examples_to_features( 242 | train_examples, label_list, args.max_seq_length, tokenizer) 243 | logger.info("***** Train set *****") 244 | logger.info(" Num examples = %d", len(train_examples)) 245 | all_input_ids = torch.tensor([f.input_ids for f in train_features], dtype=torch.long) 246 | all_input_mask = torch.tensor([f.input_mask for f in train_features], dtype=torch.long) 247 | all_segment_ids = torch.tensor([f.segment_ids for f in train_features], dtype=torch.long) 248 | all_label_id = torch.tensor([f.label_id for f in train_features], dtype=torch.long) 249 | all_guids = torch.tensor([f.guid for f in train_features], dtype=torch.long) 250 | train_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label_id, all_guids) 251 | train_dataloader_wbatch = DataLoader(train_data, sampler=SequentialSampler(train_data), batch_size=args.train_batch_size) 252 | train_dataloader = DataLoader(train_data, sampler=SequentialSampler(train_data), batch_size=1) 253 | 254 | if args.task == "SA": 255 | test_examples = sst_processor.get_dev_examples(args.data_dir) 256 | elif args.task == "NLI": 257 | test_examples = hans_processor.get_test_examples(args.data_dir) 258 | elif args.task == "NLI_negation": 259 | test_examples = hans_processor.get_neg_test_examples(args.data_dir) 260 | elif args.task == "NLI_natural": 261 | test_examples = mnli_processor.get_dev_examples(args.data_dir) 262 | 263 | test_features = convert_examples_to_features( 264 | test_examples, label_list, args.max_seq_length, tokenizer) 265 | logger.info("***** Test set *****") 266 | logger.info(" Num examples = %d", len(test_examples)) 267 | all_input_ids = torch.tensor([f.input_ids for f in test_features], dtype=torch.long) 268 | all_input_mask = torch.tensor([f.input_mask for f in test_features], dtype=torch.long) 269 | all_segment_ids = torch.tensor([f.segment_ids for f in test_features], dtype=torch.long) 270 | all_label_id = torch.tensor([f.label_id for f in test_features], dtype=torch.long) 271 | all_guids = torch.tensor([f.guid for f in test_features], dtype=torch.long) 272 | test_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label_id, all_guids) 273 | test_dataloader = DataLoader(test_data, sampler=SequentialSampler(test_data), batch_size=1) 274 | 275 | damping = args.damping 276 | 277 | test_idx = args.test_idx 278 | start_test_idx = args.start_test_idx 279 | end_test_idx = args.end_test_idx 280 | 281 | for input_ids, input_mask, segment_ids, label_ids, guids in test_dataloader: 282 | model.eval() 283 | random.seed(args.seed) 284 | np.random.seed(args.seed) 285 | torch.manual_seed(args.seed) 286 | train_dataloader_lissa = DataLoader(train_data, batch_size=args.train_batch_size, shuffle=True, drop_last=True) 287 | 288 | guid = guids[0].item() # test set loader must have a batch size of 1 now 289 | if start_test_idx != -1 and end_test_idx != -1: 290 | if guid < start_test_idx: 291 | continue 292 | if guid > end_test_idx: 293 | break 294 | else: 295 | if guid < test_idx: 296 | continue 297 | if guid > test_idx: 298 | break 299 | input_ids = input_ids.to(device) 300 | input_mask = input_mask.to(device) 301 | segment_ids = segment_ids.to(device) 302 | label_ids = label_ids.to(device) 303 | 304 | ######## GET TEST EXAMPLE DECISION ######## 305 | with torch.no_grad(): 306 | logits = model(input_ids, segment_ids, input_mask) 307 | logits = logits.detach().cpu().numpy() 308 | outputs = np.argmax(logits, axis=1) 309 | pred_label_ids = torch.from_numpy(outputs).long().to(device) 310 | if label_ids.item() == pred_label_ids.item(): 311 | test_pred_status = "correct" 312 | else: 313 | test_pred_status = "wrong" 314 | if args.influence_on_decision: 315 | label_ids = torch.from_numpy(outputs).long().to(device) 316 | ################ 317 | 318 | ######## L_TEST GRADIENT ######## 319 | model.zero_grad() 320 | test_loss = model(input_ids, segment_ids, input_mask, label_ids) 321 | test_grads = autograd.grad(test_loss, param_influence) 322 | ################ 323 | 324 | ######## TEST EXAMPLE SALIENCY MAP ######## 325 | if args.if_compute_saliency: 326 | saliency_scores = saliency_map(model, input_ids, segment_ids, input_mask, pred_label_ids) 327 | test_tok_sal_list = [] 328 | for tok, sal in zip(tokenizer.convert_ids_to_tokens(input_ids.view(-1).cpu().numpy()), saliency_scores): 329 | if tok == '[PAD]': 330 | break 331 | test_tok_sal_list.append((tok, sal)) 332 | ################ 333 | 334 | ######## IHVP ######## 335 | model.train() 336 | logger.info("######## START COMPUTING IHVP ########") 337 | inverse_hvp = get_inverse_hvp_lissa(test_grads, model, device, param_influence, train_dataloader_lissa, damping=damping, num_samples=args.lissa_repeat, recursion_depth=int(len(train_examples)*args.lissa_depth)) 338 | logger.info("######## FINISHED COMPUTING IHVP ########") 339 | ################ 340 | 341 | influences = np.zeros(len(train_dataloader.dataset)) 342 | train_tok_sal_lists = [] 343 | for train_idx, (_input_ids, _input_mask, _segment_ids, _label_ids, _) in enumerate(tqdm(train_dataloader, desc="Train set index")): 344 | model.train() 345 | _input_ids = _input_ids.to(device) 346 | _input_mask = _input_mask.to(device) 347 | _segment_ids = _segment_ids.to(device) 348 | _label_ids = _label_ids.to(device) 349 | 350 | ######## L_TRAIN GRADIENT ######## 351 | model.zero_grad() 352 | train_loss = model(_input_ids, _segment_ids, _input_mask, _label_ids) 353 | train_grads = autograd.grad(train_loss, param_influence) 354 | influences[train_idx] = torch.dot(inverse_hvp, gather_flat_grad(train_grads)).item() 355 | ################ 356 | 357 | ######## TRAIN EXAMPLE SALIENCY MAP ######## 358 | # if args.if_compute_saliency: 359 | # with torch.no_grad(): 360 | # logits = model(_input_ids, _segment_ids, _input_mask) 361 | # logits = logits.detach().cpu().numpy() 362 | # outputs = np.argmax(logits, axis=1) 363 | # _pred_label_ids = torch.from_numpy(outputs).long().to(device) 364 | 365 | # saliency_scores = saliency_map(model, _input_ids, _segment_ids, _input_mask, _pred_label_ids) 366 | # train_tok_sal_list = [] 367 | # for tok, sal in zip(tokenizer.convert_ids_to_tokens(_input_ids.view(-1).cpu().numpy()), saliency_scores): 368 | # if tok == '[PAD]': 369 | # break 370 | # train_tok_sal_list.append((tok, sal)) 371 | # train_tok_sal_lists.append(train_tok_sal_list) 372 | ################ 373 | 374 | if args.influence_on_decision: 375 | pickle.dump(influences, open(os.path.join(args.output_dir, "influences_test_" + str(guid) + ".pkl"), "wb")) 376 | else: 377 | pickle.dump(influences, open(os.path.join(args.output_dir, "influences_on_x_test_" + str(guid) + ".pkl"), "wb")) 378 | if args.if_compute_saliency: 379 | pickle.dump((test_tok_sal_list, train_tok_sal_lists, test_pred_status), open(os.path.join(args.output_dir, "saliency_test_" + str(guid) + ".pkl"), "wb")) 380 | 381 | if __name__ == "__main__": 382 | main() 383 | -------------------------------------------------------------------------------- /bert_loo_tagger.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import argparse 4 | import csv 5 | import logging 6 | import os 7 | import random 8 | import sys 9 | import pickle 10 | 11 | # os.environ['CUDA_VISIBLE_DEVICES'] = "0" 12 | 13 | import numpy as np 14 | import torch 15 | from torch import nn 16 | from torch.nn import CrossEntropyLoss 17 | from torch.utils.data import (DataLoader, RandomSampler, SequentialSampler, 18 | TensorDataset) 19 | from torch.utils.data.distributed import DistributedSampler 20 | from tqdm import tqdm, trange 21 | 22 | from pytorch_pretrained_bert.file_utils import PYTORCH_PRETRAINED_BERT_CACHE 23 | from pytorch_pretrained_bert.modeling import BertPreTrainedModel, BertModel, BertConfig, WEIGHTS_NAME, CONFIG_NAME 24 | from pytorch_pretrained_bert.tokenization import BertTokenizer 25 | from pytorch_pretrained_bert.optimization import BertAdam, warmup_linear 26 | 27 | from bert_util import * 28 | 29 | logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s', 30 | datefmt = '%m/%d/%Y %H:%M:%S', 31 | level = logging.INFO) 32 | logger = logging.getLogger(__name__) 33 | 34 | def main(): 35 | parser = argparse.ArgumentParser() 36 | 37 | ## Required parameters 38 | parser.add_argument("--data_dir", 39 | default=None, 40 | type=str, 41 | required=True, 42 | help="The input data dir. Should contain the .tsv files (or other data files) for the task.") 43 | parser.add_argument("--bert_model", default=None, type=str, required=True, 44 | help="Bert pre-trained model selected in the list: bert-base-uncased, " 45 | "bert-large-uncased, bert-base-cased, bert-large-cased, bert-base-multilingual-uncased, " 46 | "bert-base-multilingual-cased, bert-base-chinese.") 47 | parser.add_argument("--output_dir", 48 | default=None, 49 | type=str, 50 | required=True, 51 | help="The output directory where the model predictions and checkpoints will be written.") 52 | parser.add_argument("--task", 53 | default=None, 54 | type=str, 55 | required=True, 56 | help="Sentiment analysis or natural language inference? (SA or NLI)") 57 | parser.add_argument('--test_idx', 58 | type=int, 59 | default=None, 60 | required=True) 61 | parser.add_argument('--influence_file_dir', 62 | type=str, 63 | default=None, 64 | required=True) 65 | 66 | ## Other parameters 67 | parser.add_argument("--cache_dir", 68 | default="", 69 | type=str, 70 | help="Where do you want to store the pre-trained models downloaded from s3") 71 | parser.add_argument("--trained_model_dir", 72 | default="", 73 | type=str, 74 | help="Where is the fine-tuned (with the cloze-style LM objective) BERT model?") 75 | parser.add_argument("--max_seq_length", 76 | default=128, 77 | type=int, 78 | help="The maximum total input sequence length after WordPiece tokenization. \n" 79 | "Sequences longer than this will be truncated, and sequences shorter \n" 80 | "than this will be padded.") 81 | parser.add_argument("--do_train", 82 | action='store_true', 83 | help="Whether to run training.") 84 | parser.add_argument("--do_eval", 85 | action='store_true', 86 | help="Whether to run eval on the dev set.") 87 | parser.add_argument("--do_test", 88 | action='store_true', 89 | help="Whether to run eval on the test set.") 90 | parser.add_argument("--do_lower_case", 91 | action='store_true', 92 | help="Set this flag if you are using an uncased model.") 93 | parser.add_argument("--train_batch_size", 94 | default=32, 95 | type=int, 96 | help="Total batch size for training.") 97 | parser.add_argument("--eval_batch_size", 98 | default=8, 99 | type=int, 100 | help="Total batch size for eval.") 101 | parser.add_argument("--learning_rate", 102 | default=5e-5, 103 | type=float, 104 | help="The initial learning rate for Adam.") 105 | parser.add_argument("--num_train_epochs", 106 | default=3.0, 107 | type=float, 108 | help="Total number of training epochs to perform.") 109 | parser.add_argument("--warmup_proportion", 110 | default=0.1, 111 | type=float, 112 | help="Proportion of training to perform linear learning rate warmup for. " 113 | "E.g., 0.1 = 10%% of training.") 114 | parser.add_argument("--no_cuda", 115 | action='store_true', 116 | help="Whether not to use CUDA when available") 117 | parser.add_argument('--seed', 118 | type=int, 119 | default=42, 120 | help="random seed for initialization") 121 | parser.add_argument('--fp16', 122 | action='store_true', 123 | help="Whether to use 16-bit float precision instead of 32-bit") 124 | parser.add_argument('--loss_scale', 125 | type=float, default=0, 126 | help="Loss scaling to improve fp16 numeric stability. Only used when fp16 set to True.\n" 127 | "0 (default value): dynamic loss scaling.\n" 128 | "Positive power of 2: static loss scaling value.\n") 129 | parser.add_argument('--freeze_bert', 130 | action='store_true', 131 | help="Whether to freeze BERT") 132 | parser.add_argument('--full_bert', 133 | action='store_true', 134 | help="Whether to use full BERT") 135 | parser.add_argument('--num_train_samples', 136 | type=int, 137 | default=-1, 138 | help="-1 for full train set, otherwise please specify") 139 | parser.add_argument('--loo_percentage', 140 | type=float, default=0.01) 141 | args = parser.parse_args() 142 | 143 | device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") 144 | n_gpu = torch.cuda.device_count() 145 | 146 | random.seed(args.seed) 147 | np.random.seed(args.seed) 148 | torch.manual_seed(args.seed) 149 | if n_gpu > 0: 150 | torch.cuda.manual_seed_all(args.seed) 151 | 152 | if not args.do_train and not args.do_eval and not args.do_test: 153 | raise ValueError("At least one of `do_train` or `do_eval` or `do_test` must be True.") 154 | 155 | if os.path.exists(args.output_dir) and os.listdir(args.output_dir) and args.do_train: 156 | logger.info("WARNING: Output directory ({}) already exists and is not empty.".format(args.output_dir)) 157 | if not os.path.exists(args.output_dir): 158 | os.makedirs(args.output_dir) 159 | 160 | # Prepare data processor 161 | mnli_processor = MnliProcessor() 162 | hans_processor = HansProcessor() 163 | sst_processor = Sst2Processor() 164 | if args.task == "SA": 165 | label_list = sst_processor.get_labels() 166 | elif args.task == "NLI": 167 | label_list = mnli_processor.get_labels() 168 | else: 169 | raise ValueError("") 170 | num_labels = len(label_list) 171 | 172 | tokenizer = BertTokenizer.from_pretrained(args.bert_model, do_lower_case=args.do_lower_case) 173 | 174 | if args.task == "SA": 175 | full_train_examples = sst_processor.get_train_examples(args.data_dir, args.num_train_samples) 176 | elif args.task == "NLI": 177 | full_train_examples = mnli_processor.get_train_examples(args.data_dir, args.num_train_samples) 178 | if args.task == "SA": 179 | test_examples = sst_processor.get_dev_examples(args.data_dir) 180 | elif args.task == "NLI": 181 | test_examples = hans_processor.get_test_examples(args.data_dir) 182 | 183 | # Loading influence result 184 | influences = pickle.load(open(os.path.join(args.influence_file_dir, "influences_test_" + str(args.test_idx) + ".pkl"), "rb")) 185 | assert args.num_train_samples == len(influences) 186 | train_idx_sorted = list(np.argsort(influences)) 187 | train_idx_sorted.reverse() 188 | train_idx_abs_sorted = list(np.argsort(np.absolute(influences))) 189 | 190 | most_influential_idx = train_idx_sorted[:int(len(influences)*args.loo_percentage)] 191 | least_influential_idx = train_idx_sorted[-int(len(influences)*args.loo_percentage):] 192 | zero_influential_idx = train_idx_abs_sorted[:int(len(influences)*args.loo_percentage)] 193 | random_influential_idx = random.sample(train_idx_sorted, int(len(influences)*args.loo_percentage)) 194 | loo_influential_idx_list = [most_influential_idx, least_influential_idx, zero_influential_idx, random_influential_idx, []] # 0: remove most influential, 1: remove least influential, 2: remove zero influential, 3: remove random, 4: unchanged 195 | 196 | # train and test models with different loo data 197 | loo_logits_list = [] 198 | for loo_i, loo_inf_idx in enumerate(loo_influential_idx_list): 199 | random.seed(args.seed) 200 | np.random.seed(args.seed) 201 | torch.manual_seed(args.seed) 202 | if n_gpu > 0: 203 | torch.cuda.manual_seed_all(args.seed) 204 | 205 | # Prepare training data 206 | num_train_optimization_steps = None 207 | if args.do_train: 208 | train_examples = [te for tei, te in enumerate(full_train_examples) if tei not in loo_inf_idx] 209 | 210 | num_train_optimization_steps = int( 211 | len(train_examples) / args.train_batch_size) * args.num_train_epochs 212 | 213 | # Prepare model 214 | cache_dir = args.cache_dir if args.cache_dir else os.path.join(PYTORCH_PRETRAINED_BERT_CACHE, 'distributed_{}'.format(-1)) 215 | if args.trained_model_dir: # load in fine-tuned (with cloze-style LM objective) model 216 | if os.path.exists(os.path.join(args.output_dir, WEIGHTS_NAME)): 217 | previous_state_dict = torch.load(os.path.join(args.output_dir, WEIGHTS_NAME)) 218 | else: 219 | from collections import OrderedDict 220 | previous_state_dict = OrderedDict() 221 | distant_state_dict = torch.load(os.path.join(args.trained_model_dir, WEIGHTS_NAME)) 222 | previous_state_dict.update(distant_state_dict) # note that the final layers of previous model and distant model must have different attribute names! 223 | model = MyBertForSequenceClassification.from_pretrained(args.trained_model_dir, state_dict=previous_state_dict, num_labels=num_labels) 224 | else: 225 | model = MyBertForSequenceClassification.from_pretrained(args.bert_model, cache_dir=cache_dir, num_labels=num_labels) 226 | if args.fp16: 227 | model.half() 228 | model.to(device) 229 | if n_gpu > 1: 230 | model = torch.nn.DataParallel(model) 231 | 232 | # Prepare optimizer 233 | param_optimizer = list(model.named_parameters()) 234 | if args.freeze_bert: # freeze BERT if needed 235 | frozen = ['bert'] 236 | elif args.full_bert: 237 | frozen = [] 238 | else: 239 | frozen = ['bert.embeddings.', 240 | 'bert.encoder.layer.0.', 241 | 'bert.encoder.layer.1.', 242 | 'bert.encoder.layer.2.', 243 | 'bert.encoder.layer.3.', 244 | 'bert.encoder.layer.4.', 245 | 'bert.encoder.layer.5.', 246 | 'bert.encoder.layer.6.', 247 | 'bert.encoder.layer.7.', 248 | ] # *** change here to filter out params we don't want to track *** 249 | no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] 250 | optimizer_grouped_parameters = [ 251 | {'params': [p for n, p in param_optimizer if (not any(fr in n for fr in frozen)) and (not any(nd in n for nd in no_decay))], 'weight_decay': 0.01}, 252 | {'params': [p for n, p in param_optimizer if (not any(fr in n for fr in frozen)) and (any(nd in n for nd in no_decay))], 'weight_decay': 0.0} 253 | ] 254 | if args.fp16: 255 | raise ValueError("Not sure if FP16 precision works yet.") 256 | else: 257 | optimizer = BertAdam(optimizer_grouped_parameters, 258 | lr=args.learning_rate, 259 | warmup=args.warmup_proportion, 260 | t_total=num_train_optimization_steps) 261 | 262 | if args.do_train: 263 | global_step = 0 264 | train_features = convert_examples_to_features( 265 | train_examples, label_list, args.max_seq_length, tokenizer) 266 | logger.info("***** Running training *****") 267 | logger.info(" Num examples = %d", len(train_examples)) 268 | logger.info(" Batch size = %d", args.train_batch_size) 269 | logger.info(" Num steps = %d", num_train_optimization_steps) 270 | all_input_ids = torch.tensor([f.input_ids for f in train_features], dtype=torch.long) 271 | all_input_mask = torch.tensor([f.input_mask for f in train_features], dtype=torch.long) 272 | all_segment_ids = torch.tensor([f.segment_ids for f in train_features], dtype=torch.long) 273 | all_label_id = torch.tensor([f.label_id for f in train_features], dtype=torch.long) 274 | train_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label_id) 275 | train_sampler = RandomSampler(train_data) 276 | train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=args.train_batch_size) 277 | 278 | model.train() 279 | # model.eval() # train with eval mode to ignore the influence of dropout 280 | for _ in trange(int(args.num_train_epochs), desc="Epoch"): 281 | epoch_loss = [] 282 | for step, batch in enumerate(tqdm(train_dataloader, desc="Iteration")): 283 | batch = tuple(t.to(device) for t in batch) 284 | input_ids, input_mask, segment_ids, label_ids = batch 285 | loss = model(input_ids, segment_ids, input_mask, label_ids) 286 | 287 | if n_gpu > 1: 288 | loss = loss.mean() # mean() to average on multi-gpu. 289 | if args.fp16: 290 | optimizer.backward(loss) 291 | else: 292 | loss.backward() 293 | if args.fp16: 294 | # modify learning rate with special warm up BERT uses 295 | # if args.fp16 is False, BertAdam is used that handles this automatically 296 | lr_this_step = args.learning_rate * warmup_linear(global_step/num_train_optimization_steps, args.warmup_proportion) 297 | for param_group in optimizer.param_groups: 298 | param_group['lr'] = lr_this_step 299 | 300 | optimizer.step() 301 | optimizer.zero_grad() 302 | global_step += 1 303 | epoch_loss.append(loss.item()) 304 | logger.info(" epoch loss = %f", np.mean(epoch_loss)) 305 | 306 | if args.do_train: 307 | # Save a trained model and the associated configuration 308 | model_to_save = model.module if hasattr(model, 'module') else model # Only save the model it-self 309 | output_model_file = os.path.join(args.output_dir, WEIGHTS_NAME) 310 | torch.save(model_to_save.state_dict(), output_model_file) 311 | output_config_file = os.path.join(args.output_dir, CONFIG_NAME) 312 | with open(output_config_file, 'w') as f: 313 | f.write(model_to_save.config.to_json_string()) 314 | 315 | if args.do_test: 316 | test_features = convert_examples_to_features( 317 | test_examples, label_list, args.max_seq_length, tokenizer) 318 | logger.info("***** Running final test *****") 319 | logger.info(" Num examples = %d", len(test_examples)) 320 | logger.info(" Batch size = %d", args.eval_batch_size) 321 | all_input_ids = torch.tensor([f.input_ids for f in test_features], dtype=torch.long) 322 | all_input_mask = torch.tensor([f.input_mask for f in test_features], dtype=torch.long) 323 | all_segment_ids = torch.tensor([f.segment_ids for f in test_features], dtype=torch.long) 324 | all_label_id = torch.tensor([f.label_id for f in test_features], dtype=torch.long) 325 | all_guid = torch.tensor([f.guid for f in test_features], dtype=torch.long) 326 | test_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label_id, all_guid) 327 | # Run prediction for full data 328 | test_sampler = SequentialSampler(test_data) 329 | test_dataloader = DataLoader(test_data, sampler=test_sampler, batch_size=args.eval_batch_size) 330 | 331 | model.eval() 332 | test_loss, test_accuracy = 0, 0 333 | nb_test_steps, nb_test_examples = 0, 0 334 | wrong_list = [] 335 | 336 | for input_ids, input_mask, segment_ids, label_ids, guids in tqdm(test_dataloader, desc="Testing"): 337 | cur_test_idx = guids[0].item() 338 | if cur_test_idx != args.test_idx: 339 | continue 340 | 341 | input_ids = input_ids.to(device) 342 | input_mask = input_mask.to(device) 343 | segment_ids = segment_ids.to(device) 344 | label_ids = label_ids.to(device) 345 | 346 | with torch.no_grad(): 347 | logits = model(input_ids, segment_ids, input_mask) 348 | logits = torch.nn.functional.softmax(logits[0], dim=0) 349 | 350 | logits = logits.detach().cpu().numpy() 351 | loo_logits_list.append(logits) 352 | logger.info('LOO type ' + str(loo_i) + ': ' + str(logits)) 353 | 354 | break 355 | 356 | pickle.dump(loo_logits_list, open(os.path.join(args.output_dir, "loo_logits_test_" + str(args.test_idx) + ".pkl"), "wb")) 357 | 358 | if __name__ == "__main__": 359 | main() 360 | -------------------------------------------------------------------------------- /bert_util.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import argparse 4 | import csv 5 | import logging 6 | import os 7 | import random 8 | import sys 9 | import pickle 10 | import time 11 | import math 12 | 13 | import numpy as np 14 | import torch 15 | from torch import nn 16 | from torch.nn import CrossEntropyLoss 17 | from torch.utils.data import (DataLoader, RandomSampler, SequentialSampler, 18 | TensorDataset) 19 | from torch.utils.data.distributed import DistributedSampler 20 | from tqdm import tqdm, trange 21 | 22 | from pytorch_pretrained_bert.file_utils import PYTORCH_PRETRAINED_BERT_CACHE 23 | from pytorch_pretrained_bert.modeling import BertPreTrainedModel, BertModel, BertConfig, WEIGHTS_NAME, CONFIG_NAME 24 | from pytorch_pretrained_bert.tokenization import BertTokenizer 25 | from pytorch_pretrained_bert.optimization import BertAdam, warmup_linear 26 | 27 | import torch.autograd as autograd 28 | from scipy import stats 29 | 30 | class MyBertForSequenceClassification(BertPreTrainedModel): 31 | def __init__(self, config, num_labels): 32 | super(MyBertForSequenceClassification, self).__init__(config) 33 | self.num_labels = num_labels 34 | self.bert = BertModel(config) 35 | self.dropout = nn.Dropout(config.hidden_dropout_prob) 36 | self.classifier = nn.Linear(config.hidden_size, num_labels) 37 | self.apply(self.init_bert_weights) 38 | 39 | def forward(self, input_ids, token_type_ids=None, attention_mask=None, labels=None): 40 | _, pooled_output = self.bert(input_ids, token_type_ids, attention_mask, output_all_encoded_layers=False) 41 | pooled_output = self.dropout(pooled_output) 42 | logits = self.classifier(pooled_output) 43 | 44 | if labels is not None: 45 | loss_fct = CrossEntropyLoss() 46 | loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) 47 | return loss 48 | else: 49 | return logits 50 | 51 | class MyLSTMForSequenceClassification(BertPreTrainedModel): 52 | def __init__(self, config, num_labels): 53 | super(MyLSTMForSequenceClassification, self).__init__(config) 54 | self.num_labels = num_labels 55 | self.my_word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size, padding_idx=0) 56 | self.lstm = nn.LSTM(input_size=config.hidden_size, hidden_size=config.hidden_size, batch_first=True) 57 | self.hidden_size = config.hidden_size 58 | self.classifier = nn.Linear(config.hidden_size, num_labels) 59 | self.apply(self.init_bert_weights) 60 | 61 | def forward(self, input_ids, token_type_ids=None, attention_mask=None, labels=None): 62 | input_ids_lengths = (input_ids > 0).sum(dim=1) 63 | words_embeddings = self.my_word_embeddings(input_ids) 64 | 65 | packseq = nn.utils.rnn.pack_padded_sequence(words_embeddings, input_ids_lengths, batch_first=True, enforce_sorted=False) 66 | output, (h, c) = self.lstm(packseq) 67 | output, lengths = nn.utils.rnn.pad_packed_sequence(output, batch_first=True, padding_value=0) 68 | 69 | # last_hidden = torch.cat([h[0], h[1]], dim=-1) 70 | 71 | logits = self.classifier(h[0]) 72 | 73 | if labels is not None: 74 | loss_fct = CrossEntropyLoss() 75 | loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) 76 | return loss 77 | else: 78 | return logits 79 | 80 | class InputExample(object): 81 | """A single training/test example for simple sequence classification.""" 82 | 83 | def __init__(self, guid, text_a, text_b=None, label=None, note=""): 84 | """Constructs a InputExample. 85 | """ 86 | self.guid = guid 87 | self.text_a = text_a 88 | self.text_b = text_b 89 | self.label = label 90 | self.note = note 91 | 92 | class InputFeatures(object): 93 | """A single set of features of data.""" 94 | 95 | def __init__(self, input_ids, input_mask, segment_ids, label_id, guid): 96 | self.input_ids = input_ids 97 | self.input_mask = input_mask 98 | self.segment_ids = segment_ids 99 | self.label_id = label_id 100 | self.guid = guid 101 | 102 | class MnliProcessor(object): 103 | """Processor for the MultiNLI data set (GLUE version).""" 104 | 105 | def get_train_examples(self, data_dir, num_train_samples=-1): 106 | """See base class.""" 107 | if num_train_samples != -1: 108 | return self._create_examples(self._read_tsv(os.path.join(data_dir, "mnli_train.tsv")), "mnli_train")[: num_train_samples] 109 | return self._create_examples( 110 | self._read_tsv(os.path.join(data_dir, "mnli_train.tsv")), "mnli_train") 111 | 112 | def get_dev_examples(self, data_dir): 113 | """See base class.""" 114 | return self._create_examples( 115 | self._read_tsv(os.path.join(data_dir, "mnli_dev.tsv")), "mnli_dev_matched") 116 | 117 | def get_labels(self): 118 | """See base class.""" 119 | return ["entailment", "non-entailment"] 120 | 121 | def _create_examples(self, lines, set_type): 122 | """Creates examples for the training and dev sets.""" 123 | examples = [] 124 | for (i, line) in enumerate(lines): 125 | if i == 0: 126 | continue 127 | guid = i 128 | text_a = line[8] 129 | text_b = line[9] 130 | label = line[-1] 131 | if label == "contradiction" or label == "neutral": 132 | label = "non-entailment" # collapse contradiction into non-entailment 133 | examples.append( 134 | InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) 135 | return examples 136 | 137 | def _read_tsv(cls, input_file, quotechar=None): 138 | """Reads a tab separated value file.""" 139 | with open(input_file, "r") as f: 140 | reader = csv.reader(f, delimiter="\t", quotechar=quotechar) 141 | lines = [] 142 | for line in reader: 143 | lines.append(line) 144 | return lines 145 | 146 | class HansProcessor(object): 147 | 148 | def get_test_examples(self, data_dir): 149 | """See base class.""" 150 | return self._create_examples( 151 | self._read_tsv(os.path.join(data_dir, "small_heuristics_evaluation_set.txt")), "HANS small") 152 | 153 | def get_neg_test_examples(self, data_dir): 154 | """See base class.""" 155 | return self._create_examples( 156 | self._read_tsv(os.path.join(data_dir, "negated_small_heuristics_evaluation_set.txt")), "HANS small negated") 157 | 158 | def get_labels(self): 159 | """See base class.""" 160 | return ["entailment", "non-entailment"] 161 | 162 | def _create_examples(self, lines, set_type): 163 | """Creates examples for the training and dev sets.""" 164 | examples = [] 165 | for (i, line) in enumerate(lines): 166 | if i == 0: 167 | continue 168 | guid = i 169 | text_a = line[5] 170 | text_b = line[6] 171 | label = line[0] 172 | note = line[8] 173 | examples.append( 174 | InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label, note=note)) 175 | return examples 176 | 177 | def _read_tsv(cls, input_file, quotechar=None): 178 | """Reads a tab separated value file.""" 179 | with open(input_file, "r") as f: 180 | reader = csv.reader(f, delimiter="\t", quotechar=quotechar) 181 | lines = [] 182 | for line in reader: 183 | lines.append(line) 184 | return lines 185 | 186 | class Sst2Processor(object): 187 | """Processor for the SST-2 data set (GLUE version).""" 188 | 189 | def get_train_examples(self, data_dir, num_train_samples=-1): 190 | """See base class.""" 191 | if num_train_samples != -1: 192 | return self._create_examples(self._read_tsv(os.path.join(data_dir, "sst2_train.tsv")), "train")[: num_train_samples] 193 | return self._create_examples( 194 | self._read_tsv(os.path.join(data_dir, "sst2_train.tsv")), "train") 195 | 196 | def get_dev_examples(self, data_dir): 197 | """See base class.""" 198 | return self._create_examples( 199 | self._read_tsv(os.path.join(data_dir, "sst2_dev.tsv")), "dev") 200 | 201 | def get_labels(self): 202 | """See base class.""" 203 | return ["0", "1"] 204 | 205 | def _create_examples(self, lines, set_type): 206 | """Creates examples for the training and dev sets.""" 207 | examples = [] 208 | for (i, line) in enumerate(lines): 209 | if i == 0: 210 | continue 211 | guid = i 212 | text_a = line[0] 213 | label = line[1] 214 | examples.append( 215 | InputExample(guid=guid, text_a=text_a, text_b=None, label=label)) 216 | return examples 217 | 218 | def _read_tsv(cls, input_file, quotechar=None): 219 | """Reads a tab separated value file.""" 220 | with open(input_file, "r") as f: 221 | reader = csv.reader(f, delimiter="\t", quotechar=quotechar) 222 | lines = [] 223 | for line in reader: 224 | lines.append(line) 225 | return lines 226 | 227 | def convert_examples_to_features(examples, label_list, max_seq_length, tokenizer): 228 | """Loads a data file into a list of `InputBatch`s.""" 229 | 230 | label_map = {label : i for i, label in enumerate(label_list)} 231 | 232 | features = [] 233 | for (ex_index, example) in enumerate(examples): 234 | tokens_a = tokenizer.tokenize(example.text_a) 235 | 236 | tokens_b = None 237 | if example.text_b: 238 | tokens_b = tokenizer.tokenize(example.text_b) 239 | # Modifies `tokens_a` and `tokens_b` in place so that the total 240 | # length is less than the specified length. 241 | # Account for [CLS], [SEP], [SEP] with "- 3" 242 | _truncate_seq_pair(tokens_a, tokens_b, max_seq_length - 3) 243 | else: 244 | # Account for [CLS] and [SEP] with "- 2" 245 | if len(tokens_a) > max_seq_length - 2: 246 | tokens_a = tokens_a[:(max_seq_length - 2)] 247 | 248 | tokens = ["[CLS]"] + tokens_a + ["[SEP]"] 249 | segment_ids = [0] * len(tokens) 250 | 251 | if tokens_b: 252 | tokens += tokens_b + ["[SEP]"] 253 | segment_ids += [1] * (len(tokens_b) + 1) 254 | 255 | input_ids = tokenizer.convert_tokens_to_ids(tokens) 256 | 257 | # The mask has 1 for real tokens and 0 for padding tokens. Only real 258 | # tokens are attended to. 259 | input_mask = [1] * len(input_ids) 260 | 261 | # Zero-pad up to the sequence length. 262 | padding = [0] * (max_seq_length - len(input_ids)) 263 | input_ids += padding 264 | input_mask += padding 265 | segment_ids += padding 266 | 267 | assert len(input_ids) == max_seq_length 268 | assert len(input_mask) == max_seq_length 269 | assert len(segment_ids) == max_seq_length 270 | 271 | label_id = label_map[example.label] 272 | 273 | features.append( 274 | InputFeatures(input_ids=input_ids, 275 | input_mask=input_mask, 276 | segment_ids=segment_ids, 277 | label_id=label_id, 278 | guid=example.guid)) 279 | return features 280 | 281 | def _truncate_seq_pair(tokens_a, tokens_b, max_length): 282 | """Truncates a sequence pair in place to the maximum length.""" 283 | 284 | while True: 285 | total_length = len(tokens_a) + len(tokens_b) 286 | if total_length <= max_length: 287 | break 288 | if len(tokens_a) > len(tokens_b): 289 | tokens_a.pop() 290 | else: 291 | tokens_b.pop() 292 | 293 | def accuracy(out, label_ids): 294 | # axis-0: seqs in batch; axis-1: potential labels of seq 295 | outputs = np.argmax(out, axis=1) 296 | matched = outputs == label_ids 297 | num_correct = np.sum(matched) 298 | num_total = len(label_ids) 299 | return num_correct, num_total 300 | 301 | 302 | 303 | ################ functions for influence function ################ 304 | 305 | def gather_flat_grad(grads): 306 | views = [] 307 | for p in grads: 308 | if p.data.is_sparse: 309 | view = p.data.to_dense().view(-1) 310 | else: 311 | view = p.data.view(-1) 312 | views.append(view) 313 | return torch.cat(views, 0) 314 | 315 | def unflatten_to_param_dim(x, param_shape_tensor): 316 | tar_p = [] 317 | ptr = 0 318 | for p in param_shape_tensor: 319 | len_p = torch.numel(p) 320 | tmp = x[ptr : ptr + len_p].view(p.shape) 321 | tar_p.append(tmp) 322 | ptr += len_p 323 | return tar_p 324 | 325 | def hv(loss, model_params, v): # according to pytorch issue #24004 326 | # s = time.time() 327 | grad = autograd.grad(loss, model_params, create_graph=True, retain_graph=True) 328 | # e1 = time.time() 329 | Hv = autograd.grad(grad, model_params, grad_outputs=v) 330 | # e2 = time.time() 331 | # print('1st back prop: {} sec. 2nd back prop: {} sec'.format(e1-s, e2-e1)) 332 | return Hv 333 | 334 | ######## LiSSA ######## 335 | 336 | def get_inverse_hvp_lissa(v, model, device, param_influence, train_loader, damping, num_samples, recursion_depth, scale=1e4): 337 | ihvp = None 338 | for i in range(num_samples): 339 | cur_estimate = v 340 | lissa_data_iterator = iter(train_loader) 341 | for j in range(recursion_depth): 342 | try: 343 | input_ids, input_mask, segment_ids, label_ids, guids = next(lissa_data_iterator) 344 | except StopIteration: 345 | lissa_data_iterator = iter(train_loader) 346 | input_ids, input_mask, segment_ids, label_ids, guids = next(lissa_data_iterator) 347 | input_ids = input_ids.to(device) 348 | input_mask = input_mask.to(device) 349 | segment_ids = segment_ids.to(device) 350 | label_ids = label_ids.to(device) 351 | model.zero_grad() 352 | train_loss = model(input_ids, segment_ids, input_mask, label_ids) 353 | hvp = hv(train_loss, param_influence, cur_estimate) 354 | cur_estimate = [_a + (1 - damping) * _b - _c / scale for _a, _b, _c in zip(v, cur_estimate, hvp)] 355 | if (j % 200 == 0) or (j == recursion_depth - 1): 356 | print("Recursion at depth %s: norm is %f" % (j, np.linalg.norm(gather_flat_grad(cur_estimate).cpu().numpy()))) 357 | if ihvp == None: 358 | ihvp = [_a / scale for _a in cur_estimate] 359 | else: 360 | ihvp = [_a + _b / scale for _a, _b in zip(ihvp, cur_estimate)] 361 | return_ihvp = gather_flat_grad(ihvp) 362 | return_ihvp /= num_samples 363 | return return_ihvp 364 | 365 | ################ 366 | 367 | # adapted from AllenNLP Interpret 368 | def _register_embedding_list_hook(model, embeddings_list, model_type): 369 | def forward_hook(module, inputs, output): 370 | embeddings_list.append(output.squeeze(0).clone().cpu().detach().numpy()) 371 | if model_type == 'BERT': 372 | embedding_layer = model.bert.embeddings.word_embeddings 373 | elif model_type == 'LSTM': 374 | embedding_layer = model.my_word_embeddings 375 | else: 376 | raise ValueError("Current model type not supported.") 377 | handle = embedding_layer.register_forward_hook(forward_hook) 378 | return handle 379 | 380 | def _register_embedding_gradient_hooks(model, embeddings_gradients, model_type): 381 | def hook_layers(module, grad_in, grad_out): 382 | embeddings_gradients.append(grad_out[0]) 383 | if model_type == 'BERT': 384 | embedding_layer = model.bert.embeddings.word_embeddings 385 | elif model_type == 'LSTM': 386 | embedding_layer = model.my_word_embeddings 387 | else: 388 | raise ValueError("Current model type not supported.") 389 | hook = embedding_layer.register_backward_hook(hook_layers) 390 | return hook 391 | 392 | def saliency_map(model, input_ids, segment_ids, input_mask, pred_label_ids, model_type='BERT'): 393 | embeddings_list = [] 394 | handle = _register_embedding_list_hook(model, embeddings_list, model_type) 395 | embeddings_gradients = [] 396 | hook = _register_embedding_gradient_hooks(model, embeddings_gradients, model_type) 397 | 398 | model.zero_grad() 399 | _loss = model(input_ids, segment_ids, input_mask, pred_label_ids) 400 | _loss.backward() 401 | handle.remove() 402 | hook.remove() 403 | 404 | saliency_grad = embeddings_gradients[0].detach().cpu().numpy() 405 | saliency_grad = np.sum(saliency_grad[0] * embeddings_list[0], axis=1) 406 | norm = np.linalg.norm(saliency_grad, ord=1) 407 | # saliency_grad = [math.fabs(e) / norm for e in saliency_grad] 408 | saliency_grad = [(- e) / norm for e in saliency_grad] # negative gradient for loss means positive influence on decision 409 | return saliency_grad 410 | 411 | ################ 412 | 413 | def get_diff_input_masks(input_mask, test_tok_sal_list): 414 | sal_scores = np.array([sal for tok, sal in test_tok_sal_list]) 415 | sal_ordered_ix = np.argsort(sal_scores) 416 | invalid_ix = [] 417 | for i, (tok, sal) in enumerate(test_tok_sal_list): 418 | if tok == '[CLS]' or tok == '[SEP]' or '##' in tok: # would not mask [CLS] or [SEP] 419 | invalid_ix.append(i) 420 | cleaned_sal_ordered_ix = [] 421 | for sal_ix in sal_ordered_ix: 422 | if sal_ix in invalid_ix: 423 | continue 424 | else: 425 | cleaned_sal_ordered_ix.append(sal_ix) 426 | 427 | # add zero and random 428 | abs_sal_ordered_ix = np.argsort(np.absolute(sal_scores)) 429 | cleaned_abs_sal_ordered_ix = [] 430 | for sal_ix in abs_sal_ordered_ix: 431 | if sal_ix in invalid_ix: 432 | continue 433 | else: 434 | cleaned_abs_sal_ordered_ix.append(sal_ix) 435 | 436 | # mask_ix = (cleaned_sal_ordered_ix[0], cleaned_sal_ordered_ix[int(len(cleaned_sal_ordered_ix)/2)], cleaned_sal_ordered_ix[-1]) 437 | mask_ix = (cleaned_sal_ordered_ix[0], cleaned_sal_ordered_ix[int(len(cleaned_sal_ordered_ix)/2)], cleaned_sal_ordered_ix[-1], cleaned_abs_sal_ordered_ix[0], random.choice(cleaned_sal_ordered_ix)) # lowest, median, highest, zero, random 438 | diff_input_masks = [] 439 | for mi in mask_ix: 440 | diff_input_mask = input_mask.clone() 441 | diff_input_mask[0][mi] = 0 442 | diff_input_masks.append(diff_input_mask) 443 | return diff_input_masks, mask_ix 444 | 445 | def influence_distance(orig_influences, alt_influences, top_percentage=0.01): 446 | orig_influences = stats.zscore(orig_influences) 447 | alt_influences = stats.zscore(alt_influences) 448 | orig_sorted_ix = list(np.argsort(orig_influences)) 449 | orig_sorted_ix.reverse() 450 | alt_sorted_ix = list(np.argsort(alt_influences)) 451 | alt_sorted_ix.reverse() 452 | num_top = int(len(orig_influences) * top_percentage) 453 | 454 | orig_top_ix = orig_sorted_ix[:num_top] 455 | alt_top_ix = alt_sorted_ix[:num_top] 456 | orig_top_ix_set = set(orig_top_ix) 457 | alt_top_ix_set = set(alt_top_ix) 458 | ix_intersection = list(orig_top_ix_set.intersection(alt_top_ix_set)) 459 | 460 | return len(ix_intersection) / num_top 461 | -------------------------------------------------------------------------------- /NLI_mask_token_analysis.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "%matplotlib inline\n", 10 | "import argparse\n", 11 | "import csv\n", 12 | "import logging\n", 13 | "import os\n", 14 | "import random\n", 15 | "import sys\n", 16 | "import pickle\n", 17 | "import time\n", 18 | "import numpy as np\n", 19 | "from termcolor import colored\n", 20 | "import matplotlib\n", 21 | "import matplotlib.pyplot as plt\n", 22 | "import seaborn as sns\n", 23 | "from bert_util import *" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 2, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "mnli_processor = MnliProcessor()\n", 33 | "hans_processor = HansProcessor()\n", 34 | "train_examples = mnli_processor.get_train_examples('data/', 10000)\n", 35 | "test_examples = hans_processor.get_test_examples('data/')" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 3, 48 | "metadata": {}, 49 | "outputs": [ 50 | { 51 | "name": "stdout", 52 | "output_type": "stream", 53 | "text": [ 54 | "7\n", 55 | "\u001b[34mThe athlete by the doctors encouraged the senator .\u001b[0m \u001b[31mThe athlete encouraged the senator .\u001b[0m entailment\n" 56 | ] 57 | } 58 | ], 59 | "source": [ 60 | "interested_test_idx = 7\n", 61 | "\n", 62 | "for te in test_examples:\n", 63 | " if te.guid == interested_test_idx:\n", 64 | " print(te.guid)\n", 65 | " print(colored(te.text_a, \"blue\"), colored(te.text_b, \"red\"), te.label)\n", 66 | " break" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 4, 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [ 75 | "test_tok_sal_list, train_tok_sal_lists, test_pred_status = pickle.load(open(\"from_aws/NLI_influence_output_bert_e3/saliency_test_\" + str(interested_test_idx) + \".pkl\", \"rb\"))" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 5, 81 | "metadata": {}, 82 | "outputs": [ 83 | { 84 | "name": "stdout", 85 | "output_type": "stream", 86 | "text": [ 87 | "[CLS]\t0.004317282\n", 88 | "the\t-0.007485319\n", 89 | "athlete\t-0.14760923\n", 90 | "by\t-0.049165454\n", 91 | "the\t-0.050391756\n", 92 | "doctors\t-0.14886299\n", 93 | "encouraged\t-0.088029\n", 94 | "the\t-0.032777905\n", 95 | "senator\t-0.055137962\n", 96 | ".\t-0.01177289\n", 97 | "[SEP]\t0.003715336\n", 98 | "\u001b[32mthe\t0.01097851\u001b[0m\n", 99 | "athlete\t-0.04906088\n", 100 | "\u001b[32mencouraged\t0.08856703\u001b[0m\n", 101 | "the\t-0.009075683\n", 102 | "senator\t-0.18473382\n", 103 | ".\t-0.022565214\n", 104 | "\u001b[32m[SEP]\t0.035753675\u001b[0m\n" 105 | ] 106 | } 107 | ], 108 | "source": [ 109 | "top_sal_ix = [_ix for _ix, _ in sorted(enumerate(test_tok_sal_list), key=lambda x: -x[1][1])][:3]\n", 110 | "for ix, (tok, sal) in enumerate(test_tok_sal_list):\n", 111 | " if ix in top_sal_ix:\n", 112 | " print(colored(tok + '\\t' + str(sal), \"green\"))\n", 113 | " else:\n", 114 | " print(tok + '\\t' + str(sal))" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": null, 120 | "metadata": {}, 121 | "outputs": [], 122 | "source": [] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 6, 127 | "metadata": {}, 128 | "outputs": [], 129 | "source": [ 130 | "orig_influences = pickle.load(open(\"from_aws/NLI_influence_output_bert_e3/influences_test_\" + str(interested_test_idx) + \".pkl\", \"rb\"))" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": 7, 136 | "metadata": {}, 137 | "outputs": [ 138 | { 139 | "name": "stdout", 140 | "output_type": "stream", 141 | "text": [ 142 | "\u001b[34mOne of the drawings on view at MoMA is a diagram of the races, with the Jews identified as circumscised [ sic ] cut off from Earth.\u001b[0m \u001b[31mJews think of drawings on the MoMA as a diagram of the races teaching circumcision. \u001b[0m entailment\n", 143 | "5.741442680358887\n", 144 | "\n", 145 | "\u001b[34mTwo pillars surviving from the Greek Temple of Apollo stand like a gateway, but the Spanish era has given it a charming 17th-century ambience of Baroque houses with iron balconies supported by floral carvings and an occasional stone nymph.\u001b[0m \u001b[31mIt's comprised of parts of the Greek Temple of Apollo and Spanish Baroque houses.\u001b[0m entailment\n", 146 | "5.344240665435791\n", 147 | "\n", 148 | "\u001b[34mbecause you're having fun\u001b[0m \u001b[31mBecause you're having fun\u001b[0m entailment\n", 149 | "4.964197158813477\n", 150 | "\n", 151 | "\u001b[34m9. INVERSE PRICE CAPS\u001b[0m \u001b[31m9 Inward caps on price. \u001b[0m entailment\n", 152 | "4.923686981201172\n", 153 | "\n", 154 | "\u001b[34mI don't know if I was in heaven or hell, said Lillian Carter, the president's mother, after a visit.\u001b[0m \u001b[31mThe president's mother visited.\u001b[0m entailment\n", 155 | "4.879209995269775\n", 156 | "\n", 157 | "\u001b[34mDo it now, think 'bout it later.\u001b[0m \u001b[31mDon't think about it now, just do it.\u001b[0m entailment\n", 158 | "4.244877815246582\n", 159 | "\n", 160 | "\u001b[34mThe participating lawyers say Ginsburg, a diehard Red Sox fan known for his trademark bow ties, is tough to turn down.\u001b[0m \u001b[31mGinsburg is exceptionally tough to refuse according to the lawyers.\u001b[0m entailment\n", 161 | "4.093533515930176\n", 162 | "\n", 163 | "\u001b[34mHowever, they noted that more detailed guidelines often provided answers to employees' questions and served as a tool for educating subordinate security managers and others who wanted a more thorough understanding of good security practices.\u001b[0m \u001b[31mThe remarked that thorough guidelines are useful for educating security managers in lower ranks. \u001b[0m entailment\n", 164 | "3.957554340362549\n", 165 | "\n", 166 | "\u001b[34mIn 1511 Diego de Velazquez sailed from neighboring Hispaniola with some 300 conquistadores (conquerors).\u001b[0m \u001b[31mDiego de Velazquez sailed from Hispaniola in 1511.\u001b[0m entailment\n", 167 | "3.744591474533081\n", 168 | "\n", 169 | "\u001b[34mThese would be special cases of the more general case analyzed here.\u001b[0m \u001b[31mThis analysis is more general than these special cases.\u001b[0m entailment\n", 170 | "3.6332457065582275\n", 171 | "\n" 172 | ] 173 | } 174 | ], 175 | "source": [ 176 | "order_of_interest = \"max\"\n", 177 | "label_of_interest = \"both\"\n", 178 | "num_of_interest = 10\n", 179 | "\n", 180 | "train_idx_sorted = list(np.argsort(orig_influences))\n", 181 | "if order_of_interest == \"max\":\n", 182 | " train_idx_sorted.reverse()\n", 183 | "if label_of_interest == \"both\":\n", 184 | " valid_labels = [\"entailment\", \"non-entailment\"]\n", 185 | "else:\n", 186 | " valid_labels = [label_of_interest]\n", 187 | "\n", 188 | "cnt = 0\n", 189 | "for idx in train_idx_sorted:\n", 190 | " te = train_examples[idx]\n", 191 | " if te.label in valid_labels:\n", 192 | " print(colored(te.text_a, \"blue\"), colored(te.text_b, \"red\"), te.label)\n", 193 | " print(orig_influences[idx])\n", 194 | "# top_sal_ix = [_ix for _ix, _ in sorted(enumerate(train_tok_sal_lists[idx]), key=lambda x: -x[1][1])][:3]\n", 195 | "# for ix, (tok, sal) in enumerate(train_tok_sal_lists[idx]):\n", 196 | "# if ix in top_sal_ix:\n", 197 | "# print(colored(tok + '\\t' + str(sal), \"green\"))\n", 198 | "# else:\n", 199 | "# print(tok + '\\t' + str(sal))\n", 200 | " print()\n", 201 | " cnt += 1\n", 202 | " if cnt >= num_of_interest:\n", 203 | " break" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": null, 209 | "metadata": {}, 210 | "outputs": [], 211 | "source": [] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": 8, 216 | "metadata": {}, 217 | "outputs": [], 218 | "source": [ 219 | "diff_influences_list, mask_ix = pickle.load(open(\"from_aws/NLI_influence_output_bert_e3/diff_mask_influences_test_\" + str(interested_test_idx) + \".pkl\", \"rb\"))" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": 9, 225 | "metadata": {}, 226 | "outputs": [ 227 | { 228 | "name": "stdout", 229 | "output_type": "stream", 230 | "text": [ 231 | "w.r.t. token: encouraged\n", 232 | "\n", 233 | "\u001b[34mOne of the drawings on view at MoMA is a diagram of the races, with the Jews identified as circumscised [ sic ] cut off from Earth.\u001b[0m \u001b[31mJews think of drawings on the MoMA as a diagram of the races teaching circumcision. \u001b[0m entailment\n", 234 | "18.779272079467773\n", 235 | "\n", 236 | "\u001b[34mTwo pillars surviving from the Greek Temple of Apollo stand like a gateway, but the Spanish era has given it a charming 17th-century ambience of Baroque houses with iron balconies supported by floral carvings and an occasional stone nymph.\u001b[0m \u001b[31mIt's comprised of parts of the Greek Temple of Apollo and Spanish Baroque houses.\u001b[0m entailment\n", 237 | "18.099933624267578\n", 238 | "\n", 239 | "\u001b[34mI don't know if I was in heaven or hell, said Lillian Carter, the president's mother, after a visit.\u001b[0m \u001b[31mThe president's mother visited.\u001b[0m entailment\n", 240 | "16.18392562866211\n", 241 | "\n", 242 | "\u001b[34m9. INVERSE PRICE CAPS\u001b[0m \u001b[31m9 Inward caps on price. \u001b[0m entailment\n", 243 | "14.431646347045898\n", 244 | "\n", 245 | "\u001b[34mDo it now, think 'bout it later.\u001b[0m \u001b[31mDon't think about it now, just do it.\u001b[0m entailment\n", 246 | "12.819534301757812\n", 247 | "\n", 248 | "\u001b[34mbecause you're having fun\u001b[0m \u001b[31mBecause you're having fun\u001b[0m entailment\n", 249 | "12.689254760742188\n", 250 | "\n", 251 | "\u001b[34mthat's Major Dad at eight i think it is\u001b[0m \u001b[31mI think Major Dad is on at eight o'clock.\u001b[0m entailment\n", 252 | "12.103283882141113\n", 253 | "\n", 254 | "\u001b[34mIn 1511 Diego de Velazquez sailed from neighboring Hispaniola with some 300 conquistadores (conquerors).\u001b[0m \u001b[31mDiego de Velazquez sailed from Hispaniola in 1511.\u001b[0m entailment\n", 255 | "12.007287979125977\n", 256 | "\n", 257 | "\u001b[34mHowever, they noted that more detailed guidelines often provided answers to employees' questions and served as a tool for educating subordinate security managers and others who wanted a more thorough understanding of good security practices.\u001b[0m \u001b[31mThe remarked that thorough guidelines are useful for educating security managers in lower ranks. \u001b[0m entailment\n", 258 | "11.697555541992188\n", 259 | "\n", 260 | "\u001b[34mFiction has its glories, but concealment is merely squalid.\u001b[0m \u001b[31mHiding things is just dirty, whereas there is glory in fiction\u001b[0m entailment\n", 261 | "11.545598983764648\n", 262 | "\n" 263 | ] 264 | } 265 | ], 266 | "source": [ 267 | "remove_token_type = 2\n", 268 | "\n", 269 | "influences = diff_influences_list[remove_token_type]\n", 270 | "print(\"w.r.t. token:\", test_tok_sal_list[mask_ix[remove_token_type]][0])\n", 271 | "print()\n", 272 | "\n", 273 | "order_of_interest = \"max\"\n", 274 | "label_of_interest = \"both\"\n", 275 | "num_of_interest = 10\n", 276 | "\n", 277 | "train_idx_sorted = list(np.argsort(influences))\n", 278 | "if order_of_interest == \"max\":\n", 279 | " train_idx_sorted.reverse()\n", 280 | "if label_of_interest == \"both\":\n", 281 | " valid_labels = [\"entailment\", \"non-entailment\"]\n", 282 | "else:\n", 283 | " valid_labels = [label_of_interest]\n", 284 | "\n", 285 | "cnt = 0\n", 286 | "for idx in train_idx_sorted:\n", 287 | " te = train_examples[idx]\n", 288 | " if te.label in valid_labels:\n", 289 | " print(colored(te.text_a, \"blue\"), colored(te.text_b, \"red\"), te.label)\n", 290 | " print(influences[idx])\n", 291 | " print()\n", 292 | " cnt += 1\n", 293 | " if cnt >= num_of_interest:\n", 294 | " break" 295 | ] 296 | }, 297 | { 298 | "cell_type": "code", 299 | "execution_count": null, 300 | "metadata": {}, 301 | "outputs": [], 302 | "source": [] 303 | }, 304 | { 305 | "cell_type": "code", 306 | "execution_count": null, 307 | "metadata": {}, 308 | "outputs": [], 309 | "source": [] 310 | }, 311 | { 312 | "cell_type": "code", 313 | "execution_count": null, 314 | "metadata": {}, 315 | "outputs": [], 316 | "source": [] 317 | }, 318 | { 319 | "cell_type": "code", 320 | "execution_count": 59, 321 | "metadata": {}, 322 | "outputs": [], 323 | "source": [ 324 | "tp = 0.001\n", 325 | "\n", 326 | "idx_range = range(1, 31)\n", 327 | "influence_distance_lists = ([], [], [], [], [])\n", 328 | "for iti in idx_range:\n", 329 | " orig_inf = pickle.load(open(\"from_aws/NLI_influence_output_bert_e3/influences_test_\" + str(iti) + \".pkl\", \"rb\"))\n", 330 | " diff_inf_list, m_ix = pickle.load(open(\"from_aws/NLI_diff_influence_output_bert_e3/diff_mask_influences_test_\" + str(iti) + \".pkl\", \"rb\"))\n", 331 | " for i, influences in enumerate(diff_inf_list):\n", 332 | " influence_distance_lists[i].append(influence_distance(orig_inf, influences, top_percentage=tp))" 333 | ] 334 | }, 335 | { 336 | "cell_type": "code", 337 | "execution_count": 60, 338 | "metadata": {}, 339 | "outputs": [ 340 | { 341 | "data": { 342 | "text/plain": [ 343 | "0.32999999999999996" 344 | ] 345 | }, 346 | "execution_count": 60, 347 | "metadata": {}, 348 | "output_type": "execute_result" 349 | } 350 | ], 351 | "source": [ 352 | "np.mean(influence_distance_lists[0])" 353 | ] 354 | }, 355 | { 356 | "cell_type": "code", 357 | "execution_count": 61, 358 | "metadata": {}, 359 | "outputs": [ 360 | { 361 | "data": { 362 | "text/plain": [ 363 | "0.7933333333333332" 364 | ] 365 | }, 366 | "execution_count": 61, 367 | "metadata": {}, 368 | "output_type": "execute_result" 369 | } 370 | ], 371 | "source": [ 372 | "np.mean(influence_distance_lists[1])" 373 | ] 374 | }, 375 | { 376 | "cell_type": "code", 377 | "execution_count": 62, 378 | "metadata": {}, 379 | "outputs": [ 380 | { 381 | "data": { 382 | "text/plain": [ 383 | "0.46" 384 | ] 385 | }, 386 | "execution_count": 62, 387 | "metadata": {}, 388 | "output_type": "execute_result" 389 | } 390 | ], 391 | "source": [ 392 | "np.mean(influence_distance_lists[2])" 393 | ] 394 | }, 395 | { 396 | "cell_type": "code", 397 | "execution_count": 63, 398 | "metadata": {}, 399 | "outputs": [ 400 | { 401 | "data": { 402 | "text/plain": [ 403 | "" 404 | ] 405 | }, 406 | "execution_count": 63, 407 | "metadata": {}, 408 | "output_type": "execute_result" 409 | }, 410 | { 411 | "data": { 412 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD8CAYAAACMwORRAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3Xl8VPW9//HXJ3tYwhqQJayyhS1gWKy71StqheLC5lIUsfaWumDppWq5Ymv9VavorXSRxQVFoFYrVm5RW6oVJRIkEcIaNgnIEggEsi/f3x8z5kYIZIBJTjJ5Px8PHubMnMy8H53Mu2fOnO/3a845REQktIR5HUBERIJP5S4iEoJU7iIiIUjlLiISglTuIiIhSOUuIhKCVO4iIiFI5S4iEoJU7iIiISjCqydu3bq169Kli1dPLyJSL61ZsybbORdf3X6elXuXLl1ITU316ulFROolM9sVyH46LSMiEoJU7iIiIUjlLiISglTuIiIhSOUuIhKCVO4iIiFI5S4iEoI8u85dpC66fV4KWTkFdGwRy4JJw7yOI3LWVO4ilWTlFLAjO8/rGCLnTKdlRERCkMpdRCQEqdxFREKQyl1EJASp3EVEQpDKXUQkBKncRURCkMpdRCQEqdxFREKQyl1EJAQFVO5mNsLMNptZpplNr+L+Tma2wszWmtmXZnZd8KOKiEigqi13MwsHZgPXAonAeDNLPGG3R4ElzrlBwDjg98EOKiIigQvkyH0okOmc2+6cKwYWAaNO2McBcf6fmwF7gxdRpOblFZXy7Pub+epwPgBfHc7nf/6xlcKSMo+TiZydQGaF7ADsrrSdBZw4F+pjwPtm9hOgMXBVUNKJ1IKC4jImzE0hffeRitvKyh3PfrCFT7dl88pdQ4mOCPcwociZC+TI3aq4zZ2wPR542TnXEbgOWGBmJz22md1jZqlmlnrw4MEzTytSA175bOe3ir2yVdsPsyQ1q3YDiQRBIOWeBSRU2u7IyaddJgFLAJxznwExQOsTH8g596JzLtk5lxwfH392iUWC7O0v9lRzv8pd6p9Ayn010MPMuppZFL4vTJeesM9XwHcBzKwPvnLXobnUC4fyis/pfpG6qNpyd86VAlOA5cBGfFfFZJjZ42Y20r/bQ8BkM0sH3gAmOudOPHUjUid1a934nO4XqYsCWmbPObcMWHbCbTMq/bwBuCi40URqx4RhCXy+8/Ap728bF1OLaUSCQyNUpcErLT/9h8xFq3ezYNWuWkojEhwqd2nQdh/O57GlGwDfZWExkb63ROPocO6+uGvFfr/463oWr/7Ki4giZ0XlLg1WWbnjwcVpHC8qBeDey7vTrlksAG2axvDo9xJ5fFTfiv2nv7WOv6zRlTNSP6jcpcH640fbSN2VA0Df9nE8eFXPk/a548IuPHp9HwCcg2lvpvNO2ukvnRSpC1Tu0iB9mXWEWR9sASA6IoznxyURFVH12+HuS7ox/dreAJQ7mLoknWXrvq61rCJnQ+UuDU5BcRkPLE6r+CL1kev7cH6bpqf9nXsv685DV/uO7MvKHfe9sZb3M/bVeFaRs6Vylwbn18s2sv1gHgCX9Yzn9uGdA/q9n3y3B/ddeT7gu8Lmxwu/4J+b9tdYTpFzoXKXBmXFpgMVlzW2bBzF07cMwKyq6ZOq9uDVPbn3su4AlJQ57l3wBR9t0WBsqXtU7tJgZB8vYtqb6RXbT97YnzZNz2yAkpnxXyN6Mcl/mWRxWTn3vJrKp5nZQc0qcq5U7tIgOOeY/pd1ZB/3zRMzNjmBa/qed1aPZWY8en0f7rjQdzqnqLScSa+kkrL9UNDyipwrlbs0CItW7+bDjb7z451bNWLGDScuJnZmzIzHbujL+KGdACgoKePOl1ezZteppzEQqU0qdwl5O7LzePxd3yjU8DBj1tgkGkcHNK3SaYWFGU98vx83X9ARgPziMibOX03aKeaGF6lNKncJaSVl5TywOI0C/3J5U644n8GdWgTt8cPCjN/cNIDvJ7UH4FhRKXfMS2H9nqNBew6Rs6Fyl5D2u39mVqyylJTQnCn+SxmDKTzM+O0tA7l+QDsAcgtLuW1eChu/zg36c4kESuUuIWvNrhxmr8gEoFFUOLPGJhEZXjN/8hHhYTw3Nolr+rYF4Eh+CbfOTWHL/mM18nwi1VG5S0g6XlTK1CVplPlHoc74XiJda3jRjcjwMH43fjDf7d0GgMN5xUyYk8K2g8dr9HlFqqJyl5D0y3c3sOtQPgBXJ7Zl7JCEan7Dp2OLWLq2bkzHFrFn9bxREWH8/rbBXNbTt0Zw9vEiJsxZxc7svLN6PJGzZV6thpecnOxSU1M9eW4JbX9fv497X1sDQOsm0Sx/4BJaNYmu1QyFJWVMemU1KzN91763bxbD4h9eSELLRrWaQ0KPma1xziVXt5+O3CWkHMgt5OdvfVmx/fTNA2q92AFiIsOZe8cQhnVtCcDeo4WMn7OKPUcKaj2LNEwqdwkZzjl++uaX5OSXAHD78M5c4T//7YXYqHDmTxxCcmffpZdZOQVMmLOKfUcLPcskDYfKXULGq5/t4mP/JF7d4hvz8HV9PE4EjaMjeOnOISQlNAdg16F8JsxZxYFjKnipWSp3CQlb9x/j18s2AhARZjw/dhCxUeEep/JpGhPJK3cNpV+HOAC2Z+dx65wUso8XeZxMQpnKXeq94lLfKNSi0nLANy1v/47NPE71bc1iI3lt0jD6tPMV/NYDx7ltbgo5ecUeJ5NQpXKXem/Wh1vI2OsbDTqkS4uK+dbrmuaNonht0lB6tm0CwKZ9x7htXgpH/d8RiASTyl3qtZTth/jjR9sAaBIdwbNjkggPC3zxjdrWqkk0r989nO7xvgFVGXtzuWN+CrmFKngJLpW71Fu5hSVMXZLON0M1Hh/Vt15cRx7fNJqFk4fTpZUva3rWUSbO/5zjRaUeJ5NQElC5m9kIM9tsZplmNr2K+2eZWZr/3xYz05ynUuP++52MiuvGr+/fjtGDOnicKHBt42JYOHk4CS19I2G/+OoId720mvxiFbwER7XlbmbhwGzgWiARGG9m31rpwDn3oHMuyTmXBPwOeKsmwop8Y2n6Xt5euweAtnHRPDG63xmthVoXtG8ey8K7h9Ohua/gP995mEkvp1JQXOZxMgkFgRy5DwUynXPbnXPFwCJg1Gn2Hw+8EYxwIlXZe6SAR99eV7H9zC1JNG8U5WGis5fQshELJw/jvDjfWq6fbT/EPQtSKSxRwcu5CaTcOwC7K21n+W87iZl1BroC/zzF/feYWaqZpR48qBXj5cyVlzseWpJObqHv9MWki7tycY/WHqc6N51bNWbh5GHEN/VNk/Dvrdn86LU1FJWq4OXsBVLuVX3WPdVsY+OAN51zVf5VOudedM4lO+eS4+PjA80oUmH+yh185l+Iulfbpky7ppfHiYKjW3wT3pg8jFaNfZ9AVmw+yJSFaykpK/c4mdRXgZR7FlB5vtSOwN5T7DsOnZKRGrLx61ye+vtmAKLCw3huXBIxkXVjFGownN+mKa9PHkaLRpEAfLBhP/cvWkupCl7OQiDlvhroYWZdzSwKX4EvPXEnM+sFtAA+C25EEd8Uug8sSqPYX3Q/G9GrYrRnKOl9Xhyv3T2MZrG+gl+2bh9Tl6RXLDoiEqhqy905VwpMAZYDG4ElzrkMM3vczEZW2nU8sMh5NUG8hLSnl29ms3/Juu90b8VdF3X1OFHN6du+GQsmDaVpdATguzJo2p9V8HJmtFiH1HmfbM3mtnkpAMTFRLD8wUtp1+zsVkqqT774Kofb56aQ5780cmxyAk/e2J+wOjwCV2qeFuuQkHAkv5iH/pxWsf3E6P4NotgBBndqwct3DaWRf3bLxam7+cU769GHYwmEyl3qLOccD7+9jv25vqlxRw/qwA0D23ucqnYN6dKSeT8YQkyk7636espXzHx3gwpeqqVylzrrrS/2sGzdPgA6NI9l5qi+HifyxoXdWzH3jiFERfjeri9/upNfL9uogpfTUrlLnbT7cD7/vTQDADN4dsxA4mIiPU7lnYt7tObF2y8gKtz3lp3z7x08vXyzCl5OSeUudU5ZuWPqkrSKWRLvvaw7w7q18jiV9y7v1Ybf3zqYCP8Xqr//1zae/8dWj1NJXaVylzrnjx9tY/XOHAD6to/jwat6epyo7rgqsS0vTBhUMWf9cx9uZfaKTI9TSV2kcpc6ZV3WUWZ9sAWA6Igwnh+XVHGuWXxG9GvHc2OT+OaKyKeXb+bFj7d5G0rqHL1rpM4oKC7j/sVrKfUP1nnk+j6c36apx6nqphsGtueZMQP5ZpbjXy/bxEsrd3gbSuoUlbvUGb9etpHtB/MAuKxnPLcP7+xxorpt9KCO/OamARXbM9/dwIJVuzxMJHWJyl3qhBWbDlQUU4tGkTx984B6t/iGF8YkJ/Dr0f0rtn/x1/UsXv2Vh4mkrlC5i+eyjxcx7c30iu0nbxxAG//iFVK9CcM6MXPk/40BmP7WOv6yJsvDRFIXqNzFU845pv9lHdnHiwHf/Ckj+p3ncar65wff6cKj1/cBwDmY9mY676Tt8TiVeEnlLp5avHo3H27cD0DnVo2YcUNiNb8hp3L3Jd34rxG9ASh3MHVJOsvWfe1xKvGKyl08syM7j5nvbgAgPMyYNTaJxv5pbuXs/Ojy7ky92jcuoKzccd8ba3k/Y5/HqcQLKnfxRElZOQ8sTqPAvxD0lCvOZ3CnFh6nCg33fbcHP7nyfABKyx0/XvgFKzYd8DiV1DaVu3jihX9mkr77CABJCc2Z4i8jCY6pV/fkh5d1A6CkzPHD19bw8RYtSt+QqNyl1q3ZlcML/iHzsZHhzBqbRGS4/hSDycyYPqJ3xYpVxaXlTH41lU8zsz1OJrVF7yipVceLSpm6JK1iybgZNyTStXVjj1OFJjPjF9/rUzEYrKi0nEmvpPL5jsMeJ5PaoHKXWvXLdzew61A+AFf1acu4IQkeJwptZsbMkX0ZP9T3v3NBSRl3vvQ5a3bleJxMaprKXWrN8ox9LE7dDUDrJtH85qb+GoVaC8LCjCe+35+bL+gIQF5xGRPnf17xnYeEJpW71IoDuYVM/8uXFdtP3zyAVk2iPUzUsISFGb+5aQCjknzLFB4rKuX2eSms33PU42RSU1TuUuOcc0x780ty8ksAuH14Z67o3cbjVA1PeJjxzC0Dub5/OwByC0u5bV4KG7/O9TiZ1ASVu9S4Bat28ZH/Mrxu8Y15+Lo+HidquCLCw3huXBL/kdgWgCP5Jdw2N4Wt+495nEyCTeUuNSrzwDGeeG8jABFhxvNjBxEbFe5xqoYtMjyMFyYM5kr/p6dDecWMn5PCtoPHPU4mwaRylxpTXFrO/YvSKCotB+DBq3vSv2Mzj1MJQFREGL+/dTCX9owHfDNzTpizip3ZeR4nk2AJqNzNbISZbTazTDObfop9xpjZBjPLMLOFwY0p9dGsD7eQsdd3PndIlxbce1l3jxNJZTGR4bx4+wV8p7tv8fH9ub6C33043+NkEgzVlruZhQOzgWuBRGC8mSWesE8P4OfARc65vsADNZBV6pGU7Yf440e+dT2bREfw7JikikWdpe6IiQxn7g+SGdq1JQB7jxYyfs4q9hwp8DiZnKtAjtyHApnOue3OuWJgETDqhH0mA7OdczkAzjnNUtSA5RaWMHVJOs43CJWZI/uS0LKRt6HklBpFRTB/4hAu6OybuC0rp4AJc1ax72ihx8nkXARS7h2A3ZW2s/y3VdYT6GlmK81slZmNCFZAqX8eeyej4sjv+v7tuHHwiX8uUtc0iY7gpTuHMDChOQC7DuUzYc4qDhxTwddXgZR7VZ+l3QnbEUAP4HJgPDDXzJqf9EBm95hZqpmlHjyoGepC0bvpe3lrrW8FoLZx0Twxup9GodYTcTGRvHrnUPp1iANge3Yet85JIft4kcfJ5GwEUu5ZQOUJQDoCe6vY5x3nXIlzbgewGV/Zf4tz7kXnXLJzLjk+Pv5sM0sd9fXRAh55e13F9jO3JNG8UZSHieRMNWsUyYK7htH7vKYAbD1wnNvmppCTV+xxMjlTgZT7aqCHmXU1syhgHLD0hH3+ClwBYGat8Z2m2R7MoFK3lZc7HlqSTm5hKQCTLu7KxT1ae5xKzkaLxlG8fvcwerRpAsCmfce4bV4KR/0jjKV+qLbcnXOlwBRgObARWOKcyzCzx81spH+35cAhM9sArACmOecO1VRoqXvmr9zBp9t8L3mvtk2Zdk0vjxPJuWjVJJrXJw+jW7xvOuaMvbncMT+F3EIVfH1hzp14+rx2JCcnu9TUVE+eW4Jr49e5jHphJcVl5USFh/HOlIvo0y7O61gSBPtzCxn7p8/Y6Z+meXCn5rw6aRhNtNatZ8xsjXMuubr9NEJVzklhSRkPLEqjuMw3CnXaNb1U7CGkbVwMCycPJ6FlLABffHWEu15aTX5xqcfJpDoqdzknTy/fzGb/pFMXdmvFpIu7epxIgq1981gW3j2cDs19Bf/5zsNMejmVguIyj5PJ6ajc5ax9sjWbeZ/sACAuJoJnxgwkTKNQQ1JCy0YsnDyM8+JiAPhs+yHuWZBKYYkKvq5SuctZOZJfzE//nF6x/cTo/rT3H9lJaOrcqjELJw8jvqlvkZV/b83mR6+toahUBV8XqdzljDnneOTt9ezL9Y1eHD2oAzcMbO9xKqkN3eKb8MbkYbRq7Bu/sGLzQaYsXEuJ/zsXqTtU7nLG3l67h/fWfQ1Ah+axzBzV1+NEUpvOb9OU1ycPo0WjSAA+2LCf+xetpVQFX6eo3OWM7D6cz4x3MgAwg2fHDCQuJtLjVFLbep8Xx2t3D6NZrO+1X7ZuH1OXpFNW7s2l1XIylbsErKzcMXVJGseLfJfB3XtZd4Z1a+VxKvFK3/bNWDBpKE3917wvTd/LtDfTKVfB1wkqdwnYHz/axuqdOQD0bR/Hg1f19DiReG1Ax+a8Mmkojf1LJ771xR4efnudCr4OULlLQNZlHWXWB1sAiI4I47mxSURF6M9HYHCnFrx811BiI30Fv2j1bmYsXY9Xo9/FR+9OqVZBcRn3L15Lqf9o7OHr+tCjbVOPU0ldMqRLS+ZPHEJMpK9SXlv1FY//bYMK3kMqd6nWk/+7ke0HfQsnX9Yznjsu7OxxIqmLLuzeijl3JFd8ontp5U6e/N9NKniPqNzltFZsOsCrn+0CoEWjSJ6+eYAW35BTuqRHPH+6/QKiwn3V8uLH2/nt+5tV8B5QucspHTpexLQ3v6zYfvLGAbTxDz8XOZUrerVh9q2DifBPRTF7xTb+5x+ZHqdqeFTuUiXnHNPfWlexxNrY5ARG9DvP41RSX1yd2JbfjR9EuL/gZ324hdkrVPC1SeUuVVq8ejcfbNgPQOdWjZhxQ6LHiaS+ubZ/O2aNTeKbueSeXr6ZOR9rgbbaonKXk+zIzmPmuxsACDN4dkwSjbU4g5yFkQPb89tbBvLN1zRPLNvISyt3eBuqgVC5y7eUlJXzwOI0CvxTuU65sgcXdG7hcSqpz24c3JHf3DigYnvmuxtYsGqXh4kaBpW7fMsL/8wkffcRAAYmNOcnV57vcSIJBWOGJPDE6H4V27/463oWr/7Kw0ShT+UuFb74KocX/F96xUaG89zYJCLD9SciwXHrsM48Vum7m+lvreMva7I8TBTadCJVAMgrKuXBxWkVs/rNuCGRrq0be5xKQs3Ei7pSWu741XsbcQ6mvZlORLgxKqlDUB7/9nkpZOUU0LFFLAsmDQvKY9ZXKncB4Jd/28Au/wr3V/Vpy7ghCR4nklB19yXdKC4r56m/b6bcwdQl6USGh3Fd/3bn/NhZOQXsyM4LQsr6T5+5heUZ+1i0ejcArZtE85ub+msUqtSo/7z8/IpZRcvKHfe9sZb3M/Z5nCq0qNwbuAO5hUz/y/+NQn365gG0ahLtYSJpKO777vlMucL3hX1puePHC79gxaYDHqcKHSr3Bsw5x7Q3vyQnvwSA24d35orebTxOJQ2FmfHQf/Tkh5d2A6CkzPHD19bw8ZaDHicLDQGVu5mNMLPNZpZpZtOruH+imR00szT/v7uDH1WCbcGqXXzkfyN1i2/Mw9f18TiRNDRmxvRre3PnRV0AKC4tZ/KrqXyame1tsBBQbbmbWTgwG7gWSATGm1lVY9EXO+eS/P/mBjmnBFnmgWM88d5GACLCjOfGJhHrX01HpDaZGTO+l8htwzsBUFRazqRXUvl8x2GPk9VvgRy5DwUynXPbnXPFwCJgVM3GkppUXOobhVpU6lut/sGrezKgY3OPU0lDZmY8PrJfxVVaBSVl3PnS56zZleNxsvorkHLvAOyutJ3lv+1EN5nZl2b2ppnpOro67LkPt7B+Ty4AyZ1bcO9l3T1OJAJhYcavR/fnpsEdAcgrLmPi/M8rRkzLmQmk3Ku6Ju7EmfffBbo45wYAHwKvVPlAZveYWaqZpR48qC9NvPD5jsP84aNtADSJjmDW2KSKaVlFvBYWZjx18wBGJbUH4FhRKbfPS2H9nqMeJ6t/Ain3LKDykXhHYG/lHZxzh5xzRf7NOcAFVT2Qc+5F51yycy45Pj7+bPLKOcgtLOHBxWl8syjOzJF9SWjZyNtQIicIDzOeuWUg1/X3rR+QW1jKbfNS2Ph1rsfJ6pdAyn010MPMuppZFDAOWFp5BzOrPLRsJLAxeBElWB57J4M9RwoAuL5/O24cHJwh3yLBFhEexvPjBnF1YlsAjuSXcNvcFLbuP+Zxsvqj2nJ3zpUCU4Dl+Ep7iXMuw8weN7OR/t3uM7MMM0sH7gMm1lRgOTvvpu/lrbV7AGgbF80To/tpFKrUaZHhYbwwYRBX+sdeHMorZvycFLYdPO5xsvohoOvcnXPLnHM9nXPdnXNP+G+b4Zxb6v/55865vs65gc65K5xzm2oytJyZr48W8Mjb6yq2n7klieaNojxMJBKY6Ihwfn/rYC7p0RqA7ONFTJizip2aP6ZaGqEa4srLHQ8tSSe3sBSAuy7qysX+N4pIfRATGc6cO5L5TvdWAOzP9RX87sP5Hier21TuIW7+yh18uu0QAD3bNuFnI3p5nEjkzMVEhjP3B8kM7doSgL1HCxk/ZxV7/d8hyclU7iFs075cnvr7ZgCiwsN4buwgYiI1ClXqp0ZREcyfOKRi2cesnALGz1nFvqOFHierm1TuIaqwpIwHFqVRXOYbhTrtml4kto/zOJXIuWkSHcFLdw5hYIJvRPWuQ/lMmLuKA8dU8CdSuYeo3y7fzKZ9vsvGLuzWikkXd/U4kUhwxMVE8uqdQ+nXwXewsv1gHuNfXMUf/pVZcRR/JL+Y7ONFp3uYkKdyD0ErM7OZ+8kOAOJiInhmzEDCNApVQkizRpEsuGsYvc9rCsC2g3n85u+bKSgpAyAnv4Srn/2IdVkNd2Sryj3EHMkv5qEl6RXbT4zuT/vmsR4mEqkZLRpH8frdw4g9xfdIOfkl/Oj1NZT6T002NCr3EOKc45G317Mv1/fRdPSgDtwwsL3HqURqzpGCkoqj9apk5RTwr80Ncx4rlXsIeXvtHt5b9zUAHZrHMnNUX48TidSsbQeqH63aUEe0qtxDxO7D+cx4JwMAM3hmzEDiYiI9TiVSs1o1qX6kdYvGDXM0tso9BJT5R6EeL/KNQv3hpd0Z3q2Vx6lEat6ghBZ0Os3MprGR4VzT97xaTFR3qNxDwJ8+3sbnO31LkiW2i2Pq1T09TiRSO8LCjF99vx8Rp7ga7NHv9aFZbMP8BKtyr+fW7znKs+9vASA6IoznxyURFaGXVRqOS3vGs+TeCytmjwTfe2HeD5K5dVhnD5N5Sy1QjxUUl3H/orWUlvtW33j4uj70aNvU41QitW9wpxbMnziELq18p2jaN4/lu33aepzKWyr3euzJ/93ItoO+qU8v6xnPHRc23KMUEUBrFFSicq+nVmw6wKuf7QKgRaNInr55gP6wRaSCyr0eOnS8iGlvflmx/eSNA2gTF+NhIhGpa1Tu9YxzjulvrauYFGlMckdG9GuYl3qJyKmp3OuZJam7+WDDfgA6tWzEjBs0ClVETqZyr0d2Zucx890NAIQZzBqbRJPoCI9TiUhdpHKvJ0rLynlgcRr5xb5JkqZc2aNiRRoRkROp3OuJF1Zkkrb7CAADE5rzkyvP9ziRiNRlKvd64IuvcvjdPzMB31wZz41NIjJcL52InJoaoo7LKyrlwcVplPlHoc64IZGurRt7nEpE6jqVex33y79tYNehfACu6tOWcUMSPE4kIvWByr0OW56xj0WrdwPQukkU/++m/hqFKiIBCajczWyEmW02s0wzm36a/W42M2dmycGL2DAdOFbIz99aV7H91M0DaN0k2sNEIlKfVFvuZhYOzAauBRKB8WaWWMV+TYH7gJRgh2xonHP87M0vOZxXDMBtwztxZe+GPcOdiJyZQI7chwKZzrntzrliYBEwqor9fgk8BRQGMV+D9NqqXRWL+naLb8wj1530/6UiIqcVSLl3AHZX2s7y31bBzAYBCc65v53ugczsHjNLNbPUgwcb5ork1ck8cIxfvbcRgIgw47mxScRGhXucSkTqm0DGrlf1DZ6ruNMsDJgFTKzugZxzLwIvAiQnJ7tqdq+3bp+XQlZOAR1bxLJg0rCAf6+41DcKtai0HIAHr+7JgI7NayqmSMjp2CL2W/9tyAIp9yyg8vV3HYG9lbabAv2Af/mv5DgPWGpmI51zqcEKWp9k5RSwIzvvjH/vuQ+3sH5PLgDJnVtw72Xdgx1NJKSdycFUqAvktMxqoIeZdTWzKGAcsPSbO51zR51zrZ1zXZxzXYBVQIMt9rP1+Y7D/OGjbQA0iY5g1tgkwk+x6K+ISHWqLXfnXCkwBVgObASWOOcyzOxxMxtZ0wEbgtzCEh5cnIbzn6iaObIvCS0beRtKROq1gOaLdc4tA5adcNuMU+x7+bnHalgeeyeDPUcKALiu/3ncOLhDNb8hInJ6GqHqsb99uZe31u4BoG1cNE98X6NQReTcqdw99PXRAh55e33F9m9vGUiLxlEeJhKRUKFy90h5ueOnf07naEEJAHdd1JVLesR7nEpEQoXK3SPzV+5gZeYhAHq2bcLPRvTyOJGIhBKVuwc27cvlqb9vBiAqPIznxg5nWVHIAAAGi0lEQVQiJlKjUEUkeFTutaywpIwHFqVRXOYbhTrtml4kto/zOJWIhBqVey377fLNbNp3DIALu7Vi0sVdPU4kIqFI5V6LVmZmM/eTHQDExUTwzJiBhGkUqojUAJV7LTmaX8JDS9Irtn81uj/tm2tyIxGpGSr3WuCc4+G/rmNfrm+q++8ntWfkwPYepxKRUKZyrwV/TdvDe19+DUCH5rHMHNXP40QiEupU7jVs9+F8Zvw1AwAzeGbMQJrFRnqcSkRCncq9BjnneGhJOseKSgH44aXdGd6tlcepRKQhCGhWSAlccWk5xf6VlI4WlLDzUD4Aie3imHp1Ty+jiUgDonIPkvJyxx8+2sb8T3ZwKK8YgJx837wx0RFhPD8uiagIfVASkdqhcg+Sme9m8Mpnu6q8b+TA9vRo27SWE4lIQ6ZDySDYmZ13ymIH+HRbNmXlIbseuIjUQSr3IFiese+09+85Usj6PUdrKY2IiMo9KPKKywLYp7QWkoiI+Kjcg2Bgx2anvT8y3OhznmZ+FJHao3IPgst7taFbfONT3n/T4I5aPk9EapXKPQjCw4x5PxhCp5aNTrrvsp7xzLgh0YNUItKQqdyDpGvrxnww9VKeH5dE0xjfFabtmsXw8p1DaBSlK05FpHap3IMoOiKcUUkdaN0kGoCYyHDMNF+7iNQ+lbuISAgKqNzNbISZbTazTDObXsX995rZOjNLM7NPzEwnmUVEPFRtuZtZODAbuBZIBMZXUd4LnXP9nXNJwFPAs0FPKiIiAQvkyH0okOmc2+6cKwYWAaMq7+Ccy6202RjQWHsREQ8FchlHB2B3pe0sYNiJO5nZj4GpQBRwZVUPZGb3APcAdOrU6UyziohIgAI5cq/qco+Tjsydc7Odc92B/wIereqBnHMvOueSnXPJ8fHxZ5ZUREQCFki5ZwEJlbY7AntPs/8i4PvnEkpERM5NIOW+GuhhZl3NLAoYByytvIOZ9ai0eT2wNXgRRUTkTFV7zt05V2pmU4DlQDgw3zmXYWaPA6nOuaXAFDO7CigBcoAf1GRoERE5vYDGxTvnlgHLTrhtRqWf7w9yLhEROQcaoSoiEoJU7iIiIUjlLiISglTuIiIhSOUuIhKCVO4iIiFI5S4iEoK0/lsN6Ngi9lv/FRGpbSr3GrBg0kmTZoqI1CqdlhERCUEqdxGREKRyFxEJQSp3EZEQpHIXEQlBKncRkRCkchcRCUHm3ElrXdfOE5sdBHZ58uS1ozWQ7XUIOSt67eq3UH/9Ojvn4qvbybNyD3VmluqcS/Y6h5w5vXb1m14/H52WEREJQSp3EZEQpHKvOS96HUDOml67+k2vHzrnLiISknTkLiISglTuZ8nMjnudQWqOmY00s+mnuE+vvQfMrIuZra/i9sfN7KpqfvcxM/tpzaWrezSfu0gVnHNLgaVe55DqOedmeJ2hLtKR+zkyn6fNbL2ZrTOzsf7bf29mI/0/v21m8/0/TzKzX3mZuaHzHwFuMrO5/tftdTO7ysxWmtlWMxtqZhPN7AX//l3N7DMzW21mv/Q6fwMXbmZzzCzDzN43s1gze9nMbgYws+v8r+0nZvY/Zva3Sr+baGb/MrPtZnafR/lrjcr93N0IJAEDgauAp82sHfAxcIl/nw5Aov/ni4F/13ZIOcn5wPPAAKA3MAHfa/NT4OET9n0e+INzbgiwrzZDykl6ALOdc32BI8BN39xhZjHAn4BrnXMXAyeO4uwNXAMMBf7bzCJrJ7I3VO7n7mLgDedcmXNuP/ARMARfgV9iZonABmC/v/QvBD71LK18Y4dzbp1zrhzIAP7hfJeOrQO6nLDvRcAb/p8X1F5EqcIO51ya/+c1fPu16g1sd87t8G+/wbe955wrcs5lAweAtjWa1GM6537urKobnXN7zKwFMALfUXxLYAxw3Dl3rBbzSdWKKv1cXmm7nKrfF7pmuG6o/LqVAZVXoa/yvXia3w3p/tOR+7n7GBhrZuFmFg9cCnzuv+8z4AH/Pv/G95Ffp2Tqn5XAOP/Pt3oZRE5rE9DNzLr4t8d6F8V7Kvdz9zbwJZAO/BP4mXPum/Oy/wYinHOZwBf4jt5V7vXP/cCPzWw10MzrMFI151wB8J/A383sE2A/cNTbVN7RCFURCRlm1sQ5d9zMDJgNbHXOzfI6lxd05C4ioWSymaXh+5K8Gb6rZxokHbmLiIQgHbmLiIQglbuISAhSuYuIhCCVu4hICFK5i4iEIJW7iEgI+v99xKkI/oUjUgAAAABJRU5ErkJggg==\n", 413 | "text/plain": [ 414 | "
" 415 | ] 416 | }, 417 | "metadata": { 418 | "needs_background": "light" 419 | }, 420 | "output_type": "display_data" 421 | } 422 | ], 423 | "source": [ 424 | "sns.pointplot(x=[\"low\"]*len(idx_range)+[\"mid\"]*len(idx_range)+[\"high\"]*len(idx_range),\n", 425 | " y=influence_distance_lists[0]+influence_distance_lists[1]+influence_distance_lists[2], ci=68)" 426 | ] 427 | }, 428 | { 429 | "cell_type": "code", 430 | "execution_count": null, 431 | "metadata": {}, 432 | "outputs": [], 433 | "source": [] 434 | } 435 | ], 436 | "metadata": { 437 | "kernelspec": { 438 | "display_name": "Python 3", 439 | "language": "python", 440 | "name": "python3" 441 | }, 442 | "language_info": { 443 | "codemirror_mode": { 444 | "name": "ipython", 445 | "version": 3 446 | }, 447 | "file_extension": ".py", 448 | "mimetype": "text/x-python", 449 | "name": "python", 450 | "nbconvert_exporter": "python", 451 | "pygments_lexer": "ipython3", 452 | "version": "3.7.6" 453 | } 454 | }, 455 | "nbformat": 4, 456 | "nbformat_minor": 4 457 | } 458 | -------------------------------------------------------------------------------- /SA_mask_token_analysis.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "%matplotlib inline\n", 10 | "import argparse\n", 11 | "import csv\n", 12 | "import logging\n", 13 | "import os\n", 14 | "import random\n", 15 | "import sys\n", 16 | "import pickle\n", 17 | "import time\n", 18 | "import numpy as np\n", 19 | "from termcolor import colored\n", 20 | "import matplotlib\n", 21 | "import matplotlib.pyplot as plt\n", 22 | "import seaborn as sns\n", 23 | "from bert_util import *" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 2, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "sst_processor = Sst2Processor()\n", 33 | "train_examples = sst_processor.get_train_examples('data/', 10000)\n", 34 | "test_examples = sst_processor.get_dev_examples('data/')" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 3, 47 | "metadata": {}, 48 | "outputs": [ 49 | { 50 | "name": "stdout", 51 | "output_type": "stream", 52 | "text": [ 53 | "7\n", 54 | "\u001b[34ma sometimes tedious film . \u001b[0m 0\n" 55 | ] 56 | } 57 | ], 58 | "source": [ 59 | "interested_test_idx = 7\n", 60 | "\n", 61 | "for te in test_examples:\n", 62 | " if te.guid == interested_test_idx:\n", 63 | " print(te.guid)\n", 64 | " print(colored(te.text_a, \"blue\"), te.label)\n", 65 | " break" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": 4, 71 | "metadata": {}, 72 | "outputs": [], 73 | "source": [ 74 | "test_tok_sal_list, train_tok_sal_lists, test_pred_status = pickle.load(open(\"from_aws/SA_influence_output_bert_e3/saliency_test_\" + str(interested_test_idx) + \".pkl\", \"rb\"))" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 5, 80 | "metadata": {}, 81 | "outputs": [ 82 | { 83 | "name": "stdout", 84 | "output_type": "stream", 85 | "text": [ 86 | "[CLS]\t0.015014335\n", 87 | "\u001b[32ma\t0.07373888\u001b[0m\n", 88 | "\u001b[32msometimes\t0.19994108\u001b[0m\n", 89 | "ted\t-0.45029685\n", 90 | "##ious\t0.0042031133\n", 91 | "film\t-0.031940274\n", 92 | ".\t-0.015620316\n", 93 | "\u001b[32m[SEP]\t0.20924518\u001b[0m\n" 94 | ] 95 | } 96 | ], 97 | "source": [ 98 | "top_sal_ix = [_ix for _ix, _ in sorted(enumerate(test_tok_sal_list), key=lambda x: -x[1][1])][:3]\n", 99 | "for ix, (tok, sal) in enumerate(test_tok_sal_list):\n", 100 | " if ix in top_sal_ix:\n", 101 | " print(colored(tok + '\\t' + str(sal), \"green\"))\n", 102 | " else:\n", 103 | " print(tok + '\\t' + str(sal))" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "metadata": {}, 110 | "outputs": [], 111 | "source": [] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": 6, 116 | "metadata": {}, 117 | "outputs": [], 118 | "source": [ 119 | "orig_influences = pickle.load(open(\"from_aws/SA_influence_output_bert_e3/influences_test_\" + str(interested_test_idx) + \".pkl\", \"rb\"))" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 7, 125 | "metadata": {}, 126 | "outputs": [ 127 | { 128 | "name": "stdout", 129 | "output_type": "stream", 130 | "text": [ 131 | "\u001b[34mthat is the recording industry in the current climate of mergers and downsizing \u001b[0m 1\n", 132 | "31.986400604248047\n", 133 | "\n", 134 | "\u001b[34mcredulous \u001b[0m 1\n", 135 | "31.018287658691406\n", 136 | "\n", 137 | "\u001b[34man admittedly middling film \u001b[0m 1\n", 138 | "30.333276748657227\n", 139 | "\n", 140 | "\u001b[34ma simplistic narrative and \u001b[0m 1\n", 141 | "28.808013916015625\n", 142 | "\n", 143 | "\u001b[34mof its lurid fiction \u001b[0m 1\n", 144 | "28.745344161987305\n", 145 | "\n", 146 | "\u001b[34memphasising her plight and isolation \u001b[0m 1\n", 147 | "28.046144485473633\n", 148 | "\n", 149 | "\u001b[34mfresh-faced \u001b[0m 1\n", 150 | "26.88740348815918\n", 151 | "\n", 152 | "\u001b[34mthe moral shrapnel and mental shellshock will linger long after this film has ended . \u001b[0m 1\n", 153 | "26.08786392211914\n", 154 | "\n", 155 | "\u001b[34ma wacky , screwball comedy \u001b[0m 1\n", 156 | "24.073925018310547\n", 157 | "\n", 158 | "\u001b[34m, uninhibited \u001b[0m 1\n", 159 | "23.764463424682617\n", 160 | "\n" 161 | ] 162 | } 163 | ], 164 | "source": [ 165 | "order_of_interest = \"max\"\n", 166 | "label_of_interest = \"both\"\n", 167 | "num_of_interest = 10\n", 168 | "\n", 169 | "train_idx_sorted = list(np.argsort(orig_influences))\n", 170 | "if order_of_interest == \"max\":\n", 171 | " train_idx_sorted.reverse()\n", 172 | "if label_of_interest == \"both\":\n", 173 | " valid_labels = [\"0\", \"1\"]\n", 174 | "else:\n", 175 | " valid_labels = [label_of_interest]\n", 176 | "\n", 177 | "cnt = 0\n", 178 | "for idx in train_idx_sorted:\n", 179 | " te = train_examples[idx]\n", 180 | " if te.label in valid_labels:\n", 181 | " print(colored(te.text_a, \"blue\"), te.label)\n", 182 | " print(orig_influences[idx])\n", 183 | "# top_sal_ix = [_ix for _ix, _ in sorted(enumerate(train_tok_sal_lists[idx]), key=lambda x: -x[1][1])][:3]\n", 184 | "# for ix, (tok, sal) in enumerate(train_tok_sal_lists[idx]):\n", 185 | "# if ix in top_sal_ix:\n", 186 | "# print(colored(tok + '\\t' + str(sal), \"green\"))\n", 187 | "# else:\n", 188 | "# print(tok + '\\t' + str(sal))\n", 189 | " print()\n", 190 | " cnt += 1\n", 191 | " if cnt >= num_of_interest:\n", 192 | " break" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": null, 198 | "metadata": {}, 199 | "outputs": [], 200 | "source": [] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": 8, 205 | "metadata": {}, 206 | "outputs": [], 207 | "source": [ 208 | "diff_influences_list, mask_ix = pickle.load(open(\"from_aws/SA_influence_output_bert_e3/diff_mask_influences_test_\" + str(interested_test_idx) + \".pkl\", \"rb\"))" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": 47, 214 | "metadata": {}, 215 | "outputs": [ 216 | { 217 | "name": "stdout", 218 | "output_type": "stream", 219 | "text": [ 220 | "masked token: sometimes\n", 221 | "\n", 222 | "\u001b[34mvisually flashy but narratively opaque and \u001b[0m 0\n", 223 | "0.034249115735292435\n", 224 | "\n", 225 | "\u001b[34mfar too clever by half , howard 's film is really a series of strung-together moments , with all the spaces in between filled with fantasies , daydreams , memories and one fantastic visual trope after another . \u001b[0m 0\n", 226 | "0.033796798437833786\n", 227 | "\n", 228 | "\u001b[34mtedious norwegian offering which somehow snagged an oscar nomination . \u001b[0m 0\n", 229 | "0.03196989372372627\n", 230 | "\n", 231 | "\u001b[34mlaw enforcement , and a visceral , nasty journey \u001b[0m 0\n", 232 | "0.0297705065459013\n", 233 | "\n", 234 | "\u001b[34mfreaking out \u001b[0m 0\n", 235 | "0.02831873670220375\n", 236 | "\n", 237 | "\u001b[34mhuge gap \u001b[0m 0\n", 238 | "0.027937954291701317\n", 239 | "\n", 240 | "\u001b[34mthe film 's desire to be liked sometimes undermines the possibility for an exploration of the thornier aspects of the nature/nurture argument in regards to homosexuality . \u001b[0m 0\n", 241 | "0.026909133419394493\n", 242 | "\n", 243 | "\u001b[34mthe whole affair , true story or not , feels incredibly hokey ... \u001b[0m 0\n", 244 | "0.025996092706918716\n", 245 | "\n", 246 | "\u001b[34mamerican and european cinema has amassed a vast holocaust literature , but it is impossible to think of any film more challenging or depressing than the grey zone . \u001b[0m 0\n", 247 | "0.02592332288622856\n", 248 | "\n", 249 | "\u001b[34ma maddeningly insistent and repetitive piano score that made me want to scream \u001b[0m 0\n", 250 | "0.025843653827905655\n", 251 | "\n" 252 | ] 253 | } 254 | ], 255 | "source": [ 256 | "remove_token_type = 2\n", 257 | "\n", 258 | "influences = diff_influences_list[remove_token_type]\n", 259 | "print(\"masked token:\", test_tok_sal_list[mask_ix[remove_token_type]][0])\n", 260 | "print()\n", 261 | "\n", 262 | "order_of_interest = \"max\"\n", 263 | "label_of_interest = \"both\"\n", 264 | "num_of_interest = 10\n", 265 | "\n", 266 | "train_idx_sorted = list(np.argsort(influences))\n", 267 | "if order_of_interest == \"max\":\n", 268 | " train_idx_sorted.reverse()\n", 269 | "if label_of_interest == \"both\":\n", 270 | " valid_labels = [\"0\", \"1\"]\n", 271 | "else:\n", 272 | " valid_labels = [label_of_interest]\n", 273 | "\n", 274 | "cnt = 0\n", 275 | "for idx in train_idx_sorted:\n", 276 | " te = train_examples[idx]\n", 277 | " if te.label in valid_labels:\n", 278 | " print(colored(te.text_a, \"blue\"), te.label)\n", 279 | " print(influences[idx])\n", 280 | " print()\n", 281 | " cnt += 1\n", 282 | " if cnt >= num_of_interest:\n", 283 | " break" 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "execution_count": null, 289 | "metadata": {}, 290 | "outputs": [], 291 | "source": [] 292 | }, 293 | { 294 | "cell_type": "code", 295 | "execution_count": null, 296 | "metadata": {}, 297 | "outputs": [], 298 | "source": [] 299 | }, 300 | { 301 | "cell_type": "code", 302 | "execution_count": null, 303 | "metadata": {}, 304 | "outputs": [], 305 | "source": [] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "execution_count": 162, 310 | "metadata": {}, 311 | "outputs": [], 312 | "source": [ 313 | "tp = 0.001\n", 314 | "\n", 315 | "idx_range = range(1, 51)\n", 316 | "influence_distance_lists = ([], [], [], [], [])\n", 317 | "for iti in idx_range:\n", 318 | " orig_inf = pickle.load(open(\"from_aws/SA_influence_output_bert_e3/influences_test_\" + str(iti) + \".pkl\", \"rb\"))\n", 319 | " diff_inf_list, m_ix = pickle.load(open(\"from_aws/SA_diff_influence_output_bert_e3/diff_mask_influences_test_\" + str(iti) + \".pkl\", \"rb\"))\n", 320 | " for i, influences in enumerate(diff_inf_list):\n", 321 | " influence_distance_lists[i].append(influence_distance(orig_inf, influences, top_percentage=tp))" 322 | ] 323 | }, 324 | { 325 | "cell_type": "code", 326 | "execution_count": 163, 327 | "metadata": {}, 328 | "outputs": [ 329 | { 330 | "data": { 331 | "text/plain": [ 332 | "0.7559999999999999" 333 | ] 334 | }, 335 | "execution_count": 163, 336 | "metadata": {}, 337 | "output_type": "execute_result" 338 | } 339 | ], 340 | "source": [ 341 | "np.mean(influence_distance_lists[0])" 342 | ] 343 | }, 344 | { 345 | "cell_type": "code", 346 | "execution_count": 164, 347 | "metadata": {}, 348 | "outputs": [ 349 | { 350 | "data": { 351 | "text/plain": [ 352 | "0.8420000000000002" 353 | ] 354 | }, 355 | "execution_count": 164, 356 | "metadata": {}, 357 | "output_type": "execute_result" 358 | } 359 | ], 360 | "source": [ 361 | "np.mean(influence_distance_lists[1])" 362 | ] 363 | }, 364 | { 365 | "cell_type": "code", 366 | "execution_count": 165, 367 | "metadata": {}, 368 | "outputs": [ 369 | { 370 | "data": { 371 | "text/plain": [ 372 | "0.652" 373 | ] 374 | }, 375 | "execution_count": 165, 376 | "metadata": {}, 377 | "output_type": "execute_result" 378 | } 379 | ], 380 | "source": [ 381 | "np.mean(influence_distance_lists[2])" 382 | ] 383 | }, 384 | { 385 | "cell_type": "code", 386 | "execution_count": 166, 387 | "metadata": {}, 388 | "outputs": [ 389 | { 390 | "data": { 391 | "text/plain": [ 392 | "0.8440000000000001" 393 | ] 394 | }, 395 | "execution_count": 166, 396 | "metadata": {}, 397 | "output_type": "execute_result" 398 | } 399 | ], 400 | "source": [ 401 | "np.mean(influence_distance_lists[3])" 402 | ] 403 | }, 404 | { 405 | "cell_type": "code", 406 | "execution_count": 167, 407 | "metadata": {}, 408 | "outputs": [ 409 | { 410 | "data": { 411 | "text/plain": [ 412 | "0.79" 413 | ] 414 | }, 415 | "execution_count": 167, 416 | "metadata": {}, 417 | "output_type": "execute_result" 418 | } 419 | ], 420 | "source": [ 421 | "np.mean(influence_distance_lists[4])" 422 | ] 423 | }, 424 | { 425 | "cell_type": "code", 426 | "execution_count": 168, 427 | "metadata": {}, 428 | "outputs": [ 429 | { 430 | "data": { 431 | "text/plain": [ 432 | "" 433 | ] 434 | }, 435 | "execution_count": 168, 436 | "metadata": {}, 437 | "output_type": "execute_result" 438 | }, 439 | { 440 | "data": { 441 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD8CAYAAACb4nSYAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3Xl8VPXV+PHPyUYIayBhDSFB9k2QsMlqRR/AR2y1sihUFAG1trZqrV0etbZP2+dXrW3dWARRXBC1VqpY3NgJS9h3WcIS9jUQsiff3x/3MhlCIBMymTuTe96vV15M7pKcDJOT79z7/Z4jxhiUUkq5Q5jTASillAocTfpKKeUimvSVUspFNOkrpZSLaNJXSikX0aSvlFIuoklfKaVcRJO+Ukq5iCZ9pZRykQinAygtLi7OJCUlOR2GUkqFlLVr1540xsSXd1zQJf2kpCTS0tKcDkMppUKKiOz35Ti9vKOUUi6iSV8ppVxEk75SSrmIJn2llHIRTfpKKeUimvSVUspFNOkrpZSLBN08faVU8Bs3YxUZZ3JIiK3J7Am9nQ5HVYAmfaVUhWWcySH95AWnw1DXQC/vKKWUi+hIX12Vvo1XqnrRpK+uSt/GK1W96OUdpZRyEU36SinlIpr0lVLKRTTpK6WUi2jSV0opF9Gkr5RSLqJJXymlXESTvlJKuYgmfaWUchGfkr6IDBWRnSKyW0SeLmN/oogsFJH1IrJJRIbb25NEJEdENtgfU/z9AyillPJduWUYRCQceBW4BcgA1ojIPGPMNq/DfgvMNca8LiIdgflAkr1vjzGmm3/DVkopdS18Gen3AnYbY/YaY/KBOcAdpY4xQF37cT3gsP9CVEop5S++JP3mwEGvzzPsbd6eA8aKSAbWKP8nXvuS7cs+i0VkQGWCVUo5L7egiPzCYgCMMQ5HoyrKl6QvZWwr/T89BphljEkAhgOzRSQMOAIkGmO6A48D74lI3VLnIiKTRCRNRNJOnDhRsZ9AKRUQBUXFvPjlTnr/8RsOnc0B4PDZXJbu0t/ZUOJL0s8AWnh9nsDll28mAHMBjDGpQDQQZ4zJM8acsrevBfYAbUt/A2PMNGNMijEmJT4+vuI/hfK7zJwCZi5L5/i5XACy8gopKCp2OCrlpKc+2sTL3+4mM6fAsy2/qJjxM9ew5DtN/KHCl6S/BmgjIskiEgWMBuaVOuYAcDOAiHTASvonRCTevhGMiLQC2gB7/RW8qhrbDp/j5hcX8fxn27iQXwTAifN53PnaCs5m5zscnXLC5oxMPll/qMx9Rcbwx/nb9VJPiCg36RtjCoFHgQXAdqxZOltF5HkRGWEf9gQwUUQ2Au8D4431ChgIbLK3fwQ8ZIw5XRU/iPKPgqJiJs1O42TW5cl986FMfv3JZgeiUk77YsuRq+7fcfQ8+09lBygaVRk+dc4yxszHukHrve0Zr8fbgH5lnPcx8HElY1QB9M3242Scybni/v9sOcrRzFya1IsOYFTKadn2O76ruZBfGIBIVGXpilx1iZ1Hz191f7GBXcevfoyqfrom1Cv3mH+uy+BCnib+YKdJX12ibs3y3/zVqxkZgEhUMBnepSlN6l793d2MZfsY8tfFzN98RK/vBzFN+uoSvZIbXHV/UsMYOjcrf9SnqpfoyHCeGtquzH0dm9albrQ1WDiSmcsj767jRzNXs/dEViBDVD7SpK88Dp3N4cfvrrvifgGeHdGJsLCylm6o6m7B1qOexzUjrdTRtF40n/+0PwufHMzdPRI8+5fuOsnQvy3lhQU7yfHhfoAKHE36CoADp7IZOSWVffYMjKb1oklqGHPJMQZoVq+mA9Epp+0+nsWX244B0LZxbRrbl3qiI8MRERrWrsFf7r6ejx7qS4em1vrL/KJiXlm4myF/XcyXW4/qJZ8goUlfsft4FndPXeFZZXl9i/r857GBLHxyMIkNLk38UxbvcSJE5bBpS/ZwMWdPHngdImW/20tJasC/H+3Hs7d3pE4N65LPobM5TJq9lgdmrWH/qQuBClldgSZ9l9t+5Byjp6Vy7FweAD2TYnlnQi/qxUQiIoTbl3Ii7H/nbTzMwdM6H9tNjmbmehZmNasXzYhuza56fER4GPf3S+abJwfxg+4lZboW7jzBLS8t4aWvviO3QC/5OEWTvottyjjLmOkrPQux+reO460HelEn+vLZORdn7BQVG6Yv1UXVbjJzeToFRdYwf8KAVkSG+5Y2GtWJ5qVR3ZgzqQ9tG9cGIL+wmL9/s4tbX1rCwh3HqyxmdWWa9F1q7f7T3Dt9FWezrToq32vfiDfuSyEmquwpm7WjI4irXQOAD9Yc5MT5vIDFqpyTmV3Auyv3A1A/JpLRPVuUc8bl+rRqyOc/HcBvhnegVlQ4AAdOZ3P/rDVMfDtN3zkGmCZ9F1qx5yTjZqzmvL2QZljnJkwZ24PoyPArnhMmwgP9kwDIKyxm1or0QISqHPbOqv2e+ks/6ptErRo+LeK/TGR4GBMHtuKbJwbz312berZ/te0Yt7y0mFe+3UVeYWhe8hk3YxU3vbCIcTNWOR2KTzTpu8yince5/801nmX13+/WjJfHdCcqovyXwtg+LT03595O3c/53IJyzlChLLegiDeXW3/coyPDGH9jUqW/ZpN60bxyzw28M6E3reJr2d+nmBe+/I6hf1saktU6M87kkH7ywlXLlwQTTfou8p8tR5n4dhp5dgOM0T1b8OLIbkT4eI22bnQkY/u2BOB8biHvrjpQZbEq5320NsNzv2dUSgsa1Iry29fu3yaO/zw2kKeGtqOm/Q4z/eQFfjRzNY+8u5YjmaGRQEORJn2XmLfxMD9+b53nhtz4G5P44w+6eGbn+OqBfsmedwUzlqXrLIxqqrComGlLrBv24WHCgwNa+f17REWE8cjg1nz9xCCGdmri2T5/81FufnExUxbv8XToUv6jSd8F5qYd5LE56ykqthL+Q4Ou49nbO17Tytr4OjUYmWKtvDxxPo+P12X4NVYVHL7YcpQD9g3W27s2pUWp9Rr+1Lx+TaaM68Gs+3vS0l4QmJ1fxJ+/2MHwfyxlxZ6TVfa93UiTfjU3O3UfT320ybOw5udD2vLLoe2uuLjGF5MHXud5hzB18V4KtaNWtWKMuWQR3uRB1wXk+w5u14gFPxvI47e0pYb9bnL38Szumb6Kn76/nmN2FzdVOZr0q7HpS/byP59u9Xz+q2HteWxIm0olfIAWDWI8MzAOnM5m/paj5ZyhQsmy3SfZevgcAIPbxXvKKgRCdGQ4P725DV8/PoghHRp5ts/beJibX1zMG0v3atvOStKkXw0ZY/jHN7v43/nbPdt+N6KTX0dsDw8u+VqvL9qjdVWqEe9R/sMBGuWX1qJBDG/c15M3fpRCQqxV7ykrr5A/fL6d219exup0bcB3rTTpVzPGGP6yYCd//eo7AETg/93Vlfv8MN3OW/smdflee2sktv3IORaF4FQ7dblNGWdZvvsUAN0T65dbaruqDenYmK8fH8RPv9eaKHuW2Y6j5xk5NZXH527QRYLXQJN+NWKM4fnPtvHaImukFh4m/G1UN0ZewypKXzziPdpfqIXYqgPvUf5Dg65cWC2QoiPDefzWdnz584EMahvv2f7PdYf43ouLeGvFPr2vVAGa9KuJ4mLDrz/ZwpvL9wEQGS68ek937ujW/OonVkJKUgN6JsUCsHrfadL26VvuUJZ+8gJf2PdnrouvxS0dGjsc0aWS4mox6/6eTBnbg2Z2j+bzuYU8O28rI15Zztr9ZxyOMDRo0q8GCouKefLDjby/2losFRURxrRxKQzt3LScM8uXEFuT5LhanuuqpT0yuLXn8euLdLQfyqYt2XtJ+eRgbJYjIgzt3ISvnxjEI4OvIzLcinHbkXPc9foKnvpoI6ey9JLP1VxbIQ0VNPILi/nZB+uZv9kaodWMDOeN+1Lo1zrOL19/9oTeV90/uF087ZvUYcfR83yz4zg7jp6jfZPAzfZQ/nH8XC4fr7XWXDSpG80d3a9ePtlpMVERPDW0PXfekMCz87Z47kPMTctgwdZjPDW0HaN7JlZ48aEb6Eg/hOUWFPHIu2s9Cb92jQjentDLbwnfFyJyyUyeKTraD0kzl+8j374uPqF/MjUirlx8L5i0blSbdyb05uUx3Wlc16oCm5lTwG8+2cIPXlvOxoNnHY4w+GjSD1E5+UVMfDuNr7dbNcnr1Yzk3Qd70zMp8LMtbuvS1NNh69+bjmip3BBzLrekfHLd6AjG9E4s95zyLvsFkohw+/XN+OaJwUwckOwZ3W/KyOT7ry3n159s5mx2vsNRBg9N+iEoK6+Q+95czdJd1vL0hrWieH9iH65vUd+ReCLCw5g00KrNUlRsPDVbVGh4b9UBT5ntcX1bUtuH8smzJ/Rm4ZODy738F0i1a0Twm9s6Mv+nAzxTTY2xfr6bXljEB2sOUFys60k06YeYzOwCxr6xyrM4pVGdGnwwuQ8dmzl7Hf2HPRI8TVbmpmmTlVCRW1DEjGVW+eQaEWGMvzHZ4Ygqr12TOnwwqQ8vjbre85o8k13ALz/ezA+nrGDLoUyHI3SWJv0QciorjzHTV7LBvk7ZvH5N5k7uS+tGdRyOzJpLPaG/lTDyCos9ddhVcPtk/SHPH+i7UxKIr1PD4Yj8Q0T4QfcEvn1yEONvTOLi/dx1B84y4pVlPPvpFjJz3NkPQpN+iDh+LpfR01ay7YhVE6Vlwxg+mNyHpLhaDkdW4t4+iZ4mK7NT93NOm6wENe9LcWECkwY4U3KhKtWNjuS5EZ347CcD6NHSWlNSbOCt1P3c/OIiPl6b4boSIj4lfREZKiI7RWS3iDxdxv5EEVkoIutFZJOIDPfa9yv7vJ0i8l/+DN4tDp/NYdS0lew6ngVYMxbmTu5LQmzVlbu9FnWjIxl3sclKXiHvrtQmK8Hsy61HST95AYDhXZqS2DC4Xk/+1LFZXT6c3Je//LArDe1mMCez8nniw42MnJrKjqPnHI4wcMpN+iISDrwKDAM6AmNEpGOpw34LzDXGdAdGA6/Z53a0P+8EDAVes7+e8tGBU9ncPSXV88vZoWld5kzqQ+O60Q5HVrb7+yV7yuJqk5XgVbp88kMOFVYLpLAw4e6UFnz7xGDG9knkYoWJNfvOcNs/lvH7z7a5ogWoLyP9XsBuY8xeY0w+MAe4o9QxBrh4J7EecNh+fAcwxxiTZ4xJB3bbX0/5YPfxLO6euoJDZ63Wcdcn1OP9ib09N6eCkdVkxar1czIrj4/WapOVYJS65xQbM6wbmgPaxNG5eT2HIwqcejGR/OH7XZj34/6eGW9FxYYZy9K5+cXFfLrhULW+5ONL0m8OHPT6PMPe5u05YKyIZADzgZ9U4FxVhh1HzzF6WirHzlk32XomxfLOg72pH+O/PqVVZdLAVp650tOWaJOVYPR6EJRPdlqXhHp88vCN/OnOLtSPiQTg+Pk8HpuzgXumr2LXsfMOR1g1fEn6Za1jLv1ncAwwyxiTAAwHZotImI/nIiKTRCRNRNJOnNASvZszMhk9baWnKXW/1g1564Fe1ImOdDgy37RoEMPtXk1WPt98xOGIlLcthzI9azy6JtSj73UNHY7IOWFhwpheiXz7xGDG9CqpRpu69xTD/r6UP32xnQv2GobqwpeknwF41+ZNoOTyzUUTgLkAxphUIBqI8/FcjDHTjDEpxpiU+Pj40rtdZe3+09wzfSVns61ri99r34gZ9/UkJiq0yiQ9pE1WgtZUr8VzDwdJ+WSnNagVxZ/u7Monj9xI5+bWlerCYsPUxXsZ8tfFzN98pNq8hn1J+muANiKSLCJRWDdm55U65gBwM4CIdMBK+ifs40aLSA0RSQbaAKv9FXx1s2LPScbNWO1ZHTmscxOmjO1BdGTo3ftu36QuN9tNVnYcPc+infoOLhjsP3WBzzdZ467kuFrc2qmJwxEFl+6JsXz64/78/o5O1I22BlpHMnN55N11/GjmavaeyHI4wsorN+kbYwqBR4EFwHasWTpbReR5ERlhH/YEMFFENgLvA+ONZSvWO4BtwH+AHxtjdDpHGRbtPM79b64hO996eu7o1oyXx3QnKiJ0l1KUbqmonDd96V4uViLwvveiSoSHCeP6JvHtk4P5YY8Ez/alu04y9G9LeWHBTnLyizhwKps/zd/OkUxrokVWbiH5hcF//0qC7S1LSkqKSUtLczqMgFqw9SiPvreOgiLr/2JUSgv+eGeXavELOXJKKqvt5iofPdSXFAcKwinLyaw8+v35W/IKi4mvU4OlT90Uku8iA23NvtP8z7+2sONoyY3dhrWiyMwpoLBULZ+UlrHMeqCXT/WL/E1E1hpjUso7LnSHkdXEvzce5pF3SxL++BuT+FM1Sfigo/1gMmv5PvLskegD/ZI14fuoZ1IDPvtJf575746eZH7qQv5lCR8gbf8Z/u+LHYEOsUI06Tvow7SDPDZnPUX2i2fyoFY8e3vHoOxYdK0uNlkBPE1WVOBl5RXyduo+AOrUiODePuWXT1YlIsLDeKB/Mt8+MYhO5RQ3/HhdBtn5wTvjR5O+Q2av3M8vPtrkub76syFteHpo+2o3k0KbrASH91cd4FyulYju7dOSuiEy/TfYNKobXW7Sz84v4khmboAiqjhN+mUYN2MVN72wiHEzVlXJ139j6V7+519bPJ8/Paw9PxvSttol/Iu0yYqz8guLPeWTo8LDeKBfkrMBhbgGta6+Il4E6tcM3j+qmvTLkHEmh/STF8g4k+P3r/3yN7v4w+fbPZ//bkSnal/3RJusOOtfGw5x9Jw18ryrR3MaBWndplDx/XL6Bw9qG0/DIC6Vokk/QIwx/GXBDl786jvAGg38311duO/GJGcDCxBtsuKM4uKSwmoiMGlg9R5gBEL7JnV5sH/ZzWbq1YzkN8M7BDiiitGkHwDGGH7/2XZeXWj98oWHCX8b1Y1RPd1zM02brDjjq+3H2HvCqtA6rHMTkoOo/0Io+81tHfh/d3X1TFIAqFUjnH/9uB9tGjvf1OhqNOlXseJiw2/+tYWZdpKLDBdeGdOdO7q5r+7c2D6J1InWJiuB4sbyyYEiIozs2YL//GwgSXYfgkZ1okPij6om/SpUWFTMkx9u5L1VVjORqIgwpo7rwbAuTR2OzBl1oiMZ10ebrATK6vTTrD9gtda88bqGdE2o73BE1VOoTcDQpF9FCoqKeWzOBv65/hAANSPDeXN8T77XvrHDkTlLm6wEjvco33varHI3TfpVILegiIffWespKVy7RgRvT+hFv9ZxDkfmPG2yEhjbj5xjoV3krlOzuvTX156yadL3s5z8Iia+ncbX248D1t38dx/sTU+tOeOhTVaq3tRS1/JD7RKEqjqa9P0oK6+Q+95c7WlQ0aBWFO9P7ONpyaYs2mSlah08nc2/N1nPaWKDGIZ11vLJqoQmfT/JzClg7BurWJ1uVZRsVKcGH0zqQ8dylmy7lTZZqTozlqV76jlNGtiKiHD9NVcl9NXgB6cv5HPP9JVsOGjNlGhevyZzJ/cN+vm6TtImK1XjVFYec9ZYs6LiakddUg9eKdCkX2nHz+cyeloqWw9b1SNbNozhg8l9SAqB+bpOe+QmLbvsb2+l7ie3wLpHcr+WT1Zl0KRfCYfP5jBq6kq+O2a1ULsuvhZzJ/clITbG4chCQ4+WDehl3+Beve80aXazFXVtsvNLyifXigpnbO+WjsajgpMm/Wt04FQ2I6emkn7SWuLevkkdPpjcl8ZazKpCHtbRvt/MWX2Qs9nWKud7eidSLyZ4Kz0q52jSvwZ7TmQxcmqqpwpn14R6zJnUx1NQTPlucNt4OjS1bnZrk5VrV1BUzBtLreqlkeHChP6tHI5IBStN+hW04+g5Rk1N9ZSqTWkZyzsP9qZ+TJTDkYUmbbLiH/M2HOaw3bjjB92b06SevuNUZdOkXwFbDmUyetpKTmblA1Y9k7cn9NIuRJU0vHMTbbJSCcXFhqlLtHyy8o0mfR+t3X+GMdNXeq6Z3tQunpnjexITFfiu99VNRHgYkwdpk5VrtXDncc9kgls6NKZ1o9oOR6SCmSZ9H6TuOcW4Gas4b/cYHdqpCVPHpeh0OD+664YE4utok5Vr4X0D/CEtrKbKoUm/lMycAnLsTvbFxrD4uxOMf3M12flWNcg7ujXjlXu6ExWhT50/aZOVa5O27zRp+88A0Du5ATckxjockQp2mrlsBUXF/O/n2+j9x685es4aZR44lc0Ds9aQV2gtdhmZksBfR3bTZe1V5N7e2mSloi5pkqKjfOUDzV62Zz7dwvSl6Z7VjAAGPDVM7uvbkj/f2dVTHVL5X53oSH7UV5us+Oq7Y+c91VzbN6nD4LbxDkekQoEmfawR/ZzVB6+4PzJceGpoO8I04Vc5bbLiu6mLS254PzxYyycr32jSBxZ9d5yr1XgsKDKs2XcmYPG4WVztGozqqU1WynPobA6fbrC6siXE1uQ2l7bgVBWnSR/ILyy/iUdhkZb+DZSJA0qarExdskebrJRhxtJ0Cu1LjxMHaPlk5TufXikiMlREdorIbhF5uoz9L4nIBvvjOxE567WvyGvfPH8G7y+9kq/e1SoyXOiWqI1QAqVFgxhGXN8MgIOnc7TJSilns/M95ZMb1IrytJ9UyhflJn0RCQdeBYYBHYExItLR+xhjzM+NMd2MMd2Al4F/eu3OubjPGDPCj7H7TdeE+vRr3fCK++9OaaF1dQLsoUHaZOVK3k7d75lCPP7GJGpG6XoR5TtfRvq9gN3GmL3GmHxgDnDHVY4fA7zvj+AC6ZUxN9Cn1eUj/hHXN+OZ/+5YxhmqKrVrUochHbTJSmk5+UXMWrEPgJiocM9sJ6V85UvSbw54T23JsLddRkRaAsnAt16bo0UkTURWisj3rznSKhZr97P95yM3EmuXpG1evyb/GNNdV946xLsQ22uLdjsYSfCYm3aQ0xes2k+jeyZqob8gkBBbk+S4WiTE1nQ6FJ/4UjimrHlgV3qvPRr4yBjjPc8u0RhzWERaAd+KyGZjzCWlFEVkEjAJIDEx0YeQqoaIcENiLPVjojiTXaCrbh3Wo2UDeiU3YHX6adbsO8OafafpmXT1+y/VWWFRMdPt8skRYcKDA5IdjkgBzJ7Q2+kQKsSXrJYBeN8pSgAOX+HY0ZS6tGOMOWz/uxdYBHQvfZIxZpoxJsUYkxIfrwtMVImHB2uTlYs+33zE08NhRLdmNKsfGiNLFVx8SfprgDYikiwiUViJ/bJZOCLSDogFUr22xYpIDftxHNAP2OaPwJU7eDdZ+XbHcbYfcWeTFWPMpYXVBmnJBXVtyk36xphC4FFgAbAdmGuM2Soiz4uI92ycMcAcc+k0iw5AmohsBBYCfzbGaNJXPrusycpid472F313gh1HzwMwpEMj2jau43BEKlT5VAzeGDMfmF9q2zOlPn+ujPNWAF0qEZ9SDO/chBcaxHDgdDb/3niYJ25pR2JDdzWfn6KjfOUneqdSBT3vJivFBqYtdddof92BM6xKPw1Y7TlTXHwzW1WeJn0VEi5tspLhqiYr3qP8h7V8sqokTfoqJHg3WckvLGamS5qs7D6exVfbjwHQtnFtbmrXyOGIVKjTpK9ChneTlXdc0mRl2pI9XJwaMXngdVreW1WaJn0VMko3WXln5X6HI6paRzNz+WS9VT65Wb1oRnRr5nBEqjrQpK9CineTlZnL9lXrJiszl6dTYJf0fnBAKyK1fLLyA30VqZBSusnKh9W0yUpmdgHv2u9k6sdEMrqXlk9W/qFJX4Uc7yYr06ppk5V3Vu3ngl0++Ud9k4iJ8mlJjVLl0qSvQk51b7KSW1DEm/bspOjIMMbfmORsQKpa0aSvQlJ1brLy0doMTmaVlE9uUEvLJyv/0aSvQlLpJisLdx53OCL/KCwqZtoSq3xyeJh41iYo5S+a9FXIqo5ll7/YcpQDp7MBuL1rU1o0cFeNIVX1NOmrkHWxyQrgabISyowxl1QRnayF1VQV0KSvQlp1Gu0v232SrYetfgGD25X0EVDKnzTpq5BWnZqseP/RelhH+aqKaNJXIa26NFnZlHGWFXtOAdA9sb7nspVS/qZJX4W84Z2b0NJuqvLvjYc5cCrb4YgqzvuP1UODrkNEC6upqqFJvwwJsTVJjqtFQqw2ng4FEeFhTBoYuk1W0k9e4IstRwG4Lr4Wt3Ro7HBEqjrTtd1lmD2ht9MhqAq664YE/vb1Lk6cz2NuWgaP3dzW03Ql2F1SPnmQlk9WVUtH+qpaCNUmK8fP5fLxWqt8cpO60Xy/W3OHI1LVnSZ9VW2EYpOVmcv3kW8XjJvQP5moCP2VVFVLX2Gq2gi1JivnckvKJ9eNjmBM70SHI1JuoElfVSuh1GTlvVUHOJ9XCFjlk2vX0Ftsqupp0lfVSqg0WcktKGLGMuu+Q42IMMb3S3I2IOUamvRVtRMKTVY+WX+IE+fzALg7JYG42qEx00iFPk36qtoJ9iYrRcXGUz45TGDSAC25oAJHk76qloK5ycqXW4+SfvICALd1bUZiQy2frAJHk76qloK1yYoxhte9yyfbK4mVChSfkr6IDBWRnSKyW0SeLmP/SyKywf74TkTOeu27T0R22R/3+TN4pa7m4cGtPY+Dpexy6p5TbMrIBGBAmzg6N6/ncETKbcpN+iISDrwKDAM6AmNEpKP3McaYnxtjuhljugEvA/+0z20APAv0BnoBz4pIrH9/BKXK1qNlbNA1WfEe5Wv5ZOUEX0b6vYDdxpi9xph8YA5wx1WOHwO8bz/+L+ArY8xpY8wZ4CtgaGUCVqoiHgmiJitbDmWydNdJALom1KPvdQ0djUe5ky9Jvzlw0OvzDHvbZUSkJZAMfFvRc5WqCoPaxtMxSJqsTCk1ytfyycoJviT9sl6ZV5oKMRr4yBhzcRmkT+eKyCQRSRORtBMnTvgQklK+CZYmK/tPXWC+PXU0Oa4Wt3Zq4kgcSvmS9DOAFl6fJwCHr3DsaEou7fh8rjFmmjEmxRiTEh8f70NISvluWBA0WZm+dC/F9nBn0sCSxWNKBZovSX8N0EZEkkUkCiuxzyt9kIi0A2KBVK/NC4BbRSTWvoF7q71NqYCJCA9j8kBrtF+ZJivjZqziphcWMW7Gqgqdd+J8Hh/nzLo0AAANCklEQVSmWeUg4uvU4Afd9Qqnck65Sd8YUwg8ipWstwNzjTFbReR5ERnhdegYYI7xWgVjjDkN/B7rD8ca4Hl7m1IBdecNzT1NVeamZXD8fG6Fv0bGmRzST14g40xOhc6btSKdvMKS8snRkeEV/t5K+YtP8/SNMfONMW2NMdcZY/7X3vaMMWae1zHPGWMum8NvjJlpjGltf7zpv9CV8l10ZDgPejVZeXP5voB836y8QmanWuWT69SI4B4tn6wcpitylWvc0zuRugFusvL+qgOcy7XKJ9/bpyV1oyOr/HsqdTWa9JVrWE1WkoDANFnJKyzijWVWYbWo8DAe0PLJKgho0leuMr5fkleTlfQqbbLy6frDHDtnlU++q0cCjepGV9n3UspXmvSVq8TVrsFoT5OV/CprslJcbJiyxJolJGJN01QqGGjSV67zYACarHy1/Rh7T1jlk4d1bkJyXC2/fw+lroUmfeU6LRrEcEcVNlkxxlyy8vchLaymgogmfeVKDw2uuiYrq9NPs/6AVV28X+uGdE2o77evrVRladJXrtS2cR2GdGgM+L/Jyus6yldBTJO+ci3vQmyvLfRPIbbtR86xaKdVNLBTs7r0bx3nl6+rlL9o0leu1aNlLL3tJitp+/3TZGVqqVG+lk9WwUaTvnK1S0f7uyv1tQ6ezubfm6ybwi0bxjCss5ZPVsFHk75yNe8mKwt3nqhUk5U3lu6lyK6fPHFAKyLC9ddLBR99VSpXK91k5VpbKp7KyuODNKtJXFztKH7YI8Ev8Snlb5r0lesN79LU02Tls03X1mTlrdT95BZYi7zu76flk1Xw0qSvXC88TC5psjJ1ScVG+xfyCnk7dR8AtaLCGdu7pZ8jVMp/NOkrBdzVozmN7CYrH66tWJOVOWsOcjbbKtN8b5+W1IvR8skqeGnSVwqoERHOBK8mKzOX7fPpvIKiYmYstconR4YLD/RLrqoQlfILTfpK2bybrLy70rcmK/M2HOZwpvWu4Afdm9OknpZPVsFNk75SttJNVi62ObyS4mLjuf5vlU/Wkgsq+GnSV8rL/f2SiI60fi3eXH71Jivf7jjOd8eyALi1Y2NaN6odkBiVqgxN+kp5aVi7BqNSfGuyouWTVSjSpK9UKRMHlt9kJW3fadL2nwGgd3IDuifGBjRGpa6VJn2lSkmILb/JyiWj/ME6ylehQ5O+UmW4WpOV/MJivt5u1d9v36QOg9vGBzw+pa6VJn2lynC1JiuZOfmexw8P1vLJKrRo0lfqCh65qewmK1l51oyehNia3NalacDjUqoyNOkrdQU3JF7aZOXUhbxL9mv5ZBWK9BWr1FWM61tSPO1cTqHncXRkGHdr+WQVgjTpK3UFxhjeXJZe5r7cgmJmpe4LaDxK+YNPSV9EhorIThHZLSJPX+GYkSKyTUS2ish7XtuLRGSD/THPX4ErVdVS955i7YGzV9w/bfHeq67YVSoYRZR3gIiEA68CtwAZwBoRmWeM2eZ1TBvgV0A/Y8wZEWnk9SVyjDHd/By3UlVuxe5TV91/NqeA7UfO6cIsFVJ8Gen3AnYbY/YaY/KBOcAdpY6ZCLxqjDkDYIw5jlIhLsyHmZhhOl1ThRhfkn5z4KDX5xn2Nm9tgbYislxEVorIUK990SKSZm//flnfQEQm2ceknThxokI/gFJVZWA5i67iakfRwW6qrlSo8CXplzWUMaU+jwDaAIOBMcAbIlLf3pdojEkB7gH+JiKXrVk3xkwzxqQYY1Li43V1owoOPVrG0r913BX3//im1kRF6FwIFVp8ecVmAC28Pk8ADpdxzKfGmAJjTDqwE+uPAMaYw/a/e4FFQPdKxqxUQIgIr4+9geFdmlwy8hHgl0PbM/7GJIciU+ra+ZL01wBtRCRZRKKA0UDpWTj/Am4CEJE4rMs9e0UkVkRqeG3vB2xDqRBRJzqS1+7tweJf3ES83UM3sUGMll9QIavcpG+MKQQeBRYA24G5xpitIvK8iIywD1sAnBKRbcBC4BfGmFNAByBNRDba2//sPetHqVCR2DCG2jWsyW5hvtzhVSpIlTtlE8AYMx+YX2rbM16PDfC4/eF9zAqgS+XDVEop5Q96F0oppVxEk75SSrmIJn2llHIRTfpKKeUimvSVUspFNOkrpZSLaNJXSikX0aSvlFIuoklfKaVcRJO+Ukq5iCZ9pZRyEU36SinlIpr0lVLKRTTpK6WUi2jSV0opF9Gkr5RSLqJJXymlXESTvlJKuYgmfaWUchFN+kop5SKa9JVSykU06SullIto0ldKKRfRpK+UUi6iSV8ppVxEk75SSrmIJn2llHKRCKcDUCpUJMTWvORfpUKRT0lfRIYCfwfCgTeMMX8u45iRwHOAATYaY+6xt98H/NY+7A/GmLf8ELdSATd7Qm+nQ1Cq0spN+iISDrwK3AJkAGtEZJ4xZpvXMW2AXwH9jDFnRKSRvb0B8CyQgvXHYK197hn//yhKKaXK48s1/V7AbmPMXmNMPjAHuKPUMROBVy8mc2PMcXv7fwFfGWNO2/u+Aob6J3SllFIV5UvSbw4c9Po8w97mrS3QVkSWi8hK+3KQr+cqpZQKEF+u6UsZ20wZX6cNMBhIAJaKSGcfz0VEJgGTABITE30ISSml1LXwZaSfAbTw+jwBOFzGMZ8aYwqMMenATqw/Ar6cizFmmjEmxRiTEh8fX5H4lVJKVYAvSX8N0EZEkkUkChgNzCt1zL+AmwBEJA7rcs9eYAFwq4jEikgscKu9TSmllAPKvbxjjCkUkUexknU4MNMYs1VEngfSjDHzKEnu24Ai4BfGmFMAIvJ7rD8cAM8bY05XxQ+ilFKqfGLMZZfYHSUiJ4D9TscBxAEnnQ4iSOhzUUKfixL6XJQIhueipTGm3OvjQZf0g4WIpBljUpyOIxjoc1FCn4sS+lyUCKXnQmvvKKWUi2jSV0opF9Gkf2XTnA4giOhzUUKfixL6XJQImedCr+krpZSL6EhfKaVcxLVJX0SynI4hVIjICBF5+gr7qsXzKCJJIrKljO3Pi8iQcs59TkSerLroVHUgIvvsxauO0iYqqlz2ArzSq7BdwRjzjNMxhAIRCTfGFDkdR1UREcG6HF7sdCyV5dqR/kVi+YuIbBGRzSIyyt7+moiMsB9/IiIz7ccTROQPTsbsT/YId4eIvGE/B++KyBC7YuouEeklIuNF5BX7+GQRSRWRNfZq6+okXESmi8hWEflSRGqKyCwR+SGAiAy3n6tlIvIPEfnM69yOIrJIRPaKyE8dir/SROQhEdlgf6SLyEIRudX+P18nIh+KSG372H0i8oyILAPuFpFudpXdTfbvTKzDP06l2L8b20XkNWAdMENE0uzXx++8jtsnIr+zn5/NItLe3t7Qfh2tF5GplF2AMuBcn/SBO4FuwPXAEOAvItIUWAIMsI9pDnS0H/cHlgY6yCrWGqszWlegPXAP1s/5JPDrUsf+HXjdGNMTOBrIIAOgDVZfiE7AWeCuiztEJBqYCgwzxvQHSq98bI/VP6IX8KyIRAYmZP8yxkwxxnQDemIVTJyJ1fluiDHmBiANeNzrlFxjTH9jzBzgbeCXxpiuwGasBkqhrh3wtjGmO/CEvQCrKzBIRLp6HXfSfn5ex/q9AevnX2afOw8IihLCmvSt5Pa+MabIGHMMWIz1gl8KDBCRjsA24Jj9x6AvsMKxaKtGujFms/3WdSvwjbGmdW0Gkkod2w943348O3AhBkS6MWaD/Xgtl/7s7YG9dhVZKHkOLvrcGJNnjDkJHAcaV2mkVe/vwLfAGawBz3IR2QDcB7T0Ou4DABGpB9Q3xiy2t78FDAxcuFVmvzFmpf14pIisA9YDnSgZCAL80/7X+3UzEHgHwBjzOdZz6Ti9pn+Ft1zGmEP229OhWKP+BsBIIMsYcz6A8QVCntfjYq/Piyn7NVJd5/l6Pw9FgHcH9PLempc+N2R/t0RkPFZifxS4Dav73ZgrHH4hUHE55AJYlzWxRvA97Zaws4Bor+Mu/v+X/r8Put8VHelbCX2UiISLSDzWX+fV9r5U4Gf2MUux/tOr26WdilqOVV4b4F4nAwmwHUArEUmyPx/lXChVR0R6YL3Ox9rv/FYC/USktb0/RkTalj7PGJMJnBGRi5dEx2G9a64u6mL9AcgUkcbAMB/OWYL9OyIiw4CguMcRsqMRP/oE65LNRqy/yk8ZYy5eq14K3GqM2S0i+7FG+25P+o8B74nIY8DHTgcTKMaYHBF5BPiPiJykZGBQ3TyK9TpfaE1YIQ0YD7wvIjXsY34LfFfGufcBU0QkBqufxv1VHm2AGGM2ish6rMufe7EGP+X5Hdbztg7rD+CBKgzRZ7oiVykfiUhtY0yWPX3vVWCXMeYlp+NSqiL08o5Svpto38zcCtTDms2jVEjRkb5SSrmIjvSVUspFNOkrpZSLaNJXSikX0aSvlFIuoklfKaVcRJO+Ukq5yP8HfLit5vcDPj8AAAAASUVORK5CYII=\n", 442 | "text/plain": [ 443 | "
" 444 | ] 445 | }, 446 | "metadata": { 447 | "needs_background": "light" 448 | }, 449 | "output_type": "display_data" 450 | } 451 | ], 452 | "source": [ 453 | "sns.pointplot(x=[\"low\"]*len(idx_range)+[\"mid\"]*len(idx_range)+[\"high\"]*len(idx_range)+[\"zero\"]*len(idx_range)+[\"rand\"]*len(idx_range),\n", 454 | " y=influence_distance_lists[0]+influence_distance_lists[1]+influence_distance_lists[2]+influence_distance_lists[3]+influence_distance_lists[4], ci=68)" 455 | ] 456 | }, 457 | { 458 | "cell_type": "code", 459 | "execution_count": null, 460 | "metadata": {}, 461 | "outputs": [], 462 | "source": [] 463 | } 464 | ], 465 | "metadata": { 466 | "kernelspec": { 467 | "display_name": "Python 3", 468 | "language": "python", 469 | "name": "python3" 470 | }, 471 | "language_info": { 472 | "codemirror_mode": { 473 | "name": "ipython", 474 | "version": 3 475 | }, 476 | "file_extension": ".py", 477 | "mimetype": "text/x-python", 478 | "name": "python", 479 | "nbconvert_exporter": "python", 480 | "pygments_lexer": "ipython3", 481 | "version": "3.7.6" 482 | } 483 | }, 484 | "nbformat": 4, 485 | "nbformat_minor": 4 486 | } 487 | -------------------------------------------------------------------------------- /NLI_analysis.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "%matplotlib inline\n", 10 | "import argparse\n", 11 | "import csv\n", 12 | "import logging\n", 13 | "import os\n", 14 | "import random\n", 15 | "import sys\n", 16 | "import pickle\n", 17 | "import time\n", 18 | "import numpy as np\n", 19 | "from termcolor import colored\n", 20 | "import matplotlib\n", 21 | "import matplotlib.pyplot as plt\n", 22 | "import seaborn as sns\n", 23 | "from bert_util import *" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 2, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "mnli_processor = MnliProcessor()\n", 33 | "hans_processor = HansProcessor()\n", 34 | "train_examples = mnli_processor.get_train_examples('data/', 10000)\n", 35 | "test_examples = hans_processor.get_test_examples('data/')" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 171, 48 | "metadata": {}, 49 | "outputs": [ 50 | { 51 | "name": "stdout", 52 | "output_type": "stream", 53 | "text": [ 54 | "7\n", 55 | "\u001b[34mThe athlete by the doctors encouraged the senator .\u001b[0m \u001b[31mThe athlete encouraged the senator .\u001b[0m entailment\n" 56 | ] 57 | } 58 | ], 59 | "source": [ 60 | "interested_test_idx = 7\n", 61 | "\n", 62 | "for te in test_examples:\n", 63 | " if te.guid == interested_test_idx:\n", 64 | " print(te.guid)\n", 65 | " print(colored(te.text_a, \"blue\"), colored(te.text_b, \"red\"), te.label)\n", 66 | " break" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 172, 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [ 75 | "test_tok_sal_list, train_tok_sal_lists, test_pred_status = pickle.load(open(\"from_aws/NLI_influence_output_bert_e3/saliency_test_\" + str(interested_test_idx) + \".pkl\", \"rb\"))" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 173, 81 | "metadata": {}, 82 | "outputs": [ 83 | { 84 | "data": { 85 | "text/plain": [ 86 | "'correct'" 87 | ] 88 | }, 89 | "execution_count": 173, 90 | "metadata": {}, 91 | "output_type": "execute_result" 92 | } 93 | ], 94 | "source": [ 95 | "test_pred_status" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 174, 101 | "metadata": {}, 102 | "outputs": [ 103 | { 104 | "name": "stdout", 105 | "output_type": "stream", 106 | "text": [ 107 | "[CLS]\t0.004317282\n", 108 | "the\t-0.007485319\n", 109 | "athlete\t-0.14760923\n", 110 | "by\t-0.049165454\n", 111 | "the\t-0.050391756\n", 112 | "doctors\t-0.14886299\n", 113 | "encouraged\t-0.088029\n", 114 | "the\t-0.032777905\n", 115 | "senator\t-0.055137962\n", 116 | ".\t-0.01177289\n", 117 | "[SEP]\t0.003715336\n", 118 | "\u001b[32mthe\t0.01097851\u001b[0m\n", 119 | "athlete\t-0.04906088\n", 120 | "\u001b[32mencouraged\t0.08856703\u001b[0m\n", 121 | "the\t-0.009075683\n", 122 | "senator\t-0.18473382\n", 123 | ".\t-0.022565214\n", 124 | "\u001b[32m[SEP]\t0.035753675\u001b[0m\n" 125 | ] 126 | } 127 | ], 128 | "source": [ 129 | "top_sal_ix = [_ix for _ix, _ in sorted(enumerate(test_tok_sal_list), key=lambda x: -x[1][1])][:3]\n", 130 | "for ix, (tok, sal) in enumerate(test_tok_sal_list):\n", 131 | " if ix in top_sal_ix:\n", 132 | " print(colored(tok + '\\t' + str(sal), \"green\"))\n", 133 | " else:\n", 134 | " print(tok + '\\t' + str(sal))" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": null, 140 | "metadata": {}, 141 | "outputs": [], 142 | "source": [] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": 175, 147 | "metadata": {}, 148 | "outputs": [], 149 | "source": [ 150 | "influences = pickle.load(open(\"from_aws/NLI_influence_output_bert_e3/influences_test_\" + str(interested_test_idx) + \".pkl\", \"rb\"))\n", 151 | "influences = stats.zscore(influences)" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": 176, 157 | "metadata": {}, 158 | "outputs": [ 159 | { 160 | "data": { 161 | "text/plain": [ 162 | "" 163 | ] 164 | }, 165 | "execution_count": 176, 166 | "metadata": {}, 167 | "output_type": "execute_result" 168 | }, 169 | { 170 | "data": { 171 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAW4AAAD8CAYAAABXe05zAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAHXBJREFUeJzt3Xl0XGeZ5/HvU6V9sWRLsuPEi2Rnw9BmcyeBAIEAIRAOND3ABGiWHpg0pwcOPc0My2mWGfp0Q8MBZuA0zZgdGgNpIBBCCAnZTEJwLDte412S5UW2dsuqklRS3Xf+qEWL5ah0cd2rUv0+5/jUolL5vZb986PnXWTOOUREpHBEwh6AiIjMj4JbRKTAKLhFRAqMgltEpMAouEVECoyCW0SkwCi4RUQKjIJbRKTAKLhFRApMST7etLGx0TU3N+fjrUVEFqUdO3b0OueacnltXoK7ubmZ1tbWfLy1iMiiZGbHc32tWiUiIgVGwS0iUmAU3CIiBUbBLSJSYBTcIiIFRsEtIlJgFNwiIgVGwS0iUmAU3CI+/eFoLy/5l4cYSSTDHooUGQW3iE/HeoY5OTDCuZHxsIciRUbBLeJT0nOpW+dCHokUm5zOKjGzDuA8kAQmnHOb8jkokUKQTOe15ym4JVjzOWTqFc653ryNRKTAZAI7qeCWgKlVIuJTpkWiVokELdfgdsD9ZrbDzO7I54BECkWm0larRIKWa6vkRufcaTNbDjxgZgedc1unviAd6HcArFmz5hIPU2Th8TQ5KSHJqeJ2zp1O33YDdwHXzfKazc65Tc65TU1NOf0QB5GClm2VqOKWgM0Z3GZWbWa1mfvALcC+fA9MZKHzsq2SkAciRSeXVskK4C4zy7x+i3PuvryOSqQAaHJSwjJncDvn2oDnBjAWkYKS9DK3Cm4JlpYDivjkpSttTxW3BEzBLeJTUhtwJCQKbhGftI5bwqLgFvHJ0+SkhETBLeKTWiUSFgW3iE+anJSwKLhFfJqsuEMeiBQdBbeIT1rHLWFRcIv4pFaJhEXBLeKTJiclLApuEZ+SqrglJApuEZ+ypwMquCVgCm4Rn7IbcLSqRAKm4BbxKRPY2vIuQVNwi/ikLe8SFgW3iE9aVSJhUXCL+KR13BIWBbeIT6q4JSwKbhGfFNwSFgW3iE9qlUhYFNwiPul0QAmLglvEp2S60FbFLUFTcIv45KnHLSFRcIv4pMlJCYuCW8QnTU5KWBTcIj6p4pawKLhFfErqrBIJiYJbxKfsedyquCVgCm4Rn5I6j1tCouAW8cnLnMetVokETMEt4pMmJyUsOQe3mUXN7CkzuyefAxIpFJqclLDMp+L+EHAgXwMRKTSanJSw5BTcZrYKuA34Zn6HI1I4JicnFdwSrFwr7v8DfATQ/LlIWiawldsStDmD28xeD3Q753bM8bo7zKzVzFp7enou2QBFFqpsq0Q9bglYLhX3jcAbzKwD+DFws5n9+8wXOec2O+c2Oec2NTU1XeJhiiw8apVIWOYMbufcx51zq5xzzcDtwEPOub/K+8hEFrjMOm6tKpGgaR23iE+ZwNaqEglayXxe7Jx7BHgkLyMRKTCeWiUSElXcIj4458h0SDQ5KUFTcIv4MLXKVsUtQVNwi/gwdUIyqdyWgCm4RXzwvKn3ldwSLAW3iA/TKm4FtwRMwS3iw7QetyYnJWAKbhEfprZH1CqRoCm4RXyYPjmp4JZgKbhFfFDFLWFScIv4oIpbwqTgFvFh+gacEAciRUnBLeKD1nFLmBTcIj6oVSJhUnCL+JDU5KSESMEt4kPmRMDSqKnilsApuEV8yFTcpdGItrxL4BTcIj5MDW61SiRoCm4RHyZbJRG1SiRwCm4RHzIVd1nUUMEtQVNwi/iQrbhL1CqR4Cm4RXzI7JZUq0TCoOAW8UGrSiRMCm4RHzKtkrKoqVUigVNwi/iQqbJL1CqRECi4RXxITtk56el0QAmYglvEB8/TOm4Jj4JbxIdMW1uTkxIGBbeID5OrSgzQCYESLAW3iA9Tt7yDzuSWYCm4RXyY3PIemfZYJAgKbhEfZlbcnipuCdCcwW1mFWb2pJntNrP9Zva/gxiYyEKW7XGX2LTHIkEoyeE1Y8DNzrlhMysFHjOz3zjn/pjnsYksWFO3vANayy2BmjO4nXMOGE4/LE3/UnkhRW1yy7smJyV4OfW4zSxqZruAbuAB59y2/A5LZGGbejpg6rGCW4KTU3A755LOuecBq4DrzOw5M19jZneYWauZtfb09FzqcYosKElNTkqI5rWqxDk3CDwC3DrLxzY75zY55zY1NTVdouGJLEyeJiclRLmsKmkys/r0/UrgVcDBfA9MZCHTOm4JUy6rSlYC3zOzKKmgv9M5d09+hyWysGkdt4Qpl1Ule4DnBzAWkYIxczmgKm4JknZOivgw9TxuUMUtwVJwi/iQmZwsK8lU3GGORoqNglvEB63jljApuEV8yLRKSiJqlUjwFNwiPnieI2IQVXBLCBTcIj4knSMaMSIRbcCR4Cm4RXxIVdxG1FRxS/AU3CI+JL1UxR3NVtwhD0iKioJbxIekc0TNiJhaJRI8BbeID57niESMdMGtVokESsEt4kNmcjKqyUkJgYJbxIekBxGbsqpEFbcESMEt4oPnOaIRJleVqOKWACm4RXzw0pOTapVIGBTcIj4kXWZyUuu4JXgKbhEfPK3jlhApuEV8SLrU5GT6cEBNTkqgFNwiPmQOmYpoclJCoOAW8eHCLe8KbgmOglvEh6RLHTKV3fKuVokESMEt4sPMyUm1SiRICm4RHy7Y8q6KWwKk4BbxIelNb5Wo4pYgKbhFfPB0yJSESMEt4kPSS295z05OhjwgKSoKbhEfPA8ikdSv1GMltwRHwS3igyYnJUwKbhEfLpicVHBLgBTcIj7MnJxUq0SCpOAW8eGCyUmdDigBUnCL+JDM/LBg9bglBApuER8yPwEHIBoxtUokUHMGt5mtNrOHzeyAme03sw8FMTCRhSxzOiCkfu6kKm4JUkkOr5kAPuyc22lmtcAOM3vAOfd0nscmsmB5jmybxEyTkxKsOStu51yXc25n+v554ABwRb4HJrKQpSYnU/ejEdOWdwnUvHrcZtYMPB/YNsvH7jCzVjNr7enpuTSjE1mgMpOToFaJBC/n4DazGuBnwN8554Zmftw5t9k5t8k5t6mpqelSjlFkwZk6ORnR5KQELKfgNrNSUqH9Q+fcz/M7JJGFb9rkZEQVtwQrl1UlBnwLOOCc+1L+hySy8HluslUSMdMGHAlULhX3jcA7gZvNbFf61+vyPC6RBS2zcxIgGtGqEgnWnMsBnXOPARbAWEQKhufQOm4JjXZOivjgpU8HBE1OSvAU3CI+pM7jTt3X5KQETcEt4sMF67hVcUuAFNwiPlywjlsVtwRIwS3iQ3JKj1sVtwRNwS0yT865aYdMRSJaxy3BUnCLzFOmuJ62jlutEgmQgltknjJtkeyqErVKJGAKbpF5ylTXe0+eY8u2TvpjCU4NjrBlW2fII5NioeAWmadMdW02eVaJWiUSJAW3yDxlNtuk5yYxM5TbEiQFt8g8eRdU3KmVJiJBUXCLzFOmVZKpuFOtkhAHJEVHwS0yT5lWSabiNlXcEjAFt8g8eenNNpFpk5MhDkiKjoJbZJ4unJxUxS3BUnCLzNPMyUlTxS0BU3CLzNOFk5Pa8i7BUnCLzNOFk5Naxy3BUnCLzJOniltCpuAWmaeZFXfEDMW2BEnBLTJPM3vchipuCZaCW2SeZlvHrdyWICm4ReZpslVC9lYVtwRJwS0yT5OtEu2clHAouEXmyZul4tbOSQmSgltknrxZK24FtwRHwS0yTzN73KnzuEMckBQdBbfIPGVXlTD1rBIltwRHwS0yTzNPB1TFLUFTcIvM0+ynAyq5JThzBreZfdvMus1sXxADElnoLlwOqIpbgpVLxf1d4NY8j0OkYFy4ASd1VomWBEpQ5gxu59xWoD+AsYgUhMlWSepxptetTTgSFPW4ReZpcnLSpt2q4pagXLLgNrM7zKzVzFp7enou1duKLDhJ78JWCaCjXSUwlyy4nXObnXObnHObmpqaLtXbiiw43gUV9/TnRfJNrRKReUrOONY1W3ErtyUguSwH/BHwBHCNmZ00s/fmf1giC9fFJyeV3BKMkrle4Jx7WxADESkUMycnMxW3VpVIUNQqEcmB5znubD3B2ETygsnJTMXtnOPJ9n62bOsMaZRSLOasuEUE9pw6x0d+uoe6ytILJieNyYr7h9uOs/VwD2+/fk1oY5XFTxW3SA76Y2Pp28Tklvf0x6ZW3IPxcc6NjGf74CL5oOAWycFgfDx7m5zlkClIVdyD8QSeg/NjE+EMVIqCglskB9ngHklMaZUw7dY5x+BI6nXn0q8XyQcFt0gOpgZyZh337BX3ZMCL5IuCWyQHg/FUEA/EL15xJ51jaHSypSKSLwpukRzk0uOOJyayuycH4qq4JX8U3CI5yLZKRsan/CAFpt3GxpLZ158bUcUt+aPgFsnBzFaJMVlpZ9Zzx6asJFGrRPJJwS2Sg5mtksyuSZjcQanglqAouEVykKm4xyY84olktsqGKRV3IhXcZlpVIvml4BaZw0TSY2h0ghVLygHoiyUuUnGnetyX11Wq4pa8UnCLzGFoNFVJr22oBlLb32etuNOtktXLKrMVukg+KLhF5pAJ4eaGKgD6hmevuIfHJqitKKGhpjy7CkUkHxTcInPIhHBzY6biTkyvuMn0uJPUV5VSX1mqLe+SVwpukTlkKu6WhsngtmmtktTtSGKC+soy6qtKGRwZ1099l7xRcIvMYiSRzB7NmplozPS4JzyXDWuYflZJfVUpS6vKSHpOJwRK3ii4RWZIeo6XfeFhvvuHDmAyuC+vr6AsmvonM9vkJEB9VRl1laXA5AmBn/3NAb77eHsQQ5cioeAWmeHUwAg958fYdWIQSLVKzGBJRSn1ValQnm1yEqC+spT6qrL056WC+87tJ/jFrtPBDF6Kgn50mcgMbb3DAHT0xYDU5GRdZSmRiFFfVUr3+dmXA0KqVZIJ98GRBAOxBAPxcZLeMM65ab1xEb9UcYvM0N6bCuz2nlj2x5HVp9sf9ZWpanpq/E7N4rrK0uxrB+PjtKXfa2h0gv6Y1nbLpaGKW2SGTHCfH5ugdzjBQDxBXbr9UZeupi9WcS+tKpvSKkkwOj55YmBbb4yGmvK8j18WP1XcIjO098ayVXR7b4xzI+MsTQf20ll63FNXmNRXlWYnJwfj49n/BCBVwYtcCgpuKXrff6KDn+44mX3c3hvjBWuWpu8PT2+VpKvpqVW2zehxl5VEqC6LMjgyTltPjOaGKsqiEY6le+cAYxNJrfMW3xTcUtQ8z/GlBw7z1YeOADA6nuTU4Ag3rm+gLBqhrTfGYDyRDexMNX2xVSV16R54fVVZtuK+cnkNaxuqaEtX3PHEBC/67EP8cFtnAFcoi5GCW4ra4e7zDMbHOd4X58y5UTr74zgH69Nhe/TsMEOjE9mVIktnqbhnriqBVMD3x8Zo74uxrqmGdU3V2bbJ9o4B+mMJfrv/TFCXKYuMgluKykgiyZlzo9nHT7b3Z+9va+/LVsXrGmtoaaxm98nUWu7JVsnc67gBllaX8nTXEIkJj5bGaloaazjeF2Mi6fGHo70AbO/oZ2xicvLyRH/8El6pLGYKbikqn757H7f+362MJFKBua2tn5V1FdSUl7CtvT9bFTc3VtHSWE3vcGoJX6ZVkgnm2Sru8pIIJemdlfWVZZwdGgNgXWM165qqGU86Tg2O8PixXipKI4yOezzVmfqP4fGjvbz08w/z0MGz+f4jkEVAwS2LVjwxwe+ePpudBDw/Os7du08zGB/nt/vP4JxjW3sfN6xrYFPzUp5s76e9d5im2nJqK0ppSZ8GCJPLAOtmW1WSvq0qi17weiDVKkm/187OAfafHuKdN6wlYmSr7y1PpvrdW7adyH5ez/kxfr7zpCYx5QIKblkUkp7j3r1d037u42fvPcj7vt/KPXu6APj1ni5Gxz1qykv4jx0naOuN0Tuc4PqWZVzf0sDR7mFajw9kA3tqcGd627P1uDOrSirLomzZ1smWbZ2cGhgBUlX4/fvPsK6pBoAt2zpxDl7z7MvYuKqex4/1MRBL8MD+s1SXRXn4UDfd51OtnE/fvY+/v3M3jxzuyf5eh8+e5/dHJh9LccopuM3sVjM7ZGZHzexj+R6UyFSD8dTW8QznHD/Z3snBM0PZ5/7f1mP87Q938olf7APgeF+MH6Wr2C/ef4jxpMedrSe4cnkN73tpC48f7ePnO1NLAK9rWcZ1LcsAaOuJZY9vbWmaDO5n6nFn1nFXlU7uZ8tU30215ZgZS9Pru7d3DFBdFuW5q+u58coGdp8YZMuTnSSSHv/y5o0kPcddO0+x9+Q57t17BjP4wn2H8DzHQCzBO765jfd8Zzv7Tp3L/ln84IkOHj7YPe3PbCSh5YaL2ZzBbWZR4F+B1wIbgLeZ2YZ8D0wWF89z7Dt1LttbBug6N8IPnujg/GjqMKZMIH/sZ3uy28MPnTnPq760lVd/eSuHzpwH4Mu/O8JHf7aX2zf/kSNnz3Oga4gvP3CYptpy7nrqFPft6+KL9x+mJGr885v+jI6+OJ+99yA7Owd566ZVvPmFqzCDzVvbaKotp6Wxmo2r6qgsTYVtJrCbasopL0n9E/ndgbNs2dbJXTtPEY3YRSvujExwN6Z3SpoZ69Lve13LMkqjEW5c38iE5/jKg0d4zhVLeP3Gy3nh2qXc2XqCz//2IEurSvnMG57N011D/HpvF5++ez8DsQRLq0r5+zt3MTaR5Bu/b+OTv9zPf/1+a7YSv29fF8/7zP383U92kZjwAHjiWB/v+c6TbO+YnIzt7Ivzy12nmEh62edGx5PEEzqOdqHLZcv7dcBR51wbgJn9GHgj8PSlHsyjh3toqC5jbUMVFaVRTg2M0HVulMaaMlYvS/3YqM7+OH3DCa6or+Ty+gpiiSSdfXFGxpOsXlbJitoK+mIJjvfFiESM5oZq6ipLOTM0yon+OLUVJaxtqKY0apzoj3NqcJQVS8ppbqjGc47O/jj9sQSr6qtS7z+WpKMvxtiEx9qGKppqyjl7fpT2nhjRiLF+eQ31laWcHBjheH+c+spSWpqqKY1EONYzzMmBOFfUV3Hl8hrGPY9j3cMMxBOsbahmzbIq+mMJDp45T2LC4+oVNVxeX8nxvhj7Tw9RURrl2ZcvobGmnKe7hjjYdZ4VS8rZuKqespIIe0+eo713mPVNNTxnVR1DI+Ns7+jnzLkxnru6jueuqqetJ8ZjR3tJTHi8+MoGNqxcwv7TQ2zv6Ke2ooQb1jWwvLacRw/3sK29n3WN1dx87XIccM/uLnadGOBF6xt43Z+t5OzQGL/afZqTAyO8esNybrp6OQ8f6ub7TxxnPOnxV9ev4RXXLuc/Wk/yw23HWbGkgr+5aR1X1FfxT/ceYPeJQVbWVfDRW69leGyCz/3mIMNjE3ztkWN88vUb+M2+M/xq9+ns34UPvfIqPnffwWx4/ufNT/CfXrCKbz3Wzm0bV7K9vZ93futJ6ipLqass49cffAnv/V4rH/npHoZGJ/jbl6/nbdet5uc7T/Ltx9uJRoy/eP4VLK+t4Mb1jTx2tJfrW5ZhZpRGjReuXcpjR3uzLRIzo7GmnNODI1SkQ93MqCqLTtstma24pwR3Zbr6bqwpyz7X0ljNU52D3HhlIwAvWLuUspIIYxMeb920GoC3blrFR3+2l2M9Mf7hdc/i7dev5d//2MknfrGPcyPj/PdXXc3G1XX89Xe2855vb+eJtj5e+5zLaO+N8f4f7ODdL27m648eY9XSKn656zT9sVQr6EsPHAbg90d6+dit15JIenzlwSOMTXhs3trGp16/gZ2dg2zeeozRcY93vXgtb/vzNTx4sJuf7jjJsupS3nH9WjauquPXe7p48EA3Gy5fwl++4Aqqy0uyf1duWNfAbRtXMhgf5759Zzg1OMLLr2niZVc1cbRnmEcP9TDuedx0dRMbV9Wz9+Q5Wjv6qa8q5YZ1DVxWV8GuE4M8fXqINcuqeGHzUkoiEXafGOR4X4yrL6tl46p6YmMT7Dt1jsH4OM9auYQrl9dwdmiUg+n/3K+9rJaVdRUc749ztHuY2ooSrllRS01FCR29cU70x1m+pJz16RZWW0+MnuFR1iyrYs2yakYSSdr7YsTHJljbWM3KJRX0xsY43hcnYqmz2ZdWlXFmaJTOvkyuVFFbMTm3kU8217dTZvZm4Fbn3PvSj98JXO+c+8DFPmfTpk2utbV1XgPxPMezPnUfY+kKIWKpg+mfyWyvyfU5M5h66TMfX+r3n81sr8nl8/zI5X2ryqLEp1TEAGuWVdE5ZZlaWUmEpppyTg2OZJ/bsHIJZSWR7DGoZvDKa1dwvC/Gke7UbsHlteW89yUt3LOni73pb/NfvL6Bd72omS89cIjDZ4eJGHz4lmt46VWNfPBHT3G8L86qpZVsed8NALz9m3/k5MAIt2xYwdfe8QKO9gzz1q8/wdDoBN941yZevWEFR86e57avPkbUjP9xyzVUlkXp6I2x+fdtXHtZLe96UTMAu08M8pPWE7zhuZdzw7oGAB46eJbfHejmQ6+8ihVLKgD48fZOjnYP84nbJr/J/MqDR6ivKs2+l+ccn/zFPl5+TROv3nAZQPb3vP3PV7NxVT0Ajxzq5v6nz/LBm69kZV0lAN98rI3jfXE+/tprqSorYWw8yT//5gCVpVE+fMs1lEYjHOga4gd/PM4V9ZW8/6b1RCPGz3eezPbj//rFzcQTSb7+6DEGR8a5ZkUtb7tuDXtPDXLXU6fwHLx+40o+cdsGPvXLfdz/dGrlymufcxk3X7ucL/z2EN3nUytgbrq6ibrKUn6153T278vzVtfTc35s2tf82stqaeuNZSt6M1i7rIqOvsm/K9GIpdezJ6Y9FzEYT+a/jeP339efkgWrl1Wy9X++wtcpkGa2wzm3KafX5hDcbwFeMyO4r3POfXDG6+4A7kg/vAY4lL7fCPTmPvxFoxivuxivGYrzuovxmiG/173WOdeUywtzaZWcBFZPebwKuOBUeOfcZmDzzOfNrDXX/0UWk2K87mK8ZijO6y7Ga4aFc925rCrZDlxlZi1mVgbcDtyd32GJiMjFzFlxO+cmzOwDwG+BKPBt59z+vI9MRERmldMPUnDO3Qvc6/P3uKB9UiSK8bqL8ZqhOK+7GK8ZFsh1zzk5KSIiC4u2vIuIFJi8BbeZvcXM9puZZ2abpjzfbGYjZrYr/evr+RpD0C52zemPfTx9ZMAhM3tNWGPMNzP7X2Z2asrX93VhjylfivUoCDPrMLO96a/v/DZsFAgz+7aZdZvZvinPLTOzB8zsSPp2aVjjy2fFvQ/4S2DrLB875px7XvrX+/M4hqDNes3pIwJuB54N3Ap8LX2UwGL15SlfX79zIwuajoLgFemvb+hL4/Lku6T+rU71MeBB59xVwIPpx6HIW3A75w445w7N/crF4xmu+Y3Aj51zY865duAoqaMEpHBlj4JwziWAzFEQsgg457YC/TOefiPwvfT97wF/Eeigpgirx91iZk+Z2aNm9tKQxhCkK4ATUx6fTD+3WH3AzPakv90M7dvJPCu2r+lUDrjfzHakd0wXixXOuS6A9O3ysAaS03LAizGz3wGXzfKhf3DO/fIin9YFrHHO9ZnZC4FfmNmznXNDF3n9guLzmmc7uKBgl/M8058B8G/AP5K6vn8Evgj8l+BGF5hF9TWdpxudc6fNbDnwgJkdTFeoEpA/Kbidc6/y8TljwFj6/g4zOwZcDRTEJIefaybHYwMKRa5/Bmb2DeCePA8nLIvqazofzrnT6dtuM7uLVNuoGIL7rJmtdM51mdlKoHvOz8iTwFslZtaUmZgzs3XAVUBb0OMI2N3A7WZWbmYtpK75yZDHlBfpv9AZbyI1YbsYFeVREGZWbWa1mfvALSzer/FMdwPvTt9/N3Cx77Dz7k+quJ+Jmb0J+CrQBPzazHY5514DvAz4jJlNAEng/c65mZMABeli1+yc229md5I6w3wC+G/OueQzvVcB+7yZPY9U26AD+Jtwh5MfRXwUxArgrvSxpSXAFufcfeEO6dIzsx8BLwcazewk8Gngc8CdZvZeoBN4S2jj085JEZHCop2TIiIFRsEtIlJgFNwiIgVGwS0iUmAU3CIiBUbBLSJSYBTcIiIFRsEtIlJg/j98wq5gDDnQmQAAAABJRU5ErkJggg==\n", 172 | "text/plain": [ 173 | "
" 174 | ] 175 | }, 176 | "metadata": { 177 | "needs_background": "light" 178 | }, 179 | "output_type": "display_data" 180 | } 181 | ], 182 | "source": [ 183 | "sns.distplot(influences)" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": null, 189 | "metadata": {}, 190 | "outputs": [], 191 | "source": [] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": 177, 196 | "metadata": {}, 197 | "outputs": [ 198 | { 199 | "name": "stdout", 200 | "output_type": "stream", 201 | "text": [ 202 | "\u001b[34mOne of the drawings on view at MoMA is a diagram of the races, with the Jews identified as circumscised [ sic ] cut off from Earth.\u001b[0m \u001b[31mJews think of drawings on the MoMA as a diagram of the races teaching circumcision. \u001b[0m entailment\n", 203 | "10.675267619891791\n", 204 | "\n", 205 | "\u001b[34mTwo pillars surviving from the Greek Temple of Apollo stand like a gateway, but the Spanish era has given it a charming 17th-century ambience of Baroque houses with iron balconies supported by floral carvings and an occasional stone nymph.\u001b[0m \u001b[31mIt's comprised of parts of the Greek Temple of Apollo and Spanish Baroque houses.\u001b[0m entailment\n", 206 | "9.935398536642214\n", 207 | "\n", 208 | "\u001b[34mbecause you're having fun\u001b[0m \u001b[31mBecause you're having fun\u001b[0m entailment\n", 209 | "9.227490645253376\n", 210 | "\n", 211 | "\u001b[34m9. INVERSE PRICE CAPS\u001b[0m \u001b[31m9 Inward caps on price. \u001b[0m entailment\n", 212 | "9.15203224663952\n", 213 | "\n", 214 | "\u001b[34mI don't know if I was in heaven or hell, said Lillian Carter, the president's mother, after a visit.\u001b[0m \u001b[31mThe president's mother visited.\u001b[0m entailment\n", 215 | "9.069184865280848\n", 216 | "\n", 217 | "\u001b[34mDo it now, think 'bout it later.\u001b[0m \u001b[31mDon't think about it now, just do it.\u001b[0m entailment\n", 218 | "7.887612892136819\n", 219 | "\n", 220 | "\u001b[34mThe participating lawyers say Ginsburg, a diehard Red Sox fan known for his trademark bow ties, is tough to turn down.\u001b[0m \u001b[31mGinsburg is exceptionally tough to refuse according to the lawyers.\u001b[0m entailment\n", 221 | "7.605703526667061\n", 222 | "\n", 223 | "\u001b[34mHowever, they noted that more detailed guidelines often provided answers to employees' questions and served as a tool for educating subordinate security managers and others who wanted a more thorough understanding of good security practices.\u001b[0m \u001b[31mThe remarked that thorough guidelines are useful for educating security managers in lower ranks. \u001b[0m entailment\n", 224 | "7.352414811634403\n", 225 | "\n", 226 | "\u001b[34mIn 1511 Diego de Velazquez sailed from neighboring Hispaniola with some 300 conquistadores (conquerors).\u001b[0m \u001b[31mDiego de Velazquez sailed from Hispaniola in 1511.\u001b[0m entailment\n", 227 | "6.955728404245563\n", 228 | "\n", 229 | "\u001b[34mThese would be special cases of the more general case analyzed here.\u001b[0m \u001b[31mThis analysis is more general than these special cases.\u001b[0m entailment\n", 230 | "6.748324392734631\n", 231 | "\n", 232 | "\u001b[34mShenzhen's main tourist attractions are its enormous theme parks.\u001b[0m \u001b[31mMost tourists visit Shenzhen for its theme parks.\u001b[0m entailment\n", 233 | "6.551952339413625\n", 234 | "\n", 235 | "\u001b[34mAuditors should report the scope of their testing of compliance with laws and regulations and of internal control over financial reporting, including whether or not the tests they performed provided sufficient evidence to support an opinion on compliance with laws and regulations or internal control over financial reporting and whether the auditors are providing such opinions.\u001b[0m \u001b[31mThere might not be sufficient evidence for an opinion on compliance.\u001b[0m entailment\n", 236 | "6.473572188336845\n", 237 | "\n", 238 | "\u001b[34mTherma on the north coast is still an active spa offering treatments for ailments from rheumatism to infertility.\u001b[0m \u001b[31mTherma is an active spa that offers treatments for a wide range of conditions from rheumatism to infertility.\u001b[0m entailment\n", 239 | "6.295039745357184\n", 240 | "\n", 241 | "\u001b[34mBut Rubin's cautiousness has got the better of him.\u001b[0m \u001b[31mRubin had been too cautious.\u001b[0m entailment\n", 242 | "6.212309163040578\n", 243 | "\n", 244 | "\u001b[34mno i've uh you you can rent people's kiln they they uh you take your pieces to their house or their ceramic shop and uh pay them there's one woman that charge ten dollars a month for fire all that i wanted\u001b[0m \u001b[31mThere are people that let you rent their kilns to finish pieces at their shop.\u001b[0m entailment\n", 245 | "6.181591459080156\n", 246 | "\n", 247 | "\u001b[34mFiction has its glories, but concealment is merely squalid.\u001b[0m \u001b[31mHiding things is just dirty, whereas there is glory in fiction\u001b[0m entailment\n", 248 | "6.093391750516259\n", 249 | "\n", 250 | "\u001b[34mAn active government cultural policy in recent years has ?­preserved the architectural monuments of the national patrimony from the ravages of time, weather, war, revolution, and urban development.\u001b[0m \u001b[31mAn active government cultural policy was developed not too long ago.\u001b[0m entailment\n", 251 | "5.966764713009971\n", 252 | "\n", 253 | "\u001b[34mthat's Major Dad at eight i think it is\u001b[0m \u001b[31mI think Major Dad is on at eight o'clock.\u001b[0m entailment\n", 254 | "5.956997115552974\n", 255 | "\n", 256 | "\u001b[34mTotals may not sum due to rounding.\u001b[0m \u001b[31mRounding may cause inaccurate totals.\u001b[0m entailment\n", 257 | "5.925547974245473\n", 258 | "\n", 259 | "\u001b[34mIt's possible he's guilty.\u001b[0m \u001b[31mIt cannot be said that innocence is an absolute certainty.\u001b[0m entailment\n", 260 | "5.895998260705707\n", 261 | "\n" 262 | ] 263 | } 264 | ], 265 | "source": [ 266 | "order_of_interest = \"max\"\n", 267 | "label_of_interest = \"both\"\n", 268 | "num_of_interest = 20\n", 269 | "\n", 270 | "train_idx_sorted = list(np.argsort(influences))\n", 271 | "if order_of_interest == \"max\":\n", 272 | " train_idx_sorted.reverse()\n", 273 | "if label_of_interest == \"both\":\n", 274 | " valid_labels = [\"entailment\", \"non-entailment\"]\n", 275 | "else:\n", 276 | " valid_labels = [label_of_interest]\n", 277 | "\n", 278 | "cnt = 0\n", 279 | "for idx in train_idx_sorted:\n", 280 | " te = train_examples[idx]\n", 281 | " if te.label in valid_labels:\n", 282 | " print(colored(te.text_a, \"blue\"), colored(te.text_b, \"red\"), te.label)\n", 283 | " print(influences[idx])\n", 284 | "# top_sal_ix = [_ix for _ix, _ in sorted(enumerate(train_tok_sal_lists[idx]), key=lambda x: -x[1][1])][:3]\n", 285 | "# for ix, (tok, sal) in enumerate(train_tok_sal_lists[idx]):\n", 286 | "# if ix in top_sal_ix:\n", 287 | "# print(colored(tok + '\\t' + str(sal), \"green\"))\n", 288 | "# else:\n", 289 | "# print(tok + '\\t' + str(sal))\n", 290 | " print()\n", 291 | " cnt += 1\n", 292 | " if cnt >= num_of_interest:\n", 293 | " break" 294 | ] 295 | }, 296 | { 297 | "cell_type": "code", 298 | "execution_count": null, 299 | "metadata": {}, 300 | "outputs": [], 301 | "source": [] 302 | }, 303 | { 304 | "cell_type": "code", 305 | "execution_count": 168, 306 | "metadata": {}, 307 | "outputs": [ 308 | { 309 | "name": "stdout", 310 | "output_type": "stream", 311 | "text": [ 312 | "\u001b[34mH'm, yes, that might be, said John. \u001b[0m \u001b[31mYes, that might be the case, said John.\u001b[0m non-entailment\n", 313 | "-15.835316861365476\n", 314 | "\n", 315 | "\u001b[34mThis coalition of public and private entities undertakes initiatives aimed at raising public awareness about personal finance and retirement planning.\u001b[0m \u001b[31mPersonal finance and retirement planning are initiatives aimed at raising public awareness.\u001b[0m non-entailment\n", 316 | "-14.574376762573735\n", 317 | "\n", 318 | "\u001b[34mIf you've installed Internet Explorer 4.0, the new version of Microsoft's browser, you might like to take a peek at our new Table of Contents, designed especially to take advantage of the advances in Internet Explorer 4.0.\u001b[0m \u001b[31mWith Internet Explorer 4.0, you can take advantage of our new Table of Contents for Internet Explorer 4.0 users.\u001b[0m non-entailment\n", 319 | "-14.305925970899958\n", 320 | "\n", 321 | "\u001b[34mTheir applicability to case study evaluations outside of settings such as GAO is being explored.\u001b[0m \u001b[31mThey are exploring the applicability to case study evaluations inside the settings.\u001b[0m non-entailment\n", 322 | "-14.163630165532917\n", 323 | "\n", 324 | "\u001b[34mEt In an interview on Late Edition , Ken Starr says that if he could do it over again, he would have used the independent counsel's office as a bully pulpit, appearing on television to counter the White House's spin doctors.\u001b[0m \u001b[31mKen Starr used tv appearances to counter the spin doctors.\u001b[0m non-entailment\n", 325 | "-14.047570778703388\n", 326 | "\n", 327 | "\u001b[34mAs you fly down the coast from Caletta, you'll suddenly spot a small forest of tall domes in a town with a lake at its center.\u001b[0m \u001b[31mAs you arrive in Caletta from the coast, you will see a town with a small forest and lake next to it.\u001b[0m non-entailment\n", 328 | "-13.664267712736645\n", 329 | "\n", 330 | "\u001b[34mIt is better than being cooked and eaten, said San'doro.\u001b[0m \u001b[31mSan'doro wants to be cooked and eaten. \u001b[0m non-entailment\n", 331 | "-13.625070354449424\n", 332 | "\n", 333 | "\u001b[34mOne benefit is better meeting the diverse information needs of investors in evaluating funds, which has become more difficult as the number of funds has grown.\u001b[0m \u001b[31mInvestors have diverse needs when it comes to the information they need about stakeholders.\u001b[0m non-entailment\n", 334 | "-13.579489785068866\n", 335 | "\n", 336 | "\u001b[34mThe massive mansion opposite, nearly as impressive, houses the Casa de efrica, with pelts, drums, costumes, carved figures, and furniture from some 26 African countries, as well as a tantalizing collection of objects related to santeraa, the syncretic Afro-Cuban religion (see page 26; Casa de efrica will likely be closed for renovations until the end of 2000).\u001b[0m \u001b[31mFurniture from some 26 African countries can be found in the small mansion.\u001b[0m non-entailment\n", 337 | "-12.871885934365315\n", 338 | "\n", 339 | "\u001b[34mTo many Madeirans who believe the Lady of Monte has carried them through troubled times, the pilgrimage is an obligation.\u001b[0m \u001b[31mBecause the Lady of Monte is believed to have carried them through troubled times, Madeirans believe the pilgrimage is an obligation.\u001b[0m non-entailment\n", 340 | "-12.848709373663336\n", 341 | "\n", 342 | "\u001b[34mThe foundations date back to the 1170s when Strongbow, the earl of Pembroke, had it rebuilt as a stone structure.\u001b[0m \u001b[31mThe foundations of the building are from Strongbow's time.\u001b[0m non-entailment\n", 343 | "-12.614859255737555\n", 344 | "\n", 345 | "\u001b[34mthat seems to be an up and coming business i know out there where we are there's there's uh a person on each side of our county that now have aviaries\u001b[0m \u001b[31mAviaries are an up-and-coming business.\u001b[0m non-entailment\n", 346 | "-12.519039352363905\n", 347 | "\n", 348 | "\u001b[34mThey actually seem to play because they love their game.\u001b[0m \u001b[31mThey seem to play because of their love of the game and of each other.\u001b[0m non-entailment\n", 349 | "-12.043642566979226\n", 350 | "\n", 351 | "\u001b[34mOne of the oldest churches in the world, it was the only church in the Holy Land not destroyed during the Persian invasion of 614 the invaders noticed an icon of the Magi (who were fellow Persians) and spared the structure.\u001b[0m \u001b[31mIt was the only church not destroyed during the Persian invasion.\u001b[0m non-entailment\n", 352 | "-11.866403957682397\n", 353 | "\n", 354 | "\u001b[34mYou get us a working Franklin in two weeks, and we'll pretend everything is happening exactly as it should.\u001b[0m \u001b[31mThere was a deadline imposed to get Franklin working in two weeks, otherwise there would be an issue.\u001b[0m entailment\n", 355 | "-11.839088725426494\n", 356 | "\n", 357 | "\u001b[34mum for the worst i would assume\u001b[0m \u001b[31mFor the best I would assume.\u001b[0m non-entailment\n", 358 | "-11.736669020481518\n", 359 | "\n", 360 | "\u001b[34moh gosh well uh one that comes to mind is a lady i don't know what the heck she was doing in her dishwasher but she was in her dishwasher her head was in her dishwasher and her hair got caught\u001b[0m \u001b[31mA lady was doing something in her dishwasher.\u001b[0m non-entailment\n", 361 | "-11.346783397408258\n", 362 | "\n", 363 | "\u001b[34mSorry, Tuppence.\u001b[0m \u001b[31mSorry, Tuppence.\u001b[0m non-entailment\n", 364 | "-10.665433899485622\n", 365 | "\n", 366 | "\u001b[34mIt's true that coverage of Clinton on the campaign trail has been somewhat softer than the coverage of Dole, as even other Times reporters acknowledge.\u001b[0m \u001b[31mSome of the shoreline homes have hot tubs and covered porches. \u001b[0m entailment\n", 367 | "-10.633858594643545\n", 368 | "\n", 369 | "\u001b[34mThe word had filled his head as though the girl had whispered directly into both ears.\u001b[0m \u001b[31mThe girl spoke very quietly. \u001b[0m entailment\n", 370 | "-10.491693157430452\n", 371 | "\n" 372 | ] 373 | } 374 | ], 375 | "source": [ 376 | "order_of_interest = \"min\"\n", 377 | "label_of_interest = \"both\"\n", 378 | "num_of_interest = 20\n", 379 | "\n", 380 | "train_idx_sorted = list(np.argsort(influences))\n", 381 | "if order_of_interest == \"max\":\n", 382 | " train_idx_sorted.reverse()\n", 383 | "if label_of_interest == \"both\":\n", 384 | " valid_labels = [\"entailment\", \"non-entailment\"]\n", 385 | "else:\n", 386 | " valid_labels = [label_of_interest]\n", 387 | "\n", 388 | "cnt = 0\n", 389 | "for idx in train_idx_sorted:\n", 390 | " te = train_examples[idx]\n", 391 | " if te.label in valid_labels:\n", 392 | " print(colored(te.text_a, \"blue\"), colored(te.text_b, \"red\"), te.label)\n", 393 | " print(influences[idx])\n", 394 | "# top_sal_ix = [_ix for _ix, _ in sorted(enumerate(train_tok_sal_lists[idx]), key=lambda x: -x[1][1])][:3]\n", 395 | "# for ix, (tok, sal) in enumerate(train_tok_sal_lists[idx]):\n", 396 | "# if ix in top_sal_ix:\n", 397 | "# print(colored(tok + '\\t' + str(sal), \"green\"))\n", 398 | "# else:\n", 399 | "# print(tok + '\\t' + str(sal))\n", 400 | " print()\n", 401 | " cnt += 1\n", 402 | " if cnt >= num_of_interest:\n", 403 | " break" 404 | ] 405 | }, 406 | { 407 | "cell_type": "code", 408 | "execution_count": null, 409 | "metadata": {}, 410 | "outputs": [], 411 | "source": [] 412 | } 413 | ], 414 | "metadata": { 415 | "kernelspec": { 416 | "display_name": "Python 3", 417 | "language": "python", 418 | "name": "python3" 419 | }, 420 | "language_info": { 421 | "codemirror_mode": { 422 | "name": "ipython", 423 | "version": 3 424 | }, 425 | "file_extension": ".py", 426 | "mimetype": "text/x-python", 427 | "name": "python", 428 | "nbconvert_exporter": "python", 429 | "pygments_lexer": "ipython3", 430 | "version": "3.7.3" 431 | } 432 | }, 433 | "nbformat": 4, 434 | "nbformat_minor": 4 435 | } 436 | --------------------------------------------------------------------------------