├── .gitignore
├── README.md
├── bin
    ├── run-spelling-convnet-exp01-convparams-final.sh
    ├── run-spelling-convnet-exp01-convparams.sh
    ├── run-spelling-convnet-exp02-embeddings.sh
    ├── run-spelling-convnet-exp03-inputs.sh
    ├── run-spelling-convnet-exp04-real-errors.sh
    ├── run-spelling-convnet-exp05-multiclass.sh
    ├── run-spelling-convnet-residual.sh
    ├── run-spelling-convnet.sh
    ├── run-spelling-correction-isolated-binary.sh
    ├── run-spelling-correction-isolated-multiclass.sh
    └── run-spelling-lstm.sh
├── contrasting_cases.py
├── modeling
    ├── __init__.py
    ├── autograd_examples.py
    ├── builders.py
    ├── callbacks.py
    ├── chainer_model.py
    ├── data.py
    ├── dataset.py
    ├── difference.py
    ├── fbeta.py
    ├── fbeta_predict.py
    ├── lasagne_model.py
    ├── layers.py
    ├── nonconvnet.py
    ├── outliers.py
    ├── parser.py
    ├── preprocess.py
    ├── residual.py
    ├── spelling.py
    └── utils.py
├── models
    ├── keras
    │   ├── attention
    │   │   ├── model.json
    │   │   └── model.py
    │   ├── preposition
    │   │   ├── convnet
    │   │   │   ├── 4e0ae5dc683611e5950afcaa149e39ea
    │   │   │   │   ├── model.py
    │   │   │   │   └── model_old_keras.py
    │   │   │   ├── model-word2vec.json
    │   │   │   ├── model.json
    │   │   │   ├── model.py
    │   │   │   ├── run-medium.sh
    │   │   │   ├── run-small.sh
    │   │   │   └── small
    │   │   │   │   └── find-best-filter-size
    │   │   │   │       ├── find-best.sh
    │   │   │   │       └── find-best.txt
    │   │   └── lstm
    │   │   │   ├── model.json
    │   │   │   └── model.py
    │   └── spelling
    │   │   ├── convnet
    │   │       ├── exp03-inputs
    │   │       │   └── op_transpose_n_ops_1_n_errors_per_word_3
    │   │       │   │   └── analysis.py
    │   │       ├── model.json
    │   │       └── model.py
    │   │   ├── correction
    │   │       └── isolated
    │   │       │   ├── binary
    │   │       │       ├── model.json
    │   │       │       └── model.py
    │   │       │   └── multiclass
    │   │       │       ├── model.json
    │   │       │       └── model.py
    │   │   ├── data
    │   │       └── nietzsche.txt
    │   │   └── toksents.py
    └── lasagne
    │   └── spelling
    │       └── convnet
    │           ├── model.json
    │           └── model.py
├── notebooks
    ├── ConvnetAnalysis.ipynb
    ├── ConvnetAnalysisHumanJudgments.ipynb
    ├── ConvnetSensitivityAnalysis.ipynb
    ├── Spelling.ipynb
    └── notes.txt
├── requirements.txt
├── setup.py
├── tests
    ├── testdata.py
    ├── testdifference.py
    ├── testlasagne.py
    ├── testlayers.py
    └── testnonconvnet.py
├── train_chainer.py
├── train_keras.py
├── train_keras_simple.py
└── train_lasagne.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # modeling
2 | 


--------------------------------------------------------------------------------
/bin/run-spelling-convnet-exp01-convparams-final.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | model_dir=models/keras/spelling/convnet
 4 | data_dir=data/spelling/experimental/
 5 | distance=1
 6 | errors=3
 7 | 
 8 | experiment_name=$(echo $0 | sed -r 's/.*-(exp[0-9][0-9]-..*).sh/\1/')
 9 | experiment_dir=$model_dir/$experiment_name
10 | mkdir -p $experiment_dir
11 | 
12 | for operation in delete
13 | do
14 |     for n_embed_dims in 10 
15 |     do
16 |         for n_filters in 3000
17 |         do
18 |             for filter_width in 6
19 |             do
20 |                 for n_fully_connected in 1
21 |                 do
22 |                     for n_residual_blocks in 0
23 |                     do
24 |                         for n_hidden in 1000
25 |                         do
26 |                             model_dest=$experiment_dir/op_${operation}_n_embed_dims_${n_embed_dims}_n_filters_${n_filters}_filter_width_${filter_width}_n_fully_connected_${n_fully_connected}_n_residual_blocks_${n_residual_blocks}_n_hidden_${n_hidden} 
27 |                             if [ -d $model_dest ]
28 |                             then
29 |                                 continue
30 |                             fi
31 |                             ./train_keras.py $model_dir \
32 |                                 $data_dir/op-${operation}-distance-${distance}-errors-per-word-${errors}.h5 \
33 |                                 $data_dir/op-${operation}-distance-${distance}-errors-per-word-${errors}.h5 \
34 |                                 chars \
35 |                                 --target-name binary_target \
36 |                                 --model-dest $model_dest \
37 |                                 --n-embeddings 61 \
38 |                                 --model-cfg n_embed_dims=$n_embed_dims n_filters=$n_filters filter_width=$filter_width n_fully_connected=${n_fully_connected} n_residual_blocks=$n_residual_blocks n_hidden=$n_hidden patience=10 batch_size=32 \
39 |                                 --shuffle \
40 |                                 --confusion-matrix \
41 |                                 --classification-report \
42 |                                 --class-weight-auto \
43 |                                 --class-weight-exponent 3 \
44 |                                 --early-stopping-metric f2 \
45 |                                 --verbose \
46 |                                 --log
47 |                         done
48 |                     done
49 |                 done
50 |             done
51 |         done
52 |     done
53 | done 
54 | 


--------------------------------------------------------------------------------
/bin/run-spelling-convnet-exp01-convparams.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | model_dir=models/keras/spelling/convnet
 4 | data_dir=data/spelling/experimental/old/
 5 | distance=1
 6 | errors=3
 7 | 
 8 | experiment_name=$(echo $0 | sed -r 's/.*-(exp[0-9][0-9]-..*).sh/\1/')
 9 | experiment_dir=$model_dir/$experiment_name
10 | mkdir -p $experiment_dir
11 | 
12 | for operation in delete
13 | do
14 |     for n_embed_dims in 10 30 100
15 |     do
16 |         for n_filters in 100 200 300 
17 |         do
18 |             for filter_width in 2 4 6 8
19 |             do
20 |                 for n_fully_connected in 1
21 |                 do
22 |                     for n_residual_blocks in 0
23 |                     do
24 |                         for n_hidden in 100 200 300
25 |                         do
26 |                             model_dest=$experiment_dir/op_${operation}_n_embed_dims_${n_embed_dims}_n_filters_${n_filters}_filter_width_${filter_width}_n_fully_connected_${n_fully_connected}_n_residual_blocks_${n_residual_blocks}_n_hidden_${n_hidden} 
27 |                             if [ -d $model_dest ]
28 |                             then
29 |                                 continue
30 |                             fi
31 |                             echo ./train_keras.py $model_dir \
32 |                                 $data_dir/$operation-${errors}errors1word-distance-$distance${nonce}.h5 \
33 |                                 $data_dir/$operation-${errors}errors1word-distance-$distance${nonce}.h5 \
34 |                                 word \
35 |                                 --target-name target \
36 |                                 --model-dest $model_dest \
37 |                                 --n-embeddings 61 \
38 |                                 --model-cfg n_embed_dims=$n_embed_dims n_filters=$n_filters filter_width=$filter_width n_fully_connected=${n_fully_connected} n_residual_blocks=$n_residual_blocks n_hidden=$n_hidden patience=5 \
39 |                                 --shuffle \
40 |                                 --confusion-matrix \
41 |                                 --classification-report \
42 |                                 --class-weight-auto \
43 |                                 --class-weight-exponent 3 \
44 |                                 --early-stopping-metric f2 \
45 |                                 --verbose \
46 |                                 --log
47 |                         done
48 |                     done
49 |                 done
50 |             done
51 |         done
52 |     done
53 | done | parallel --gnu -j 2
54 | 


--------------------------------------------------------------------------------
/bin/run-spelling-convnet-exp02-embeddings.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | model_dir=models/keras/spelling/convnet
 4 | data_dir=data/spelling/experimental/
 5 | distance=1
 6 | errors=3
 7 | 
 8 | experiment_name=$(echo $0 | sed -r 's/.*-(exp[0-9][0-9]-..*).sh/\1/')
 9 | experiment_dir=$model_dir/$experiment_name
10 | mkdir -p $experiment_dir
11 | 
12 | operation=delete
13 | n_embed_dims=56
14 | n_filters=10
15 | filter_width=6
16 | n_fully_connected=0
17 | n_hidden=0
18 | 
19 | for embedding_init in identity orthogonal uniform normal 
20 | do
21 |     for train_embeddings in false true
22 |     do
23 |         model_dest=$experiment_dir/op_${operation}_n_embed_dims_${n_embed_dims}_n_filters_${n_filters}_filter_width_${filter_width}_n_fully_connected_${n_fully_connected}_n_hidden_${n_hidden}_embedding_init_${embedding_init}_train_embeddings_${train_embeddings}
24 |             #--model-dest $model_dest \
25 |         echo $model_dest
26 |         ./train_keras.py $model_dir \
27 |             $data_dir/op-${operation}-distance-${distance}-errors-per-word-${errors}.h5 \
28 |             $data_dir/op-${operation}-distance-${distance}-errors-per-word-${errors}.h5 \
29 |             chars \
30 |             --target-name binary_target \
31 |             --n-embeddings 56 \
32 |             --model-cfg n_embed_dims=$n_embed_dims n_filters=$n_filters filter_width=$filter_width n_fully_connected=${n_fully_connected} n_hidden=$n_hidden embedding_init=$embedding_init train_embeddings=$train_embeddings optimizer=SGD learning_rate=0.001 momentum=0.0 decay=0.0 \
33 |             --shuffle \
34 |             --confusion-matrix \
35 |             --classification-report \
36 |             --class-weight-auto \
37 |             --class-weight-exponent 3 \
38 |             --verbose \
39 |             --n-train 50000 \
40 |             --n-epochs 3 \
41 |             --no-save
42 |             #--log \
43 |     done 
44 | done
45 | #| parallel --gnu -j 2
46 | 


--------------------------------------------------------------------------------
/bin/run-spelling-convnet-exp03-inputs.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | model_dir=models/keras/spelling/convnet
 4 | data_dir=data/spelling/experimental/
 5 | 
 6 | experiment_name=$(echo $0 | sed -r 's/.*-(exp[0-9][0-9]-..*).sh/\1/')
 7 | experiment_dir=$model_dir/$experiment_name
 8 | mkdir -p $experiment_dir
 9 | 
10 | n_embed_dims=10 
11 | n_filters=3000
12 | filter_width=6
13 | n_fully_connected=1
14 | n_residual_blocks=0
15 | n_hidden=1000
16 | 
17 | for operation in delete insert substitute transpose
18 | do
19 |     for n_operations in 1 2
20 |     do
21 |         for n_errors_per_word in 3 10
22 |         do
23 |             model_dest=$experiment_dir/op_${operation}_n_ops_${n_operations}_n_errors_per_word_${n_errors_per_word}
24 |             if [ -d $model_dest ]
25 |             then
26 |                 continue
27 |             fi
28 |             echo ./train_keras.py $model_dir \
29 |                 $data_dir/op-${operation}-distance-${n_operations}-errors-per-word-${n_errors_per_word}.h5 \
30 |                 $data_dir/op-${operation}-distance-${n_operations}-errors-per-word-${n_errors_per_word}.h5 \
31 |                 marked_chars \
32 |                 --target-name binary_target \
33 |                 --model-dest $model_dest \
34 |                 --n-embeddings 61 \
35 |                 --model-cfg n_embed_dims=$n_embed_dims n_filters=$n_filters filter_width=$filter_width n_fully_connected=$n_fully_connected n_residual_blocks=$n_residual_blocks n_hidden=$n_hidden patience=10 \
36 |                 --shuffle \
37 |                 --confusion-matrix \
38 |                 --classification-report \
39 |                 --class-weight-auto \
40 |                 --class-weight-exponent 3 \
41 |                 --early-stopping-metric val_f2 \
42 |                 --checkpoint-metric val_f2 \
43 |                 --verbose \
44 |                 --log
45 |             break
46 |         done
47 |     done
48 | done | parallel --gnu -j 2
49 | 


--------------------------------------------------------------------------------
/bin/run-spelling-convnet-exp04-real-errors.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | model_dir=models/keras/spelling/convnet
 4 | data_dir=data/spelling/experimental/
 5 | 
 6 | experiment_name=$(echo $0 | sed -r 's/.*-(exp[0-9][0-9]-..*).sh/\1/')
 7 | experiment_dir=$model_dir/$experiment_name
 8 | mkdir -p $experiment_dir
 9 | 
10 | n_embed_dims=10 
11 | n_filters=3000
12 | filter_width=6
13 | n_fully_connected=1
14 | n_residual_blocks=0
15 | n_hidden=1000
16 | 
17 | # Train two models, one with random artificial errors, one with artificial
18 | # errors learned from a corpus of real errors.
19 | 
20 | corpora="non-word-error-detection-experiment-04-random-negative-examples.h5 non-word-error-detection-experiment-04-generated-negative-examples.h5"
21 | 
22 | for corpus in $corpora
23 | do
24 |     model_dest=$experiment_dir/$(echo $corpus | sed -e 's,-,_,g' -e 's,.h5,,')
25 |     if [ -d $model_dest ]
26 |     then
27 |         continue
28 |     fi
29 |     ./train_keras.py $model_dir \
30 |         $data_dir/$corpus \
31 |         $data_dir/$corpus \
32 |         marked_chars \
33 |         --target-name binary_target \
34 |         --model-dest $model_dest \
35 |         --n-embeddings 255 \
36 |         --model-cfg n_embed_dims=$n_embed_dims n_filters=$n_filters filter_width=$filter_width n_fully_connected=$n_fully_connected n_residual_blocks=$n_residual_blocks n_hidden=$n_hidden patience=10 \
37 |         --shuffle \
38 |         --confusion-matrix \
39 |         --classification-report \
40 |         --class-weight-auto \
41 |         --class-weight-exponent 3 \
42 |         --early-stopping-metric val_f2 \
43 |         --checkpoint-metric val_f2 \
44 |         --save-all-checkpoints \
45 |         --verbose \
46 |         --log
47 | done 
48 | #| parallel --gnu -j 2
49 | 


--------------------------------------------------------------------------------
/bin/run-spelling-convnet-exp05-multiclass.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -ex
 2 | 
 3 | model_dir=models/keras/spelling/convnet
 4 | data_dir=data/spelling/experimental/
 5 | distance=1
 6 | errors=3
 7 | 
 8 | experiment_name=$(echo $0 | sed -r 's/.*-(exp[0-9][0-9]-..*).sh/\1/')
 9 | experiment_dir=$model_dir/$experiment_name
10 | mkdir -p $experiment_dir
11 | 
12 | for operation in delete
13 | do
14 |     for n_embed_dims in 100
15 |     do
16 |         for n_filters in 300 
17 |         do
18 |             for filter_width in 8
19 |             do
20 |                 for n_fully_connected in 2
21 |                 do
22 |                     for n_residual_blocks in 1
23 |                     do
24 |                         for n_hidden in 300
25 |                         do
26 |                             ./train_keras.py $model_dir \
27 |                                 $data_dir/op-$operation-distance-$distance-errors-per-word-${errors}/000.h5 \
28 |                                 $data_dir/op-$operation-distance-$distance-errors-per-word-${errors}/000.h5 \
29 |                                 chars \
30 |                                 --target-name multiclass_target \
31 |                                 --model-dest $experiment_dir/op_${operation}_n_embed_dims_${n_embed_dims}_n_filters_${n_filters}_filter_width_${filter_width}_n_fully_connected_${n_fully_connected}_n_residual_blocks_${n_residual_blocks}_n_hidden_${n_hidden}_n_hsm_classes_5000 \
32 |                                 --n-embeddings 61 \
33 |                                 --model-cfg n_embed_dims=$n_embed_dims n_filters=$n_filters filter_width=$filter_width n_fully_connected=${n_fully_connected} n_residual_blocks=$n_residual_blocks n_hidden=$n_hidden patience=240 n_hsm_classes=5000 batch_size=8 \
34 |                                 --shuffle \
35 |                                 --class-weight-auto \
36 |                                 --class-weight-exponent 3 \
37 |                                 --early-stopping-metric f1 \
38 |                                 --verbose \
39 |                                 --target-data $data_dir/op-$operation-distance-$distance-errors-per-word-${errors}.json \
40 |                                 --extra-train-file $(ls $data_dir/op-$operation-distance-$distance-errors-per-word-${errors}/* | egrep -v '000.h5') \
41 |                                 --n-classes 119773 \
42 |                                 --log
43 |                         done
44 |                     done
45 |                 done
46 |             done
47 |         done
48 |     done
49 | done 
50 | 


--------------------------------------------------------------------------------
/bin/run-spelling-convnet-residual.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | model_dir=models/keras/spelling/convnet
 4 | data_dir=data/spelling/experimental/
 5 | distance=1
 6 | errors=3
 7 | nonce_interval=
 8 | 
 9 | crossval_dir=$model_dir/crossval
10 | mkdir -p $crossval_dir
11 | 
12 | #for operation in delete insert substitute transpose
13 | for operation in delete
14 | do
15 |     for n_embed_dims in 100
16 |     do
17 |         for n_filters in 1000 
18 |         do
19 |             for filter_width in 5
20 |             do
21 |                 for n_fully_connected in 1 2 3 4 5 6 7
22 |                 do
23 |                     for n_residual_blocks in 0
24 |                     do
25 |                         for n_hidden in 100 
26 |                         do
27 |                             echo ./train_keras.py $model_dir \
28 |                                 $data_dir/$operation-${errors}errors1word-distance-$distance${nonce}.h5 \
29 |                                 $data_dir/$operation-${errors}errors1word-distance-$distance${nonce}.h5 \
30 |                                 word \
31 |                                 --model-dest $crossval_dir/op_${operation}_n_embed_dims_${n_embed_dims}_n_filters_${n_filters}_filter_width_${filter_width}_n_fully_connected_${n_fully_connected}_n_residual_blocks_${n_residual_blocks}_n_hidden_${n_hidden} \
32 |                                 --target-name target \
33 |                                 --n-embeddings 61 \
34 |                                 --model-cfg n_embed_dims=$n_embed_dims n_filters=$n_filters filter_width=$filter_width n_fully_connected=${n_fully_connected} n_residual_blocks=$n_residual_blocks n_hidden=$n_hidden patience=10 \
35 |                                 --shuffle \
36 |                                 --confusion-matrix \
37 |                                 --classification-report \
38 |                                 --class-weight-auto \
39 |                                 --class-weight-exponent 3 \
40 |                                 --early-stopping-metric f2 \
41 |                                 --n-validation 100000 \
42 |                                 --log \
43 |                                 --verbose
44 |                         done
45 |                     done
46 |                 done
47 |             done
48 |         done
49 |     done
50 | done | parallel --gnu -j 2
51 | 


--------------------------------------------------------------------------------
/bin/run-spelling-convnet.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | model_dir=models/keras/spelling/convnet
 4 | data_dir=data/spelling/experimental/
 5 | distance=1
 6 | errors=3
 7 | nonce_interval=-nonce-interval-3
 8 | 
 9 | mkdir -p $model_dir/crossval
10 | 
11 | #for operation in delete insert substitute transpose
12 | #for nonce in "" "-nonce-interval-3"
13 | #do
14 | for operation in delete
15 | do
16 |     for n_embed_dims in 100 
17 |     do
18 |         for n_filters in 1000 
19 |         do
20 |             for filter_width in 5
21 |             do
22 |                 for n_hidden in 100
23 |                 do
24 |                     for n_fully_connected in 1 2 3 4
25 |                     do
26 |                         echo ./train_keras.py $model_dir \
27 |                             $data_dir/$operation-${errors}errors1word-distance-$distance${nonce}.h5 \
28 |                             $data_dir/$operation-${errors}errors1word-distance-$distance${nonce}.h5 \
29 |                             word \
30 |                             --model-dest $model_dir/crossval/op_${operation}_n_embed_dims_${n_embed_dims}_n_filters_${n_filters}_filter_width_${filter_width}_n_fully_connected_${n_fully_connected}_n_hidden_${n_hidden} \
31 |                             --target-name target \
32 |                             --n-embeddings 61 \
33 |                             --model-cfg n_embed_dims=$n_embed_dims n_filters=$n_filters filter_width=$filter_width n_hidden=$n_hidden n_fully_connected=${n_fully_connected} patience=3 \
34 |                             --shuffle \
35 |                             --confusion-matrix \
36 |                             --classification-report \
37 |                             --class-weight-auto \
38 |                             --class-weight-exponent 3 \
39 |                             --early-stopping-metric f2 \
40 |                             --n-validation 100000 \
41 |                             --log
42 |                     done
43 |                 done
44 |             done
45 |         done
46 |     done
47 | done | parallel --gnu -j 2
48 | 


--------------------------------------------------------------------------------
/bin/run-spelling-correction-isolated-binary.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | model_dir=models/keras/spelling/correction/isolated/binary/
 4 | data_dir=data/spelling/experimental/
 5 | 
 6 | experiment_name=$(echo $0 | sed -r 's/.*-(exp[0-9][0-9]-..*).sh/\1/')
 7 | experiment_dir=$model_dir/$experiment_name
 8 | mkdir -p $experiment_dir
 9 | 
10 | n_embed_dims=10
11 | n_filters=3000
12 | filter_width=6
13 | n_fully_connected=2
14 | n_residual_blocks=2
15 | n_hidden=1000
16 | 
17 | #corpora="non-word-error-detection-experiment-04-random-negative-examples.h5 non-word-error-detection-experiment-04-generated-negative-examples.h5"
18 | #corpora="non-word-error-detection-experiment-04-random-negative-examples.h5"
19 | corpora="non-word-error-detection-experiment-04-generated-negative-examples.h5"
20 | 
21 | for corpus in $corpora
22 | do
23 |     model_dest=$experiment_dir/$(echo $corpus | sed -e 's,-,_,g' -e 's,.h5,,')
24 |     if [ -d $model_dest ]
25 |     then
26 |         continue
27 |     fi
28 |     ./train_keras_simple.py $model_dir \
29 |         $data_dir/$corpus \
30 |         $data_dir/$corpus \
31 |         non_word_marked_chars real_word_marked_chars \
32 |         --target-name binary_target \
33 |         --model-dest $model_dest \
34 |         --n-embeddings 255 \
35 |         --model-cfg n_embed_dims=$n_embed_dims n_filters=$n_filters filter_width=$filter_width n_fully_connected=$n_fully_connected n_residual_blocks=$n_residual_blocks n_hidden=$n_hidden patience=10 \
36 |         --class-weight-exponent 3 \
37 |         --verbose \
38 |         --no-save
39 | done 
40 |         #--log
41 | #| parallel --gnu -j 2
42 | 


--------------------------------------------------------------------------------
/bin/run-spelling-correction-isolated-multiclass.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | model_dir=models/keras/spelling/correction/isolated/multiclass/
 4 | data_dir=data/spelling/experimental/
 5 | 
 6 | experiment_name=$(echo $0 | sed -r 's/.*-(exp[0-9][0-9]-..*).sh/\1/')
 7 | experiment_dir=$model_dir/$experiment_name
 8 | mkdir -p $experiment_dir
 9 | 
10 | n_embed_dims=10
11 | n_filters=3000
12 | filter_width=6
13 | n_fully_connected=2
14 | n_residual_blocks=2
15 | n_hidden=1000
16 | 
17 | #corpora="non-word-error-detection-experiment-04-random-negative-examples.h5 non-word-error-detection-experiment-04-generated-negative-examples.h5"
18 | #corpora="non-word-error-detection-experiment-04-random-negative-examples.h5"
19 | corpora="non-word-error-detection-experiment-04-generated-negative-examples.h5"
20 | 
21 | for corpus in $corpora
22 | do
23 |     model_dest=$experiment_dir/$(echo $corpus | sed -e 's,-,_,g' -e 's,.h5,,')
24 |     if [ -d $model_dest ]
25 |     then
26 |         continue
27 |     fi
28 |     ./train_keras_simple.py $model_dir \
29 |         $data_dir/$corpus \
30 |         $data_dir/$corpus \
31 |         non_word_marked_chars \
32 |         --target-name multiclass_correction_target \
33 |         --model-dest $model_dest \
34 |         --n-embeddings 255 \
35 |         --model-cfg n_embed_dims=$n_embed_dims n_filters=$n_filters filter_width=$filter_width n_fully_connected=$n_fully_connected n_residual_blocks=$n_residual_blocks n_hidden=$n_hidden patience=10 \
36 |         --class-weight-exponent 3 \
37 |         --verbose \
38 |         --n-epochs 3 \
39 |         --no-save
40 | done 
41 |         #--log
42 | #| parallel --gnu -j 2
43 | 


--------------------------------------------------------------------------------
/bin/run-spelling-lstm.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -xe
 2 | 
 3 | ./train_keras.py models/keras/spelling/lstm \
 4 |     data/spelling/birbeck-train.h5 \
 5 |     data/spelling/birbeck-valid.h5 \
 6 |     word \
 7 |     --target-name is_real_word \
 8 |     --n-embeddings 56 \
 9 |     --model-cfg n_units=20 n_embed_dims=25 patience=1000 train_embeddings=true embedding_init=uniform optimizer=Adam \
10 |     --shuffle \
11 |     --log \
12 |     --confusion-matrix \
13 |     --classification-report \
14 |     --class-weight-auto \
15 |     --class-weight-exponent 5 \
16 |     --n-epochs 350
17 | 


--------------------------------------------------------------------------------
/contrasting_cases.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | from __future__ import absolute_import
  4 | from __future__ import print_function
  5 | import numpy as np
  6 | np.random.seed(1337)  # for reproducibility
  7 | 
  8 | import sys
  9 | import argparse
 10 | import h5py
 11 | 
 12 | from keras.datasets import mnist
 13 | from keras.models import Sequential
 14 | from keras.layers.core import Dense, Dropout, Activation
 15 | from keras.layers.normalization import BatchNormalization
 16 | from keras.optimizers import SGD, Adadelta
 17 | from keras.utils import np_utils
 18 | 
 19 | from outliers import PMeansMultivariateNormal
 20 | 
 21 | def create_dataset(n, train_size, valid_size):
 22 |     means = np.arange(100)
 23 |     cov = [range(1, 101)] * 100
 24 |     mvn = PMeansMultivariateNormal(means, cov, (n,))
 25 |     X = mvn.generate()
 26 | 
 27 |     assert n % 2 == 0
 28 |     assert n > train_size + valid_size
 29 | 
 30 |     # Make the data different along one dimension.
 31 |     even = np.arange(0, n, step=2)
 32 |     X[even, 0] = np.random.uniform(-.25, 1.75, size=n/2)
 33 |     # Make each odd-numbered row the inverse of its previous row.
 34 |     X[even+1, 0] = np.random.uniform(-1.75, .25, size=n/2)
 35 | 
 36 |     X += np.random.uniform(0.01, size=X.shape)
 37 |     X = X.astype(np.float32)
 38 | 
 39 |     y = np.array([[0,1] * (n/2)]).reshape((n,1))
 40 |     y = y.astype(np.int32)
 41 | 
 42 |     X_train = X[0:train_size, :]
 43 |     X_valid = X[train_size:train_size+valid_size, :]
 44 |     X_test = X[train_size+valid_size:, :]
 45 | 
 46 |     y_train = y[0:train_size]
 47 |     y_valid = y[train_size:train_size+valid_size]
 48 |     y_test = y[train_size+valid_size:]
 49 | 
 50 |     return X_train, X_valid, X_test, \
 51 |             y_train, y_valid, y_test
 52 | 
 53 | 
 54 | def build_model(n_inputs, n_hidden, n_classes):
 55 |     model = Sequential()
 56 |     model.add(Dense(n_inputs, n_hidden))
 57 |     model.add(BatchNormalization((n_hidden,)))
 58 |     model.add(Activation('relu'))
 59 |     model.add(Dense(n_hidden, n_hidden))
 60 |     model.add(BatchNormalization((n_hidden,)))
 61 |     model.add(Activation('relu'))
 62 |     model.add(Dense(n_hidden, n_hidden))
 63 |     model.add(BatchNormalization((n_hidden,)))
 64 |     model.add(Activation('relu'))
 65 |     model.add(Dense(n_hidden, n_hidden))
 66 |     model.add(BatchNormalization((n_hidden,)))
 67 |     model.add(Activation('relu'))
 68 |     model.add(Dense(n_hidden, n_classes))
 69 |     model.add(Activation('softmax'))
 70 |     
 71 |     optimizer = Adadelta()
 72 |     model.compile(loss='categorical_crossentropy', optimizer=optimizer)
 73 | 
 74 |     return model
 75 | 
 76 | def get_parser():
 77 |     parser = argparse.ArgumentParser(
 78 |             description='train a model to demonstrate contrasting cases')
 79 |     parser.add_argument(
 80 |             '--shuffle', action='store_true',
 81 |             help='shuffle the training examples after each epoch (i.e. do not use contrasting cases)')
 82 |     parser.add_argument(
 83 |             '--n', type=int, default=10000,
 84 |             help='the size of the data set to create')
 85 |     parser.add_argument(
 86 |             '--train-size', type=int, default=7000,
 87 |             help='the number of examples from the data set to allocate to training')
 88 |     parser.add_argument(
 89 |             '--valid-size', type=int, default=1500,
 90 |             help='the number of examples from the data set to allocate to validation')
 91 |     parser.add_argument(
 92 |             '--batch-size', type=int, default=10,
 93 |             help='mini-batch size')
 94 |     parser.add_argument(
 95 |             '--n-epochs', type=int, default=20,
 96 |             help='number of epochs to train')
 97 |     parser.add_argument(
 98 |             '--verbose', action='store_true',
 99 |             help='print progress')
100 |     
101 |     return parser.parse_args()
102 | 
103 | def main(args):
104 |     x_train, x_valid, x_test, \
105 |             y_train, y_valid, y_test = create_dataset(
106 |                     args.n, args.train_size, args.valid_size)
107 | 
108 |     y_train = y_train.reshape((y_train.shape[0], 1))
109 |     y_valid = y_valid.reshape((y_valid.shape[0], 1))
110 |     y_test = y_test.reshape((y_test.shape[0], 1))
111 | 
112 |     n_classes = len(np.unique(y_train))
113 | 
114 |     '''
115 |     print('y_train', y_train.shape)
116 |     print('y_valid', y_valid.shape)
117 |     print('y_test', y_test.shape)
118 |     print('n_classes', n_classes, np.unique(y_train))
119 |     '''
120 |     
121 |     # convert class vectors to binary class matrices
122 |     y_train = np_utils.to_categorical(
123 |             y_train, n_classes).astype(np.int32)
124 |     y_valid = np_utils.to_categorical(
125 |             y_valid, n_classes).astype(np.int32)
126 |     y_test = np_utils.to_categorical(
127 |             y_test, n_classes).astype(np.int32)
128 |     
129 |     if args.shuffle:
130 |         print('Training (shuffled)')
131 |         # Leave odd-numbered rows where they are; shuffle only
132 |         # even-numbered ones.  This ensures that each minibatch has one
133 |         # example from each class.
134 |         perm = np.arange(x_train.shape[0])
135 |         evens = np.arange(0, x_train.shape[0], 2)
136 |         perm[evens] = np.random.permutation(evens)
137 |     else:
138 |         print('Training (contrasting cases)')
139 |     
140 |     model = build_model(100, 20, n_classes)
141 | 
142 |     print('x_train', x_train.dtype)
143 |     print('y_train', y_train.dtype)
144 |     
145 |     model.fit(x_train, y_train,
146 |             batch_size=args.batch_size,
147 |             shuffle=False,
148 |             nb_epoch=args.n_epochs,
149 |             show_accuracy=True,
150 |             verbose=2 if args.verbose else 0,
151 |             validation_data=(x_valid, y_valid))
152 |     
153 |     score = model.evaluate(x_test, y_test,
154 |             show_accuracy=True,
155 |             verbose=1 if args.verbose else 0)
156 |     
157 |     if args.shuffle:
158 |         print('Test accuracy (shuffled)', score[1])
159 |     else:
160 |         print('Test accuracy (contrasting cases)', score[1])
161 |     
162 | if __name__ == '__main__':
163 |     sys.exit(main(get_parser()))
164 | 


--------------------------------------------------------------------------------
/modeling/__init__.py:
--------------------------------------------------------------------------------
1 | from .data import (split_data, mask_zero_for_rnn, balance_datasets)
2 | 


--------------------------------------------------------------------------------
/modeling/autograd_examples.py:
--------------------------------------------------------------------------------
 1 | import autograd.numpy as np
 2 | from autograd import grad
 3 | 
 4 | def sigmoid(x):
 5 |     return 0.5*(np.tanh(x) + 1)
 6 | 
 7 | def logistic_predictions(weights, inputs):
 8 |     # Outputs probability of a label being true according to logistic model.
 9 |     return sigmoid(np.dot(inputs, weights))
10 | 
11 | def training_loss(weights):
12 |     # Training loss is the negative log-likelihood of the training labels.
13 |     preds = logistic_predictions(weights, inputs)
14 |     label_probabilities = preds * targets + (1 - preds) * (1 - targets)
15 |     return -np.sum(np.log(label_probabilities))
16 | 
17 | # Build a toy dataset.
18 | inputs = np.array([[0.52, 1.12,  0.77],
19 |     [0.88, -1.08, 0.15],
20 |     [0.52, 0.06, -1.30],
21 |     [0.74, -2.49, 1.39]])
22 | targets = np.array([True, True, False, True])
23 | 
24 | # Define a function that returns gradients of training loss using autograd.
25 | training_gradient_fun = grad(training_loss)
26 | 
27 | # Optimize weights using gradient descent.
28 | weights = np.array([0.0, 0.0, 0.0])
29 | print "Initial loss:", training_loss(weights)
30 | for i in xrange(100):
31 |     weights -= training_gradient_fun(weights) * 0.01
32 | print  "Trained loss:", training_loss(weights)
33 | 
34 | def taylor_sine(x):
35 |     ans = currterm = x
36 |     i = 0
37 |     while np.abs(currterm) > 0.001:
38 |         currterm = -currterm * x**2 / ((2 * i + 3) * (2 * i + 2))
39 |         ans = ans + currterm
40 |         i += 1
41 |     return ans
42 | 
43 | grad_sine = grad(taylor_sine)
44 | print "Gradient of sin(pi) is", grad_sine(np.pi)
45 | 


--------------------------------------------------------------------------------
/modeling/builders.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from keras.layers.core import Dense
  4 | from keras.layers.embeddings import Embedding
  5 | from keras.layers.convolutional import (Convolution1D, MaxPooling1D)
  6 | from keras.optimizers import (SGD, Adam, Adadelta, Adagrad, RMSprop)
  7 | from keras.constraints import maxnorm
  8 | from keras.regularizers import l2
  9 | 
 10 | from modeling.layers import ImmutableEmbedding, HierarchicalSoftmax
 11 | 
 12 | def build_embedding_layer(config, input_width=None):
 13 |     try:
 14 |         n_embeddings = config.n_vocab
 15 |     except AttributeError:
 16 |         n_embeddings = config.n_embeddings
 17 | 
 18 |     try:
 19 |         input_width = config.input_width
 20 |     except AttributeError:
 21 |         input_width = input_width
 22 | 
 23 |     try:
 24 |         mask_zero = config.mask_zero
 25 |     except AttributeError:
 26 |         mask_zero = False
 27 | 
 28 |     if hasattr(config, 'embedding_weights') and config.embedding_weights is not None:
 29 |         W = np.load(config.embedding_weights)
 30 |         if config.train_embeddings is True or config.train_embeddings == 'true':
 31 |             return Embedding(n_embeddings, config.n_embed_dims,
 32 |                 weights=[W], input_length=input_width,
 33 |                 W_constraint=maxnorm(config.embedding_max_norm),
 34 |                 mask_zero=mask_zero)
 35 |         else:
 36 |             return ImmutableEmbedding(n_embeddings, config.n_embed_dims,
 37 |                 weights=[W], mask_zero=mask_zero,
 38 |                 input_length=input_width)
 39 |     else:
 40 |         if config.train_embeddings is True:
 41 |             return Embedding(n_embeddings, config.n_embed_dims,
 42 |                 init=config.embedding_init,
 43 |                 W_constraint=maxnorm(config.embedding_max_norm),
 44 |                 mask_zero=mask_zero,
 45 |                 input_length=input_width)
 46 |         else:
 47 |             return ImmutableEmbedding(n_embeddings, config.n_embed_dims,
 48 |                 init=config.embedding_init,
 49 |                 mask_zero=mask_zero,
 50 |                 input_length=input_width)
 51 | 
 52 | def build_convolutional_layer(config):
 53 |     return Convolution1D(config.n_filters, config.filter_width,
 54 |         W_constraint=maxnorm(config.filter_max_norm),
 55 |         border_mode=config.border_mode,
 56 |         W_regularizer=l2(config.l2_penalty))
 57 | 
 58 | def build_pooling_layer(config, input_width=None, filter_width=None):
 59 |     try:
 60 |         input_width = config.input_width
 61 |     except AttributeError:
 62 |         assert input_width is not None
 63 | 
 64 |     try:
 65 |         filter_width = config.filter_width
 66 |     except AttributeError:
 67 |         assert filter_width is not None
 68 | 
 69 |     return MaxPooling1D(
 70 |         pool_length=input_width - filter_width + 1,
 71 |         stride=1)
 72 | 
 73 | def build_dense_layer(config, n_hidden=None, activation='linear'):
 74 |     if n_hidden is None:
 75 |         n_hidden = config.n_hidden
 76 |     return Dense(n_hidden,
 77 |             W_regularizer=l2(config.l2_penalty),
 78 |             W_constraint=maxnorm(config.dense_max_norm),
 79 |             activation=activation)
 80 | 
 81 | def build_hierarchical_softmax_layer(config):
 82 |     # This n_classes is different from the number of unique target values in
 83 |     # the training set.  Hierarchical softmax assigns each word to a class
 84 |     # and decomposes the softmax into a prediction that's conditioned on
 85 |     # class membership.
 86 |     return HierarchicalSoftmax(config.n_classes, config.n_hsm_classes,
 87 |             batch_size=config.batch_size)
 88 | 
 89 | def load_weights(config, model):
 90 |     if hasattr(config, 'model_weights') and config.model_weights is not None:
 91 |         print('Loading weights from %s' % config.model_weights)
 92 |         model.load_weights(config.model_weights)
 93 | 
 94 | def build_optimizer(config):
 95 |     if config.optimizer == 'SGD':
 96 |         optimizer = SGD(lr=config.learning_rate,
 97 |             decay=config.decay, momentum=config.momentum,
 98 |             clipnorm=config.clipnorm)
 99 |     elif config.optimizer == 'Adam':
100 |         optimizer = Adam(clipnorm=config.clipnorm)
101 |     elif config.optimizer == 'RMSprop':
102 |         optimizer = RMSprop(clipnorm=config.clipnorm)
103 |     elif config.optimizer == 'Adadelta':
104 |         optimizer = Adadelta(clipnorm=config.clipnorm)
105 |     elif config.optimizer == 'Adagrad':
106 |         optimizer = Adagrad(clipnorm=config.clipnorm)
107 |     else:
108 |         raise ValueError("don't know how to use optimizer {0}".format(config.optimizer))
109 | 
110 |     return optimizer
111 | 


--------------------------------------------------------------------------------
/modeling/callbacks.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import keras
  4 | from keras.callbacks import Callback, EarlyStopping
  5 | import keras.callbacks
  6 | import numpy as np
  7 | import six
  8 | from sklearn.metrics import (classification_report, 
  9 |         confusion_matrix, f1_score, fbeta_score)
 10 | 
 11 | def predict(model, x, marshaller, batch_size=128):
 12 |     if isinstance(model, keras.models.Graph):
 13 |         if marshaller is None:
 14 |             raise ValueError("a marshaller is required with Graphs")
 15 |         x = marshaller.marshal(x)
 16 |         output = model.predict(x, batch_size=batch_size)
 17 |         y_hat = marshaller.unmarshal(output)
 18 |         y_hat = np.argmax(y_hat, axis=1)
 19 |     else:
 20 |         y_hat = model.predict_classes(x, verbose=0, batch_size=batch_size)
 21 |     return y_hat
 22 | 
 23 | class PredictionCallback(Callback):
 24 |     def __init__(self, x, logger, marshaller=None, iteration_freq=10, batch_size=128):
 25 |         self.__dict__.update(locals())
 26 |         self.callbacks = []
 27 | 
 28 |     def add(self, callback):
 29 |         self.callbacks.append(callback)
 30 | 
 31 |     def _set_model(self, model):
 32 |         self.model = model
 33 |         for cb in self.callbacks:
 34 |             cb._set_model(model)
 35 | 
 36 |     def on_batch_begin(self, batch, logs={}):
 37 |         pass
 38 | 
 39 |     def on_batch_end(self, batch, logs={}):
 40 |         pass
 41 | 
 42 |     def on_epoch_begin(self, epoch, logs={}):
 43 |         pass
 44 | 
 45 |     def on_epoch_end(self, epoch, logs={}):
 46 |         if 'iteration' in logs.keys() and logs['iteration'] % self.iteration_freq != 0:
 47 |             # If we've broken a large training set into smaller chunks, we don't
 48 |             # need to run the classification report after every chunk.
 49 |             return
 50 | 
 51 |         y_hat = predict(self.model, self.x, self.marshaller, batch_size=self.batch_size)
 52 |         logs['y_hat'] = y_hat
 53 |         for cb in self.callbacks:
 54 |             cb.on_epoch_end(epoch, logs)
 55 | 
 56 |     def on_train_begin(self, logs={}):
 57 |         pass
 58 | 
 59 |     def on_train_end(self, logs={}):
 60 |         pass
 61 | 
 62 | class DelegatingMetricCallback(Callback):
 63 |     def __init__(self, x, y, logger, metric_name, delegate, marshaller=None, batch_size=128):
 64 |         self.__dict__.update(locals())
 65 |         del self.self
 66 | 
 67 |     def _set_model(self, model):
 68 |         self.model = model
 69 |         self.delegate._set_model(model)
 70 | 
 71 |     def on_epoch_end(self, epoch, logs={}):
 72 |         try:
 73 |             y_hat = logs['y_hat']
 74 |         except KeyError:
 75 |             y_hat = predict(self.model, self.x, self.marshaller, batch_size=self.batch_size)
 76 |         metric = self.build_metric(logs)
 77 |         logs[self.metric_name] = metric(self.y, y_hat)
 78 |         self.logger('%s %.03f' % (self.metric_name, logs[self.metric_name]))
 79 |         self.delegate.on_epoch_end(epoch, logs)
 80 | 
 81 |     def build_metric(self, logs):
 82 |         return {
 83 |                 'val_loss': lambda y,y_hat: logs['val_loss'],
 84 |                 'val_acc': lambda y,y_hat: logs['val_acc'],
 85 |                 'val_f1': f1_score,
 86 |                 'val_f1': lambda y,y_hat: fbeta_score(y, y_hat, beta=0.5),
 87 |                 'val_f2': lambda y,y_hat: fbeta_score(y, y_hat, beta=2)
 88 |                 }[self.metric_name]
 89 | 
 90 | class ConfusionMatrix(Callback):
 91 |     def __init__(self, x, y, logger, marshaller=None, batch_size=128):
 92 |         self.__dict__.update(locals())
 93 |         del self.self
 94 | 
 95 |     def on_epoch_end(self, epoch, logs={}):
 96 |         try:
 97 |             y_hat = logs['y_hat']
 98 |         except KeyError:
 99 |             y_hat = predict(self.model, self.x, self.marshaller, batch_size=self.batch_size)
100 |         self.logger('\nConfusion matrix')
101 |         self.logger(confusion_matrix(self.y, y_hat))
102 | 
103 | class ClassificationReport(Callback):
104 |     def __init__(self, x, y, logger, target_names=None, marshaller=None, batch_size=128):
105 |         self.__dict__.update(locals())
106 |         del self.self
107 | 
108 |         self.labels = np.arange(max(y)+1)
109 | 
110 |         if target_names is None:
111 |             self.target_names = [str(t) for t in self.labels]
112 |         else:
113 |             self.target_names = [str(tn) for tn in target_names]
114 | 
115 |     def on_epoch_end(self, epoch, logs={}):
116 |         try:
117 |             y_hat = logs['y_hat']
118 |         except KeyError:
119 |             y_hat = predict(self.model, self.x, self.marshaller, batch_size=self.batch_size)
120 | 
121 |         self.logger('\nClassification report')
122 |         self.logger(classification_report(
123 |                 self.y, y_hat,
124 |                 labels=self.labels, target_names=self.target_names))
125 | 
126 | class OptimizerMonitor(Callback):
127 |     def __init__(self, logger):
128 |         self.logger = logger
129 | 
130 |     def on_epoch_end(self, epoch, logs={}):
131 |         if not hasattr(self.model.optimizer, 'lr'):
132 |             return
133 | 
134 |         lr = self.model.optimizer.lr.get_value()
135 |         optimizer_state = str({ 'lr': lr })
136 | 
137 |         if 'iteration' in logs.keys():
138 |             self.logger("epoch {epoch} iteration {iteration} - optimizer state {optimizer_state}".format(
139 |                 epoch=epoch, iteration=logs['iteration'], optimizer_state=optimizer_state))
140 |         else:
141 |             self.logger("epoch {epoch} - optimizer state {optimizer_state}".format(
142 |                 epoch=epoch, optimizer_state=optimizer_state))
143 | 
144 | class VersionedModelCheckpoint(Callback):
145 |     def __init__(self, filepath, max_epochs=10000, **kwargs):
146 |         kwargs['save_best_only'] = False
147 |         self.delegate = keras.callbacks.ModelCheckpoint(filepath, **kwargs)
148 |         self.filepath = filepath
149 |         self.basepath, self.ext = os.path.splitext(filepath)
150 |         self.epoch = 0
151 |         width = int(np.log10(max_epochs)) + 1
152 |         self.fmt_string = '{basepath}-{epoch:0' + str(width) + 'd}{ext}'
153 | 
154 |     def on_epoch_end(self, epoch, logs={}):
155 |         logs['val_loss'] = -self.epoch
156 |         self.delegate.on_epoch_end(epoch, logs)
157 | 
158 |         if os.path.exists(self.filepath):
159 |             newpath = self.fmt_string.format(
160 |                     basepath=self.basepath, epoch=self.epoch, ext=self.ext)
161 |             os.rename(self.filepath, newpath)
162 |         self.epoch += 1
163 | 
164 |     def _set_model(self, model):
165 |         self.model = model
166 |         self.delegate._set_model(model)
167 | 
168 | class SingleStepLearningRateSchedule(keras.callbacks.Callback):
169 |     def __init__(self, patience=5, learning_rate_divisor=10.):
170 |         self.patience = patience
171 |         self.learning_rate_divisor = learning_rate_divisor
172 |         self.best_loss = np.inf
173 |         self.best_epoch = 0
174 |         self.updated_lr = False
175 | 
176 |     def on_epoch_end(self, epoch, logs={}):
177 |         if self.updated_lr:
178 |             return
179 | 
180 |         if logs['val_loss'] < self.best_loss:
181 |             self.best_loss = logs['val_loss']
182 |             self.best_epoch = epoch
183 | 
184 |         if epoch - self.best_epoch > self.patience:
185 |             old_lr = self.model.optimizer.lr.get_value()
186 |             new_lr = (old_lr / self.learning_rate_divisor).astype(np.float32)
187 |             print('old_lr', old_lr, 'new_lr', new_lr)
188 |             self.model.optimizer.lr.set_value(new_lr)
189 |             self.learning_rate_divisor = 1.
190 | 


--------------------------------------------------------------------------------
/modeling/chainer_model.py:
--------------------------------------------------------------------------------
  1 | import chainer
  2 | import chainer.functions as F
  3 | from chainer import optimizers
  4 | 
  5 | class Model(object):
  6 |     def __init__(self, args):
  7 |         for k,v in vars(args).iteritems():
  8 |             self.__dict__[k] = v
  9 |         self.init_params()
 10 |         self.init_optimizer()
 11 |         self.optimizer.setup(self.params)
 12 | 
 13 |     def init_optimizer(self):
 14 |         if self.optimizer == 'SGD':
 15 |             self.optimizer = optimizers.MomentumSGD(
 16 |                 lr=self.learning_rate, momentum=self.momentum)
 17 |         elif self.optimizer == 'AdaDelta':
 18 |             self.optimizer = optimizers.AdaDelta()
 19 |         elif self.optimizer == 'AdaGrad':
 20 |             self.optimizer = optimizers.AdaGrad()
 21 |         elif self.optimizer == 'Adam':
 22 |             self.optimizer = optimizers.Adam()
 23 |         elif self.optimizer == 'RMSprop':
 24 |             self.optimizer = optimizers.RMSprop()
 25 | 
 26 |     def update(self):
 27 |         if hasattr(self, 'weight_decay'):
 28 |             if self.weight_decay > 0:
 29 |                 self.optimizer.weight_decay(self.weight_decay)
 30 |         self.optimizer.update()
 31 | 
 32 |     def iteration(self, data, target, train=False):
 33 |         if train:
 34 |             self.optimizer.zero_grads()
 35 |         pred = self.forward(data)
 36 |         loss, metric = self.loss(pred, target)
 37 |         if train:
 38 |             loss.backward()
 39 |             self.update()
 40 |         return pred, loss, metric
 41 | 
 42 |     def fit(self, data, target):
 43 |         pred, loss, metric = self.iteration(data, target, train=True)
 44 |         return pred, loss, metric
 45 | 
 46 |     def evaluate(self, data, target):
 47 |         pred, loss, metric = self.iteration(data, target)
 48 |         return pred, loss, metric
 49 | 
 50 |     def init_params(self):
 51 |         raise NotImplementedError()
 52 | 
 53 |     def forward(self):
 54 |         raise NotImplementedError()
 55 | 
 56 |     def loss(self, pred, target):
 57 |         raise NotImplementedError()
 58 | 
 59 |     def predict(self, data, target=None):
 60 |         raise NotImplementedError()
 61 | 
 62 |     def predict_proba(self, data):
 63 |         raise NotImplementedError()
 64 | 
 65 |     def to_gpu(self):
 66 |         self.params.to_gpu()
 67 | 
 68 |     def to_cpu(self):
 69 |         self.params.to_cpu()
 70 | 
 71 | class Classifier(Model):
 72 |     def loss(self, pred, target):
 73 |         target = chainer.Variable(target)
 74 |         loss = F.softmax_cross_entropy(pred, target)
 75 |         metric = F.accuracy(pred, target)
 76 |         return loss, metric
 77 | 
 78 |     def predict(self, data, target=None):
 79 |         pred = self.forward(data, train=False)
 80 |         if target is None:
 81 |             return np.argmax(F.softmax(pred).data, axis=1)
 82 |         else:
 83 |             loss, metric = self.loss(pred, target)
 84 |             return pred, loss, metric
 85 | 
 86 |     def predict_proba(self, data):
 87 |         pred = self.forward(data, train=False)
 88 |         return F.softmax(pred).data
 89 | 
 90 | class Regressor(Model):
 91 |     def loss(self, pred, target):
 92 |         target = chainer.Variable(target)
 93 |         loss = F.mean_squared_error(pred, target)
 94 |         return loss, loss
 95 | 
 96 |     def predict(self, data, target=None):
 97 |         pred = self.forward(data, train=False)
 98 |         if target is None:
 99 |             return pred
100 |         else:
101 |             loss, metric = self.loss(pred, target)
102 |             return pred, loss, metric
103 | 


--------------------------------------------------------------------------------
/modeling/data.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import h5py
  3 | import numpy as np
  4 | 
  5 | class GraphMarshaller(object):
  6 |     """
  7 |     Interface for classes that handle preparing inputs and unpacking
  8 |     outputs of Keras Graph models.
  9 |     """
 10 |     def marshal(self, data, target=None):
 11 |         raise NotImplementedError()
 12 | 
 13 |     def unmsrhal(self, output):
 14 |         raise NotImplementedError()
 15 | 
 16 | def split_data(hdf5_path, split_size, output_dir=None):
 17 |     """
 18 |     Split the datasets in an HDF5 file into smaller sets and save them
 19 |     to new files.  By default the files are put into a subdirectory of
 20 |     the directory containing `hdf5_path`.  The subdirectory is created
 21 |     if it does not exist; the name of the directory is `hdf5_path` with
 22 |     the file suffix removed.  To write to a different directory, provide
 23 |     the path to the existing directory in `output_dir`.
 24 | 
 25 |     Parameters
 26 |     -------
 27 |     hdf5_path : str
 28 |         The path to the HDF5 file.
 29 |     split_size : int
 30 |         The size of the 
 31 |     """
 32 |     f = h5py.File(hdf5_path)
 33 |     n = 0
 34 |     # Find the largest n.
 35 |     for k,v in f.iteritems():
 36 |         n = max(n, v.value.shape[0])
 37 | 
 38 |     if output_dir is None:
 39 |         output_dir = os.path.splitext(hdf5_path)[0]
 40 |         os.mkdir(output_dir)
 41 | 
 42 |     # Copy subsequences of the data to smaller files.
 43 |     width = int(np.ceil(np.log10(n / split_size)))
 44 |     for i,j in enumerate(range(0, n, split_size)):
 45 |         outfile = '{dir}/{num:{fill}{width}}.h5'.format(
 46 |                 dir=output_dir, num=i, fill='0', width=width)
 47 |         print(outfile)
 48 |         fout = h5py.File(outfile, 'w')
 49 |         for k,v in f.iteritems():
 50 |             subset = v[j:j+split_size]
 51 |             fout.create_dataset(k, data=subset, dtype=v.dtype)
 52 |         fout.close()
 53 | 
 54 | def balance_classes(target):
 55 |     """
 56 |     Get a subset of the indices in the target variable of an imbalanced dataset
 57 |     such that each class has the same number of occurrences.  This is to be used
 58 |     in conjunction with `balance_datasets` to create a balanced dataset.
 59 | 
 60 |     Parameters
 61 |     ---------
 62 |     target : array-like of int
 63 |         The target variable from which to sample.
 64 |     """
 65 |     n = min(np.bincount(target))
 66 |     n_even = n/2
 67 |     indices = []
 68 | 
 69 |     for code in np.arange(max(target)+1):
 70 |         mask = target == code
 71 |         idx = np.sort(np.where(mask)[0])
 72 |         # Only sample from the even indices so the downsampled dataset
 73 |         # still consists of pairs of positive and negative examples.
 74 |         even_idx = idx[idx % 2 == 0]
 75 |         sampled_even_idx = np.sort(np.random.choice(even_idx, size=n_even, replace=False))
 76 |         # Add the odd-numbered examples of errors.
 77 |         sampled_idx = np.concatenate([sampled_even_idx, sampled_even_idx+1])
 78 |         sampled_idx = np.sort(sampled_idx)
 79 |         indices.extend(sampled_idx)
 80 | 
 81 |     return np.sort(indices)
 82 | 
 83 | def balance_datasets(hdf5_file, key='original_word_code'):
 84 |     """
 85 |     Balance the datasets in an HDF5 file.  A balanced sample of
 86 |     the dataset denoted by `key` is taken.  The corresponding 
 87 |     examples from all other datasets are sampled, too.
 88 | 
 89 |     Parameters
 90 |     -----------
 91 |     hdf5_file : h5py.File
 92 |         An open HDF5 file.
 93 |     key : str
 94 |         The key of the target variable in `hdf5_file` to balance.  
 95 |     """
 96 |     idx = balance_classes(hdf5_file[key].value)
 97 |     for key in hdf5_file.keys():
 98 |         value = hdf5_file[key].value
 99 |         del hdf5_file[key]
100 |         hdf5_file.create_dataset(key, data=value[idx], dtype=value.dtype)
101 | 
102 | def mask_zero_for_rnn(hdf5_fh, n_vocab):
103 |     """
104 |     Given an HDF5 data set with inputs `X` (the entire sentence),
105 |     `Xwindow` (the window of words around e.g. a preposition), and
106 |     `XwindowNULL` (the window of words as in `Xwindow` with the center
107 |     word replaced by a nonce), transform the inputs as follows:
108 | 
109 |         a) Change 0 in every position before the end of the sentence to
110 |            vocab_size + 1.
111 |         b) Change 0 in every position after the beginning of the sentence
112 |            to vocab_size + 1.
113 | 
114 |     Unmodified, the inputs `X`, etc., use 0 to indicate both that the
115 |     word is unknown and that the sentence has ended (i.e. for padding
116 |     a variable-length input like a sentence to fill all of the columns
117 |     of a matrix).  The reasons to change this is that (1) some models,
118 |     like recurrent neural networks, pay attention to every detail of
119 |     their input and (2) some frameworks, like Keras, allow you do mask
120 |     out 0's, so the model gets less confused.
121 | 
122 |     The `len` key has the offset at which the sentence ends in `X`.
123 | 
124 |     The `window_position` key in the data set has the offset at which
125 |     the preposition occurs in `X`.
126 | 
127 |     Parameters
128 |     ------------
129 |     hdf5_fh : 
130 |         A open, writable HDF5 file.
131 |     n_vocab : int
132 |         The number of words in the model's vocabulary.
133 |     """
134 |     XRNN = renumber_unknowns_in_sentence(
135 |                 hdf5_fh['X'].value,
136 |                 hdf5_fh['len'].value,
137 |                 n_vocab)
138 |     hdf5_fh.create_dataset('XRNN', data=XRNN, dtype=XRNN.dtype)
139 | 
140 |     XwindowRNN = renumber_unknowns_in_window(
141 |                 hdf5_fh['Xwindow'].value,
142 |                 hdf5_fh['window_position'].value,
143 |                 n_vocab)
144 |     hdf5_fh.create_dataset('XwindowRNN', data=XwindowRNN, dtype=XwindowRNN.dtype)
145 | 
146 |     XwindowNULLRNN = renumber_unknowns_in_window(
147 |                 hdf5_fh['XwindowNULL'].value,
148 |                 hdf5_fh['window_position'].value,
149 |                 n_vocab)
150 |     hdf5_fh.create_dataset('XwindowNULLRNN', data=XwindowNULLRNN, dtype=XwindowNULLRNN.dtype)
151 | 
152 |     return hdf5_fh
153 | 
154 | def renumber_unknowns_in_sentence(X, lengths, n_vocab):
155 |     """
156 |     So, to transform `X` as described in item (a) above,
157 | 
158 |         * Find every occurrence of a 0 before the end of a sentence,
159 |           using `len` to determine where the sentence ends.
160 |         * Replace those occurences with `n_vocab`.
161 |     """
162 | 
163 |     X = X.copy()
164 |     for i,length in enumerate(lengths):
165 |         sent = X[i]
166 |         zeros_in_sent = [False] * X.shape[1]
167 |         # Add 2 for leading '<s>' and trailing '</s>'.
168 |         zeros_in_sent[:length+2] = sent[:length+2] == 0
169 |         if np.any(zeros_in_sent):
170 |             X[i, zeros_in_sent] = n_vocab
171 |     return X
172 | 
173 | def renumber_unknowns_in_window(Xwindow, window_positions, n_vocab):
174 |     """
175 |     And to transform `Xwindow` and `XwindowNULL` for item (b),
176 | 
177 |         * Find every occurrence of a 0 after the beginning of a sentence
178 |           using `window_position` to determine where in the window the
179 |           sentence begins.  If `window_position` is 0, the first two
180 |           positions in the window will be 0, because the preposition in
181 |           that case is the first word in the sentence and it appears at
182 |           the center of the window (index 2, with windows of length 5).
183 |           Those first two words must remain 0, as they indicate the
184 |           absence of words.  If `window_position` is 1, only the first
185 |           word must remain 0; the word in the second position of the
186 |           window could be 0 because it is out of vocabulary.  And if
187 |           `window_position` is 2, then the first two words, if 0, are
188 |           0 because they're out of vocabulary.  Thus, the indices in the
189 |           window that should be checked for the "zero because out of
190 |           vocabulary" case start at max(0, 2-`window_position`).  (NB:
191 |           I didn't find any occurrences of `window_position` > `len`,
192 |           just some occurrences of `window_position` == `len` - 2,
193 |           which with sentence-terminating punctuation and the </s>
194 |           padding character at the end of each sentence just means
195 |           that there are several sentences that end with a preposition.
196 |           So we only need to deal with the beginning of the window.)
197 |         * Replace those occurrences with `n_vocab`.
198 |     """
199 |     Xwindow = Xwindow.copy()
200 |     for i,window_position in enumerate(window_positions):
201 |         window = Xwindow[i]
202 |         start = max(0, 2 - window_position)
203 |         zeros_in_window = window == 0
204 |         zeros_in_window[0:start] = False
205 |         if np.any(zeros_in_window):
206 |             Xwindow[i, zeros_in_window] = n_vocab
207 |     return Xwindow
208 | 
209 | def create_window(sentence, position, size=7, nonce=None):
210 |     """
211 |     Create a fixed-width window onto a sentence centered at some position.
212 |     The sentence is assumed not to contain sentence-initial and -terminating
213 |     markup (i.e. no '<s>' element immediately before the start of the
214 |     sentence and no '</s>' immediately after its end).  (If they were included
215 |     in `sentence`, we would exclude them for backward compatibility with other
216 |     preprocesing code.)  It is also assumed not to be padded with trailing zeros.
217 | 
218 |     Parameters
219 |     ---------
220 |     sentence : np.ndarray
221 |         An array of integers that represents a sentence.  The integers
222 |         are indices in a model's vocabulary.
223 |     position : int
224 |         The 0-based index of the word in the sentence on which the window
225 |         should be centered.
226 |     size : int
227 |         The size of the window.  Must be odd.
228 |     nonce : int or None
229 |         The index in the vocabulary of the nonce word to put at the
230 |         center of the window, replacing the index of the existing word.
231 |         When None, this does not occur.
232 |     """
233 |     if position < 0 or position >= len(sentence):
234 |         raise ValueError("`position` (%d) must lie within sentence (len=%d)" % 
235 |                 (position, len(sentence)))
236 | 
237 |     # Get exactly the positions in `sentence` to copy to `window`.
238 |     window_start = position - size/2
239 |     window_end = position + size/2
240 |     sent_range = np.arange(window_start, window_end+1)
241 |     sent_mask = (sent_range >= 0) & (sent_range < len(sentence))
242 |     sent_indices = sent_range[sent_mask]
243 | 
244 |     window_range = np.arange(0, size)
245 |     window_indices = window_range[sent_mask]
246 | 
247 |     #print('window_start', window_start, 'window_end', window_end, 'sent_range', sent_range, 'sent_mask', sent_mask, 'sent_indices', sent_indices, 'window_range', window_range, 'window_indices', window_indices, 'sentence', sentence, 'position', position)
248 | 
249 |     window = np.zeros(size)
250 |     window[window_indices] = sentence[sent_indices]
251 | 
252 |     if nonce is not None:
253 |         window[size/2] = nonce
254 | 
255 |     return window
256 | 
257 | def create_windows(sentences, lengths, positions, size, nonce=None):
258 |     windows = np.zeros((len(sentences), size))
259 |     for i, sentence in enumerate(sentences):
260 |         length = lengths[i]
261 |         position = positions[i]
262 |         sentence_without_zero_padding = sentence[0:length+2]
263 |         sentence_without_markup = sentence_without_zero_padding[1:-1]
264 |         windows[i] = create_window(
265 |                 sentence_without_markup, 
266 |                 position=position,
267 |                 size=size,
268 |                 nonce=nonce)
269 |     return windows
270 | 
271 | def add_window_dataset(hdf5_file, name, size, nonce=None, sentences_name='X'):
272 |     sentences = hdf5_file[sentences_name].value
273 |     lengths = hdf5_file['len'].value
274 |     positions = hdf5_file['window_position'].value
275 | 
276 |     windows = create_windows(sentences, lengths, positions, size, nonce)
277 |     hdf5_file.create_dataset(name, data=windows, dtype=np.int32)
278 | 
279 | def create_contrasting_cases(X, seed=17, values=[7,8,10,12,13,17,18,19,27]):
280 |     center_idx = int(X.shape[1]/2)
281 |     rng = np.random.RandomState(seed)
282 |     Xcc = np.zeros((X.shape[0]*2, X.shape[1]), dtype=X.dtype)
283 | 
284 |     for i in np.arange(len(X)):
285 | 
286 |         # Original example
287 |         j = i * 2
288 |         Xcc[j, :] = X[i, :]
289 | 
290 |         # Contrasting case
291 |         cc = X[i, :].copy()
292 | 
293 |         while True:
294 |             replacement_value = rng.choice(values)
295 |             if replacement_value != cc[center_idx]:
296 |                 break
297 | 
298 |         cc[center_idx] = replacement_value
299 |         Xcc[j+1, :] = cc
300 | 
301 |     return Xcc
302 | 
303 | def duplicate_values(values):
304 |     new_values = np.zeros(len(values)*2)
305 |     for i,value in enumerate(values):
306 |         j = i * 2
307 |         new_values[j] = value
308 |         new_values[j+1] = value
309 |     return new_values
310 | 


--------------------------------------------------------------------------------
/modeling/dataset.py:
--------------------------------------------------------------------------------
 1 | import h5py
 2 | from sklearn.utils import check_random_state
 3 | import numpy as np
 4 | from modeling.utils import balanced_class_weights
 5 | from keras.utils import np_utils
 6 | 
 7 | class HDF5FileDataset(object):
 8 |     def __init__(self, file_path, data_name, target_name, batch_size, one_hot=True, random_state=17):
 9 |         assert isinstance(data_name, (list,tuple))
10 |         assert isinstance(target_name, (list,tuple))
11 | 
12 |         random_state = check_random_state(random_state)
13 | 
14 |         self.__dict__.update(locals())
15 |         del self.self
16 | 
17 |         self._load_data()
18 |         self._check_data()
19 | 
20 |     def _load_data(self):
21 |         self.hdf5_file = h5py.File(self.file_path)
22 |         self.n_classes = {}
23 |         for target_name in self.target_name:
24 |             self.n_classes[target_name] = np.max(self.hdf5_file[target_name])+1
25 | 
26 |     def _check_data(self):
27 |         self.n = None
28 |         for data_name in self.data_name:
29 |             if self.n is None:
30 |                 self.n = len(self.hdf5_file[data_name])
31 |             else:
32 |                 assert len(self.hdf5_file[data_name]) == self.n
33 |         for target_name in self.target_name:
34 |             assert len(self.hdf5_file[target_name]) == self.n
35 | 
36 |     def __getitem__(self, name):
37 |         return self.hdf5_file[name].value
38 | 
39 |     def class_weights(self, class_weight_exponent, target):
40 |         return balanced_class_weights(
41 |                 self.hdf5_file[target],
42 |                 2,
43 |                 class_weight_exponent)
44 | 
45 |     def generator(self, one_hot=None, batch_size=None):
46 |         if one_hot is None: one_hot = self.one_hot
47 |         if batch_size is None: batch_size = self.batch_size
48 | 
49 |         while 1:
50 |             idx = self.random_state.choice(self.n, size=batch_size, replace=False)
51 |             batch = {}
52 |             for data_name in self.data_name:
53 |                 batch[data_name] = self.hdf5_file[data_name].value[idx]
54 |             for target_name in self.target_name:
55 |                 target = self.hdf5_file[target_name].value[idx]
56 |                 if one_hot:
57 |                     batch[target_name] = np_utils.to_categorical(target,
58 |                             self.n_classes[target_name])
59 |                 else:
60 |                     batch[target_name] = target
61 | 
62 |             yield batch
63 | 


--------------------------------------------------------------------------------
/modeling/difference.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import absolute_import
 3 | 
 4 | import unittest
 5 | import numpy as np
 6 | from theano import function
 7 | import theano.tensor as T
 8 | 
 9 | from keras.layers.core import Layer
10 | 
11 | class TemporalDifference(Layer):
12 |     """
13 |     Given a 3-tensor with shape (nb_samples, maxlen, output_dim), outputs
14 |     the difference X[
15 |     """
16 |     def _get_output(self, X):
17 |         return X[:, 1:, :] - X[:, 0:X.shape[1]-1, :]
18 | 
19 |     def get_output(self, train):
20 |         return self._get_output(self.get_input(train))
21 | 
22 |     def get_config(self):
23 |         return {"name": self.__class__.__name__}
24 | 
25 | class TestTemporalDifference(unittest.TestCase):
26 |     def testForward(self):
27 |         nb_examples = 2
28 |         maxlen = 7
29 |         output_dim = nb_word_dim = 5
30 |         x = np.random.normal(size=(nb_examples, maxlen, output_dim)).astype(np.float32)
31 |         expected = x[:, 1:, :] - x[:, 0:x.shape[1]-1, :]
32 |         X = T.tensor3('X')
33 |         retval = TemporalDifference()._get_output(X)
34 |         f = function([X], retval)
35 |         actual = f(x)
36 |         self.assertTrue(np.allclose(actual, expected))
37 | 


--------------------------------------------------------------------------------
/modeling/fbeta.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import theano.tensor as tt
  3 | from theano import function
  4 | 
  5 | eps = 1e-20
  6 | 
  7 | def support(y):
  8 |     return y.sum(axis=0)
  9 | 
 10 | def true_positive(y, y_hat):
 11 |     return (tt.eq(y_hat, y) & tt.eq(y, 1)).sum(axis=0)
 12 | 
 13 | def make_y_diff(y, y_hat):
 14 |     return y_hat - y
 15 | 
 16 | def false_positive(y_diff):
 17 |     return tt.eq(y_diff, 1).sum(axis=0)
 18 | 
 19 | def true_negative(y_diff):
 20 |     return tt.eq(y_diff, 0).sum(axis=0)
 21 | 
 22 | def false_negative(y_diff):
 23 |     return tt.eq(y_diff, -1).sum(axis=0)
 24 | 
 25 | def precision(y, y_hat, eps=1e-9, y_diff=None):
 26 |     tp = true_positive(y, y_hat)
 27 |     if y_diff is None:
 28 |         y_diff = make_y_diff(y, y_hat)
 29 |     fp = false_positive(y_diff)
 30 |     return tp/(tp+fp+eps)
 31 | 
 32 | def recall(y, y_hat, eps=1e-9, y_diff=None):
 33 |     tp = true_positive(y, y_hat)
 34 |     if y_diff is None:
 35 |         y_diff = make_y_diff(y, y_hat)
 36 |     fn = false_negative(y_diff)
 37 |     return tp/(tp+fn+eps)
 38 | 
 39 | def fbeta_loss(y, y_hat, beta=0.5, eps=1e-9, average=None):
 40 |     """
 41 |     Returns the negative of the F_beta measure, because the
 42 |     optimizer is trying to minimize the objective.
 43 |     """
 44 |     y_diff = make_y_diff(y, y_hat)
 45 |     pr = precision(y, y_hat, eps=eps, y_diff=y_diff)
 46 |     rc = recall(y, y_hat, eps=eps, y_diff=y_diff)
 47 | 
 48 |     f_per_class = ( (1 + beta**2) * (pr * rc) ) / (beta**2 * pr + rc + eps)
 49 | 
 50 |     if average is None:
 51 |         f = f_per_class
 52 |     elif average == 'macro':
 53 |         f = f_per_class.mean()
 54 |     elif average == 'weighted':
 55 |         s = support(y)
 56 |         f = ((f_per_class * s) / s.sum()).sum()
 57 | 
 58 |     return -f
 59 | 
 60 | 
 61 | y = tt.matrix('y', dtype='int64')
 62 | y_hat = tt.matrix('y', dtype='int64')
 63 | 
 64 | floss = fbeta_loss(y, y_hat, average='weighted')
 65 | f = function([y, y_hat], floss)
 66 | 
 67 | loss = f(np.array([[0, 1, 0, 0], [1, 0, 0, 0], [0, 0, 1, 0]]),
 68 |     np.array([[0, 1, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0]]))
 69 | 
 70 | print("loss", loss)
 71 | print("grad", tt.grad(loss, floss))
 72 | 
 73 | import numpy
 74 | import theano
 75 | import theano.tensor as T
 76 | rng = numpy.random
 77 | 
 78 | N = 400
 79 | feats = 784
 80 | D = (rng.randn(N, feats), rng.randint(size=N, low=0, high=2))
 81 | training_steps = 10000
 82 | 
 83 | ###########################################################################
 84 | # Declare Theano symbolic variables
 85 | ###########################################################################
 86 | 
 87 | x = T.matrix("x")
 88 | y = T.vector("y")
 89 | w = theano.shared(rng.randn(feats), name="w")
 90 | b = theano.shared(0., name="b")
 91 | 
 92 | print("Initial model:")
 93 | print(w.get_value())
 94 | print(b.get_value())
 95 | 
 96 | ###########################################################################
 97 | # Construct Theano expression graph
 98 | ###########################################################################
 99 | 
100 | # Probability that target = 1
101 | p_1 = 1 / (1 + T.exp(-T.dot(x, w) - b))   
102 | 
103 | # The prediction thresholded
104 | prediction = p_1 > 0.5                    
105 | 
106 | # Cross-entropy loss function
107 | xent = -y * T.log(p_1) - (1-y) * T.log(1-p_1) 
108 | 
109 | # The cost to minimize
110 | cost = xent.mean() + 0.01 * (w ** 2).sum()
111 | 
112 | # Compute the gradient of the cost (we shall return to this in a following
113 | # section of this tutorial).
114 | gw, gb = T.grad(cost, [w, b])             
115 | 
116 | # Compile
117 | train = theano.function(
118 |     inputs=[x,y],
119 |     outputs=[prediction, xent],
120 |     updates=((w, w - 0.1 * gw), (b, b - 0.1 * gb)))
121 | predict = theano.function(inputs=[x], outputs=prediction)
122 | 
123 | # Train
124 | for i in range(training_steps):
125 |     pred, err = train(D[0], D[1])
126 | 
127 | print("Final model:")
128 | print(w.get_value())
129 | print(b.get_value())
130 | print("target values for D:")
131 | print(D[1])
132 | print("prediction on D:")
133 | print(predict(D[0]))
134 | 


--------------------------------------------------------------------------------
/modeling/fbeta_predict.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn.metrics import fbeta_score
 3 | 
 4 | def make_default_targets(y, target_names):
 5 |     default_targets = []
 6 |     for target in target_names[y]:
 7 |         # at-on => on-on
 8 |         # from-about => about-about
 9 |         s,t = target.split('-')
10 |         default = '-'.join([t, t])
11 |         default_targets.append(
12 |                 np.where(target_names == default)[0][0])
13 |     return default_targets
14 | 
15 | def predict_for_fbeta(y_hat_proba, default_targets, threshold=0.5, threshold_type='margin'):
16 |     n = y_hat_proba.shape[0]
17 |     y_hat_for_fbeta = np.zeros(n, dtype=np.int)
18 | 
19 |     if threshold_type not in ['margin', 'value']:
20 |         raise ValueError('threshold_type must be either "margin" or "value"')
21 | 
22 |     for i in np.arange(n):
23 |         most, next_most = np.argsort(y_hat_proba[i, :])[[-2,-1]]
24 |         if threshold_type == 'margin':
25 |             if y_hat_proba[i, most] - y_hat_proba[i, next_most] > threshold:
26 |                 y_hat_for_fbeta[i] = most
27 |             else:
28 |                 y_hat_for_fbeta[i] = default_targets[most]
29 |         elif threshold_type == 'value':
30 |             if y_hat_proba[i, most] > threshold:
31 |                 y_hat_for_fbeta[i] = most
32 |             else:
33 |                 y_hat_for_fbeta[i] = default_targets[most]
34 | 
35 |     return y_hat_for_fbeta
36 | 


--------------------------------------------------------------------------------
/modeling/lasagne_model.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import lasagne
 3 | import theano.tensor as T
 4 | import theano
 5 | 
 6 | class Model(object):
 7 |     def __init__(self, config):
 8 |         self.config = config
 9 | 
10 |         self.input_var = self.build_input_var()
11 |         self.target_var = self.build_target_var()
12 | 
13 |         self.model = self.build_model()
14 | 
15 |         self.train_output = lasagne.layers.get_output(self.model)
16 |         self.train_loss = self.build_loss(self.train_output)
17 |         self.params = lasagne.layers.get_all_params(self.model, trainable=True)
18 |         self.updates = self.build_updates()
19 | 
20 |         self.test_output = lasagne.layers.get_output(self.model,
21 |                 deterministic=True)
22 |         self.test_loss = self.build_loss(self.test_output)
23 |         self.test_accuracy = T.eq(
24 |                 T.argmax(self.test_output, axis=1), self.target_var)
25 |         self.test_accuracy = T.mean(
26 |                 self.test_accuracy, dtype=theano.config.floatX)
27 | 
28 |         self.train_fn = theano.function(
29 |                 [self.input_var, self.target_var],
30 |                 self.train_loss,
31 |                 updates=self.updates)
32 | 
33 |         self.val_fn = theano.function(
34 |                 [self.input_var, self.target_var],
35 |                 [self.test_loss, self.test_accuracy])
36 | 
37 |         self.pred_fun = theano.function([self.input_var], self.test_output)
38 | 
39 |     def build_input_var(self):
40 |         raise NotImplementedError()
41 | 
42 |     def build_target_var(self):
43 |         raise NotImplementedError()
44 | 
45 |     def build_updates(self):
46 |         raise NotImplementedError()
47 | 
48 |     def build_model(self):
49 |         raise NotImplementedError()
50 | 
51 |     def fit(self, data, target):
52 |         return self.train_fn(data, target)
53 | 
54 |     def evaluate(self, data, target):
55 |         output = self.val_fn(data, target)
56 |         return output[0], output[1]
57 | 
58 |     def predict(self, data):
59 |         pred = self.pred_fn(data)
60 |         return pred
61 | 
62 |     def save_weights(self, path):
63 |         np.savez(path, *lasagne.layers.get_all_param_values(self.model))
64 | 
65 |     def load_weights(self, path):
66 |         with np.load(path) as f:
67 |             params = [f['arr_%d' % i] for i in range(len(f.files))]
68 |             lasagne.layers.set_all_param_values(self.model, params)
69 | 
70 | class Classifier(Model):
71 |     def build_loss(self, output):
72 |         loss = lasagne.objectives.categorical_crossentropy(
73 |                 output, self.target_var)
74 |         return loss.mean()
75 | 
76 | class Regressor(Model):
77 |     def build_loss(self, output):
78 |         loss = lasagne.objectives.squared_error(
79 |                 output, self.target_var)
80 |         return loss.mean()
81 | 


--------------------------------------------------------------------------------
/modeling/layers.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import logging
  3 | import numpy as np
  4 | import theano.tensor as T
  5 | import theano.tensor.nnet 
  6 | 
  7 | from keras.layers.embeddings import Embedding
  8 | from keras.layers.convolutional import Convolution1D
  9 | from keras.layers.core import Layer
 10 | from keras import activations, initializations, regularizers, constraints
 11 | 
 12 | from keras import backend as K
 13 | 
 14 | logger = logging.getLogger()
 15 | 
 16 | class ImmutableEmbedding(Embedding):
 17 |     '''
 18 |     Same as Embedding except the weights are not parameters of the
 19 |     network.  This can be useful when the layer is initialized with
 20 |     pre-trained embeddings, such as Word2Vec.
 21 | 
 22 |     @input_dim: size of vocabulary (highest input integer + 1)
 23 |     @out_dim: size of dense representation
 24 |     '''
 25 |     def __init__(self, input_dim, output_dim, **kwargs):
 26 |         super(ImmutableEmbedding, self).__init__(
 27 |                 input_dim, output_dim, **kwargs)
 28 |         self.params = []
 29 | 
 30 |     def build(self):
 31 |         super(ImmutableEmbedding, self).build()
 32 |         self.params = []
 33 | 
 34 | class ImmutableConvolution1D(Convolution1D):
 35 |     '''
 36 |     Same as Convolution1D except the convolutional filters are not 
 37 |     parameters of the network.  This can be useful when the layer 
 38 |     is initialized with pre-trained convolutional filters.
 39 | 
 40 |     @nb_filters: the number of convolutional filters
 41 |     @filter_width: the width of each filter
 42 |     '''
 43 |     def __init__(self, nb_filters, filter_width, **kwargs):
 44 |         super(ImmutableConvolution1D, self).__init__(
 45 |                 nb_filters, filter_width, **kwargs)
 46 |         self.params = []
 47 | 
 48 |     def build(self):
 49 |         super(ImmutableConvolution1D, self).build()
 50 |         self.params = []
 51 | 
 52 | class Transpose(Layer):
 53 |     def __init__(self):
 54 |         super(Transpose, self).__init__()
 55 |         self.input = T.matrix()
 56 | 
 57 |     def _get_output(self, X):
 58 |         return X.T
 59 | 
 60 |     def get_output(self, train):
 61 |         return self._get_output(self.get_input(train))
 62 | 
 63 |     def get_config(self):
 64 |         return {"name": self.__class__.__name__}
 65 | 
 66 | class HierarchicalSoftmax(Layer):
 67 |     def __init__(self, output_dim, nb_hsm_classes, batch_size,
 68 |             init='glorot_uniform',
 69 |             W1_weights=None, W1_regularizer=None, W1_constraint=None,
 70 |             W2_weights=None, W2_regularizer=None, W2_constraint=None,
 71 |             b1_regularizer=None, b1_constraint=None,
 72 |             b2_regularizer=None, b2_constraint=None,
 73 |             input_dim=None, **kwargs):
 74 | 
 75 |         self.__dict__.update(locals())
 76 |         del self.self
 77 | 
 78 |         self.init = initializations.get(init)
 79 |         #self.output_dim = nb_classes * nb_outputs_per_class
 80 |         self.nb_outputs_per_class = int(np.ceil(output_dim / float(nb_hsm_classes)))
 81 | 
 82 |         self.W1_regularizer = regularizers.get(W1_regularizer)
 83 |         self.b1_regularizer = regularizers.get(b1_regularizer)
 84 |         self.W2_regularizer = regularizers.get(W2_regularizer)
 85 |         self.b2_regularizer = regularizers.get(b2_regularizer)
 86 | 
 87 |         self.W1_constraint = constraints.get(W1_constraint)
 88 |         self.b1_constraint = constraints.get(b1_constraint)
 89 |         self.W2_constraint = constraints.get(W2_constraint)
 90 |         self.b2_constraint = constraints.get(b2_constraint)
 91 | 
 92 |         self.constraints = [self.W1_constraint, self.b1_constraint,
 93 |                 self.W2_constraint, self.b2_constraint]
 94 | 
 95 |         #self.initial_weights = weights
 96 |         self.input_dim = input_dim
 97 |         if self.input_dim:
 98 |             kwargs['input_shape'] = (self.input_dim,)
 99 |         self.input = T.matrix()
100 |         super(HierarchicalSoftmax, self).__init__(**kwargs)
101 | 
102 |     def build(self):
103 |         #print('self.input_shape', self.input_shape)
104 |         n_features = self.input_shape[1]
105 | 
106 |         self.W1 = self.init((n_features, self.nb_hsm_classes))
107 |         self.b1 = K.zeros((self.nb_hsm_classes,))
108 | 
109 |         self.W2 = self.init((self.nb_hsm_classes, n_features, self.nb_outputs_per_class))
110 |         self.b2 = K.zeros((self.nb_hsm_classes, self.nb_outputs_per_class))
111 | 
112 |         self.trainable_weights = [self.W1, self.b1,
113 |                 self.W2, self.b2]
114 |         
115 |         self.regularizers = []
116 |         if self.W1_regularizer:
117 |             self.W1_regularizer.set_param(self.W1)
118 |             self.regularizers.append(self.W1_regularizer)
119 |         
120 |         if self.b1_regularizer:
121 |             self.b1_regularizer.set_param(self.b1)
122 |             self.regularizers.append(self.b1_regularizer)
123 | 
124 |         if self.W2_regularizer:
125 |             self.W2_regularizer.set_param(self.W2)
126 |             self.regularizers.append(self.W2_regularizer)
127 |         
128 |         if self.b2_regularizer:
129 |             self.b2_regularizer.set_param(self.b2)
130 |             self.regularizers.append(self.b2_regularizer)
131 | 
132 |     @property
133 |     def output_shape(self):
134 |         print('HierarchicalSoftmax.output_shape', self.input_shape[0], self.output_dim)
135 |         return (self.input_shape[0], self.output_dim)
136 | 
137 |     def _get_output(self, X):
138 |         output = theano.tensor.nnet.h_softmax(X,
139 |                 #self.input_shape[1], self.output_dim,
140 |                 self.batch_size, self.output_dim,
141 |                 self.nb_hsm_classes, self.nb_outputs_per_class,
142 |                 self.W1, self.b1,
143 |                 self.W2, self.b2)
144 |         return output
145 | 
146 |     def get_output(self, train=False):
147 |         return self._get_output(self.get_input(train))
148 | 
149 | 


--------------------------------------------------------------------------------
/modeling/nonconvnet.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import numpy as np
  3 | import theano
  4 | import theano.tensor as T
  5 | import unittest
  6 | import logging
  7 | 
  8 | logger = logging.getLogger()
  9 | 
 10 | from keras.layers.core import Layer
 11 | from keras.utils.theano_utils import sharedX
 12 | 
 13 | class SplitOutputByFilter(Layer):
 14 |     """
 15 |     input: (batch_size, max_seq_len, n_filters * filter_width)
 16 |     output: (batch_size, n_filters, max_seq_len, filter_width)
 17 |     """
 18 |     def __init__(self, n_filters, filter_width):
 19 |         super(SplitOutputByFilter, self).__init__()
 20 |         self.n_filters = n_filters
 21 |         self.filter_width = filter_width
 22 |         self.input = T.tensor3()
 23 | 
 24 |     def slice(self, i, X):
 25 |         start = i * self.filter_width
 26 |         end = (i+1) * self.filter_width
 27 |         return X[:, :, start:end]
 28 | 
 29 |     def _get_output(self, X):
 30 |         outputs, updates = theano.scan(
 31 |                 fn=self.slice,
 32 |                 outputs_info=None,
 33 |                 sequences=[T.arange(self.n_filters)],
 34 |                 non_sequences=X)
 35 |         return outputs.dimshuffle(1, 0, 2, 3)
 36 | 
 37 |     def get_output(self, train):
 38 |         return self._get_output(self.get_input(train))
 39 | 
 40 |     def get_config(self):
 41 |         return {"name": self.__class__.__name__}
 42 | 
 43 | class SlidingWindowL2MaxPooling(Layer):
 44 |     '''
 45 |     input: (batch_size, n_filters, max_seq_len, filter_width)
 46 |     output: (batch_size, n_filters, filter_width, filter_width)
 47 |     '''
 48 |     def __init__(self, batch_size, n_filters, filter_width, max_seq_len):
 49 |         super(SlidingWindowL2MaxPooling, self).__init__()
 50 |         self.batch_size = batch_size
 51 |         self.n_filters = n_filters
 52 |         self.filter_width = filter_width
 53 |         self.max_seq_len = max_seq_len
 54 | 
 55 |     def get_output(self, train):
 56 |         return self._get_output(self.get_input(train))
 57 | 
 58 |     def _get_output(self, X):
 59 |         outputs, updates = theano.scan(
 60 |                 fn=self.sample_dimension,
 61 |                 sequences=[T.arange(self.batch_size)],
 62 |                 non_sequences=X)
 63 |         return outputs
 64 | 
 65 |     def sample_dimension(self, i, X):
 66 |         '''
 67 |         Takes a 4-tensor of shape `(batch_size, n_filters, max_seq_len,
 68 |         filter_width)` and an index into its first dimension.  Returns the
 69 |         `(batch_size, n_filters, filter_width, filter_width)` subtensor
 70 |         with the greatest L2 norm along the third dimension.
 71 | 
 72 |         Parameters
 73 |         ----------
 74 |         X : a 4-tensor
 75 |             An `(batch_size, n_filters, max_seq_len, filter_width)` tensor.
 76 |         i : int
 77 |             An index into the first dimension of `X`.
 78 | 
 79 |         Returns
 80 |         ----------
 81 |         A 3-tensor of shape `(n_filters, filter_width, filter_width)`
 82 |         consisting of the subtensor of `X` with the greatest L2 norm along
 83 |         `X`'s third dimension (where `max_seq_len` lies).
 84 |         '''
 85 |         outputs, updates = theano.scan(
 86 |                 fn=self.filter_dimension,
 87 |                 sequences=[T.arange(self.n_filters)],
 88 |                 non_sequences=X[i, :, :, :])
 89 | 
 90 |         return outputs
 91 | 
 92 |     def filter_dimension(self, i, X):
 93 |         '''
 94 |         Takes a 3-tensor of shape `(n_filters, max_seq_len, filter_width)`
 95 |         and an index into its first dimension.  Returns the
 96 |         `(filter_width, filter_width)` subtensor of `X` with the greatest
 97 |         L2 norm along the second dimension.
 98 | 
 99 |         Parameters
100 |         ----------
101 |         X : a 3-tensor
102 |             An `(batch_size, n_filters, max_seq_len, filter_width)` tensor.
103 |         i : int
104 |             An index into the first dimension of `X`.
105 | 
106 |         Returns
107 |         ----------
108 |         A 2-tensor of shape `(filter_width, filter_width)` consisting
109 |         of the subtensor of the i-th element along the first dimension
110 |         of `X` with the greatest L2 norm along `X`'s second dimension
111 |         (where `max_seq_len` lies).
112 |         '''
113 |         norms, updates = theano.scan(
114 |                 fn=self.norm,
115 |                 sequences=[T.arange(self.max_seq_len)],
116 |                 non_sequences=X[i, :, :])
117 |         start_window = T.argmax(norms)
118 |         end_window = start_window + self.filter_width
119 |         return X[i, start_window:end_window, :]
120 | 
121 |     def norm(self, i, X):
122 |         return (X[i:i+self.filter_width, :] ** 2).sum()
123 | 
124 | class ZeroFillDiagonals(Layer):
125 |     '''
126 |     input: (batch_size, n_filters, filter_width, filter_width)
127 |     output: (batch_size, n_filters, filter_width, filter_width) with the
128 |     diagonal of the last two `(filter_width, filter_width)` dimensions 
129 |     zeroed out.
130 |     '''
131 |     def __init__(self, batch_size, n_filters, filter_width):
132 |         super(ZeroFillDiagonals, self).__init__()
133 |         self.batch_size = batch_size
134 |         self.n_filters = n_filters
135 |         self.filter_width = filter_width
136 | 
137 |         # Construct a shared boolean matrix by which to multiply the input
138 |         # element-wise.  It should be 0 everywhere except on the diagonals
139 |         # of the last two dimensions.
140 |         input_shape = (batch_size, n_filters, filter_width, filter_width)
141 |         mask = np.ones(input_shape)
142 |         diag_indices = np.arange(filter_width)
143 |         for i in np.arange(batch_size):
144 |             for j in np.arange(n_filters):
145 |                 mask[i, j, diag_indices, diag_indices] = 0
146 |         self.mask = sharedX(mask, dtype='int32')
147 | 
148 |     def get_output(self, train):
149 |         return self._get_output(self.get_input(train))
150 | 
151 |     def _get_output(self, X):
152 |         return X * self.mask
153 | 


--------------------------------------------------------------------------------
/modeling/outliers.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import sys
  4 | import argparse
  5 | import os.path
  6 | import cPickle
  7 | from itertools import product
  8 | 
  9 | import theano
 10 | import pylearn2
 11 | from pylearn2.config import yaml_parse
 12 | 
 13 | import numpy as np
 14 | from numpy.random import multivariate_normal as mvnormal
 15 | from numpy.random import uniform
 16 | from scipy.spatial.distance import pdist, squareform
 17 | from scipy.stats import pearsonr
 18 | 
 19 | import matplotlib.pyplot as plt
 20 | from mpl_toolkits.mplot3d import axes3d
 21 | 
 22 | from sklearn.covariance import MinCovDet, EmpiricalCovariance
 23 | from sklearn.decomposition import PCA
 24 | 
 25 | ###########################################################################
 26 | # This class was useful for simulating data sets while developing
 27 | # this script. 
 28 | # means = [[0, 0]]
 29 | # cov = [[2, 1], [1, 2]]
 30 | # n = 5000
 31 | # mvn = PMeansMultivariateNormal(n, means, cov)
 32 | # X = mvn.generate()
 33 | # X.shape
 34 | # np.savetxt(X, file='simulated.csv')
 35 | ###########################################################################
 36 | class PMeansMultivariateNormal(object):
 37 |     def __init__(self, means, cov, size):
 38 |         self.__dict__.update(locals())
 39 |         del self.self
 40 |         #self.n = n
 41 |         #self.means = means
 42 |         #self.cov = cov
 43 | 
 44 |     def generate(self):
 45 |         return mvnormal(self.means, self.cov, self.size)
 46 |         '''
 47 |         X = np.empty(shape=(self.n*len(self.means), 2))
 48 |         for i, mean in enumerate(self.means):
 49 |             idx = range(i*self.n, i*self.n+self.n)
 50 |             x, y = mvnormal(mean, self.cov, self.n).T
 51 |             X[idx, 0] = x
 52 |             X[idx, 1] = y
 53 |         return X
 54 |         '''
 55 | 
 56 | def reconstruction_error(a, b):
 57 |     return ((a - b)**2).sum(axis=1)
 58 | 
 59 | def train_autoencoder(dataset_path, nvis=2, nhid=2, act_enc=None, act_dec=None):
 60 |     yaml = open('outliers.yaml', 'r').read()
 61 |     if act_enc is None:
 62 |         act_enc = 'null'
 63 |     else:
 64 |         act_enc = "'" + act_enc + "'"
 65 | 
 66 |     if act_dec is None:
 67 |         act_dec = 'null'
 68 |     else:
 69 |         act_dec = "'" + act_dec + "'"
 70 | 
 71 |     params = {
 72 |         'dataset_path': dataset_path,
 73 |         'nvis': nvis,
 74 |         'nhid': nhid,
 75 |         'act_enc': act_enc,
 76 |         'act_dec': act_dec,
 77 |         'learning_rate': 0.05,
 78 |         'save_path': 'outliers.pkl'
 79 |     }
 80 |     
 81 |     yaml = yaml % (params)
 82 | 
 83 |     train = yaml_parse.load(yaml)
 84 |     train.main_loop()
 85 |     
 86 |     pkl = open('outliers.pkl')
 87 |     return cPickle.load(pkl)
 88 | 
 89 | class NullTransformer(object):
 90 |     def fit(self, X):
 91 |         pass
 92 | 
 93 |     def fit_transform(self, X):
 94 |         return X
 95 | 
 96 |     def transform(self, X):
 97 |         return X
 98 | 
 99 | def main():
100 |     parser = argparse.ArgumentParser(
101 |         description='Plot outlier-like distances for a 2-dimensional dataset')
102 |     parser.add_argument(
103 |         'dataset', type=argparse.FileType('r'),
104 |         help='a CSV file containing the dataset')
105 |     parser.add_argument(
106 |         '--plot', type=str, choices=['train', 'grid'], default='grid',
107 |         help='plot the dataset or a grid evenly distributed over its span')
108 |     parser.add_argument(
109 |         '--plotdims', type=int, choices=[2, 3], default=2,
110 |         help='the number of dimensions to plot')
111 | 
112 |     args = parser.parse_args()
113 | 
114 |     X = np.loadtxt(args.dataset, delimiter=',')
115 |     fig = plt.figure()
116 | 
117 |     xformer = NullTransformer()
118 | 
119 |     if X.shape[1] > 2:
120 |         xformer = PCA(n_components=2)
121 |         X = xformer.fit_transform(X)
122 | 
123 |     if args.plotdims == 2:
124 |         plt.scatter(X[:, 0], X[:, 1], s=60, linewidth='0')
125 |     else:
126 |         plt.scatter(X[:, 0], X[:, 1])
127 |     plt.show(block=False)
128 | 
129 |     path_to_script = os.path.realpath(__file__)
130 |     dir_of_script = os.path.dirname(path_to_script)
131 |     dataset_path = dir_of_script + '/outliers.npy'
132 |     np.save(dataset_path, X)
133 |     
134 |     ###########################################################################
135 |     # Train autoencoder with the n samples until convergence.  Run
136 |     # evenly distributed samples through the autoencoder and compute
137 |     # their reconstruction error.
138 |     ###########################################################################
139 | 
140 |     maxseq_orig = np.max(X)
141 |     minseq_orig = np.min(X)
142 |     seqrange = np.abs(maxseq_orig - minseq_orig)
143 |     maxseq = maxseq_orig + 0.5 * seqrange
144 |     minseq = minseq_orig - 0.5 * seqrange
145 |     print("minseq", minseq, "maxseq", maxseq)
146 |     if args.plot == 'grid':
147 |         seq = np.linspace(minseq, maxseq, num=50, endpoint=True)
148 |         Xplot = np.array([_ for _ in product(seq, seq)])
149 |     else:
150 |         Xplot = X
151 | 
152 |     robust_cov = MinCovDet().fit(X)
153 |     robust_md = robust_cov.mahalanobis(Xplot)
154 | 
155 |     empirical_cov = EmpiricalCovariance().fit(X)
156 |     empirical_md = empirical_cov.mahalanobis(Xplot)
157 | 
158 |     # Assume Xplot is at least 2-dimensional.
159 |     if Xplot.shape[1] > 2:
160 |         Xplot2d = bh_sne(Xplot)
161 |     else:
162 |         Xplot2d = Xplot
163 | 
164 |     robust_md01 = robust_md - np.nanmin(robust_md)
165 |     robust_md01 = robust_md01 / np.nanmax(robust_md01)
166 | 
167 |     empirical_md01 = empirical_md - np.nanmin(empirical_md)
168 |     empirical_md01 = empirical_md01 / np.nanmax(empirical_md01)
169 | 
170 |     fig = plt.figure()
171 |     if args.plotdims == 2:
172 |         ax = fig.add_subplot(1, 1, 1)
173 |         ax.scatter(Xplot2d[:, 0], Xplot2d[:, 1], 
174 |             cmap=plt.cm.jet, c=robust_md01, s=60, linewidth='0')
175 |     else:
176 |         ax = fig.add_subplot(1, 1, 1, projection='3d')
177 |         ax.plot_trisurf(Xplot2d[:, 0], Xplot2d[:, 1], robust_md01,
178 |             cmap=plt.cm.jet, color=robust_md01)
179 |         ax.set_zlabel('Mahalanobis distance')
180 |     ax.set_xlabel('x')
181 |     ax.set_ylabel('y')
182 |     ax.set_title('Mahalanobis distance (robust covariance)')
183 | 
184 |     fig = plt.figure()
185 |     if args.plotdims == 2:
186 |         ax = fig.add_subplot(1, 1, 1)
187 |         ax.scatter(Xplot2d[:, 0], Xplot2d[:, 1], 
188 |             cmap=plt.cm.jet, c=empirical_md01, s=60, linewidth='0')
189 |     else:
190 |         ax = fig.add_subplot(1, 1, 1, projection='3d')
191 |         ax.plot_trisurf(Xplot2d[:, 0], Xplot2d[:, 1], empirical_md01,
192 |             cmap=plt.cm.jet, color=empirical_md01)
193 |         ax.set_zlabel('Mahalanobis distance')
194 | 
195 |     ax.set_xlabel('x')
196 |     ax.set_ylabel('y')
197 |     ax.set_title('Mahalanobis distance (empirical covariance)')
198 |     
199 |     enc_dec = [
200 |         # tanh encoder, linear decoder
201 |         ['tanh', 'linear'],
202 |         # sigmoid encoder, linear decoder
203 |         ['sigmoid', 'linear'],
204 |         #######################################################################
205 |         # The reconstruction error of the autoencoders trained with the
206 |         # remaining commented-out pairs don't seem to match Mahalanobis
207 |         # distance very well.  Feel free to uncomment them to see for
208 |         # yourself.
209 |         # linear encoder, linear decoder
210 |         # ['linear', 'linear'],
211 |         # tanh encoder, tanh decoder
212 |         # ['tanh', 'tanh'],
213 |         # tanh encoder, sigmoid decoder
214 |         # ['tanh', 'sigmoid'],
215 |         # sigmoid encoder, tanh decoder
216 |         # ['sigmoid', 'tanh'],
217 |         # sigmoid encoder, sigmoid decoder
218 |         # ['sigmoid', 'sigmoid']
219 |         #######################################################################
220 |     ]
221 |     
222 |     for i, act in enumerate(enc_dec):
223 |         enc, dec = act
224 |         if dec == 'linear':
225 |             dec = None
226 |         model = train_autoencoder(dataset_path,
227 |             act_enc=enc, act_dec=dec, nvis=X.shape[1], nhid=16)
228 |         
229 |         Xshared = theano.shared(
230 |             np.asarray(Xplot, dtype=theano.config.floatX), borrow=True)
231 |         f = theano.function([], outputs=model.reconstruct(Xshared))
232 |         fit = f()
233 |         error = reconstruction_error(Xplot, fit)
234 | 
235 |         error01 = error - np.nanmin(error)
236 |         error01 = error01 / np.nanmax(error01)
237 |         
238 |         fig = plt.figure()
239 |         if args.plotdims == 2:
240 |             ax = fig.add_subplot(1, 1, 1)
241 |             ax.scatter(Xplot2d[:, 0], Xplot2d[:, 1],
242 |                 cmap=plt.cm.jet, c=error, s=60, linewidth='0')
243 |         else:
244 |             ax = fig.add_subplot(1, 1, 1, projection='3d')
245 |             ax.plot_trisurf(Xplot2d[:, 0], Xplot2d[:, 1], error,
246 |                 cmap=plt.cm.jet, color=error01)
247 |             ax.set_zlabel('Reconstruction error')
248 | 
249 |         ax.set_xlabel('x')
250 |         ax.set_ylabel('y')
251 |         encdec_type = ', '.join(act) 
252 |         ax.set_title('Reconstruction error (' + encdec_type + ')')
253 | 
254 |         print("Correlation of robust MD and reconstruction error (" +
255 |             str(encdec_type) + ") " + str(pearsonr(robust_md, error)))
256 |         print("Correlation of empirical MD and reconstruction error (" +
257 |             str(encdec_type) + ") " + str(pearsonr(empirical_md, error)))
258 | 
259 |     print("Correlation of robust MD and empirical MD " +
260 |         str(pearsonr(robust_md, empirical_md)))
261 | 
262 |     os.remove(dataset_path)
263 |     os.remove('outliers.pkl')
264 | 
265 |     plt.show(block=True)
266 | 
267 | if __name__ == '__main__':
268 |     sys.exit(main())
269 | 


--------------------------------------------------------------------------------
/modeling/parser.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import argparse
 3 | import numpy
 4 | 
 5 | def kvpair(s):
 6 |     try:
 7 |         k,v = s.split('=')
 8 |         if '.' in v:
 9 |             try:
10 |                 v = float(v)
11 |             except ValueError:
12 |                 pass
13 |         else:
14 |             try:
15 |                 v = int(v)
16 |             except ValueError:
17 |                 pass
18 |         return k,v
19 |     except:
20 |         raise argparse.ArgumentTypeError(
21 |                 '--model-cfg arguments must be KEY=VALUE pairs')
22 | 
23 | def build_chainer():
24 |     parser = build()
25 |     parser.add_argument('--gpu', '-g', default=-1, type=int,
26 |             help='GPU ID (negative value indicates CPU)')
27 |     return parser
28 | 
29 | def build_keras():
30 |     parser = build()
31 |     return parser
32 | 
33 | def build_lasagne():
34 |     parser = build()
35 |     parser.add_argument('--progress', action='store_true',
36 |             help='Whether to display a progress for training and validation')
37 |     return parser
38 | 
39 | def build():
40 |     parser = argparse.ArgumentParser(
41 |             description='Train a model.')
42 |     parser.add_argument('model_dir', metavar="MODEL_DIR", type=str,
43 |             help='The base directory of this model.  Must contain a model.py (model code) and a model.json (hyperparameters).  Model configuration and weights are saved to model_dir/UUID.')
44 |     parser.add_argument('--model-cfg', type=kvpair, nargs='+', default=[],
45 |             help='Model hyper-parameters as KEY=VALUE pairs; overrides parameters in MODEL_DIR/model.json')
46 |     parser.add_argument('--model-dest', type=str, default='',
47 |             help='Directory to which to copy model.py and model.json.  This overrides copying to model_dir/UUID.')
48 |     parser.add_argument(
49 |             '--mode', type=str, 
50 |             choices=['transient', 'persistent', 'persistent-background'],
51 |             default='persistent',
52 |             help='How to run the model; in "transient" mode, output goes to the console and the model is not saved; in "persistent" mode, output goes to the console and the model is saved; in "persistent-background" mode, output goes to the model.log file and the model is saved.  The default is "persistent"')
53 | 
54 |     return parser
55 | 


--------------------------------------------------------------------------------
/modeling/preprocess.py:
--------------------------------------------------------------------------------
 1 | class NullPreprocessor(object):
 2 |     def __init__(self):
 3 |         pass
 4 | 
 5 |     def fit(self, X, y=None):
 6 |         pass
 7 | 
 8 |     def transform(self, X, y=None):
 9 |         if y is None:
10 |             return X
11 |         else:
12 |             return X, y
13 | 
14 |     def fit_transform(self, X, y=None):
15 |         return self.transform(X, y)
16 | 


--------------------------------------------------------------------------------
/modeling/residual.py:
--------------------------------------------------------------------------------
 1 | from keras.models import Sequential, Graph
 2 | from keras.layers.core import Dense, Activation, Layer, Dropout
 3 | from keras.activations import relu
 4 | 
 5 | class Identity(Layer):
 6 |     def get_output(self, train):
 7 |         return self.get_input(train)
 8 | 
 9 | def build_residual_block(name, input_shape, n_hidden, n_skip=2):
10 |     """
11 |     Rough sketch of building blocks of layers for residual learning.
12 |     See http://arxiv.org/abs/1512.03385 for motivation.
13 |     """
14 |     block = Graph()
15 |     input_name = 'x'
16 |     block.add_input(input_name, input_shape=input_shape)
17 | 
18 |     # The current keras graph implementation doesn't allow you to connect
19 |     # an input node to an output node.  Use Identity to work around that.
20 |     block.add_node(Identity(), name=name+'identity', input=input_name)
21 | 
22 |     prev_output = input_name
23 |     for i in range(n_skip):
24 |         layer_name = 'h' + str(i)
25 |         l = Dense(n_hidden, activation='relu')
26 |         block.add_node(l, name=layer_name, input=prev_output)
27 |         prev_output = layer_name
28 |         if i < n_skip:
29 |             block.add_node(Dropout(0.5), name=layer_name+'do', input=layer_name)
30 |             prev_output = layer_name+'do'
31 | 
32 |     block.add_output(name=name+'output', inputs=[name+'identity', prev_output], merge_mode='sum')
33 | 
34 |     return block
35 | 


--------------------------------------------------------------------------------
/models/keras/attention/model.json:
--------------------------------------------------------------------------------
 1 | 
 2 | {
 3 |     "train_embeddings": true,
 4 |     "regularization_layer": "",
 5 |     "dropout_p": 0.5,
 6 |     "dropout_p_conv": 0.0,
 7 |     "n_embed_dims": 25,
 8 |     "loss": "categorical_crossentropy",
 9 |     "patience": 20,
10 |     "batch_size": 128,
11 |     "decay": 0.0,
12 |     "embedding_max_norm": 1000,
13 |     "filter_max_norm": 1000,
14 |     "dense_max_norm": 1000,
15 |     "l2_penalty": 0.0,
16 |     "clipnorm": 0,
17 |     "truncate_gradient": -1
18 | }
19 | 


--------------------------------------------------------------------------------
/models/keras/attention/model.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.setrecursionlimit(5000)
 3 | import json
 4 | import h5py
 5 | 
 6 | import numpy as np
 7 | 
 8 | from keras.models import Sequential, Graph
 9 | from keras.layers.core import (Layer, Dense, Activation, Dropout,
10 |         TimeDistributedDense, TimeDistributedMerge,
11 |         Flatten, Reshape)
12 | from keras.layers.normalization import BatchNormalization
13 | from keras.layers.recurrent import LSTM, GRU
14 | from keras.layers.embeddings import Embedding
15 | from keras.constraints import maxnorm
16 | from keras.regularizers import l2
17 | from keras.optimizers import SGD, Adam, Adadelta, Adagrad, RMSprop
18 | 
19 | from modeling.layers import ImmutableEmbedding
20 | from modeling.difference import TemporalDifference
21 | from modeling.builders import (build_embedding_layer,
22 |     build_convolutional_layer, build_pooling_layer,
23 |     build_dense_layer, build_optimizer, load_weights)
24 | 
25 | def error_free_examples(path):
26 |     f = h5py.File(path)
27 |     # Target_code is 0 when the preposition in the example is the original
28 |     # preposition in the corpus and 1 when the preposition has been randomly
29 |     # replaced with another one in the confusion set.
30 |     idx = f['target_code'].value == 0
31 |     f.close()
32 |     return idx 
33 | 
34 | class Identity(Layer):
35 |     def get_output(self, train):
36 |         return self.get_input(train)
37 | 
38 | class Transpose(Layer):
39 |     def get_output(self, train):
40 |         return self.get_input(train).T
41 | 
42 | def build_model(args):
43 |     np.random.seed(args.seed)
44 | 
45 |     graph = Graph()
46 | 
47 |     graph.add_input('input', input_shape=(args.input_width,), dtype='int')
48 | 
49 |     graph.add_node(build_embedding_layer(args), 
50 |             input='input', name='embedding')
51 | 
52 |     graph.add_node(LSTM(args.n_units,
53 |         truncate_gradient=args.truncate_gradient,
54 |         return_sequences=True),
55 |         input='embedding', name='lstm0')
56 | 
57 |     graph.add_node(LSTM(args.n_units,
58 |         truncate_gradient=args.truncate_gradient,
59 |         return_sequences=True),
60 |         input='lstm0', name='lstm1')
61 | 
62 |     # Attention module.
63 |     graph.add_node(TimeDistributedDense(args.n_units, activation='relu'),
64 |             input='lstm1', name='attention0')
65 |     graph.add_node(TimeDistributedDense(args.n_units, activation='relu'),
66 |             input='attention0', name='attention1')
67 |     graph.add_node(TimeDistributedDense(args.n_units, activation='softmax'),
68 |             input='attention1', name='attention2')
69 | 
70 |     # Apply mask from output of attention module to LSTM output.
71 |     graph.add_node(TimeDistributedMerge(mode='sum'),
72 |             inputs=['lstm1', 'attention2'],
73 |             name='applyattn',
74 |             merge_mode='mul')
75 | 
76 |     graph.add_node(Dense(args.n_classes, activation='softmax'),
77 |             input='applyattn', name='softmax')
78 | 
79 |     graph.add_output(input='softmax', name='output')
80 | 
81 |     load_weights(args, graph)
82 | 
83 |     optimizer = build_optimizer(args)
84 | 
85 |     graph.compile(loss={'output': args.loss}, optimizer=optimizer)
86 | 
87 |     return graph
88 | 


--------------------------------------------------------------------------------
/models/keras/preposition/convnet/4e0ae5dc683611e5950afcaa149e39ea/model.py:
--------------------------------------------------------------------------------
  1 | import os.path
  2 | 
  3 | import numpy as np
  4 | 
  5 | from keras.models import Sequential
  6 | from keras.layers.core import Dense, Dropout, Activation, Flatten
  7 | from keras.layers.normalization import BatchNormalization
  8 | from keras.layers.convolutional import Convolution1D, MaxPooling1D
  9 | from keras.layers.embeddings import Embedding
 10 | from keras.constraints import maxnorm
 11 | from keras.regularizers import l2
 12 | from keras.optimizers import SGD, Adam, Adadelta, Adagrad, RMSprop
 13 | 
 14 | from modeling.layers import ImmutableEmbedding
 15 | from modeling.difference import TemporalDifference
 16 | 
 17 | def build_model(args):
 18 |     print("args", vars(args))
 19 | 
 20 |     np.random.seed(args.seed)
 21 | 
 22 |     model = Sequential()
 23 | 
 24 |     if hasattr(args, 'embedding_weights') and args.embedding_weights is not None:
 25 |         W = np.load(args.embedding_weights)
 26 |         if args.train_embeddings is True or args.train_embeddings == 'true':
 27 |             model.add(Embedding(args.n_vocab, args.n_word_dims,
 28 |                 weights=[W], input_length=args.input_width,
 29 |                 W_constraint=maxnorm(args.embedding_max_norm)))
 30 |         else:
 31 |             model.add(ImmutableEmbedding(args.n_vocab, args.n_word_dims,
 32 |                 weights=[W], input_length=args.input_width))
 33 |     else:
 34 |         model.add(Embedding(args.n_vocab, args.n_word_dims,
 35 |             W_constraint=maxnorm(args.embedding_max_norm),
 36 |             input_length=args.input_width))
 37 | 
 38 |     if args.use_difference:
 39 |         model.add(TemporalDifference())
 40 | 
 41 |     model.add(Convolution1D(args.n_filters, args.filter_width,
 42 |         W_constraint=maxnorm(args.filter_max_norm),
 43 |         border_mode=args.border_mode,
 44 |         W_regularizer=l2(args.l2_penalty),
 45 |         activation='relu'))
 46 |     #if 'normalization' in args.regularization_layer:
 47 |     #    model.add(BatchNormalization(
 48 |     #        (args.input_width-args.filter_width+1, args.n_filters)))
 49 |     #model.add(Activation('relu'))
 50 | 
 51 |     model.add(MaxPooling1D(
 52 |         pool_length=args.input_width - args.filter_width + 1,
 53 |         stride=1, ignore_border=False))
 54 |     model.add(Flatten())
 55 | 
 56 |     if 'dropout' in args.regularization_layer:
 57 |         model.add(Dropout(args.dropout_p_conv))
 58 |     if 'normalization' in args.regularization_layer:
 59 |         model.add(BatchNormalization())
 60 | 
 61 |     model.add(Dense(2*args.n_filters,
 62 |             W_regularizer=l2(args.l2_penalty),
 63 |             activation='relu'))
 64 |     if 'dropout' in args.regularization_layer:
 65 |         model.add(Dropout(args.dropout_p))
 66 |     if 'normalization' in args.regularization_layer:
 67 |         model.add(BatchNormalization())
 68 | 
 69 |     model.add(Dense(2*args.n_filters,
 70 |             W_regularizer=l2(args.l2_penalty),
 71 |             activation='relu'))
 72 |     if 'dropout' in args.regularization_layer:
 73 |         model.add(Dropout(args.dropout_p))
 74 |     if 'normalization' in args.regularization_layer:
 75 |         model.add(BatchNormalization())
 76 | 
 77 |     model.add(Dense(2*args.n_filters,
 78 |             W_regularizer=l2(args.l2_penalty),
 79 |             activation='relu'))
 80 |     if 'dropout' in args.regularization_layer:
 81 |         model.add(Dropout(args.dropout_p))
 82 |     if 'normalization' in args.regularization_layer:
 83 |         model.add(BatchNormalization())
 84 | 
 85 |     model.add(Dense(args.n_classes,
 86 |         W_regularizer=l2(args.l2_penalty),
 87 |         activation='softmax'))
 88 |     #if 'normalization' in args.regularization_layer:
 89 |     #    model.add(BatchNormalization((args.n_classes,)))
 90 | 
 91 |     if args.optimizer == 'SGD':
 92 |         optimizer = SGD(lr=args.learning_rate,
 93 |             decay=args.decay, momentum=args.momentum,
 94 |             clipnorm=args.clipnorm)
 95 |     elif args.optimizer == 'Adam':
 96 |         optimizer = Adam(clipnorm=args.clipnorm)
 97 |     elif args.optimizer == 'RMSprop':
 98 |         optimizer = RMSprop(clipnorm=args.clipnorm)
 99 |     elif args.optimizer == 'Adadelta':
100 |         optimizer = Adadelta(clipnorm=args.clipnorm)
101 |     elif args.optimizer == 'Adagrad':
102 |         optimizer = Adagrad(clipnorm=args.clipnorm)
103 |     else:
104 |         raise ValueError("don't know how to use optimizer {0}".format(args.optimizer))
105 | 
106 |     if hasattr(args, 'model_weights'):
107 |         print('Checking for weights file ' + str(args.model_weights))
108 |         if os.path.exists(args.model_weights):
109 |             print('Loading weights')
110 |             model.load_weights(args.model_weights)
111 | 
112 |     print('Compiling')
113 |     model.compile(loss=args.loss, optimizer=optimizer)
114 | 
115 |     return model
116 | 


--------------------------------------------------------------------------------
/models/keras/preposition/convnet/4e0ae5dc683611e5950afcaa149e39ea/model_old_keras.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from keras.models import Sequential
  4 | from keras.layers.core import Dense, Dropout, Activation, Flatten
  5 | from keras.layers.normalization import BatchNormalization
  6 | from keras.layers.convolutional import Convolution1D, MaxPooling1D
  7 | from keras.layers.embeddings import Embedding
  8 | from keras.constraints import maxnorm
  9 | from keras.regularizers import l2
 10 | from keras.optimizers import SGD, Adam, Adadelta, Adagrad, RMSprop
 11 | 
 12 | from modeling.layers import ImmutableEmbedding
 13 | from modeling.difference import TemporalDifference
 14 | 
 15 | def build_model(args):
 16 |     print("args", vars(args))
 17 | 
 18 |     np.random.seed(args.seed)
 19 | 
 20 |     model = Sequential()
 21 | 
 22 |     if hasattr(args, 'embedding_weights') and args.embedding_weights is not None:
 23 |         W = np.load(args.embedding_weights)
 24 |         if args.train_embeddings:
 25 |             model.add(Embedding(args.n_vocab, args.n_word_dims,
 26 |                 weights=[W],
 27 |                 W_constraint=maxnorm(args.embedding_max_norm)))
 28 |         else:
 29 |             model.add(ImmutableEmbedding(args.n_vocab, args.n_word_dims,
 30 |                 weights=[W]))
 31 |     else:
 32 |         model.add(Embedding(args.n_vocab, args.n_word_dims,
 33 |             W_constraint=maxnorm(args.embedding_max_norm)))
 34 | 
 35 |     if args.use_difference:
 36 |         model.add(TemporalDifference())
 37 | 
 38 |     model.add(Convolution1D(args.n_word_dims, args.n_filters, args.filter_width,
 39 |         W_constraint=maxnorm(args.filter_max_norm),
 40 |         border_mode=args.border_mode,
 41 |         W_regularizer=l2(args.l2_penalty)))
 42 |     #if 'normalization' in args.regularization_layer:
 43 |     #    model.add(BatchNormalization(
 44 |     #        (args.input_width-args.filter_width+1, args.n_filters)))
 45 |     model.add(Activation('relu'))
 46 | 
 47 |     model.add(MaxPooling1D(
 48 |         pool_length=args.input_width - args.filter_width + 1,
 49 |         stride=None, ignore_border=False))
 50 |     model.add(Flatten())
 51 |     if 'dropout' in args.regularization_layer:
 52 |         model.add(Dropout(args.dropout_p_conv))
 53 |     if 'normalization' in args.regularization_layer:
 54 |         model.add(BatchNormalization((args.n_filters,)))
 55 | 
 56 |     model.add(Dense(args.n_filters, 2*args.n_filters,
 57 |         W_regularizer=l2(args.l2_penalty)))
 58 |     model.add(Activation('relu'))
 59 |     if 'dropout' in args.regularization_layer:
 60 |         model.add(Dropout(args.dropout_p))
 61 |     if 'normalization' in args.regularization_layer:
 62 |         model.add(BatchNormalization((2*args.n_filters,)))
 63 | 
 64 |     model.add(Dense(2*args.n_filters, 2*args.n_filters))
 65 |     model.add(Activation('relu'))
 66 |     if 'dropout' in args.regularization_layer:
 67 |         model.add(Dropout(args.dropout_p))
 68 |     if 'normalization' in args.regularization_layer:
 69 |         model.add(BatchNormalization((2*args.n_filters,)))
 70 | 
 71 |     model.add(Dense(2*args.n_filters, 2*args.n_filters,
 72 |         W_regularizer=l2(args.l2_penalty)))
 73 |     model.add(Activation('relu'))
 74 |     if 'dropout' in args.regularization_layer:
 75 |         model.add(Dropout(args.dropout_p))
 76 |     if 'normalization' in args.regularization_layer:
 77 |         model.add(BatchNormalization((2*args.n_filters,)))
 78 | 
 79 |     model.add(Dense(2*args.n_filters, args.n_classes,
 80 |         W_regularizer=l2(args.l2_penalty)))
 81 |     #if 'normalization' in args.regularization_layer:
 82 |     #    model.add(BatchNormalization((args.n_classes,)))
 83 |     model.add(Activation('softmax'))
 84 | 
 85 |     if args.optimizer == 'SGD':
 86 |         optimizer = SGD(lr=args.learning_rate,
 87 |             decay=args.decay, momentum=args.momentum,
 88 |             clipnorm=args.clipnorm)
 89 |     elif args.optimizer == 'Adam':
 90 |         optimizer = Adam(clipnorm=args.clipnorm)
 91 |     elif args.optimizer == 'RMSprop':
 92 |         optimizer = RMSprop(clipnorm=args.clipnorm)
 93 |     elif args.optimizer == 'Adadelta':
 94 |         optimizer = Adadelta(clipnorm=args.clipnorm)
 95 |     elif args.optimizer == 'Adagrad':
 96 |         optimizer = Adagrad(clipnorm=args.clipnorm)
 97 |     else:
 98 |         raise ValueError("don't know how to use optimizer {0}".format(args.optimizer))
 99 | 
100 |     if hasattr(args, 'model_weights') and args.model_weights is not None:
101 |         model.load_weights(args.model_weights)
102 | 
103 |     model.compile(loss=args.loss, optimizer=optimizer)
104 | 
105 |     return model
106 | 


--------------------------------------------------------------------------------
/models/keras/preposition/convnet/model-word2vec.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "embedding_weights": "data/prepositions-weights.npy",
 3 |     "train_embeddings": false,
 4 |     "regularization_layer": "normalization",
 5 |     "n_word_dims": 300,
 6 |     "border_mode": "valid",
 7 |     "use_difference": true,
 8 |     "n_filters": 1000,
 9 |     "filter_width": 3,
10 |     "loss": "categorical_crossentropy",
11 |     "patience": 30,
12 |     "batch_size": 128,
13 |     "optimizer": "SGD",
14 |     "learning_rate": 0.001,
15 |     "momentum": 0.9,
16 |     "decay": 0.0,
17 |     "embedding_max_norm": 1000,
18 |     "filter_max_norm": 1000,
19 |     "dense_max_norm": 1000,
20 |     "l2_penalty": 0.0,
21 |     "clipnorm": 0
22 | }
23 | 


--------------------------------------------------------------------------------
/models/keras/preposition/convnet/model.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "train_embeddings": true,
 3 |     "regularization_layer": "",
 4 |     "dropout_p": 0.5,
 5 |     "dropout_p_conv": 0.0,
 6 |     "n_word_dims": 50,
 7 |     "border_mode": "valid",
 8 |     "use_difference": false,
 9 |     "n_filters": 500,
10 |     "n_hidden": 500,
11 |     "filter_width": 4,
12 |     "loss": "categorical_crossentropy",
13 |     "patience": 20,
14 |     "batch_size": 128,
15 |     "optimizer": "SGD",
16 |     "learning_rate": 0.001,
17 |     "momentum": 0.9,
18 |     "decay": 0.0,
19 |     "embedding_max_norm": 1000,
20 |     "filter_max_norm": 1000,
21 |     "dense_max_norm": 1000,
22 |     "l2_penalty": 0.0,
23 |     "clipnorm": 0
24 | }
25 | 


--------------------------------------------------------------------------------
/models/keras/preposition/convnet/model.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | sys.setrecursionlimit(5000)
  3 | import json
  4 | import h5py
  5 | 
  6 | import numpy as np
  7 | 
  8 | from keras.models import Sequential, Graph
  9 | from keras.layers.core import Dense, Dropout, Activation, Flatten, Layer
 10 | from keras.layers.normalization import BatchNormalization
 11 | from keras.layers.convolutional import Convolution1D, MaxPooling1D
 12 | from keras.layers.embeddings import Embedding
 13 | from keras.constraints import maxnorm
 14 | from keras.regularizers import l2
 15 | from keras.optimizers import SGD, Adam, Adadelta, Adagrad, RMSprop
 16 | 
 17 | from modeling.layers import ImmutableEmbedding
 18 | from modeling.difference import TemporalDifference
 19 | from modeling.builders import (build_embedding_layer,
 20 |     build_convolutional_layer, build_pooling_layer,
 21 |     build_dense_layer, build_optimizer, load_weights)
 22 | 
 23 | class EncartaExamplesWithOKWindows():
 24 |     def __init__(self, seed=17):
 25 |         self.random_state = np.random.RandomState(seed=seed)
 26 |         self.prepositions = set([7, 8, 10, 12, 13, 17, 18, 19, 27])
 27 | 
 28 |     def fit_transform(self, X, y=None):
 29 |         return self.transform(X, y)
 30 | 
 31 |     def transform(self, X, y=None):
 32 |         # Select the examples where the middle column is in our
 33 |         # preposition set.
 34 |         middle_column = X[:, X.shape[1]/2]
 35 |         ok = np.array([True] * len(X))
 36 |         for i,val in enumerate(middle_column):
 37 |             if val not in self.prepositions:
 38 |                 ok[i] = False
 39 |         print('in %d out %d' % (len(X), len(X[ok])))
 40 |         if y is not None:
 41 |             return X[ok], y[ok]
 42 |         else:
 43 |             return X[ok]
 44 | 
 45 | class TrainingSetRealExamples():
 46 |     def __init__(self, seed=17):
 47 |         self.random_state = np.random.RandomState(seed=seed)
 48 | 
 49 |     def fit_transform(self, X, y=None):
 50 |         evens = [i*2 for i in np.arange(X.shape[0]/2)]
 51 |         if y is not None:
 52 |             return X[evens], y[evens]
 53 |         else:
 54 |             return X[evens]
 55 | 
 56 |     def transform(self, X, y=None):
 57 |         if y is None:
 58 |             return X
 59 |         else:
 60 |             return X, y
 61 | 
 62 | class RandomPermuter(object):
 63 |     def __init__(self, seed=17):
 64 |         self.random_state = np.random.RandomState(seed=seed)
 65 | 
 66 |     def fit(self, X, y=None):
 67 |         pass
 68 | 
 69 |     def _transform(self, X, y=None):
 70 |         X = X.copy()
 71 |         middle_column_idx = np.int(X.shape[1]/2)
 72 |         middle_column_values = X[:, middle_column_idx]
 73 |         random_values = self.random_state.permutation(middle_column_values)
 74 |         X[:, middle_column_idx] = random_values
 75 |         if y is None:
 76 |             return X
 77 |         else:
 78 |             return X, y
 79 | 
 80 | class ValidationSetRealExamples(RandomPermuter):
 81 |     def __init__(self, seed=17):
 82 |         self.random_state = np.random.RandomState(seed=seed)
 83 | 
 84 |     def fit_transform(self, X, y=None):
 85 |         if y is None:
 86 |             return X
 87 |         else:
 88 |             return X, y
 89 | 
 90 |     def transform(self, X, y=None):
 91 |         evens = [i*2 for i in np.arange(X.shape[0]/2)]
 92 |         if y is not None:
 93 |             return X[evens], y[evens]
 94 |         else:
 95 |             return X[evens]
 96 | 
 97 | class TrainingSetPrepositionRandomPermuter(RandomPermuter):
 98 |     def fit_transform(self, X, y=None):
 99 |         return self._transform(X, y)
100 | 
101 |     def transform(self, X, y=None):
102 |         if y is None:
103 |             return X
104 |         else:
105 |             return X, y
106 | 
107 | class ValidationSetPrepositionRandomPermuter(RandomPermuter):
108 |     def fit_transform(self, X, y=None):
109 |         if y is None:
110 |             return X
111 |         else:
112 |             return X, y
113 | 
114 |     def transform(self, X, y=None):
115 |         return self._transform(X, y)
116 | 
117 | class RandomRegularizer(object):
118 |     def __init__(self, seed=17):
119 |         self.random_state = np.random.RandomState(seed=seed)
120 | 
121 |     def fit(self, X, y=None):
122 |         pass
123 | 
124 |     def _transform(self, X, y=None):
125 |         X = X.copy()
126 |         middle_column_idx = np.int(X.shape[1]/2)
127 |         middle_column_values = X[:, middle_column_idx]
128 |         value_set = list(set(middle_column_values.tolist()))
129 |         random_values = []
130 |         for i in np.arange(len(X)):
131 |             current_value = middle_column_values[i]
132 |             while True:
133 |                 random_value = self.random_state.choice(value_set)
134 |                 if random_value != current_value:
135 |                     random_values.append(random_value)
136 |                     break
137 |         X[:, middle_column_idx] = random_values
138 |         if y is None:
139 |             return X
140 |         else:
141 |             return X, y
142 | 
143 | class TrainingSetPrepositionRandomRegularizer(RandomRegularizer):
144 |     """
145 |     Takes examples in the form of a vector of indices.  Replaces each
146 |     middle value in each vector with a value from some other example.
147 |     """
148 |     def fit_transform(self, X, y=None):
149 |         return self._transform(X, y)
150 | 
151 |     def transform(self, X, y=None):
152 |         if y is None:
153 |             return X
154 |         else:
155 |             return X, y
156 | 
157 | class ValidationSetPrepositionRandomRegularizer(RandomRegularizer):
158 |     def fit_transform(self, X, y=None):
159 |         if y is None:
160 |             return X
161 |         else:
162 |             return X, y
163 | 
164 |     def transform(self, X, y=None):
165 |         return self._transform(X, y)
166 | 
167 | class UnconstrainedTrainingSetPrepositionPermuter(object):
168 |     def __init__(self, seed=17):
169 |         self.random_state = np.random.RandomState(seed=seed)
170 | 
171 |     def fit(self, X, y=None):
172 |         pass
173 | 
174 |     def fit_transform(self, X, y=None):
175 |         X = X.copy()
176 |         middle_column_idx = np.int(X.shape[1]/2)
177 |         middle_column_values = X[:, middle_column_idx]
178 |         random_values = self.random_state.permutation(middle_column_values)
179 |         X[:, middle_column_idx] = random_values
180 |         if y is None:
181 |             return X
182 |         else:
183 |             return X, y
184 | 
185 |     def transform(self, X, y=None):
186 |         if y is None:
187 |             return X
188 |         else:
189 |             return X, y
190 | 
191 | 
192 | def real_examples(path):
193 |     f = h5py.File(path)
194 |     # Target_code is 0 when the preposition in the example is the original
195 |     # preposition in the corpus and 1 when the preposition has been randomly
196 |     # replaced with another one in the confusion set.
197 |     idx = f['target_code'].value == 0
198 |     f.close()
199 |     return idx 
200 | 
201 | def random_regularization_examples(path):
202 |     f = h5py.File(path)
203 |     idx = f['target_code'].value == 1
204 |     f.close()
205 |     return idx 
206 | 
207 | class Identity(Layer):
208 |     def get_output(self, train):
209 |         return self.get_input(train)
210 | 
211 | def build_residual_model(args):
212 |     graph = Graph()
213 | 
214 |     graph.add_input('input', input_shape=(args.input_width,), dtype='int')
215 | 
216 |     graph.add_node(build_embedding_layer(args), name='embedding', input='input')
217 | 
218 |     graph.add_node(build_convolutional_layer(args), name='conv', input='embedding')
219 |     prev_layer = 'conv'
220 |     if 'normalization' in args.regularization_layer:
221 |         graph.add_node(BatchNormalization(), name='conv_bn', input=prev_layer)
222 |         prev_layer = 'conv_bn'
223 |     graph.add_node(Activation('relu'), name='conv_relu', input=prev_layer)
224 | 
225 |     graph.add_node(build_pooling_layer(args), name='pool', input='conv_relu')
226 | 
227 |     graph.add_node(Flatten(), name='flatten', input='pool')
228 |     prev_layer = 'flatten'
229 | 
230 |     # Add two dense layers.
231 |     for i in range(2):
232 |         layer_name = 'dense%02d' %i
233 |         l = build_dense_layer(args, n_hidden=args.n_filters)
234 |         graph.add_node(l, name=layer_name, input=prev_layer)
235 |         prev_layer = layer_name
236 |         if 'normalization' in args.regularization_layer:
237 |             graph.add_node(BatchNormalization(), name=layer_name+'bn', input=prev_layer)
238 |             prev_layer = layer_name+'bn'
239 |         if 'dropout' in args.regularization_layer:
240 |             graph.add_node(Dropout(args.dropout_p), name=layer_name+'do', input=prev_layer)
241 |             prev_layer = layer_name+'do'
242 |     
243 |     # Add sequence of residual blocks.
244 |     for i in range(args.n_residual_blocks):
245 |         # Add a fixed number of layers per residual block.
246 |         block_name = '%02d' % i
247 | 
248 |         graph.add_node(Identity(), name=block_name+'input', input=prev_layer)
249 |         prev_layer = block_input_layer = block_name+'input'
250 | 
251 |         for layer_num in range(args.n_layers_per_residual_block):
252 |             layer_name = 'h%s%02d' % (block_name, layer_num)
253 |     
254 |             l = build_dense_layer(args, n_hidden=args.n_filters)
255 |             graph.add_node(l, name=layer_name, input=prev_layer)
256 |             prev_layer = layer_name
257 |     
258 |             if 'normalization' in args.regularization_layer:
259 |                 graph.add_node(BatchNormalization(), name=layer_name+'bn', input=prev_layer)
260 |                 prev_layer = layer_name+'bn'
261 |     
262 |             if i < args.n_layers_per_residual_block:
263 |                 a = Activation('relu')
264 |                 graph.add_node(Activation('relu'), name=layer_name+'relu', input=prev_layer)
265 |                 prev_layer = layer_name+'relu'
266 |                 if 'dropout' in args.regularization_layer:
267 |                     graph.add_node(Dropout(args.dropout_p), name=layer_name+'do', input=prev_layer)
268 |                     prev_layer = layer_name+'do'
269 | 
270 |         graph.add_node(Identity(), name=block_name+'output', inputs=[block_input_layer, prev_layer], merge_mode='sum')
271 |         graph.add_node(Activation('relu'), name=block_name+'relu', input=block_name+'output')
272 |         prev_layer = block_input_layer = block_name+'relu'
273 | 
274 |     graph.add_node(build_dense_layer(args, args.n_classes,
275 |             activation='softmax'), name='softmax', input=prev_layer)
276 | 
277 |     graph.add_output(name='output', input='softmax')
278 | 
279 |     load_weights(args, graph)
280 | 
281 |     optimizer = build_optimizer(args)
282 | 
283 |     graph.compile(loss={'output': args.loss}, optimizer=optimizer)
284 | 
285 |     return graph
286 | 
287 | 
288 | def build_ordinary_model(args):
289 |     model = Sequential()
290 |     model.add(build_embedding_layer(args))
291 |     if args.dropout_embedding_p > 0.:
292 |         model.add(Dropout(args.dropout_embedding_p))
293 |     model.add(build_convolutional_layer(args))
294 |     if 'normalization' in args.regularization_layer:
295 |         model.add(BatchNormalization())
296 |     model.add(Activation('relu'))
297 |     if args.dropout_conv_p > 0.:
298 |         model.add(Dropout(args.dropout_conv_p))
299 | 
300 |     model.add(build_pooling_layer(args))
301 |     model.add(Flatten())
302 | 
303 |     for i in range(args.n_fully_connected):
304 |         model.add(build_dense_layer(args))
305 |         if 'normalization' in args.regularization_layer:
306 |             model.add(BatchNormalization())
307 |         model.add(Activation('relu'))
308 |         if 'dropout' in args.regularization_layer:
309 |             model.add(Dropout(args.dropout_p))
310 | 
311 |     model.add(build_dense_layer(args, args.n_classes,
312 |             activation='softmax'))
313 | 
314 |     load_weights(args, model)
315 | 
316 |     optimizer = build_optimizer(args)
317 | 
318 |     model.compile(loss=args.loss, optimizer=optimizer)
319 | 
320 |     for k,v in json.loads(model.to_json()).items():
321 |         print(k)
322 |         if k == 'layers':
323 |             for l in v:
324 |                 print('  => %s' % l['name'])
325 | 
326 |     return model
327 | 
328 | def build_model(args):
329 |     np.random.seed(args.seed)
330 | 
331 |     if isinstance(args.n_residual_blocks, int):
332 |         return build_residual_model(args)
333 |     else:
334 |         return build_ordinary_model(args)
335 | 
336 | 


--------------------------------------------------------------------------------
/models/keras/preposition/convnet/run-medium.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -xe
 2 | 
 3 | N=10000000
 4 | 
 5 | #--extra-train-file $(ls data/preposition/prepositions-all-new-train-$N/* | grep -v 00.h5) \
 6 | 
 7 | embedding_weights=data/preposition/prepositions-all-new-weights.npy
 8 | 
 9 | ./train_keras.py \
10 |     models/preposition/convnet \
11 |     data/preposition/prepositions-all-new-train-$N.h5 \
12 |     data/preposition/prepositions-all-new-validate.h5 \
13 |     XwindowNULL \
14 |     --target-name original_word_code \
15 |     --target-data data/preposition/prepositions-all-new-target-data.json \
16 |     --description "comparing inputs with convnets - input = XwindowNULL, target = original_word_code, contrasting, $N training examples, Adagrad, n_filters=500 , n_hidden=1000, n_word_dims=300 (pre-trained, frozen), 3 hidden layers, shuffled data" \
17 |     --n-vocab 83064 \
18 |     --model-cfg optimizer=Adagrad regularization_layer="" patience=10 n_filters=500 n_hidden=1000 n_word_dims=300 embedding_weights=$embedding_weights train_embeddings=false \
19 |     --n-validation 20000 \
20 |     --classification-report \
21 |     --shuffle \
22 |     --n-epochs 10 \
23 |     --log
24 | 
25 | ./train_keras.py \
26 |     models/preposition/convnet \
27 |     data/preposition/prepositions-all-new-train-$N.h5 \
28 |     data/preposition/prepositions-all-new-validate.h5 \
29 |     XwindowNULL X \
30 |     --target-name original_word_code \
31 |     --target-data data/preposition/prepositions-all-new-target-data.json \
32 |     --description "comparing inputs with convnets - input = XwindowNULL X, target = original_word_code, contrasting, $N training examples, Adagrad, n_filters=500 , n_hidden=1000, n_word_dims=300 (pre-trained, frozen), 3 hidden layers, shuffled data" \
33 |     --n-vocab 83064 \
34 |     --model-cfg optimizer=Adagrad regularization_layer="" patience=10 n_filters=500 n_hidden=1000 n_word_dims=300 embedding_weights=$embedding_weights train_embeddings=false \
35 |     --n-validation 20000 \
36 |     --classification-report \
37 |     --shuffle \
38 |     --n-epochs 10 \
39 |     --log
40 | 
41 | 


--------------------------------------------------------------------------------
/models/keras/preposition/convnet/run-small.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -xe
 2 | 
 3 | N=1000000
 4 | 
 5 | embedding_weights=data/preposition/prepositions-all-new-weights.npy
 6 | 
 7 | function train() {
 8 |     n_filters=$1
 9 |     shift
10 |     filter_width=$1
11 |     shift
12 |     features=$@
13 | 
14 |     features_name=$(echo $features | sed 's, ,-,g')
15 |     dest=$features_name-$n_filters-$filter_width
16 | 
17 |     ./train_keras.py \
18 |         models/keras/preposition/convnet \
19 |         data/preposition/prepositions-all-new-train-$N-balanced.h5 \
20 |         data/preposition/prepositions-all-new-validate-balanced.h5 \
21 |         $features \
22 |         --model-dest models/keras/preposition/convnet/small/feature-evaluation/$dest \
23 |         --target-name original_word_code \
24 |         --target-data data/preposition/prepositions-all-new-target-data.json \
25 |         --description "comparing inputs with convnets - input = $features, target = original_word_code, contrasting, $N training examples, Adagrad, patience=5, n_filters=$n_filters, filter_width=$filter_width, n_word_dims=300 (pre-trained, frozen), 1 hidden layer, shuffled data" \
26 |         --n-vocab 83064 \
27 |         --model-cfg optimizer=Adagrad regularization_layer="dropout" n_filters=$n_filters n_word_dims=300 embedding_weights=$embedding_weights train_embeddings=false filter_width=$filter_width patience=5 \
28 |         --n-validation 20000 \
29 |         --n-epochs 10 \
30 |         --shuffle \
31 |         --log
32 | }
33 | 
34 | function xval5() {
35 |     features=$@
36 |     for filter_width in 2 3 5
37 |     do
38 |         for n_filters in 100 
39 |         do
40 |             train $n_filters $filter_width $features
41 |         done
42 |     done
43 | }
44 | 
45 | function xval7() {
46 |     features=$@
47 |     for filter_width in 2 3 5 7
48 |     do
49 |         for n_filters in 100 
50 |         do
51 |             train $n_filters $filter_width $features
52 |         done
53 |     done
54 | }
55 | 
56 | function xval9() {
57 |     features=$@
58 |     for filter_width in 2 3 5 7 9
59 |     do
60 |         for n_filters in 100 
61 |         do
62 |             train $n_filters $filter_width $features
63 |         done
64 |     done
65 | }
66 | 
67 | xval5 Xwindow
68 | xval7 Xwindow7
69 | xval9 Xwindow9
70 | 
71 | xval5 XwindowNULL X
72 | xval7 Xwindow7NULL X
73 | xval9 Xwindow9NULL X
74 | 
75 | xval9 X
76 | 
77 | xval5 XwindowNULL
78 | xval7 Xwindow7NULL
79 | xval9 Xwindow9NULL
80 | 
81 | xval5 Xwindow X
82 | xval7 Xwindow7 X
83 | xval9 Xwindow9 X
84 | 


--------------------------------------------------------------------------------
/models/keras/preposition/convnet/small/find-best-filter-size/find-best.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Get a unique list of the feature names.
 4 | for input in $(for file in */model.log; do echo $(dirname $file) | sed 's,-[0-9]$,,'; done | sort | uniq)
 5 | do
 6 |     # For each feature, find the one filter width that yielded the lowest
 7 |     # validation loss.
 8 |     for file in ${input}*/model.log
 9 |     do
10 |         echo $input $(dirname $file) $(grep val_acc $file | cat -n | sort -n -r -k17 | tail -1)
11 |     done | sort -n -r -k17 | tail -1
12 | done | sort -n -r -k 17
13 | 


--------------------------------------------------------------------------------
/models/keras/preposition/convnet/small/find-best-filter-size/find-best.txt:
--------------------------------------------------------------------------------
 1 | X-100 X-100-5 10 11-25 18:13 root INFO 170s - loss: 1.2008 - acc: 0.6007 - val_loss: 1.2047 - val_acc: 0.5964
 2 | 
 3 | XwindowNULL-100 XwindowNULL-100-3 10 11-25 19:56 root INFO 160s - loss: 1.1836 - acc: 0.6147 - val_loss: 1.2035 - val_acc: 0.6055
 4 | XwindowNULL-X-100 XwindowNULL-X-100-5 10 11-25 12:33 root INFO 177s - loss: 1.0559 - acc: 0.6551 - val_loss: 1.0390 - val_acc: 0.6549
 5 | Xwindow-X-100 Xwindow-X-100-5 10 11-26 00:28 root INFO 178s - loss: 1.0339 - acc: 0.6663 - val_loss: 1.0215 - val_acc: 0.6684
 6 | Xwindow-100 Xwindow-100-3 10 11-25 08:02 root INFO 161s - loss: 0.9886 - acc: 0.6886 - val_loss: 1.0048 - val_acc: 0.6834
 7 | 
 8 | Xwindow7NULL-100 Xwindow7NULL-100-5 10 11-25 21:07 root INFO 135s - loss: 1.1051 - acc: 0.6404 - val_loss: 1.1552 - val_acc: 0.6223
 9 | Xwindow7NULL-X-100 Xwindow7NULL-X-100-5 10 11-25 13:54 root INFO 180s - loss: 1.0307 - acc: 0.6636 - val_loss: 1.0158 - val_acc: 0.6630
10 | Xwindow7-X-100 Xwindow7-X-100-7 10 11-26 02:20 root INFO 189s - loss: 0.9867 - acc: 0.6817 - val_loss: 0.9934 - val_acc: 0.6765
11 | Xwindow7-100 Xwindow7-100-5 10 11-25 09:12 root INFO 135s - loss: 0.9192 - acc: 0.7094 - val_loss: 0.9673 - val_acc: 0.6980
12 | 
13 | Xwindow9NULL-100 Xwindow9NULL-100-5 10 11-25 22:30 root INFO 204s - loss: 1.0893 - acc: 0.6448 - val_loss: 1.1373 - val_acc: 0.6236
14 | Xwindow9NULL-X-100 Xwindow9NULL-X-100-9 10 11-25 16:56 root INFO 211s - loss: 0.9888 - acc: 0.6783 - val_loss: 1.0049 - val_acc: 0.6674
15 | Xwindow9-X-100 Xwindow9-X-100-9 10 11-26 04:50 root INFO 211s - loss: 0.9618 - acc: 0.6897 - val_loss: 0.9908 - val_acc: 0.6795
16 | Xwindow9-100 Xwindow9-100-7 10 11-25 10:58 root INFO 135s - loss: 0.8795 - acc: 0.7216 - val_loss: 0.9521 - val_acc: 0.7008
17 | 


--------------------------------------------------------------------------------
/models/keras/preposition/lstm/model.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "regularization_layer": null,
 3 |     "n_word_dims": 50,
 4 |     "n_units": 100,
 5 |     "loss": "categorical_crossentropy",
 6 |     "patience": 10,
 7 |     "batch_size": 128,
 8 |     "optimizer": "SGD",
 9 |     "learning_rate": 0.001,
10 |     "momentum": 0.9,
11 |     "decay": 0.0,
12 |     "embedding_max_norm": 1000,
13 |     "truncate_gradient": -1,
14 |     "clipnorm": 0,
15 |     "mask_zero": false,
16 |     "l2_penalty": 0.0
17 | }
18 | 


--------------------------------------------------------------------------------
/models/keras/preposition/lstm/model.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from keras.models import Sequential
 4 | from keras.layers.core import Dense, Dropout, Activation, Flatten
 5 | from keras.layers.recurrent import LSTM, GRU
 6 | from keras.layers.embeddings import Embedding
 7 | from keras.constraints import maxnorm
 8 | from keras.regularizers import l2
 9 | from keras.optimizers import SGD, Adam, RMSprop, Adadelta, Adagrad
10 | 
11 | from modeling.layers import ImmutableEmbedding
12 | 
13 | def build_model(args):
14 |     print("args", vars(args))
15 | 
16 |     model = Sequential()
17 | 
18 |     np.random.seed(args.seed)
19 | 
20 |     if hasattr(args, 'embedding_weights') and args.embedding_weights is not None:
21 |         W = np.load(args.embedding_weights)
22 |         if args.train_embeddings:
23 |             model.add(Embedding(args.n_vocab, args.n_word_dims,
24 |                 weights=[W],
25 |                 W_constraint=maxnorm(args.embedding_max_norm)))
26 |         else:
27 |             model.add(ImmutableEmbedding(args.n_vocab, args.n_word_dims,
28 |                 weights=[W]))
29 |     else:
30 |         model.add(Embedding(args.n_vocab, args.n_word_dims,
31 |             mask_zero=args.mask_zero,
32 |             W_constraint=maxnorm(args.embedding_max_norm)))
33 | 
34 |     model.add(LSTM(args.n_word_dims, args.n_units,
35 |         truncate_gradient=args.truncate_gradient,
36 |         return_sequences=True))
37 |     if args.regularization_layer == 'dropout':
38 |         model.add(Dropout(0.2))
39 |     #elif args.regularization_layer == 'normalization':
40 |     #    model.add(BatchNormalization((args.n_filters,)))
41 | 
42 |     model.add(LSTM(args.n_units, args.n_units,
43 |         truncate_gradient=args.truncate_gradient,
44 |         return_sequences=True))
45 |     if args.regularization_layer == 'dropout':
46 |         model.add(Dropout(0.2))
47 |     #elif args.regularization_layer == 'normalization':
48 |     #    model.add(BatchNormalization((args.n_filters,)))
49 | 
50 |     '''
51 |     model.add(LSTM(args.n_units, args.n_units,
52 |         truncate_gradient=args.truncate_gradient,
53 |         return_sequences=True))
54 |     if args.regularization_layer == 'dropout':
55 |         model.add(Dropout(0.2))
56 |     #elif args.regularization_layer == 'normalization':
57 |     #    model.add(BatchNormalization((args.n_filters,)))
58 |     '''
59 | 
60 |     model.add(LSTM(args.n_units, args.n_units,
61 |         truncate_gradient=args.truncate_gradient,
62 |         return_sequences=False))
63 |     if args.regularization_layer == 'dropout':
64 |         model.add(Dropout(0.2))
65 |     #elif args.regularization_layer == 'normalization':
66 |     #    model.add(BatchNormalization((args.n_filters,)))
67 | 
68 |     model.add(Dense(args.n_units, args.n_classes,
69 |         W_regularizer=l2(args.l2_penalty)))
70 |     model.add(Activation('softmax'))
71 | 
72 |     if args.optimizer == 'SGD':
73 |         optimizer = SGD(lr=args.learning_rate,
74 |             decay=args.decay, momentum=args.momentum,
75 |             clipnorm=args.clipnorm)
76 |     elif args.optimizer == 'Adam':
77 |         optimizer = Adam(clipnorm=args.clipnorm)
78 |     elif args.optimizer == 'RMSprop':
79 |         optimizer = RMSprop(clipnorm=args.clipnorm)
80 |     elif args.optimizer == 'Adadelta':
81 |         optimizer = Adadelta(clipnorm=args.clipnorm)
82 |     elif args.optimizer == 'Adagrad':
83 |         optimizer = Adagrad(clipnorm=args.clipnorm)
84 |     else:
85 |         raise ValueError("don't know how to use optimizer {0}".format(args.optimizer))
86 | 
87 |     model.compile(loss=args.loss, optimizer=optimizer)
88 | 
89 |     return model
90 | 


--------------------------------------------------------------------------------
/models/keras/spelling/convnet/exp03-inputs/op_transpose_n_ops_1_n_errors_per_word_3/analysis.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | import numpy as np
 4 | import pandas as pd
 5 | import modeling.utils
 6 | import spelling.baseline
 7 | 
 8 | def mark(words):
 9 |     return ['^'+w+'$' for w in words]
10 | 
11 | def build_index():
12 |     train_hdf5_file = 'data/spelling/experimental/op-transpose-distance-1-errors-per-word-3.h5'
13 |     train_h5 = h5py.File(train_hdf5_file)
14 | 
15 |     train_csv_file = 'data/spelling/experimental/op-transpose-distance-1-errors-per-word-3.csv'
16 |     train_df = pd.read_csv(train_csv_file, sep='\t', encoding='utf8')
17 |     words = train_df.real_word.tolist()
18 |     marked_words = mark(words)
19 | 
20 |     X_train = train_h5['marked_chars'].value
21 |     index_size = np.max(X_train)
22 |     i = 0
23 |     index = {}
24 | 
25 |     while len(index) < index_size:
26 |         marked_word = marked_words[i]
27 |         row = X_train[i]
28 | 
29 |         for j,idx in enumerate(row):
30 |             if idx == 0:
31 |                 break
32 |             index[marked_word[j]] = idx
33 | 
34 |         i += 1
35 | 
36 |     return index
37 | 
38 | index = build_index()
39 | 
40 | model_dir = 'models/keras/spelling/convnet/exp03-inputs/op_transpose_n_ops_1_n_errors_per_word_3'
41 | 
42 | df = pd.read_csv('../spelling/data/aspell-dict.csv.gz', sep='\t', encoding='utf8')
43 | words = df.word.tolist()
44 | vocab = set(words)
45 | 
46 | lm = spelling.baseline.CharacterLanguageModel('witten-bell', order=3)
47 | lm.fit(words)
48 | 
49 | model, model_cfg = modeling.utils.load_model(model_dir, model_weights=True)
50 | 
51 | bins = np.arange(0, 1, .1)
52 | outputs = {}
53 | histograms = {}
54 | 
55 | for order in range(1, 4):
56 |     print('order %d' % order)
57 |     generated = []
58 |     # Generate 500k words, controlling for length and excluding those
59 |     # that are already in the vocabulary.  Only keep the first 100k
60 |     # of those that satisfy our requirements.
61 |     for g in lm.generate(order, 500000):
62 |         if len(g) < 5 or len(g) > 10:
63 |             continue
64 |         if g in vocab:
65 |             continue
66 |         generated.append(g)
67 |         if len(generated) == 100000:
68 |             break
69 | 
70 |     marked = mark(generated)
71 |     X = np.zeros((len(marked), input_width))
72 |     for i,word in enumerate(marked):
73 |         for j,chr in enumerate(word):
74 |             X[i,j] = index[chr]
75 |         
76 |     output = zip(generated, model.predict(X)[:, 1])
77 |     outputs[order] = output
78 |     histograms[order] = np.histogram([o[1] for o in output], bins=bins)
79 | 


--------------------------------------------------------------------------------
/models/keras/spelling/convnet/model.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "n_word_dims": 0,
 3 |     "n_filters": 0,
 4 |     "filter_width": 0,
 5 |     "n_fully_connected": 0,
 6 |     "n_residual_blocks": 0,
 7 | 
 8 |     "train_embeddings": true,
 9 |     "embedding_init": "uniform",
10 |     "batch_normalization": true,
11 | 
12 |     "optimizer": "Adam",
13 |     "loss": "categorical_crossentropy",
14 |     "l2_penalty": 0.0,
15 | 
16 |     "dropout_embedding_p": 0.0,
17 |     "dropout_conv_p": 0.0,
18 |     "dropout_fc_p": 0.0,
19 | 
20 |     "patience": 1,
21 |     "batch_size": 128,
22 | 
23 |     "embedding_max_norm": 1000,
24 |     "filter_max_norm": 1000,
25 |     "dense_max_norm": 1000,
26 |     "clipnorm": 0,
27 |     "border_mode": "valid"
28 | }
29 | 


--------------------------------------------------------------------------------
/models/keras/spelling/convnet/model.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | sys.setrecursionlimit(5000)
  3 | import json
  4 | import h5py
  5 | 
  6 | import numpy as np
  7 | 
  8 | from keras.models import Sequential, Graph
  9 | from keras.layers.core import Dense, Dropout, Activation, Flatten, Layer
 10 | from keras.layers.normalization import BatchNormalization
 11 | from keras.layers.convolutional import Convolution1D, MaxPooling1D
 12 | from keras.layers.embeddings import Embedding
 13 | from keras.constraints import maxnorm
 14 | from keras.regularizers import l2
 15 | from keras.optimizers import SGD, Adam, Adadelta, Adagrad, RMSprop
 16 | 
 17 | from modeling.layers import ImmutableEmbedding
 18 | from modeling.difference import TemporalDifference
 19 | import modeling.data
 20 | from modeling.builders import (build_embedding_layer,
 21 |     build_convolutional_layer, build_pooling_layer,
 22 |     build_dense_layer, build_optimizer, load_weights)
 23 | 
 24 | class GraphMarshaller(modeling.data.GraphMarshaller):
 25 |     def marshal(self, data, target=None):
 26 |         return {
 27 |             'input': data,
 28 |             'output': target
 29 |             }
 30 | 
 31 |     def unmarshal(self, output):
 32 |         return output['output']
 33 | 
 34 | class Identity(Layer):
 35 |     def get_output(self, train):
 36 |         return self.get_input(train)
 37 | 
 38 | def build_residual_model(args):
 39 |     graph = Graph()
 40 | 
 41 |     graph.add_input('input', input_shape=(args.input_width,), dtype='int')
 42 | 
 43 |     graph.add_node(build_embedding_layer(args), name='embedding', input='input')
 44 | 
 45 |     graph.add_node(build_convolutional_layer(args), name='conv', input='embedding')
 46 |     prev_layer = 'conv'
 47 |     if args.batch_normalization:
 48 |         graph.add_node(BatchNormalization(), name='conv_bn', input=prev_layer)
 49 |         prev_layer = 'conv_bn'
 50 |     graph.add_node(Activation('relu'), name='conv_relu', input=prev_layer)
 51 | 
 52 |     graph.add_node(build_pooling_layer(args), name='pool', input='conv_relu')
 53 | 
 54 |     graph.add_node(Flatten(), name='flatten', input='pool')
 55 |     prev_layer = 'flatten'
 56 | 
 57 |     # Add some number of fully-connected layers without skip connections.
 58 |     for i in range(args.n_fully_connected):
 59 |         layer_name = 'dense%02d' %i
 60 |         l = build_dense_layer(args, n_hidden=args.n_hidden)
 61 |         graph.add_node(l, name=layer_name, input=prev_layer)
 62 |         prev_layer = layer_name
 63 |         if args.batch_normalization:
 64 |             graph.add_node(BatchNormalization(), name=layer_name+'bn', input=prev_layer)
 65 |             prev_layer = layer_name+'bn'
 66 |         if args.dropout_fc_p > 0.:
 67 |             graph.add_node(Dropout(args.dropout_fc_p), name=layer_name+'do', input=prev_layer)
 68 |             prev_layer = layer_name+'do'
 69 |     
 70 |     # Add sequence of residual blocks.
 71 |     for i in range(args.n_residual_blocks):
 72 |         # Add a fixed number of layers per residual block.
 73 |         block_name = '%02d' % i
 74 | 
 75 |         graph.add_node(Identity(), name=block_name+'input', input=prev_layer)
 76 |         prev_layer = block_input_layer = block_name+'input'
 77 | 
 78 |         try:
 79 |             n_layers_per_residual_block = args.n_layers_per_residual_block
 80 |         except AttributeError:
 81 |             n_layers_per_residual_block = 2
 82 | 
 83 |         for layer_num in range(n_layers_per_residual_block):
 84 |             layer_name = 'h%s%02d' % (block_name, layer_num)
 85 |     
 86 |             l = build_dense_layer(args, n_hidden=args.n_hidden)
 87 |             graph.add_node(l, name=layer_name, input=prev_layer)
 88 |             prev_layer = layer_name
 89 |     
 90 |             if args.batch_normalization:
 91 |                 graph.add_node(BatchNormalization(), name=layer_name+'bn', input=prev_layer)
 92 |                 prev_layer = layer_name+'bn'
 93 |     
 94 |             if i < n_layers_per_residual_block:
 95 |                 a = Activation('relu')
 96 |                 graph.add_node(Activation('relu'), name=layer_name+'relu', input=prev_layer)
 97 |                 prev_layer = layer_name+'relu'
 98 |                 if args.dropout_fc_p > 0.:
 99 |                     graph.add_node(Dropout(args.dropout_fc_p), name=layer_name+'do', input=prev_layer)
100 |                     prev_layer = layer_name+'do'
101 | 
102 |         graph.add_node(Identity(), name=block_name+'output', inputs=[block_input_layer, prev_layer], merge_mode='sum')
103 |         graph.add_node(Activation('relu'), name=block_name+'relu', input=block_name+'output')
104 |         prev_layer = block_input_layer = block_name+'relu'
105 | 
106 |     graph.add_node(build_dense_layer(args, args.n_classes,
107 |             activation='softmax'), name='softmax', input=prev_layer)
108 | 
109 |     graph.add_output(name='output', input='softmax')
110 | 
111 |     load_weights(args, graph)
112 | 
113 |     optimizer = build_optimizer(args)
114 | 
115 |     graph.compile(loss={'output': args.loss}, optimizer=optimizer)
116 | 
117 |     return graph
118 | 
119 | def build_ordinary_model(args):
120 |     model = Sequential()
121 |     model.add(build_embedding_layer(args))
122 |     if args.dropout_embedding_p > 0.:
123 |         model.add(Dropout(args.dropout_embedding_p))
124 |     model.add(build_convolutional_layer(args))
125 |     if args.batch_normalization:
126 |         model.add(BatchNormalization())
127 |     model.add(Activation('relu'))
128 |     if args.dropout_conv_p > 0.:
129 |         model.add(Dropout(args.dropout_conv_p))
130 | 
131 |     model.add(build_pooling_layer(args))
132 |     model.add(Flatten())
133 | 
134 |     for i in range(args.n_fully_connected):
135 |         model.add(build_dense_layer(args))
136 |         if args.batch_normalization:
137 |             model.add(BatchNormalization())
138 |         model.add(Activation('relu'))
139 |         if args.dropout_fc_p > 0.:
140 |             model.add(Dropout(args.dropout_fc_p))
141 | 
142 |     model.add(build_dense_layer(args, args.n_classes,
143 |             activation='softmax'))
144 | 
145 |     load_weights(args, model)
146 | 
147 |     optimizer = build_optimizer(args)
148 | 
149 |     model.compile(loss=args.loss, optimizer=optimizer)
150 | 
151 |     if args.verbose:
152 |         for k,v in json.loads(model.to_json()).items():
153 |             if k == 'layers':
154 |                 for l in v:
155 |                     print('  => %s' % l['name'])
156 | 
157 |     return model
158 | 
159 | def build_model(args):
160 |     np.random.seed(args.seed)
161 | 
162 |     if args.n_residual_blocks > 0:
163 |         return build_residual_model(args)
164 |     else:
165 |         return build_ordinary_model(args)
166 | 
167 | 


--------------------------------------------------------------------------------
/models/keras/spelling/correction/isolated/binary/model.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "n_word_dims": 0,
 3 |     "n_filters": 0,
 4 |     "filter_width": 0,
 5 |     "n_fully_connected": 0,
 6 |     "n_residual_blocks": 0,
 7 | 
 8 |     "train_embeddings": true,
 9 |     "embedding_init": "uniform",
10 |     "batch_normalization": true,
11 | 
12 |     "optimizer": "Adam",
13 |     "loss": "categorical_crossentropy",
14 |     "l2_penalty": 0.0,
15 | 
16 |     "dropout_embedding_p": 0.0,
17 |     "dropout_conv_p": 0.0,
18 |     "dropout_fc_p": 0.0,
19 | 
20 |     "patience": 1,
21 |     "batch_size": 128,
22 | 
23 |     "embedding_max_norm": 1000,
24 |     "filter_max_norm": 1000,
25 |     "dense_max_norm": 1000,
26 |     "clipnorm": 0,
27 |     "border_mode": "valid"
28 | }
29 | 


--------------------------------------------------------------------------------
/models/keras/spelling/correction/isolated/binary/model.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import h5py
  3 | sys.setrecursionlimit(5000)
  4 | import json
  5 | import h5py
  6 | 
  7 | from sklearn.utils import check_random_state
  8 | 
  9 | import numpy as np
 10 | 
 11 | from keras.models import Sequential, Graph
 12 | from keras.utils import np_utils
 13 | from keras.layers.core import Dense, Dropout, Activation, Flatten, Layer
 14 | from keras.layers.normalization import BatchNormalization
 15 | 
 16 | import modeling.data
 17 | from modeling.builders import (build_embedding_layer,
 18 |     build_convolutional_layer, build_pooling_layer,
 19 |     build_dense_layer, build_optimizer, load_weights,
 20 |     build_hierarchical_softmax_layer)
 21 | from modeling.utils import balanced_class_weights
 22 | 
 23 | class SingleFileDataset(object):
 24 |     def __init__(self, file_path, data_name, target_name, batch_size, random_state=17):
 25 |         assert isinstance(data_name, (list,tuple))
 26 |         assert isinstance(target_name, (list,tuple))
 27 | 
 28 |         random_state = check_random_state(random_state)
 29 | 
 30 |         self.__dict__.update(locals())
 31 |         del self.self
 32 | 
 33 |         self.load_data()
 34 | 
 35 |     def load_data(self):
 36 |         self.data = {}
 37 |         self.target = {}
 38 |         self.target_one_hot = {}
 39 | 
 40 |         f = h5py.File(self.file_path)
 41 |         self.n = None
 42 | 
 43 |         for data_name in self.data_name:
 44 |             self.data[data_name] = f[data_name].value
 45 |             if self.n is None:
 46 |                 self.n = len(self.data[data_name])
 47 |             else:
 48 |                 assert len(self.data[data_name]) == self.n
 49 |         for target_name in self.target_name:
 50 |             target = f[target_name].value
 51 |             assert len(target) == self.n
 52 | 
 53 |             self.target[target_name] = target
 54 |             n_classes = np.max(target) + 1
 55 |             self.target_one_hot[target_name] = np_utils.to_categorical(target, n_classes)
 56 |         f.close()
 57 | 
 58 |     def get_dict(self, one_hot=True):
 59 |         d = {}
 60 |         for data_name in self.data_name:
 61 |             d[data_name] = self.data[data_name]
 62 |         for target_name in self.target_name:
 63 |             if one_hot:
 64 |                 d[target_name] = self.target_one_hot[target_name]
 65 |             else:
 66 |                 d[target_name] = self.target[target_name]
 67 |         return d
 68 | 
 69 |     def class_weights(self, class_weight_exponent):
 70 |         return balanced_class_weights(
 71 |                 self.target['binary_target'],
 72 |                 2,
 73 |                 class_weight_exponent)
 74 | 
 75 |     def generate(self):
 76 |         while 1:
 77 |             idx = self.random_state.choice(self.n, size=self.batch_size, replace=False)
 78 |             batch = {}
 79 |             for data_name in self.data_name:
 80 |                 batch[data_name] = self.data[data_name][idx]
 81 |             for target_name in self.target_name:
 82 |                 batch[target_name] = self.target_one_hot[target_name][idx]
 83 |             yield batch
 84 | 
 85 | class Identity(Layer):
 86 |     def get_output(self, train):
 87 |         return self.get_input(train)
 88 | 
 89 | def add_bn_relu(graph, args, prev_layer):
 90 |     bn_name = prev_layer + '_bn'
 91 |     relu_name = prev_layer + '_relu'
 92 |     if args.batch_normalization:
 93 |         graph.add_node(BatchNormalization(), name=bn_name, input=prev_layer)
 94 |         prev_layer = bn_name
 95 |     graph.add_node(Activation('relu'), name=relu_name, input=prev_layer)
 96 |     return relu_name
 97 | 
 98 | def build_model(args, train_data, validation_data):
 99 |     np.random.seed(args.seed)
100 | 
101 |     graph = Graph()
102 | 
103 |     non_word_input = 'non_word_marked_chars'
104 |     real_word_input = 'real_word_marked_chars'
105 | 
106 |     non_word_input_width = train_data.data[non_word_input].shape[1]
107 |     real_word_input_width = train_data.data[real_word_input].shape[1]
108 | 
109 |     print('non_word_input_width', non_word_input_width)
110 |     print('real_word_input_width', real_word_input_width)
111 | 
112 |     graph.add_input(non_word_input, input_shape=(non_word_input_width,), dtype='int')
113 |     graph.add_node(build_embedding_layer(args, input_width=non_word_input_width),
114 |             name='non_word_embedding', input=non_word_input)
115 |     graph.add_node(build_convolutional_layer(args), name='non_word_conv', input='non_word_embedding')
116 |     non_word_prev_layer = add_bn_relu(graph, args, 'non_word_conv')
117 |     graph.add_node(build_pooling_layer(args, input_width=non_word_input_width),
118 |             name='non_word_pool', input=non_word_prev_layer)
119 |     graph.add_node(Flatten(), name='non_word_flatten', input='non_word_pool')
120 | 
121 |     graph.add_input(real_word_input, input_shape=(real_word_input_width,), dtype='int')
122 |     graph.add_node(build_embedding_layer(args, input_width=real_word_input_width),
123 |             name='real_word_embedding', input=real_word_input)
124 |     graph.add_node(build_convolutional_layer(args), name='real_word_conv', input='real_word_embedding')
125 |     real_word_prev_layer = add_bn_relu(graph, args, 'real_word_conv')
126 |     graph.add_node(build_pooling_layer(args, input_width=real_word_input_width),
127 |             name='real_word_pool', input=real_word_prev_layer)
128 |     graph.add_node(Flatten(), name='real_word_flatten', input='real_word_pool')
129 | 
130 |     # Add some number of fully-connected layers without skip connections.
131 |     prev_layer = 'join_non_and_real'
132 |     for i in range(args.n_fully_connected):
133 |         layer_name = 'dense%02d' %i
134 |         l = build_dense_layer(args, n_hidden=args.n_hidden)
135 |         if i == 0:
136 |             graph.add_node(l, name=layer_name,
137 |                 inputs=['non_word_flatten', 'real_word_flatten'])
138 |         else:
139 |             graph.add_node(l, name=layer_name, input=prev_layer)
140 |         prev_layer = layer_name
141 |         if args.batch_normalization:
142 |             graph.add_node(BatchNormalization(), name=layer_name+'bn', input=prev_layer)
143 |             prev_layer = layer_name+'bn'
144 |         if args.dropout_fc_p > 0.:
145 |             graph.add_node(Dropout(args.dropout_fc_p), name=layer_name+'do', input=prev_layer)
146 |             prev_layer = layer_name+'do'
147 |     
148 |     # Add sequence of residual blocks.
149 |     for i in range(args.n_residual_blocks):
150 |         # Add a fixed number of layers per residual block.
151 |         block_name = '%02d' % i
152 | 
153 |         graph.add_node(Identity(), name=block_name+'input', input=prev_layer)
154 |         prev_layer = block_input_layer = block_name+'input'
155 | 
156 |         try:
157 |             n_layers_per_residual_block = args.n_layers_per_residual_block
158 |         except AttributeError:
159 |             n_layers_per_residual_block = 2
160 | 
161 |         for layer_num in range(n_layers_per_residual_block):
162 |             layer_name = 'h%s%02d' % (block_name, layer_num)
163 |     
164 |             l = build_dense_layer(args, n_hidden=args.n_hidden)
165 |             graph.add_node(l, name=layer_name, input=prev_layer)
166 |             prev_layer = layer_name
167 |     
168 |             if args.batch_normalization:
169 |                 graph.add_node(BatchNormalization(), name=layer_name+'bn', input=prev_layer)
170 |                 prev_layer = layer_name+'bn'
171 |     
172 |             if i < n_layers_per_residual_block:
173 |                 a = Activation('relu')
174 |                 graph.add_node(Activation('relu'), name=layer_name+'relu', input=prev_layer)
175 |                 prev_layer = layer_name+'relu'
176 |                 if args.dropout_fc_p > 0.:
177 |                     graph.add_node(Dropout(args.dropout_fc_p), name=layer_name+'do', input=prev_layer)
178 |                     prev_layer = layer_name+'do'
179 | 
180 |         graph.add_node(Identity(), name=block_name+'output', inputs=[block_input_layer, prev_layer], merge_mode='sum')
181 |         graph.add_node(Activation('relu'), name=block_name+'relu', input=block_name+'output')
182 |         prev_layer = block_input_layer = block_name+'relu'
183 | 
184 |     #if hasattr(args, 'n_hsm_classes'):
185 |     #    graph.add_node(build_hierarchical_softmax_layer(args),
186 |     #            name='softmax', input=prev_layer)
187 |     #else:
188 | 
189 |     graph.add_node(build_dense_layer(args, 2,
190 |         activation='softmax'), name='softmax', input=prev_layer)
191 | 
192 |     graph.add_output(name='binary_target', input='softmax')
193 | 
194 |     load_weights(args, graph)
195 | 
196 |     optimizer = build_optimizer(args)
197 | 
198 |     graph.compile(loss={'binary_target': args.loss}, optimizer=optimizer)
199 | 
200 |     return graph
201 | 
202 | def load_train(args, model_cfg):
203 |     return SingleFileDataset(
204 |             args.train_path,
205 |             args.data_name, [args.target_name],
206 |             model_cfg.batch_size, args.seed)
207 | 
208 | def load_validation(args, model_cfg):
209 |     return SingleFileDataset(
210 |             args.validation_path,
211 |             args.data_name, [args.target_name],
212 |             model_cfg.batch_size, args.seed)
213 | 
214 | def fit_model(graph, train_data, validation_data, args, callbacks=[]):
215 |     graph.fit_generator(train_data.generate(),
216 |             samples_per_epoch=int(train_data.n/100),
217 |             nb_epoch=args.n_epochs,
218 |             validation_data=validation_data.get_dict(),
219 |             callbacks=callbacks,
220 |             class_weight=train_data.class_weights(args.class_weight_exponent))
221 | 
222 |     #fit_generator(generator, samples_per_epoch, nb_epoch, verbose=1, callbacks=[], validation_data=None, 
223 |     #        nb_val_samples=None, class_weight={}, nb_worker=1, nb_val_worker=None)
224 | 
225 | 


--------------------------------------------------------------------------------
/models/keras/spelling/correction/isolated/multiclass/model.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "n_word_dims": 0,
 3 |     "n_filters": 0,
 4 |     "filter_width": 0,
 5 |     "n_fully_connected": 0,
 6 |     "n_residual_blocks": 0,
 7 | 
 8 |     "train_embeddings": true,
 9 |     "embedding_init": "uniform",
10 |     "batch_normalization": true,
11 | 
12 |     "optimizer": "Adam",
13 |     "loss": "categorical_crossentropy",
14 |     "l2_penalty": 0.0,
15 | 
16 |     "dropout_embedding_p": 0.0,
17 |     "dropout_conv_p": 0.0,
18 |     "dropout_fc_p": 0.0,
19 | 
20 |     "n_classes": 119774,
21 |     "patience": 1,
22 |     "batch_size": 128,
23 | 
24 |     "embedding_max_norm": 1000,
25 |     "filter_max_norm": 1000,
26 |     "dense_max_norm": 1000,
27 |     "clipnorm": 0,
28 |     "border_mode": "valid"
29 | }
30 | 


--------------------------------------------------------------------------------
/models/keras/spelling/correction/isolated/multiclass/model.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import h5py
  3 | sys.setrecursionlimit(5000)
  4 | import json
  5 | import h5py
  6 | 
  7 | from sklearn.utils import check_random_state
  8 | 
  9 | import numpy as np
 10 | 
 11 | from keras.models import Sequential, Graph
 12 | from keras.utils import np_utils
 13 | from keras.layers.core import Dense, Dropout, Activation, Flatten, Layer
 14 | from keras.layers.normalization import BatchNormalization
 15 | 
 16 | import modeling.data
 17 | from modeling.builders import (build_embedding_layer,
 18 |     build_convolutional_layer, build_pooling_layer,
 19 |     build_dense_layer, build_optimizer, load_weights,
 20 |     build_hierarchical_softmax_layer)
 21 | from modeling.utils import balanced_class_weights
 22 | 
 23 | class HDF5FileDataset(object):
 24 |     def __init__(self, file_path, data_name, target_name, batch_size, one_hot=True, random_state=17):
 25 |         assert isinstance(data_name, (list,tuple))
 26 |         assert isinstance(target_name, (list,tuple))
 27 | 
 28 |         random_state = check_random_state(random_state)
 29 | 
 30 |         self.__dict__.update(locals())
 31 |         del self.self
 32 | 
 33 |         self._load_data()
 34 |         self._check_data()
 35 | 
 36 |     def _load_data(self):
 37 |         self.hdf5_file = h5py.File(self.file_path)
 38 |         self.n_classes = {}
 39 |         for target_name in self.target_name:
 40 |             self.n_classes[target_name] = np.max(self.hdf5_file[target_name])+1
 41 | 
 42 |     def _check_data(self):
 43 |         self.n = None
 44 |         for data_name in self.data_name:
 45 |             if self.n is None:
 46 |                 self.n = len(self.hdf5_file[data_name])
 47 |             else:
 48 |                 assert len(self.hdf5_file[data_name]) == self.n
 49 |         for target_name in self.target_name:
 50 |             assert len(self.hdf5_file[target_name]) == self.n
 51 | 
 52 |     def __getitem__(self, name):
 53 |         return self.hdf5_file[name].value
 54 | 
 55 |     def class_weights(self, class_weight_exponent, target='multiclass_correction_target'):
 56 |         return balanced_class_weights(
 57 |                 self.hdf5_file[target],
 58 |                 2,
 59 |                 class_weight_exponent)
 60 | 
 61 |     def generator(self, one_hot=None, batch_size=None):
 62 |         if one_hot is None: one_hot = self.one_hot
 63 |         if batch_size is None: batch_size = self.batch_size
 64 | 
 65 |         while 1:
 66 |             idx = self.random_state.choice(self.n, size=batch_size, replace=False)
 67 |             batch = {}
 68 |             for data_name in self.data_name:
 69 |                 batch[data_name] = self.hdf5_file[data_name].value[idx]
 70 |             for target_name in self.target_name:
 71 |                 target = self.hdf5_file[target_name].value[idx]
 72 |                 if one_hot:
 73 |                     batch[target_name] = np_utils.to_categorical(target,
 74 |                             self.n_classes[target_name])
 75 |                 else:
 76 |                     batch[target_name] = target
 77 | 
 78 |             yield batch
 79 | 
 80 | class Identity(Layer):
 81 |     def get_output(self, train):
 82 |         return self.get_input(train)
 83 | 
 84 | def add_bn_relu(graph, args, prev_layer):
 85 |     bn_name = prev_layer + '_bn'
 86 |     relu_name = prev_layer + '_relu'
 87 |     if args.batch_normalization:
 88 |         graph.add_node(BatchNormalization(), name=bn_name, input=prev_layer)
 89 |         prev_layer = bn_name
 90 |     graph.add_node(Activation('relu'), name=relu_name, input=prev_layer)
 91 |     return relu_name
 92 | 
 93 | def build_model(args, train_data):
 94 |     np.random.seed(args.seed)
 95 | 
 96 |     graph = Graph()
 97 | 
 98 |     non_word_input = 'non_word_marked_chars'
 99 |     non_word_input_width = train_data[non_word_input].shape[1]
100 | 
101 |     graph.add_input(non_word_input, input_shape=(non_word_input_width,), dtype='int')
102 |     graph.add_node(build_embedding_layer(args, input_width=non_word_input_width),
103 |             name='non_word_embedding', input=non_word_input)
104 |     graph.add_node(build_convolutional_layer(args), name='non_word_conv', input='non_word_embedding')
105 |     non_word_prev_layer = add_bn_relu(graph, args, 'non_word_conv')
106 |     graph.add_node(build_pooling_layer(args, input_width=non_word_input_width),
107 |             name='non_word_pool', input=non_word_prev_layer)
108 |     graph.add_node(Flatten(), name='non_word_flatten', input='non_word_pool')
109 | 
110 |     # Add some number of fully-connected layers without skip connections.
111 |     prev_layer = 'non_word_flatten'
112 |     for i in range(args.n_fully_connected):
113 |         layer_name = 'dense%02d' %i
114 |         l = build_dense_layer(args, n_hidden=args.n_hidden)
115 |         graph.add_node(l, name=layer_name, input=prev_layer)
116 |         prev_layer = layer_name
117 |         if args.batch_normalization:
118 |             graph.add_node(BatchNormalization(), name=layer_name+'bn', input=prev_layer)
119 |             prev_layer = layer_name+'bn'
120 |         if args.dropout_fc_p > 0.:
121 |             graph.add_node(Dropout(args.dropout_fc_p), name=layer_name+'do', input=prev_layer)
122 |             prev_layer = layer_name+'do'
123 |     
124 |     # Add sequence of residual blocks.
125 |     for i in range(args.n_residual_blocks):
126 |         # Add a fixed number of layers per residual block.
127 |         block_name = '%02d' % i
128 | 
129 |         graph.add_node(Identity(), name=block_name+'input', input=prev_layer)
130 |         prev_layer = block_input_layer = block_name+'input'
131 | 
132 |         try:
133 |             n_layers_per_residual_block = args.n_layers_per_residual_block
134 |         except AttributeError:
135 |             n_layers_per_residual_block = 2
136 | 
137 |         for layer_num in range(n_layers_per_residual_block):
138 |             layer_name = 'h%s%02d' % (block_name, layer_num)
139 |     
140 |             l = build_dense_layer(args, n_hidden=args.n_hidden)
141 |             graph.add_node(l, name=layer_name, input=prev_layer)
142 |             prev_layer = layer_name
143 |     
144 |             if args.batch_normalization:
145 |                 graph.add_node(BatchNormalization(), name=layer_name+'bn', input=prev_layer)
146 |                 prev_layer = layer_name+'bn'
147 |     
148 |             if i < n_layers_per_residual_block:
149 |                 a = Activation('relu')
150 |                 graph.add_node(Activation('relu'), name=layer_name+'relu', input=prev_layer)
151 |                 prev_layer = layer_name+'relu'
152 |                 if args.dropout_fc_p > 0.:
153 |                     graph.add_node(Dropout(args.dropout_fc_p), name=layer_name+'do', input=prev_layer)
154 |                     prev_layer = layer_name+'do'
155 | 
156 |         graph.add_node(Identity(), name=block_name+'output', inputs=[block_input_layer, prev_layer], merge_mode='sum')
157 |         graph.add_node(Activation('relu'), name=block_name+'relu', input=block_name+'output')
158 |         prev_layer = block_input_layer = block_name+'relu'
159 | 
160 |     n_classes = np.max(train_data['multiclass_correction_target']) + 1
161 |     if hasattr(args, 'n_hsm_classes'):
162 |         graph.add_node(build_hierarchical_softmax_layer(args),
163 |             name='softmax', input=prev_layer)
164 |     else:
165 |         graph.add_node(build_dense_layer(args, n_classes,
166 |             activation='softmax'), name='softmax', input=prev_layer)
167 | 
168 |     graph.add_output(name='multiclass_correction_target', input='softmax')
169 | 
170 |     load_weights(args, graph)
171 | 
172 |     optimizer = build_optimizer(args)
173 | 
174 |     graph.compile(loss={'multiclass_correction_target': args.loss}, optimizer=optimizer)
175 | 
176 |     return graph
177 | 
178 | def fit(config, callbacks=[]):
179 |     train_data = HDF5FileDataset(
180 |             config.train_path,
181 |             config.data_name,
182 |             [config.target_name],
183 |             config.batch_size,
184 |             config.seed)
185 | 
186 |     validation_data = HDF5FileDataset(
187 |             config.validation_path,
188 |             config.data_name,
189 |             [config.target_name],
190 |             config.batch_size,
191 |             config.seed)
192 | 
193 |     graph = build_model(config, train_data)
194 | 
195 |     graph.fit_generator(train_data.generator(),
196 |             samples_per_epoch=int(train_data.n/100),
197 |             nb_epoch=config.n_epochs,
198 |             validation_data=validation_data.generator(),
199 |             nb_val_samples=10000,
200 |             callbacks=callbacks,
201 |             class_weight=train_data.class_weights(config.class_weight_exponent))
202 | 


--------------------------------------------------------------------------------
/models/keras/spelling/toksents.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import sys
 4 | import os
 5 | from data import data
 6 | import marshal
 7 | 
 8 | sent_file = sys.argv[1]
 9 | d = data.load_data(sent_file)
10 | token_seq = data.tokenize(d)
11 | marshal_file = os.path.splitext(sent_file)[0] + '.marshal'
12 | marshal.dump(token_seq, open(marshal_file, 'w'))
13 | print('DONE ' + sent_file)
14 | 


--------------------------------------------------------------------------------
/models/lasagne/spelling/convnet/model.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "n_word_dims": 50,
 3 |     "use_difference": false,
 4 |     "n_filters": 1000,
 5 |     "filter_width": 4,
 6 |     "loss": "categorical_crossentropy",
 7 |     "patience": 400,
 8 |     "batch_size": 128,
 9 |     "optimizer": "Adagrad",
10 |     "learning_rate": 0.1,
11 |     "momentum": 0.9,
12 |     "decay": 0.0,
13 |     "embedding_max_norm": 1000,
14 |     "filter_max_norm": 1000,
15 |     "dense_max_norm": 1000,
16 |     "l2_penalty": 0.0,
17 |     "clipnorm": 0,
18 |     "regularization_layer": "dropout",
19 |     "dropout_p_conv": 0.1,
20 |     "dropout_p": 0.5
21 | }
22 | 


--------------------------------------------------------------------------------
/models/lasagne/spelling/convnet/model.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | 
 3 | import sys
 4 | import os
 5 | import time
 6 | 
 7 | import numpy as np
 8 | import theano
 9 | import theano.tensor as T
10 | 
11 | import modeling.lasagne_model
12 | import lasagne
13 | 
14 | class Model(modeling.lasagne_model.Classifier):
15 |     def build_input_var(self):
16 |         return T.imatrix('inputs')
17 | 
18 |     def build_target_var(self):
19 |         return T.ivector('targets')
20 | 
21 |     def build_updates(self):
22 |         return lasagne.updates.nesterov_momentum(
23 |                 self.train_loss, self.params,
24 |                 learning_rate=0.01, momentum=0.9)
25 | 
26 |     def build_model(self):
27 |         # Input layer
28 |         input_shape = (self.config.batch_size, self.config.input_width)
29 |         print('input_shape', input_shape)
30 |         model = lasagne.layers.InputLayer(shape=input_shape,
31 |                 input_var=self.input_var)
32 |     
33 |         # Embedding layer
34 |         model = lasagne.layers.EmbeddingLayer(model,
35 |                 self.config.n_vocab, self.config.n_word_dims)
36 |     
37 |         # Convolutional layer
38 |         model = lasagne.layers.Conv1DLayer(model,
39 |                 num_filters=self.config.n_filters,
40 |                 filter_size=self.config.filter_width,
41 |                 nonlinearity=lasagne.nonlinearities.rectify,
42 |                 W=lasagne.init.GlorotUniform())
43 | 
44 |         print('pool_size', self.config.input_width-self.config.filter_width-1)
45 |     
46 |         # Max-pooling layer 
47 |         model = lasagne.layers.MaxPool1DLayer(model,
48 |                 pool_size=self.config.input_width-self.config.filter_width-1)
49 | 
50 |         # Flatten layer
51 |         #model = lasagne.layers.FlattenLayer(model)
52 |     
53 |         # Fully-connected layer
54 |         model = lasagne.layers.DenseLayer(
55 |                 lasagne.layers.dropout(model, p=.0),
56 |                 num_units=self.config.n_filters*2,
57 |                 nonlinearity=lasagne.nonlinearities.rectify)
58 |     
59 |         # Output layer 
60 |         model = lasagne.layers.DenseLayer(
61 |                 lasagne.layers.dropout(model, p=.5),
62 |                 num_units=self.config.n_classes,
63 |                 nonlinearity=lasagne.nonlinearities.softmax)
64 |     
65 |         return model
66 | 


--------------------------------------------------------------------------------
/notebooks/ConvnetSensitivityAnalysis.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 2,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import numpy as np\n",
 12 |     "np.set_printoptions(precision=3)\n",
 13 |     "np.set_printoptions(suppress=True)"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 3,
 19 |    "metadata": {
 20 |     "collapsed": true
 21 |    },
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "import itertools\n",
 25 |     "\n",
 26 |     "def powerset(iterable):\n",
 27 |     "    s = list(iterable)\n",
 28 |     "    return itertools.chain.from_iterable(\n",
 29 |     "            itertools.combinations(s, r) for r in range(len(s)+1))"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": 4,
 35 |    "metadata": {
 36 |     "collapsed": false
 37 |    },
 38 |    "outputs": [],
 39 |    "source": [
 40 |     "import json\n",
 41 |     "import pandas as pd\n",
 42 |     "from sklearn.metrics import precision_recall_fscore_support\n",
 43 |     "import modeling.utils"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": 5,
 49 |    "metadata": {
 50 |     "collapsed": false
 51 |    },
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "# For scikit learn metrics.\n",
 55 |     "precision_recall_average = 'macro'"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": 6,
 61 |    "metadata": {
 62 |     "collapsed": true
 63 |    },
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "# Best so far, but imbalanced.\n",
 67 |     "model_dir = 'models/keras/preposition/convnet/20a7a6b088ee11e5b2b374d435ed6f3a/'\n",
 68 |     "\n",
 69 |     "# Balanced.\n",
 70 |     "# model_dir = 'models/keras/preposition/convnet/balanced/'\n",
 71 |     "\n",
 72 |     "# Load the test set for evaluation.\n",
 73 |     "data_file = 'data/preposition/prepositions-all-new-test.h5'"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": 7,
 79 |    "metadata": {
 80 |     "collapsed": false
 81 |    },
 82 |    "outputs": [
 83 |     {
 84 |      "name": "stdout",
 85 |      "output_type": "stream",
 86 |      "text": [
 87 |       "Loading weights (build_model)\n",
 88 |       "Loading weights\n"
 89 |      ]
 90 |     },
 91 |     {
 92 |      "name": "stderr",
 93 |      "output_type": "stream",
 94 |      "text": [
 95 |       "Using gpu device 0: GeForce GTX TITAN X (CNMeM is disabled)\n"
 96 |      ]
 97 |     }
 98 |    ],
 99 |    "source": [
100 |     "model, model_cfg = modeling.utils.load_model(model_dir, load_weights=True)"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "execution_count": 8,
106 |    "metadata": {
107 |     "collapsed": false
108 |    },
109 |    "outputs": [
110 |     {
111 |      "name": "stdout",
112 |      "output_type": "stream",
113 |      "text": [
114 |       "[(999552, 5), (999552, 52)]\n"
115 |      ]
116 |     }
117 |    ],
118 |    "source": [
119 |     "model_data = modeling.utils.load_all_model_data(data_file, model_cfg)"
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "code",
124 |    "execution_count": 9,
125 |    "metadata": {
126 |     "collapsed": false
127 |    },
128 |    "outputs": [],
129 |    "source": [
130 |     "# Load target data or metadata (e.g. mapping between numeric target variable and preposition).\n",
131 |     "target_data_file = 'data/preposition/prepositions-all-new-target-data.json'\n",
132 |     "target_data = json.load(open(target_data_file))"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "code",
137 |    "execution_count": null,
138 |    "metadata": {
139 |     "collapsed": true
140 |    },
141 |    "outputs": [],
142 |    "source": [
143 |     "def compute_n_unknown_words():\n",
144 |     "    n_unknown_words = np.zeros_like(model_data.len)\n",
145 |     "    for i in np.arange(0, len(model_data.len)):\n",
146 |     "        n_unknown_words[i] = len(np.where(model_data.data[i, 0:model_data.len[i]] == 0)[0])\n",
147 |     "    return n_unknown_words"
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "markdown",
152 |    "metadata": {},
153 |    "source": [
154 |     "Sensitivity analysis of effect of position of unknown words in window around preposition\n",
155 |     "=======\n",
156 |     "1. Take all examples in which the window around the preposition contains no unknown words.\n",
157 |     "2. For each set in the powerset of positions in the window (excluding the center, where the preposition occurs):\n",
158 |     "  1. Set the words in that position to be unknown (i.e. assign 0 to that position) for all examples.\n",
159 |     "  2. Run the examples through the model.\n",
160 |     "3. Evaluate the model's performance."
161 |    ]
162 |   },
163 |   {
164 |    "cell_type": "code",
165 |    "execution_count": null,
166 |    "metadata": {
167 |     "collapsed": false
168 |    },
169 |    "outputs": [],
170 |    "source": [
171 |     "def sensitivity_analysis(n=50000):\n",
172 |     "    n_unknown_words = compute_n_unknown_words()\n",
173 |     "\n",
174 |     "    print('# of examples ' + str(len(model_data.data)))\n",
175 |     "    print('# of examples with no unknown words ' + str((n_unknown_words==0).sum()))\n",
176 |     "    \n",
177 |     "    error_detection_targets = np.ones_like(model_data.current_word_code)\n",
178 |     "    evens = np.arange(0, len(model_data.target), 2)\n",
179 |     "    error_detection_targets[evens] = 0\n",
180 |     "\n",
181 |     "    no_unknown_words_data = model_data.data[n_unknown_words == 0]\n",
182 |     "    no_unknown_words_correction_targets = model_data.target[n_unknown_words == 0]\n",
183 |     "    no_unknown_words_detection_targets = error_detection_targets[n_unknown_words == 0]\n",
184 |     "\n",
185 |     "    window_size = 5\n",
186 |     "    center = 2\n",
187 |     "\n",
188 |     "    assert len(np.where(model_data.data[:, center] == 0)[0]) == 0\n",
189 |     "\n",
190 |     "    indices_in_window = [center-2, center-1, center+1, center+2]\n",
191 |     "\n",
192 |     "    masks = [mask for mask in powerset(indices_in_window)]\n",
193 |     "\n",
194 |     "    correction_results = {}\n",
195 |     "    \n",
196 |     "    results_df = None\n",
197 |     "\n",
198 |     "    for mask in masks:\n",
199 |     "        data = no_unknown_words_data.copy()[0:n]\n",
200 |     "        mask = np.array(mask, dtype=int)\n",
201 |     "\n",
202 |     "        data[:, mask] = 0\n",
203 |     "\n",
204 |     "        for i in np.arange(len(data)):\n",
205 |     "            data[i, mask + model_data.position[i] + 3] = 0\n",
206 |     "\n",
207 |     "        no_unknown_words_correction_preds = model.predict_classes(data, verbose=0)\n",
208 |     "\n",
209 |     "        unknowns_str = ['_'] * (len(indices_in_window) + 1)\n",
210 |     "        for x in mask:\n",
211 |     "            unknowns_str[x] = \"?\"\n",
212 |     "        unknowns_str[center] = \"P\"\n",
213 |     "\n",
214 |     "        # Error correction\n",
215 |     "        p, r, f, _ = precision_recall_fscore_support(\n",
216 |     "                no_unknown_words_correction_targets[0:n],\n",
217 |     "                no_unknown_words_correction_preds,\n",
218 |     "                average=precision_recall_average)\n",
219 |     "        \n",
220 |     "        row = pd.DataFrame({\n",
221 |     "                \"pos-2\": [unknowns_str[0]],\n",
222 |     "                \"pos-1\": [unknowns_str[1]],\n",
223 |     "                \"pos-0\": [unknowns_str[2]],\n",
224 |     "                \"pos+1\": [unknowns_str[3]],\n",
225 |     "                \"pos+2\": [unknowns_str[4]],\n",
226 |     "                \"precision\": [p],\n",
227 |     "                \"recall\": [r],\n",
228 |     "                \"f1\": [f],\n",
229 |     "                \"n\": [n]\n",
230 |     "                    })\n",
231 |     "        if results_df is None:\n",
232 |     "            results_df = row\n",
233 |     "        else:\n",
234 |     "            results_df = pd.concat([results_df, row])\n",
235 |     "\n",
236 |     "    results_df = results_df[[\"pos-2\", \"pos-1\", \"pos-0\", \"pos+1\", \"pos+2\", \"precision\", \"recall\", \"f1\", \"n\"]]\n",
237 |     "    print(results_df.to_latex(index=False, float_format=lambda f: '%.02f' % f))\n",
238 |     "      \n",
239 |     "sensitivity_analysis(n=10000)"
240 |    ]
241 |   },
242 |   {
243 |    "cell_type": "code",
244 |    "execution_count": null,
245 |    "metadata": {
246 |     "collapsed": true
247 |    },
248 |    "outputs": [],
249 |    "source": []
250 |   }
251 |  ],
252 |  "metadata": {
253 |   "kernelspec": {
254 |    "display_name": "Python 2",
255 |    "language": "python",
256 |    "name": "python2"
257 |   },
258 |   "language_info": {
259 |    "codemirror_mode": {
260 |     "name": "ipython",
261 |     "version": 2
262 |    },
263 |    "file_extension": ".py",
264 |    "mimetype": "text/x-python",
265 |    "name": "python",
266 |    "nbconvert_exporter": "python",
267 |    "pygments_lexer": "ipython2",
268 |    "version": "2.7.10"
269 |   }
270 |  },
271 |  "nbformat": 4,
272 |  "nbformat_minor": 0
273 | }
274 | 


--------------------------------------------------------------------------------
/notebooks/notes.txt:
--------------------------------------------------------------------------------
  1 | ---------------------------------------------------------------------------
  2 | Given our current architecture, how helpful is increasing the training set size?
  3 | ---------------------------------------------------------------------------
  4 | 
  5 |     Embedding size: 50
  6 |     Number of convolutional filters: 500
  7 |         Filter width: 4
  8 |         Max filter norm: 1 (might be too small, considering filter width)
  9 |     Hidden fully-connected layers: 3 
 10 |     Fully-connected layer sizes: 1000, 1000, 500
 11 |     Learning rate: 0.03
 12 |     Momentum: 0.9
 13 |     Weight decay: 0
 14 | 
 15 | Train on 1m, validate on 200k:
 16 | 
 17 |     acc: 0.1336 - val_acc: 0.2124 - val_f1: 0.12
 18 |     acc: 0.2628 - val_acc: 0.3049 - val_f1: 0.21
 19 |     acc: 0.3093 - val_acc: 0.3476 - val_f1: 0.26
 20 |     acc: 0.3571 - val_acc: 0.3972 - val_f1: 0.33
 21 |     acc: 0.3913 - val_acc: 0.4225 - val_f1: 0.36
 22 |     acc: 0.4091 - val_acc: 0.4312 - val_f1: 0.38
 23 | 
 24 | Train on 2m, validate on 200k:
 25 | 
 26 |     acc: 0.1909 - val_acc: 0.3048 - val_f1: 0.22
 27 |     acc: 0.3421 - val_acc: 0.4070 - val_f1: 0.34
 28 |     acc: 0.4028 - val_acc: 0.4362 - val_f1: 0.38
 29 |     acc: 0.4208 - val_acc: 0.4464 - val_f1: 0.40
 30 |     acc: 0.4293 - val_acc: 0.4522 - val_f1: 0.40
 31 | 
 32 | Train on 4m, validate on 200k:
 33 | 
 34 |     acc: 0.2616 - val_acc: 0.4063 - val_f1: 0.25
 35 |     acc: 0.4095 - val_acc: 0.4456 - val_f1: 0.33
 36 |     acc: 0.4315 - val_acc: 0.4573 - val_f1: 0.35
 37 |     acc: 0.4398 - val_acc: 0.4630 - val_f1: 0.37
 38 |     acc: 0.4448 - val_acc: 0.4658 - val_f1: 0.38
 39 |     acc: 0.4484 - val_acc: 0.4673 - val_f1: 0.39
 40 |     acc: 0.4509 - val_acc: 0.4693 - val_f1: 0.39
 41 | 
 42 | Loosen max norm constraint on word embeddings to 2, use class weights
 43 | to help model perform better on less frequent classes:
 44 | 
 45 |     Embedding size: 50
 46 |     Number of convolutional filters: 500
 47 |         Filter width: 4
 48 |     Hidden fully-connected layers:
 49 |         Number: 3
 50 |         Fully-connected layer sizes: 1000, 1000, 500
 51 |     Learning rate: 0.1
 52 |     Momentum: 0.9
 53 |     Decay: 0.000000001
 54 | 
 55 | Train on 4m, validate on 200k:
 56 |     acc: 0.3606 - val_acc: 0.4497 - val_f1: 0.40
 57 |     acc: 0.4343 - val_acc: 0.4619 - val_f1: 0.42
 58 |     acc: 0.4366 - val_acc: 0.4585 - val_f1: 0.42
 59 |     acc: 0.4362 - val_acc: 0.4624 - val_f1: 0.42
 60 |     acc: 0.4335 - val_acc: 0.4553 - val_f1: 0.41
 61 |     acc: 0.4316 - val_acc: 0.4541 - val_f1: 0.41
 62 | 
 63 | ---------------------------------------------------------------------------
 64 | What happens when we use an LSTM network instead of a temporal
 65 | convolutional network?
 66 | ---------------------------------------------------------------------------
 67 | 
 68 |     Embedding size: 50
 69 |     Number of LSTM layers: 3
 70 |     Number of units in LSTM layers: 64
 71 |     Dropout after each LSTM layer: 0.2
 72 |     Learning rate: 0.1
 73 |     Momentum: 0.9
 74 |     Decay: 0
 75 |     Gradient truncation: -1 (classical BPTT)
 76 |     Norm clipping threshold: 0 (no clipping)
 77 | 
 78 | Train on 4m, validate on 200k:
 79 |     acc: 0.2791 - val_acc: 0.3589 - val_f1: 0.29
 80 |     acc: 0.4383 - val_acc: 0.5068 - val_f1: 0.47
 81 |     acc: 0.4863 - val_acc: 0.5203 - val_f1: 0.49
 82 |     acc: 0.5251 - val_acc: 0.5438 - val_f1: 0.51
 83 |     acc: 0.5461 - val_acc: 0.5613 - val_f1: 0.53
 84 |     acc: 0.5614 - val_acc: 0.5697 - val_f1: 0.55
 85 |     acc: 0.5736 - val_acc: 0.5744 - val_f1: 0.56
 86 |     acc: 0.5825 - val_acc: 0.5800 - val_f1: 0.56
 87 | 
 88 | New configuration (with clipping)
 89 | 
 90 |     Embedding size: 50
 91 |     Number of LSTM layers: 3
 92 |     Number of units in LSTM layers: 64
 93 |     Dropout after each LSTM layer: 0.2
 94 |     Learning rate: 0.1
 95 |     Momentum: 0.9
 96 |     Decay: 0
 97 |     Gradient truncation: -1 (classical BPTT)
 98 |     Norm clipping threshold: 5 (no clipping)
 99 | 
100 | Train on 8m, validate on 200k (here one epoch is 100k examples; I had
101 | to split up the 8m examples into separate files, and each gets its
102 | own epoch):
103 | 
104 |     acc: 0.4823 - val_acc: 0.491425
105 | 
106 | New configuration (with clipping)
107 | 
108 |     Embedding size: 50
109 |     Number of LSTM layers: 4
110 |     Number of units in LSTM layers: 500
111 |     Dropout after each LSTM layer: 0.2
112 |     Learning rate: 0.1
113 |     Momentum: 0.9
114 |     Decay: 0
115 |     Gradient truncation: -1 (classical BPTT)
116 |     Norm clipping threshold: 5 (no clipping)
117 | 
118 | Train on 16m, validate on 200k:
119 | 
120 | ---------------------------------------------------------------------------
121 | After creating a balanced data set (which excluded 'about', because it
122 | is so less frequent than the other prepositions), I may have discovered
123 | that training a model with intra-minibatch contrasting cases -- that is,
124 | with every sentence from the corpus being accompanied by an example that
125 | is the same sentence with an error introduced -- is essential to being
126 | able to train this model.
127 | ---------------------------------------------------------------------------
128 | 
129 | ---------------------------------------------------------------------------
130 | Multi-task learning; change train.py to allow multiple --target
131 | arguments ... or change the architecture so the targets used are
132 | determined by model.json and model.py?
133 | ---------------------------------------------------------------------------
134 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | keras=0.3.1
2 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 | 
3 | setup(name='Modeling package', packages=['modeling'])
4 | 


--------------------------------------------------------------------------------
/tests/testdata.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import absolute_import
 3 | 
 4 | #import six
 5 | import sys
 6 | import os
 7 | import numpy as np
 8 | 
 9 | import unittest
10 | import modeling.data
11 | 
12 | class TestData(unittest.TestCase):
13 |     def test_create_window_position_at_beginning(self):
14 |         sentence = np.arange(1, 12)
15 |         position = 0
16 |         expected_window = [0, 0, 0, 1, 2, 3, 4]
17 |         window = modeling.data.create_window(sentence, position,
18 |                 size=7)
19 | 
20 |         self.assertEqual(7, len(window))
21 |         self.assertTrue(np.all(window == expected_window))
22 | 
23 |     def test_create_window_position_at_end_nonce(self):
24 |         sentence = np.arange(1, 12)
25 |         position = len(sentence) - 1
26 |         nonce = 99
27 |         expected_window = [8, 9, 10, nonce, 0, 0, 0]
28 |         window = modeling.data.create_window(sentence, position,
29 |                 size=7, nonce=nonce)
30 | 
31 |         self.assertEqual(7, len(window))
32 |         self.assertTrue(np.all(window == expected_window))
33 | 
34 |     def test_create_window_position_before_sentence(self):
35 |         sentence = np.arange(1, 12)
36 |         position = -1
37 |         self.assertRaises(
38 |                 ValueError,
39 |                 modeling.data.create_window,
40 |                 sentence, position)
41 | 
42 |     def test_create_window_position_after_sentence(self):
43 |         sentence = np.arange(1, 12)
44 |         position = 12
45 |         self.assertRaises(
46 |                 ValueError,
47 |                 modeling.data.create_window,
48 |                 sentence, position)
49 | 
50 | 


--------------------------------------------------------------------------------
/tests/testdifference.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import absolute_import
 3 | 
 4 | import unittest
 5 | import numpy as np
 6 | from theano import function
 7 | import theano.tensor as T
 8 | 
 9 | from keras.layers.core import Layer
10 | 
11 | class TemporalDifference(Layer):
12 |     """
13 |     Given a 3-tensor with shape (nb_samples, maxlen, output_dim), outputs
14 |     the difference X[
15 |     """
16 |     def _get_output(self, X):
17 |         return X[:, 1:, :] - X[:, 0:X.shape[1]-1, :]
18 | 
19 |     def get_output(self, train):
20 |         return self._get_output(self.get_input(train))
21 | 
22 |     def get_config(self):
23 |         return {"name": self.__class__.__name__}
24 | 
25 | class TestTemporalDifference(unittest.TestCase):
26 |     def testForward(self):
27 |         nb_examples = 2
28 |         maxlen = 7
29 |         output_dim = nb_word_dim = 5
30 |         x = np.random.normal(size=(nb_examples, maxlen, output_dim)).astype(np.float32)
31 |         expected = x[:, 1:, :] - x[:, 0:x.shape[1]-1, :]
32 |         X = T.tensor3('X')
33 |         retval = TemporalDifference()._get_output(X)
34 |         f = function([X], retval)
35 |         actual = f(x)
36 |         self.assertTrue(np.allclose(actual, expected))
37 | 


--------------------------------------------------------------------------------
/tests/testlasagne.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import absolute_import
  3 | 
  4 | import six
  5 | import sys
  6 | import os
  7 | import numpy as np
  8 | 
  9 | import unittest
 10 | import modeling.lasagne_model
 11 | import modeling.utils
 12 | 
 13 | import theano.tensor as T
 14 | import lasagne
 15 | 
 16 | # From Lasagne/examples/mnist.py
 17 | def load_mnist():
 18 |     # We first define a download function, supporting both Python 2 and 3.
 19 |     if sys.version_info[0] == 2:
 20 |         from urllib import urlretrieve
 21 |     else:
 22 |         from urllib.request import urlretrieve
 23 | 
 24 |     def download(filename, source='http://yann.lecun.com/exdb/mnist/'):
 25 |         print("Downloading %s" % filename)
 26 |         urlretrieve(source + filename, filename)
 27 | 
 28 |     # We then define functions for loading MNIST images and labels.
 29 |     # For convenience, they also download the requested files if needed.
 30 |     import gzip
 31 | 
 32 |     def load_mnist_images(filename):
 33 |         if not os.path.exists(filename):
 34 |             download(filename)
 35 |         # Read the inputs in Yann LeCun's binary format.
 36 |         with gzip.open(filename, 'rb') as f:
 37 |             data = np.frombuffer(f.read(), np.uint8, offset=16)
 38 |         # The inputs are vectors now, we reshape them to monochrome 2D images,
 39 |         # following the shape convention: (examples, channels, rows, columns)
 40 |         data = data.reshape(-1, 1, 28, 28)
 41 |         # The inputs come as bytes, we convert them to float32 in range [0,1].
 42 |         # (Actually to range [0, 255/256], for compatibility to the version
 43 |         # provided at http://deeplearning.net/data/mnist/mnist.pkl.gz.)
 44 |         return data / np.float32(256)
 45 | 
 46 |     def load_mnist_labels(filename):
 47 |         if not os.path.exists(filename):
 48 |             download(filename)
 49 |         # Read the labels in Yann LeCun's binary format.
 50 |         with gzip.open(filename, 'rb') as f:
 51 |             data = np.frombuffer(f.read(), np.uint8, offset=8)
 52 |         # The labels are vectors of integers now, that's exactly what we want.
 53 |         return data
 54 | 
 55 |     # We can now download and read the training and test set images and labels.
 56 |     X_train = load_mnist_images('train-images-idx3-ubyte.gz')
 57 |     y_train = load_mnist_labels('train-labels-idx1-ubyte.gz')
 58 |     X_test = load_mnist_images('t10k-images-idx3-ubyte.gz')
 59 |     y_test = load_mnist_labels('t10k-labels-idx1-ubyte.gz')
 60 | 
 61 |     # We reserve the last 10000 training examples for validation.
 62 |     X_train, X_val = X_train[:-10000], X_train[-10000:]
 63 |     y_train, y_val = y_train[:-10000], y_train[-10000:]
 64 | 
 65 |     # We just return all the arrays in order, as expected in main().
 66 |     # (It doesn't matter how we do this as long as we can read them again.)
 67 |     return X_train, y_train, X_val, y_val, X_test, y_test
 68 | 
 69 | class TestModel(modeling.lasagne_model.Classifier):
 70 |     def build_input_var(self):
 71 |         return T.tensor4('inputs')
 72 | 
 73 |     def build_target_var(self):
 74 |         return T.ivector('targets')
 75 | 
 76 |     def build_updates(self):
 77 |         return lasagne.updates.nesterov_momentum(
 78 |             self.train_loss, self.params, learning_rate=0.01, momentum=0.9)
 79 | 
 80 |     def build_model(self, input_var):
 81 |         l_in = lasagne.layers.InputLayer(
 82 |                 shape=(None, 1, 28, 28), input_var=input_var)
 83 | 
 84 |         l_in_drop = lasagne.layers.DropoutLayer(l_in, p=0.2)
 85 | 
 86 |         # Add a fully-connected layer of 800 units, using the linear rectifier, and
 87 |         # initializing weights with Glorot's scheme (which is the default anyway).
 88 |         l_hid1 = lasagne.layers.DenseLayer(
 89 |                 l_in_drop, num_units=800,
 90 |                 nonlinearity=lasagne.nonlinearities.rectify,
 91 |                 W=lasagne.init.GlorotUniform())
 92 | 
 93 |         # Finally, we'll add the fully-connected output layer, of 10 softmax units:
 94 |         l_out = lasagne.layers.DenseLayer(
 95 |                 l_hid1, num_units=10,
 96 |                 nonlinearity=lasagne.nonlinearities.softmax)
 97 | 
 98 |         # Each layer is linked to its incoming layer(s), so we only need to pass
 99 |         # the output layer to give access to a network in Lasagne:
100 |         return l_out
101 |     
102 | class TestLasagneClassifier(unittest.TestCase):
103 |     def test_mnist(self):
104 |         args = {}
105 |         config = modeling.utils.ModelConfig(**args)
106 |         model = TestModel(config)
107 |         X_train, y_train, X_val, y_val, X_test, y_test = load_mnist()
108 |         n_epochs = 5
109 |         batch_size = 256
110 |         for epoch in six.moves.range(n_epochs):
111 |             for j in six.moves.range(0, len(X_train), batch_size):
112 |                 model.fit(X_train[j:j+batch_size], y_train[j:j+batch_size])
113 |         val_loss, val_acc = model.evaluate(X_val, y_val)
114 |         self.assertTrue(val_acc > 0.9)
115 | 
116 |     def test_save_load(self):
117 |         weights_file = '/tmp/model.npz'
118 | 
119 |         args = {}
120 |         config = modeling.utils.ModelConfig(**args)
121 |         rng1 = np.random.RandomState(17)
122 |         lasagne.random.set_rng(rng1)
123 |         model1 = TestModel(config)
124 |         model1.save_weights(weights_file)
125 | 
126 |         rng2 = np.random.RandomState(23)
127 |         lasagne.random.set_rng(rng2)
128 |         model2 = TestModel(config)
129 |         model2.load_weights(weights_file)
130 | 
131 |         weights1 = lasagne.layers.get_all_param_values(model1.model)
132 |         weights2 = lasagne.layers.get_all_param_values(model2.model)
133 | 
134 |         for i in six.moves.range(len(weights1)):
135 |             w1 = weights1[i]
136 |             w2 = weights2[i]
137 |             self.assertTrue(np.allclose(w1, w2))
138 | 


--------------------------------------------------------------------------------
/tests/testlayers.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import numpy as np
  3 | import theano
  4 | import theano.tensor as T
  5 | #import theano.tensor.nnet 
  6 | 
  7 | import keras.models
  8 | import keras.layers.core
  9 | 
 10 | from modeling.layers import HierarchicalSoftmax
 11 | import modeling.utils
 12 | import modeling.builders
 13 | 
 14 | class TestHierarchicalSoftmax(unittest.TestCase):
 15 |     def setUp(self):
 16 |         self.batch_size = 1
 17 |         self.input_dim = 4
 18 |         self.n_hsm_classes = 5
 19 |         self.n_outputs_per_class = 3
 20 |         self.output_size = self.n_hsm_classes * self.n_outputs_per_class
 21 | 
 22 |     def test_hierarchical_softmax_integrated(self):
 23 |         net = keras.models.Sequential()
 24 |         net.add(keras.layers.core.Dense(100, input_dim=self.input_dim, activation='relu'))
 25 |         net.add(HierarchicalSoftmax(
 26 |             self.output_size, self.n_hsm_classes,
 27 |             #self.n_hsm_classes, self.n_outputs_per_class,
 28 |             batch_size=self.batch_size))
 29 |         net.compile(loss='categorical_crossentropy', optimizer='Adam')
 30 |         x = np.random.normal(size=(self.batch_size, self.input_dim))
 31 |         target = net.predict_proba(x, verbose=0)
 32 |         n_classes = self.n_hsm_classes * self.n_outputs_per_class
 33 |         self.assertEqual((self.batch_size, n_classes), target.shape)
 34 | 
 35 |     def test_hierarchical_softmax_isolated(self):
 36 |         layer = HierarchicalSoftmax(self.output_size, self.n_hsm_classes,
 37 |                 #self.n_outputs_per_class,
 38 |                 batch_size=self.batch_size,
 39 |                 input_dim=self.input_dim)
 40 |         layer.build()
 41 | 
 42 |         xt = T.matrix('x')
 43 |         f = theano.function([xt], layer._get_output(xt))
 44 |         x = np.random.normal(size=(self.batch_size, self.input_dim)).astype(np.float32)
 45 | 
 46 |         output = f(x)
 47 |         self.assertTrue(output.shape == (self.batch_size, self.output_size))
 48 |         self.assertTrue(np.allclose(1.0, output.sum()))
 49 | 
 50 |     #@unittest.skip('')
 51 |     def test_theano_h_softmax(self):
 52 |         """
 53 |         Tests the output dimensions of the h_softmax when a target is provided or
 54 |         not.
 55 | 
 56 |         This test came from 
 57 |         """
 58 | 
 59 |         #############
 60 |         # Initialize shared variables
 61 |         #############
 62 |     
 63 |         floatX = theano.config.floatX
 64 |         shared = theano.shared
 65 |     
 66 |         # Class softmax.
 67 |         W1 = np.asarray(np.random.normal(
 68 |             size=(self.input_dim, self.n_hsm_classes)), dtype=floatX)
 69 |         W1 = shared(W1)
 70 |         b1 = np.asarray(np.zeros((self.n_hsm_classes,)), dtype=floatX)
 71 |         b1 = shared(b1)
 72 |     
 73 |         # Class member softmax.
 74 |         W2 = np.asarray(np.random.normal(
 75 |             size=(self.n_hsm_classes, self.input_dim, self.n_outputs_per_class)),
 76 |             dtype=floatX)
 77 |         W2 = shared(W2)
 78 |         b2 = np.asarray(
 79 |             np.zeros((self.n_hsm_classes, self.n_outputs_per_class)), dtype=floatX)
 80 |         b2 = shared(b2)
 81 |     
 82 |         #############
 83 |         # Build graph
 84 |         #############
 85 |         x = T.matrix('x')
 86 |         y = T.ivector('y')
 87 |     
 88 |         # This only computes the output corresponding to the target
 89 |         y_hat_tg = theano.tensor.nnet.h_softmax(x,
 90 |                 self.batch_size, self.output_size, self.n_hsm_classes, self.n_outputs_per_class,
 91 |                 W1, b1, W2, b2, y)
 92 |     
 93 |         # This computes all the outputs
 94 |         y_hat_all = theano.tensor.nnet.h_softmax(x,
 95 |                 self.batch_size, self.output_size, self.n_hsm_classes, self.n_outputs_per_class,
 96 |                 W1, b1, W2, b2)
 97 |     
 98 |         #############
 99 |         # Compile functions
100 |         #############
101 |         fun_output_tg = theano.function([x, y], y_hat_tg)
102 |         fun_output = theano.function([x], y_hat_all)
103 |     
104 |         #############
105 |         # Test
106 |         #############
107 |         x_mat = np.random.normal(size=(self.batch_size, self.input_dim)).astype(floatX)
108 |         y_mat = np.random.randint(0, self.output_size, self.batch_size).astype('int32')
109 |         
110 |         self.assertTrue(fun_output_tg(x_mat, y_mat).shape == (self.batch_size,))
111 |         self.assertTrue(fun_output(x_mat).shape == (self.batch_size, self.output_size))
112 |     
113 | if __name__ == '__main__':
114 |     unittest.main()
115 | 


--------------------------------------------------------------------------------
/tests/testnonconvnet.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import random
  3 | import numpy as np
  4 | import theano
  5 | import theano.tensor as T
  6 | 
  7 | from keras import models
  8 | from keras.layers import embeddings
  9 | from keras.layers import core
 10 | 
 11 | from modeling.nonconvnet import ZeroFillDiagonals, \
 12 |         SplitOutputByFilter, \
 13 |         SlidingWindowL2MaxPooling
 14 | 
 15 | class TestNonConvNet(unittest.TestCase):
 16 |     def setUp(self):
 17 |         self.n_vocab = 100
 18 |         self.n_word_dims = 5
 19 |         self.filter_width = 4
 20 |         self.n_filters = 3
 21 |         self.max_seq_len = 9
 22 |         self.batch_size = 3
 23 | 
 24 |     def setSeeds(self):
 25 |         np.random.seed(1)
 26 | 
 27 |     def testNonConvNet(self):
 28 |         self.setSeeds()
 29 | 
 30 |         x = np.random.randint(self.n_vocab, size=(self.batch_size,
 31 |                 self.max_seq_len))
 32 | 
 33 |         model = models.Sequential()
 34 | 
 35 |         # input: (batch_size, max_seq_len)
 36 |         # output: (batch_size, max_seq_len, n_word_dims)
 37 |         model.add(embeddings.Embedding(self.n_vocab, self.n_word_dims))
 38 |         model.compile(loss='mse', optimizer='sgd')
 39 |         expected_shape_l1 = (self.batch_size, self.max_seq_len,
 40 |                 self.n_word_dims)
 41 |         output_l1 = model.predict(x)
 42 |         self.assertEqual(expected_shape_l1, output_l1.shape)
 43 | 
 44 |         # input: (batch_size, max_seq_len, n_word_dims)
 45 |         # output: (batch_size, max_seq_len, n_filters * filter_width)
 46 |         model.add(core.TimeDistributedDense(
 47 |             self.n_word_dims, self.n_filters * self.filter_width))
 48 |         model.compile(loss='mse', optimizer='sgd')
 49 |         expected_shape_l2 = (self.batch_size, self.max_seq_len,
 50 |                 self.n_filters * self.filter_width)
 51 |         output_l2 = model.predict(x)
 52 |         self.assertEqual(expected_shape_l2, output_l2.shape)
 53 | 
 54 |         # input: (batch_size, max_seq_len, n_filters * filter_width)
 55 |         # output: (batch_size, n_filters, max_seq_len, filter_width)
 56 |         model.add(SplitOutputByFilter(self.n_filters, self.filter_width))
 57 |         model.compile(loss='mse', optimizer='sgd')
 58 |         expected_shape_l3 = (self.batch_size, self.n_filters,
 59 |                 self.max_seq_len, self.filter_width)
 60 |         output_l3 = model.predict(x)
 61 |         self.assertEqual(expected_shape_l3, output_l3.shape)
 62 | 
 63 |         # input: (batch_size, n_filters, max_seq_len, filter_width)
 64 |         # output: (batch_size, n_filters, filter_width, filter_width)
 65 |         model.add(SlidingWindowL2MaxPooling(
 66 |                 self.batch_size, self.n_filters,
 67 |                 self.filter_width, self.max_seq_len))
 68 |         model.compile(loss='mse', optimizer='sgd')
 69 |         expected_shape_l4 = (self.batch_size, self.n_filters,
 70 |                 self.filter_width, self.filter_width)
 71 |         output_l4 = model.predict(x)
 72 |         self.assertEqual(expected_shape_l4, output_l4.shape)
 73 | 
 74 |         # input: (batch_size, n_filters, filter_width, filter_width)
 75 |         # output: (batch_size, n_filters, filter_width, filter_width)
 76 |         model.add(ZeroFillDiagonals(
 77 |                 self.batch_size, self.n_filters, self.filter_width))
 78 |         model.compile(loss='mse', optimizer='sgd')
 79 |         expected_shape_l5 = (self.batch_size, self.n_filters,
 80 |                 self.filter_width, self.filter_width)
 81 |         output_l5 = model.predict(x)
 82 |         self.assertEqual(expected_shape_l5, output_l5.shape)
 83 | 
 84 |     def testSplitOutputByFilter(self):
 85 |         self.setSeeds()
 86 | 
 87 |         input_shape = (self.batch_size, self.max_seq_len,
 88 |                 self.n_filters * self.filter_width)
 89 |         output_shape = (self.batch_size, self.n_filters,
 90 |                 self.max_seq_len, self.filter_width)
 91 | 
 92 |         x = np.arange(np.prod(input_shape))
 93 |         x = x.reshape(input_shape).astype(np.int32)
 94 |         y = np.zeros_like(x)
 95 |         y = np.reshape(y, output_shape)
 96 | 
 97 |         for i in range(self.n_filters):
 98 |             s = x[:, :, i*self.filter_width:(i+1)*self.filter_width]
 99 |             y[:, i, :, :] = s
100 | 
101 |         xt = T.itensor3('xt')
102 |         layer = SplitOutputByFilter(self.n_filters, self.filter_width)
103 |         yt = layer._get_output(xt)
104 | 
105 |         f = theano.function(inputs=[xt], outputs=yt)
106 |         y_theano = f(x)
107 | 
108 |         self.assertEquals(y.shape, y_theano.shape)
109 |         self.assertTrue(np.all(y == y_theano))
110 | 
111 |     def testSlidingWindowL2MaxPooling(self):
112 |         self.assertTrue(
113 |                 self.max_seq_len - self.filter_width > self.n_filters)
114 | 
115 |         self.setSeeds()
116 | 
117 |         input_shape = (self.batch_size, self.n_filters,
118 |                 self.max_seq_len, self.filter_width)
119 |         output_shape = (self.batch_size, self.n_filters,
120 |                 self.filter_width, self.filter_width)
121 | 
122 |         x = np.zeros(shape=input_shape)
123 |         expected = np.zeros(shape=output_shape)
124 | 
125 |         max_input_shape = (self.batch_size, self.filter_width, self.filter_width)
126 | 
127 |         # For the i-th filter, make i the offset at which the maximum
128 |         # L2 norm occurs.
129 |         for i in np.arange(self.n_filters):
130 |             start = i
131 |             end = i+self.filter_width
132 |             values = i + np.arange(np.prod(max_input_shape))
133 |             values = values.reshape(max_input_shape)
134 |             x[:, i, start:end, :] = values
135 |             expected[:, i, :, :] = values
136 | 
137 |         it = T.iscalar()
138 |         x3d = T.dtensor3('x3d')
139 |         x4d = T.dtensor4('x4d')
140 | 
141 |         layer = SlidingWindowL2MaxPooling(
142 |                 self.batch_size, self.n_filters, self.filter_width,
143 |                 self.max_seq_len)
144 | 
145 |         '''
146 |         Use the first sample and first filter to test `filter_dimension`.
147 |         '''
148 |         yt_filter_dim = layer.filter_dimension(it, x3d)
149 |         f_filter_dim = theano.function(inputs=[it, x3d], outputs=yt_filter_dim)
150 |         y_filter_dim_out = f_filter_dim(0, x[0])
151 |         self.assertEquals((self.filter_width, self.filter_width),
152 |                 y_filter_dim_out.shape)
153 |         self.assertTrue(np.all(expected[0, 0, :, :] == y_filter_dim_out))
154 | 
155 |         '''
156 |         Use the first sample to test `filter_dimension`.
157 |         '''
158 |         yt_sample_dim = layer.sample_dimension(it, x4d)
159 |         f_sample_dim = theano.function(inputs=[it, x4d], outputs=yt_sample_dim)
160 |         y_sample_dim_out = f_sample_dim(0, x)
161 |         self.assertEquals((self.n_filters, self.filter_width, self.filter_width),
162 |                 y_sample_dim_out.shape)
163 |         self.assertTrue(np.all(expected[0, :, :, :] == y_sample_dim_out))
164 | 
165 |         '''
166 |         Use all of `x` to test `_get_output`.
167 |         '''
168 |         yt_output = layer._get_output(x4d)
169 |         f_output = theano.function(inputs=[x4d], outputs=yt_output)
170 |         yt_out = f_output(x)
171 |         self.assertEquals(
172 |                 (self.batch_size, self.n_filters, self.filter_width,
173 |                 self.filter_width), yt_out.shape)
174 |         self.assertTrue(np.all(expected == yt_out))
175 | 
176 |     def testZeroFillDiagonals(self):
177 |         input_shape = (self.batch_size, self.n_filters,
178 |                 self.filter_width, self.filter_width)
179 |         mask = np.ones(input_shape)
180 |         diag_indices = np.arange(self.filter_width)
181 |         for i in np.arange(self.batch_size):
182 |             for j in np.arange(self.n_filters):
183 |                 mask[i, j, diag_indices, diag_indices] = 0
184 | 
185 |         x = np.arange(np.prod(input_shape)).reshape(input_shape)
186 |         expected = x * mask
187 | 
188 |         x4d = T.dtensor4('x4d')
189 |         layer = ZeroFillDiagonals(
190 |                 self.batch_size, self.n_filters, self.filter_width)
191 |         yt_output = layer._get_output(x4d)
192 |         f_output = theano.function(inputs=[x4d], outputs=yt_output)
193 | 
194 |         yt_out = f_output(x)
195 |         self.assertEquals(expected.shape, yt_out.shape)
196 |         self.assertTrue(np.all(expected == yt_out))
197 | 


--------------------------------------------------------------------------------
/train_chainer.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | from __future__ import absolute_import
  4 | from __future__ import print_function
  5 | 
  6 | import sys
  7 | import six
  8 | import argparse
  9 | import progressbar
 10 | import copy
 11 | import cPickle
 12 | 
 13 | import numpy as np
 14 | import pandas as pd
 15 | 
 16 | import chainer
 17 | from chainer import cuda
 18 | from modeling.chainer_model import Classifier
 19 | from modeling.utils import (
 20 |         load_model_data, load_model_json, build_model_id, build_model_path,
 21 |         setup_model_dir, setup_logging, ModelConfig)
 22 | import modeling.parser
 23 | 
 24 | def main(args):
 25 |     if args.gpu >= 0:
 26 |         cuda.check_cuda_available()
 27 |     xp = cuda.cupy if args.gpu >= 0 else np
 28 | 
 29 |     model_id = build_model_id(args)
 30 |     model_path = build_model_path(args, model_id)
 31 |     setup_model_dir(args, model_path)
 32 |     sys.stdout, sys.stderr = setup_logging(args)
 33 | 
 34 |     x_train, y_train = load_model_data(args.train_file,
 35 |             args.data_name, args.target_name,
 36 |             n=args.n_train)
 37 |     x_validation, y_validation = load_model_data(
 38 |             args.validation_file,
 39 |             args.data_name, args.target_name,
 40 |             n=args.n_validation)
 41 | 
 42 |     rng = np.random.RandomState(args.seed)
 43 | 
 44 |     N = len(x_train)
 45 |     N_validation = len(x_validation)
 46 | 
 47 |     n_classes = max(np.unique(y_train)) + 1
 48 |     json_cfg = load_model_json(args, x_train, n_classes)
 49 | 
 50 |     print('args.model_dir', args.model_dir)
 51 |     sys.path.append(args.model_dir)
 52 |     from model import Model
 53 |     model_cfg = ModelConfig(**json_cfg)
 54 |     model = Model(model_cfg)
 55 |     setattr(model, 'stop_training', False)
 56 |     
 57 |     if args.gpu >= 0:
 58 |         cuda.get_device(args.gpu).use()
 59 |         model.to_gpu()
 60 |     
 61 |     best_accuracy = 0.
 62 |     best_epoch = 0
 63 |     
 64 |     def keep_training(epoch, best_epoch):
 65 |         if model_cfg.n_epochs is not None and epoch > model_cfg.n_epochs:
 66 |                 return False
 67 |         if epoch > 1 and epoch - best_epoch > model_cfg.patience:
 68 |             return False
 69 |         return True
 70 |     
 71 |     epoch = 1
 72 |     
 73 |     while True:
 74 |         if not keep_training(epoch, best_epoch):
 75 |             break
 76 |     
 77 |         if args.shuffle:
 78 |             perm = np.random.permutation(N)
 79 |         else:
 80 |             perm = np.arange(N)
 81 |     
 82 |         sum_accuracy = 0
 83 |         sum_loss = 0
 84 | 
 85 |         pbar = progressbar.ProgressBar(term_width=40,
 86 |             widgets=[' ', progressbar.Percentage(),
 87 |             ' ', progressbar.ETA()],
 88 |             maxval=N).start()
 89 | 
 90 |         for j, i in enumerate(six.moves.range(0, N, model_cfg.batch_size)):
 91 |             pbar.update(j+1)
 92 |             x_batch = xp.asarray(x_train[perm[i:i + model_cfg.batch_size]].flatten())
 93 |             y_batch = xp.asarray(y_train[perm[i:i + model_cfg.batch_size]])
 94 |             pred, loss, acc = model.fit(x_batch, y_batch)
 95 |             sum_loss += float(loss.data) * len(y_batch)
 96 |             sum_accuracy += float(acc.data) * len(y_batch)
 97 | 
 98 |         pbar.finish()
 99 |         print('train epoch={}, mean loss={}, accuracy={}'.format(
100 |             epoch, sum_loss / N, sum_accuracy / N))
101 |     
102 |         # Validation set evaluation
103 |         sum_accuracy = 0
104 |         sum_loss = 0
105 | 
106 |         pbar = progressbar.ProgressBar(term_width=40,
107 |             widgets=[' ', progressbar.Percentage(),
108 |             ' ', progressbar.ETA()],
109 |             maxval=N_validation).start()
110 | 
111 |         for i in six.moves.range(0, N_validation, model_cfg.batch_size):
112 |             pbar.update(i+1)
113 |             x_batch = xp.asarray(x_validation[i:i + model_cfg.batch_size].flatten())
114 |             y_batch = xp.asarray(y_validation[i:i + model_cfg.batch_size])
115 |             pred, loss, acc = model.predict(x_batch, target=y_batch)
116 |             sum_loss += float(loss.data) * len(y_batch)
117 |             sum_accuracy += float(acc.data) * len(y_batch)
118 | 
119 |         pbar.finish()
120 |         validation_accuracy = sum_accuracy / N_validation
121 |         validation_loss = sum_loss / N_validation
122 |     
123 |         if validation_accuracy > best_accuracy:
124 |             best_accuracy = validation_accuracy
125 |             best_epoch = epoch
126 |             if model_path is not None:
127 |                 if args.gpu >= 0:
128 |                     model.to_cpu()
129 |                 store = {
130 |                         'args': args,
131 |                         'model': model,
132 |                     }
133 |                 cPickle.dump(store, open(model_path + '.store', 'w'))
134 |                 if args.gpu >= 0:
135 |                     model.to_gpu()
136 |     
137 |         print('validation epoch={}, mean loss={}, accuracy={} best=[accuracy={} epoch={}]'.format(
138 |             epoch, validation_loss, validation_accuracy, 
139 |             best_accuracy,
140 |             best_epoch))
141 |     
142 |         epoch += 1
143 |     
144 | if __name__ == '__main__':
145 |     parser = modeling.parser.build_chainer()
146 |     sys.exit(main(parser.parse_args()))
147 | 


--------------------------------------------------------------------------------
/train_keras_simple.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import print_function
 5 | 
 6 | import os, sys, shutil
 7 | import logging
 8 | import json
 9 | import uuid
10 | import json
11 | import itertools 
12 | 
13 | import numpy as np
14 | 
15 | import theano
16 | import h5py
17 | import six
18 | from sklearn.metrics import accuracy_score
19 | 
20 | from keras.utils import np_utils
21 | from keras.optimizers import SGD
22 | import keras.callbacks
23 | from keras.callbacks import ModelCheckpoint, EarlyStopping
24 | import keras.models
25 | 
26 | sys.path.append('.')
27 | 
28 | from modeling.callbacks import (ClassificationReport,
29 |         ConfusionMatrix, PredictionCallback,
30 |         DelegatingMetricCallback,
31 |         SingleStepLearningRateSchedule)
32 | from modeling.utils import (count_parameters, callable_print,
33 |         setup_logging, setup_model_dir, save_model_info,
34 |         load_model_data, load_model_json, load_target_data,
35 |         build_model_id, build_model_path,
36 |         ModelConfig)
37 | import modeling.preprocess
38 | import modeling.parser
39 | 
40 | def main(args):
41 |     model_id = build_model_id(args)
42 |     model_path = build_model_path(args, model_id)
43 |     setup_model_dir(args, model_path)
44 | 
45 |     rng = np.random.RandomState(args.seed)
46 | 
47 |     json_cfg = load_model_json(args, x_train=None, n_classes=None)
48 |     model_cfg = ModelConfig(**json_cfg)
49 |     if args.verbose:
50 |         print("model_cfg " + str(model_cfg))
51 | 
52 |     sys.path.append(args.model_dir)
53 |     import model
54 |     from model import build_model, fit_model, load_train, load_validation
55 | 
56 |     train_data = load_train(args, model_cfg)
57 |     validation_data = load_validation(args, model_cfg)
58 | 
59 |     if args.verbose:
60 |         print("loading model")
61 |     model = build_model(model_cfg, train_data, validation_data)
62 |     fit_model(model, train_data, validation_data, args)
63 | 
64 | if __name__ == '__main__':
65 |     parser = modeling.parser.build_keras()
66 |     sys.exit(main(parser.parse_args()))
67 | 


--------------------------------------------------------------------------------
/train_lasagne.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | from __future__ import absolute_import
  4 | from __future__ import print_function
  5 | 
  6 | import sys
  7 | import six
  8 | import argparse
  9 | import progressbar
 10 | import copy
 11 | import cPickle
 12 | import itertools
 13 | 
 14 | import numpy as np
 15 | import pandas as pd
 16 | 
 17 | from modeling.lasagne_model import Classifier
 18 | from modeling.utils import (
 19 |         load_model_data, load_model_json, build_model_id, build_model_path,
 20 |         setup_model_dir, setup_logging, ModelConfig)
 21 | import modeling.parser
 22 | 
 23 | def keep_training(epoch, best_epoch, model_cfg):
 24 |     if model_cfg.n_epochs is not None and epoch > model_cfg.n_epochs:
 25 |             return False
 26 |     if epoch > 1 and epoch - best_epoch > model_cfg.patience:
 27 |         return False
 28 |     return True
 29 | 
 30 | def train_one_epoch(model, x_train, y_train, args, model_cfg, progress=False):
 31 |     n = len(x_train)
 32 | 
 33 |     if args.shuffle:
 34 |         perm = np.random.permutation(n)
 35 |     else:
 36 |         perm = np.arange(n)
 37 | 
 38 |     if progress:
 39 |         pbar = progressbar.ProgressBar(term_width=40,
 40 |             widgets=[' ', progressbar.Percentage(),
 41 |             ' ', progressbar.ETA()],
 42 |             maxval=n).start()
 43 |     else:
 44 |         pbar = None
 45 | 
 46 |     train_loss = 0
 47 | 
 48 |     for j, i in enumerate(six.moves.range(0, n, model_cfg.batch_size)):
 49 |         if progress:
 50 |             pbar.update(j+1)
 51 |         x = x_train[perm[i:i + model_cfg.batch_size]]
 52 |         y = y_train[perm[i:i + model_cfg.batch_size]]
 53 |         if len(x) != model_cfg.batch_size:
 54 |             # TODO: how do other frameworks solve this?
 55 |             continue
 56 |         train_loss += model.fit(x, y)
 57 | 
 58 |     if progress:
 59 |         pbar.finish()
 60 | 
 61 |     return train_loss/float(n)
 62 | 
 63 | def validate(model, x_valid, y_valid, args, model_cfg, progress=False):
 64 |     n = len(x_valid)
 65 | 
 66 |     if progress:
 67 |         pbar = progressbar.ProgressBar(term_width=40,
 68 |             widgets=[' ', progressbar.Percentage(),
 69 |             ' ', progressbar.ETA()],
 70 |             maxval=n).start()
 71 |     else:
 72 |         pbar = None
 73 | 
 74 |     val_accuracy = 0.
 75 |     val_loss = 0.
 76 | 
 77 |     for i in six.moves.range(0, n, model_cfg.batch_size):
 78 |         if progress:
 79 |             pbar.update(i+1)
 80 |         x = x_valid[i:i + model_cfg.batch_size]
 81 |         y = y_valid[i:i + model_cfg.batch_size]
 82 |         loss, acc = model.evaluate(x, y)
 83 |         val_loss += loss
 84 |         val_accuracy += acc
 85 | 
 86 |     if progress:
 87 |         pbar.finish()
 88 | 
 89 |     return val_loss/float(n), val_accuracy/float(n)
 90 | 
 91 | def main(args):
 92 |     model_id = build_model_id(args)
 93 |     model_path = build_model_path(args, model_id)
 94 |     setup_model_dir(args, model_path)
 95 |     sys.stdout, sys.stderr = setup_logging(args)
 96 | 
 97 |     rng = np.random.RandomState(args.seed)
 98 | 
 99 |     x_train, y_train = load_model_data(args.train_file,
100 |             args.data_name, args.target_name,
101 |             n=args.n_train)
102 | 
103 |     x_valid, y_valid = load_model_data(
104 |             args.validation_file,
105 |             args.data_name, args.target_name,
106 |             n=args.n_validation)
107 | 
108 |     train_files = args.extra_train_file + [args.train_file]
109 |     train_files_iter = itertools.cycle(train_files)
110 | 
111 |     n_classes = max(np.unique(y_train)) + 1
112 |     json_cfg = load_model_json(args, x_train, n_classes)
113 | 
114 |     sys.path.append(args.model_dir)
115 |     from model import Model
116 |     model_cfg = ModelConfig(**json_cfg)
117 |     model = Model(model_cfg)
118 |     setattr(model, 'stop_training', False)
119 |     
120 |     best_accuracy = 0.
121 |     best_epoch = 0
122 |     
123 |     epoch = 1
124 |     iteration = 0
125 |     
126 |     while True:
127 |         if not keep_training(epoch, best_epoch, model_cfg):
128 |             break
129 | 
130 |         train_loss = train_one_epoch(model, x_train, y_train,
131 |                 args, model_cfg, progress=args.progress)
132 | 
133 |         val_loss, val_accuracy = validate(model, x_valid, y_valid,
134 |                 args, model_cfg, progress=args.progress)
135 | 
136 |         if val_accuracy > best_accuracy:
137 |             best_accuracy = val_accuracy
138 |             best_epoch = epoch
139 |             if model_path is not None:
140 |                 model.save_weights(model_path + '.npz')
141 |                 cPickle.dump(model, open(model_path + '.pkl', 'w'))
142 | 
143 |         print('epoch={epoch:05d}, iteration={iteration:05d}, loss={loss:.04f}, val_loss={val_loss:.04f}, val_acc={val_acc:.04f} best=[accuracy={best_accuracy:.04f} epoch={best_epoch:05d}]'.format(
144 |             epoch=epoch, iteration=iteration,
145 |             loss=train_loss, val_loss=val_loss, val_acc=val_accuracy, 
146 |             best_accuracy=best_accuracy, best_epoch=best_epoch))
147 |     
148 |         iteration += 1
149 |         if iteration % len(train_files) == 0:
150 |             epoch += 1
151 | 
152 |         x_train, y_train = load_model_data(
153 |                 next(train_files_iter),
154 |                 args.data_name, args.target_name,
155 |                 n=args.n_train)
156 |     
157 | if __name__ == '__main__':
158 |     parser = modeling.parser.build_lasagne()
159 |     sys.exit(main(parser.parse_args()))
160 | 


--------------------------------------------------------------------------------