├── nlp
    ├── __init__.py
    └── every2vec.py
├── callbacks
    ├── __init__.py
    └── helper.py
├── img
    ├── read_data
    │   ├── readhdf.py
    │   ├── shuffle.py
    │   └── div_data.py
    └── preprocess
    │   ├── label.py
    │   ├── data_augment.py
    │   └── face_align2.py
├── LICENSE
├── models
    └── activ_cnn.py
├── README.md
└── param_op
    ├── skopt.py
    └── hypopt.py


/nlp/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/callbacks/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/img/read_data/readhdf.py:
--------------------------------------------------------------------------------
 1 | import h5py
 2 | import matplotlib.pyplot as plt
 3 | import numpy as np
 4 | 
 5 | 
 6 | f = h5py.File('weights_vgg_train.hdf5','r')
 7 | 
 8 | print f
 9 | print f.keys().index('dense_12')
10 | 
11 | print f.values()[13]
12 | 
13 | g=h5py.File('vgg16_weights.h5','r')
14 | 
15 | #for i in g.values()[13]:
16 | 	#print dim


--------------------------------------------------------------------------------
/img/read_data/shuffle.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import numpy as np
 3 | 
 4 | np.random.seed(1707)
 5 | 
 6 | '''
 7 | with open("train.txt") as t:
 8 |         files = t.readlines()
 9 |         img = []
10 |         label = []
11 |         for lines in files:
12 |             item = lines.split()
13 |             img.append(item[0])
14 |             label.append(int(item[1]))
15 |         pos=[]
16 |         pos= [i for i in range(len(img))]
17 |         np.random.shuffle(pos)
18 |         label=[label[i] for i in pos]
19 |         img=[img[i] for i in pos]
20 |         print label 
21 |         print img       
22 | '''
23 | 
24 | def shuffleData(data, labels):
25 |     pos= [i for i in range(len(labels))]
26 |     np.random.shuffle(pos)
27 |     data=[data[i] for i in pos]
28 |     labels=[labels[i] for i in pos]
29 |     return data, labels
30 | 
31 | 


--------------------------------------------------------------------------------
/img/preprocess/label.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import data_Augment as da
 3 | norm = glob.glob("/home/neo/work/cnn_down/data/baby_box/*nor*")
 4 | down = glob.glob("/home/neo/work/cnn_down/data/baby_box/*down*")
 5 | 
 6 | 
 7 | print len(norm)
 8 | print len(down)
 9 | # train_nor=nor[:int(len(nor)*0.6+1)]
10 | # test_nor=nor[int(len(nor)*0.6+2):int(len(nor)*0.6+2)+int(len(nor)*0.2+1)]
11 | # val_nor=nor[int(len(nor)*0.6+2)+int(len(nor)*0.2+2):]
12 | 
13 | # down=da.get_data(down)
14 | # translated_d=da.translate(down)
15 | # rotate_d=da.rotate(down)
16 | #da.out_img(translated_d, rotate_d, "b_down")
17 | 
18 | 
19 | # norm=da.get_data(norm)
20 | # translated_n=da.translate(norm)
21 | # rotate_n=da.rotate(norm)
22 | #da.out_img(translated_n, rotate_n, "b_nor")
23 | 
24 | 
25 | # print len(train_nor) ,train_nor[-1:]
26 | # print len(test_nor), test_nor[0], test_nor[-1:]
27 | # print len(val_nor), val_nor[0]
28 | labels = open("/home/neo/work/cnn_down/data/224_align_col/labels.txt", "a")
29 | 
30 | for i in norm:
31 |     labels.write(i + " 0\n")
32 | 
33 | for i in down:
34 |     labels.write(i + " 1\n")
35 | 
36 | file.close(labels)
37 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2016 Ishank sharma
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/models/activ_cnn.py:
--------------------------------------------------------------------------------
 1 | from sklearn.preprocessing import MinMaxScaler
 2 | from sklearn.svm import SVC
 3 | 
 4 | 
 5 | def scale_features(data):
 6 |     extract_features = theano.function([model.layers[0].input], model.layers[
 7 |                                        32].output, allow_input_downcast=True)
 8 |     features = extract_features(data)
 9 |     scale = MinMaxScaler()
10 |     scale_feat = scale.fit_transform(features)
11 |     return scale_feat
12 | 
13 | print "scaling train feats"
14 | train_feats = scale_features(X_train)
15 | print " scaling test feats"
16 | test_feats = scale_features(X_test)
17 | 
18 | svc = SVC(cache_size=60000)
19 | 
20 | print "Fitting svm"
21 | svc.fit(train_feats, y_train)
22 | 
23 | print"Making predictions"
24 | pred = svc.predict(test_feats)
25 | 
26 | 
27 | def predictions(pred, y_test):
28 |     positive = []
29 |     for i in range(len(y_test)):
30 |         if pred[i] == y_test[i]:
31 |             positive.append(int(i))
32 |     return positive, len(positive) / float(len(y_test))
33 | 
34 | positives, accuracy = predictions(pred, y_test)
35 | 
36 | print "accuarcy: ", accuracy
37 | print "positives: ", positives
38 | 


--------------------------------------------------------------------------------
/img/read_data/div_data.py:
--------------------------------------------------------------------------------
 1 | import cv2, random
 2 | import numpy as np
 3 | import glob
 4 | 
 5 | np.random.seed(1707)
 6 | 
 7 | 
 8 | def shuffleData(data, labels):
 9 |     pos= [i for i in range(len(labels))]
10 |     np.random.shuffle(pos)
11 |     data=[data[i] for i in pos]
12 |     labels=[labels[i] for i in pos]
13 |     return data, labels
14 | 
15 | def loadData(datafile):
16 |     with open(datafile) as f:
17 |         files = f.readlines()
18 |         img = []
19 |         label = []
20 |         for lines in files:
21 |             item = lines.split()
22 |             img.append(item[0])
23 |             label.append(int(item[1]))
24 |         return img, label
25 | 
26 | def loadImages(path_list, crop_size=224, shift=15):
27 |     images = np.ndarray([len(path_list),3, crop_size, crop_size])
28 |     for i in xrange(len(path_list)):
29 |         img = cv2.imread(path_list[i])
30 |         h, w, c = img.shape
31 |         assert c==3
32 |         img = img.astype(np.float32) 
33 |         img= img.transpose((2,0,1))
34 |         images[i] = img
35 |     return images
36 | 
37 | 
38 | image_list, label_list= loadData("/home/data/224_align_col/labels.txt")
39 | 
40 | image_list, label_list= shuffleData(image_list,label_list)
41 | 
42 | 
43 | print len(image_list)
44 | #train
45 | train_path=image_list[:2241]
46 | train_label=label_list[:2241]
47 | #val
48 | val_path=image_list[2241:2721]
49 | val_label=label_list[2241:2721]
50 | #test
51 | test_path=image_list[2721:]
52 | test_label=label_list[2721:]
53 | 
54 | #load train images
55 | X_train=loadImages(train_path)
56 | #y_train=np_utils.to_categorical(train_label, nb_classes)
57 | 
58 | #load test images
59 | X_test=loadImages(test_path)
60 | #y_test=np_utils.to_categorical(test_label, nb_classes)
61 | #load val images
62 | X_val=loadImages(val_path)
63 | #y_test=np_utils.to_categorical(val_label, nb_classes)


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Kutils
 2 | ==========
 3 | Utility functions for Keras.  
 4 | 
 5 | 
 6 | ### [Callbacks](https://github.com/ishank26/Kutils/blob/master/callbacks/helper.py) 
 7 | * **Exp_decay learning rate:** Exponential decay of learning rate w.r.t loss, after n epochs.
 8 | * **Decay learning rate:** Anneal learning at end of nth epoch by custom value.
 9 | * **Learning rate printer.**
10 | * **Training metrics logger.**
11 | * **Get activations of nth layer.**  
12 | 
13 | ### Image                                                      
14 | * **Preprocessing**
15 |     * [Random rotate, Random shift (Data augmentation)](https://github.com/ishank26/Kutils/blob/master/img/preprocess/data_augment.py)
16 |     * [Affine transform](https://github.com/ishank26/Kutils/blob/master/img/preprocess/data_augment.py)
17 |     * [Face alignment](https://github.com/ishank26/Kutils/blob/master/img/preprocess/face_align2.py)
18 | * **Read data**
19 |     * [Output data](https://github.com/ishank26/Kutils/blob/master/img/read_data/div_data.py)
20 |     * [Label to txt](https://github.com/ishank26/Kutils/blob/master/img/preprocess/label.py)
21 |     * [Shuffle data](https://github.com/ishank26/Kutils/blob/master/img/read_data/div_data.py)  
22 |     
23 | ### NLP
24 | * [**everything2vec:**](https://github.com/ishank26/Kutils/blob/master/nlp/every2vec.py) A library to integrate word2vec and data processing functions.  
25 | 
26 | ### Models
27 | * [**feature-SVM:**](https://github.com/ishank26/Kutils/blob/master/models/activ_cnn.py) Apply SVM to nth layer activations.
28 | 
29 | ### Hyper_params Optimization
30 | * [**skopt:**](https://github.com/ishank26/Kutils/blob/master/param_op/skopt.py) Apply GridSearch, RandomSearch using sklearn. (Not working for RNN)
31 | * [**hypopt:**](https://github.com/ishank26/Kutils/blob/master/param_op/hypopt.py) Apply GridSearch, RandomSearch using hyperas library.
32 | 
33 | &nbsp;
34 | 
35 | 
36 | **Note:** Repo under development. Mail me for any info or contribute :)
37 | 


--------------------------------------------------------------------------------
/img/preprocess/data_augment.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import menpo.io as mo
 3 | from menpo.transform import Translation
 4 | 
 5 | np.random.seed(1707)
 6 | 
 7 | # rotate
 8 | 
 9 | def get_data(data_path):
10 |     data = []
11 |     for i in data_path:
12 |         img = mo.import_image(i)
13 |         data.append(img)
14 |     return data
15 | 
16 | 
17 | def rotate(img_data):
18 |     rotated = []
19 |     rotation = [-15.0, 15.0]
20 |     for i in range(len(img_data)):
21 |         img = img_data[i]
22 |         j = np.random.randint(2)
23 |         rot = img.rotate_ccw_about_centre(
24 |             rotation[j], degrees=True, retain_shape=True)
25 |         rotated.append(rot)
26 |     return rotated
27 | 
28 | 
29 | # translate
30 | 
31 | def translate(img_data):
32 |     translation = [[30, 30], [30, -30], [-30, 30], [-30, -30]]
33 |     translated = []
34 |     for i in range(len(img_data)):
35 |         j = np.random.randint(4)
36 |         img = img_data[i]
37 |         shift = Translation(translation[j], skip_checks=False)
38 |         trans = img.warp_to_shape((224, 224), shift, warp_landmarks=False)
39 |         translated.append(trans)
40 |     return translated
41 | 
42 | 
43 | def out_img(list1, list2, type):
44 |     for j in range(len(list2)):
45 |         mo.export_image(list2[j], "rot/rot_" + str(type) +
46 |                         str(j) + ".jpg", overwrite=True)
47 | 
48 |     for i in range(len(list1)):
49 |         mo.export_image(list1[i], "trans/trans_" +
50 |                         str(type) + str(i) + ".jpg", overwrite=True)
51 | 
52 | 
53 | ''''
54 | import skimage.transform as t 
55 | import numpy as np
56 | from skimage import io
57 | 
58 | img0=io.imread("/home/neo/work/cnn_down/menpo_script/resize/resz0.jpg")
59 | center_shift = np.array((224, 224)) / 2. - 0.5
60 | 
61 | translation=(30,30)
62 | rotation=30
63 | tform_center = t.SimilarityTransform(translation=translation)
64 | tform_augment = t.AffineTransform(rotation=np.deg2rad(rotation))
65 | 
66 | img=t.warp(img0,tform_center,output_shape=(224,224))
67 | 
68 | #io.imshow(img)
69 | #io.show()
70 | #io.imsave("translated.png",img)'''
71 | 


--------------------------------------------------------------------------------
/param_op/skopt.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*
 2 | from matplotlib import pyplot as plt
 3 | from sklearn.grid_search import GridSearchCV, RandomizedSearchCV
 4 | from keras.models import Sequential
 5 | from keras.layers.core import Dropout, Dense, Reshape
 6 | from keras.layers.wrappers import TimeDistributed
 7 | from keras.layers.recurrent import LSTM
 8 | from keras.optimizers import SGD, Adagrad
 9 | from keras.callbacks import History, LearningRateScheduler, ModelCheckpoint, EarlyStopping
10 | from keras.callbacks import Callback
11 | from keras import backend as k
12 | from keras.wrappers.scikit_learn import KerasClassifier as kclass
13 | import numpy as np
14 | import codecs
15 | 
16 | #### for hindi text ###
17 | import sys
18 | reload(sys)
19 | sys.setdefaultencoding('utf8')
20 | #####
21 | 
22 | np.random.seed(1707)
23 | 
24 | line_length = 248  # seq size
25 | 
26 | 
27 | # y_val=y[rand:rand+30]
28 | 
29 | 
30 | ########## opt params #####################
31 | dropout = np.array([0.4, 0.5, 0.6, 0.7, 0.8])
32 | init = ["normal", "uniform"]
33 | dropout = dict(dropout=dropout)
34 | print dropout
35 | ############################# Begin model ###################
36 | 
37 | 
38 | def my_model(dropout):
39 |     ############ model params ################
40 |     line_length = 248  # seq size
41 |     train_char = 58
42 |     hidden_neurons = 512  # hidden neurons
43 |     batch = 64  # batch_size
44 |     no_epochs = 5
45 |     ################### Model ################
46 |     model = Sequential()
47 |     # layer 1
48 |     model.add(LSTM(hidden_neurons, return_sequences=True,
49 |                    input_shape=(line_length, train_char)))
50 |     model.add(Dropout(dropout))
51 |     # layer 2
52 |     model.add(LSTM(hidden_neurons, return_sequences=True))
53 |     model.add(Dropout(dropout))
54 |     # layer 3
55 |     model.add(LSTM(hidden_neurons, return_sequences=True))
56 |     model.add(Dropout(dropout))
57 |     model.add(Reshape((248, 512)))
58 |     # fc layer
59 |     model.add(TimeDistributed(Dense(58, activation='softmax')))
60 |     # model.load_weights("weights/model_maha1_noep50_batch64_seq_248.hdf5")
61 |     # model.layers.pop()
62 |     # model.layers.pop()
63 |     # model.add(Dropout(dropout))
64 |     #model.add(TimeDistributed(Dense(train_char, activation='softmax')))
65 |     initlr = 0.00114
66 |     adagrad = Adagrad(lr=initlr, epsilon=1e-08)
67 |     model.compile(optimizer=adagrad,
68 |                   loss='categorical_crossentropy', metrics=['accuracy'])
69 |     ###load weights####
70 |     return model
71 | 
72 | '''
73 | not working for RNN 
74 | '''
75 | print "Creating model"
76 | classif = kclass(my_model, batch_size=64)
77 | randgrid = RandomizedSearchCV(
78 |     estimator=classif, param_distributions=dropout, n_iter=2)
79 | print "Checking best hyper_params"
80 | print X_train.shape, y_train.shape
81 | opt_result = randgrid.fit(X_train, y_train)
82 | print("Best:{0} using {1}".format(
83 |     opt_result.best_score_, opt_result.best_params_))
84 | 


--------------------------------------------------------------------------------
/callbacks/helper.py:
--------------------------------------------------------------------------------
 1 | from keras.callbacks import History, ModelCheckpoint, EarlyStopping, LearningRateScheduler
 2 | from keras.callbacks import Callback
 3 | from keras import backend as k
 4 | import nump as np
 5 | 
 6 | 
 7 | class decay_lr(Callback):
 8 |     '''
 9 |     Learning rate decay at end of n epoch
10 | 
11 |     decay: decay value
12 |     n_epoch: deacy learning rate at end of n_epoch
13 |     '''
14 | 
15 |     def __init__(self, n_epoch, decay):
16 |         super(decay_lr, self).__init__()
17 |         self.n_epoch = n_epoch
18 |         self.decay = decay
19 | 
20 |     def on_epoch_begin(self, epoch, logs={}):
21 |         old_lr = self.model.optimizer.lr.get_value()
22 |         if epoch > 1 and epoch % self.n_epoch == 0:
23 |             new_lr = self.decay * old_lr
24 |             k.set_value(self.model.optimizer.lr, new_lr)
25 |         else:
26 |             k.set_value(self.model.optimizer.lr, old_lr)
27 | 
28 | 
29 | # keras integrated
30 | def decay_sch(epoch):
31 |     if epoch % 6 == 0:
32 |         lr = 0.10 * model.optimizer.lr.get_value()
33 |         return float(lr)
34 |     else:
35 |         return float(model.optimizer.lr.get_value())
36 | 
37 | 
38 | class expdecaylr_loss(Callback):
39 |     '''
40 |     Decay learning rate(lr) exponentially w.r.t loss
41 | 
42 |     Output: current_lr*e^{loss}
43 |     '''
44 | 
45 |     def __init__(self):
46 |         super(decaylr_loss, self).__init__()
47 | 
48 |     def on_epoch_end(self, epoch, logs={}):
49 |         loss = logs.items()[1][1]  # get loss
50 |         print "loss: ", loss
51 |         old_lr = self.model.optimizer.lr.get_value()  # get old lr
52 |         new_lr = old_lr * np.exp(loss)  # lr*exp(loss)
53 |         k.set_value(self.model.optimizer.lr, new_lr)
54 | 
55 | 
56 | # decaylr=LearningRateScheduler(decay_sch)
57 | 
58 | 
59 | # checkpoint=ModelCheckpoint("weights/adam_noep{0}_batch{1}_seq_{2}.hdf5".format(\
60 | # no_epochs,batch, seq_length), monitor='loss', verbose=0,
61 | # save_best_only=True, save_weights_only=False, mode='min')
62 | 
63 | 
64 | class lr_printer(Callback):
65 |     '''
66 |     Print lr at beginning of each epoch
67 |     '''
68 | 
69 |     def __init__(self):
70 |         super(lr_printer, self).__init__()
71 | 
72 |     def on_epoch_begin(self, epoch, logs={}):
73 |         print('lr:', self.model.optimizer.lr.get_value())
74 | 
75 | 
76 | class logger(Callback):
77 |     '''
78 |     Log training metrics in file at end of each epoch
79 |     metrics logged: Loss, Train acc., Val. loss, Val. acc.
80 | 
81 |     file: filename of logging file
82 |     '''
83 | 
84 |     def __init__(self, file):
85 |         self.file = file
86 |         super(logger, self).__init__()
87 | 
88 |     def on_epoch_end(self, epoch, logs={}):
89 |         item = logs.items()
90 |         with open(self.file, "a") as log:
91 |             log.write("------epoch:{0}, lr:{2} ,stats:{1}------\n".format(
92 |                 epoch, item, model.optimizer.lr.get_value()))
93 | 
94 | 
95 | # get activations of nth layer
96 | extract_features = theano.function([model.layers[0].input], model.layers[
97 |                                    n].output, allow_input_downcast=True)
98 | 


--------------------------------------------------------------------------------
/param_op/hypopt.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*
 2 | from hyperopt import Trials, STATUS_OK, rand
 3 | from hyperas import optim
 4 | from hyperas.distributions import choice, uniform, conditional
 5 | from keras.models import Sequential
 6 | from keras.layers.core import Dropout, Dense, Reshape
 7 | from keras.layers.wrappers import TimeDistributed
 8 | from keras.layers.recurrent import LSTM
 9 | from keras.optimizers import SGD, Adagrad
10 | from keras.callbacks import History, LearningRateScheduler, ModelCheckpoint, EarlyStopping
11 | from keras.callbacks import Callback
12 | from keras import backend as k
13 | import numpy as np
14 | import codecs
15 | 
16 | #### for hindi text ###
17 | import sys
18 | reload(sys)
19 | sys.setdefaultencoding('utf8')
20 | 
21 | np.random.seed(1707)
22 | 
23 | 
24 | # y_val=y[rand:rand+30]
25 | 
26 | ########## opt params #####################
27 | dropout = np.array([0.4, 0.5, 0.6, 0.7, 0.8])
28 | init = ["normal", "uniform"]
29 | dropout = dict(dropout=dropout)
30 | print dropout
31 | ############################# Begin model ###################
32 | 
33 | 
34 | def my_model(X_train, y_train, X_test, y_test):
35 |     ############ model params ################
36 |     line_length = 248  # seq size
37 |     train_char = 58
38 |     hidden_neurons = 512  # hidden neurons
39 |     batch = 64  # batch_size
40 |     no_epochs = 3
41 |     ################### Model ################
42 | 
43 |     ######### begin model ########
44 |     model = Sequential()
45 |     # layer 1
46 |     model.add(LSTM(hidden_neurons, return_sequences=True,
47 |                    input_shape=(line_length, train_char)))
48 |     model.add(Dropout({{choice([0.4, 0.5, 0.6, 0.7, 0.8])}}))
49 |     # layer 2
50 |     model.add(LSTM(hidden_neurons, return_sequences=True))
51 |     model.add(Dropout({{choice([0.4, 0.5, 0.6, 0.7, 0.8])}}))
52 |     # layer 3
53 |     model.add(LSTM(hidden_neurons, return_sequences=True))
54 |     model.add(Dropout({{choice([0.4, 0.5, 0.6, 0.7, 0.8])}}))
55 |     # fc layer
56 |     model.add(TimeDistributed(Dense(train_char, activation='softmax')))
57 |     model.load_weights("weights/model_maha1_noep50_batch64_seq_248.hdf5")
58 |     ########################################################################
59 |     checkpoint = ModelCheckpoint("weights/hypmodel2_maha1_noep{0}_batch{1}_seq_{2}.hdf5".format(
60 |         no_epochs, batch, line_length), monitor='val_loss', verbose=0, save_best_only=True, save_weights_only=False, mode='min')
61 | 
62 |     initlr = 0.00114
63 |     adagrad = Adagrad(lr=initlr, epsilon=1e-08,
64 |                       clipvalue={{choice([0, 1, 2, 3, 4, 5, 6, 7])}})
65 |     model.compile(optimizer=adagrad,
66 |                   loss='categorical_crossentropy', metrics=['accuracy'])
67 |     history = History()
68 |     # fit model
69 |     model.fit(X_train, y_train, batch_size=batch, nb_epoch=no_epochs,
70 |               validation_split=0.2, callbacks=[history, checkpoint])
71 | 
72 |     score, acc = model.evaluate(X_test, y_test, verbose=0)
73 |     print('Test accuracy:', acc)
74 |     return {'loss': -acc, 'status': STATUS_OK, 'model': model}
75 | 
76 | print "Getting best hyper_params"
77 | best_run, best_model = optim.minimize(
78 |     model=my_model, data=get_data, algo=rand.suggest, max_evals=20, trials=Trials())
79 | print(best_run, "\n")
80 | print(best_model)
81 | 


--------------------------------------------------------------------------------
/img/preprocess/face_align2.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Original author: XY Feng
  3 | Edited by: Ishank Sharma
  4 | '''
  5 | 
  6 | 
  7 | 
  8 | import dlib
  9 | import numpy as np
 10 | import os
 11 | import glob
 12 | import cv2
 13 | 
 14 | FOLDER_PATH = "/normal"
 15 | IMAGE_FORMAT = "jpg"
 16 | REFERENCE_PATH = "resz0.jpg"
 17 | SCALE_FACTOR = 1
 18 | PREDICTOR_PATH = "face_landmarks.dat"
 19 | 
 20 | 
 21 | 
 22 | #FACE_POINTS = list(range(17, 68))
 23 | RIGHT_EYE_POINTS = list(range(36, 42))
 24 | LEFT_EYE_POINTS = list(range(42, 48))
 25 | NOSE_POINTS = list(range(27, 35))
 26 | #JAW_POINTS = list(range(0, 17))
 27 | #MOUTH_POINTS = list(range(48, 61))
 28 | #RIGHT_BROW_POINTS = list(range(17, 22))
 29 | #LEFT_BROW_POINTS = list(range(22, 27))
 30 | 
 31 | 
 32 | ALIGN_POINTS = (RIGHT_EYE_POINTS + LEFT_EYE_POINTS + NOSE_POINTS)
 33 | 
 34 | detector=dlib.get_frontal_face_detector()
 35 | predictor=dlib.shape_predictor(PREDICTOR_PATH)
 36 | 
 37 | def get_landmarks(im,fname):
 38 |     rects = detector(im, 1)
 39 |     maxbb=max(rects, key=lambda rect: rect.width() * rect.height())
 40 |     return np.matrix([[p.x, p.y] for p in predictor(im, maxbb).parts()])
 41 | 
 42 | def transformation_from_points(points1, points2):
 43 |     """
 44 |     Return an affine transformation [s * R | T] such that:
 45 |         sum ||s*R*p1,i + T - p2,i||^2
 46 |     is minimized.
 47 |     """
 48 |     # Solve the procrustes problem by subtracting centroids, scaling by the
 49 |     # standard deviation, and then using the SVD to calculate the rotation. See
 50 |     # the following for more details:
 51 |     #   https://en.wikipedia.org/wiki/Orthogonal_Procrustes_problem
 52 | 
 53 |     points1 = points1.astype(np.float64)
 54 |     points2 = points2.astype(np.float64)
 55 | 
 56 |     c1 = np.mean(points1, axis=0)
 57 |     c2 = np.mean(points2, axis=0)
 58 |     points1 -= c1
 59 |     points2 -= c2
 60 | 
 61 |     s1 = np.std(points1)
 62 |     s2 = np.std(points2)
 63 |     points1 /= s1
 64 |     points2 /= s2
 65 | 
 66 |     U, S, Vt = np.linalg.svd(points1.T * points2)
 67 | 
 68 |     R = (U * Vt).T
 69 | 
 70 |     return np.vstack([np.hstack(((s2 / s1) * R,c2.T - (s2 / s1) * R * c1.T)),np.matrix([0., 0., 1.])])
 71 | 
 72 | 
 73 | def read_im_and_landmarks(fname):
 74 |     im = cv2.imread(fname, cv2.IMREAD_COLOR)
 75 | 
 76 |     try:
 77 |         s = get_landmarks(im,fname)
 78 |         return im, s
 79 |     except:    
 80 |         return im,fname
 81 |     
 82 | def nobox(img):
 83 |     cv2.imwrite('g_nor_nobox_'+ str(img_index)+'.jpg', img)
 84 | 
 85 | def warp_im(im, M, dshape):
 86 |     output_im = np.zeros(dshape, dtype=im.dtype)
 87 |     cv2.warpAffine(im,M[:2],(dshape[1], dshape[0]),dst=output_im,borderMode=cv2.BORDER_TRANSPARENT,flags=cv2.WARP_INVERSE_MAP)
 88 |     return output_im
 89 | 
 90 | img_ref, landmark_ref = read_im_and_landmarks(REFERENCE_PATH)
 91 | img_index = 0
 92 | 
 93 | for f in glob.glob(os.path.join(FOLDER_PATH, "*." + IMAGE_FORMAT)):
 94 |     print("Processing file: {}".format(f))
 95 |     img_index += 1
 96 |     img, landmark = read_im_and_landmarks(f)
 97 |     try:
 98 |         M = transformation_from_points(landmark_ref[ALIGN_POINTS],landmark[ALIGN_POINTS])
 99 |         warped_im2 = warp_im(img, M, img_ref.shape)
100 |         cv2.imwrite('g_nor_align_'+str(img_index)+'.jpg', warped_im2)
101 |     except:
102 |         nobox(img)
103 |         
104 |    


--------------------------------------------------------------------------------
/nlp/every2vec.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*
  2 | import re
  3 | import codecs
  4 | import json
  5 | import numpy as np
  6 | import sys
  7 | from gensim.models import Word2Vec
  8 | from keras.initializations import glorot_uniform
  9 | 
 10 | 
 11 | ### if utf-8 charcaters, reload system (failsafe)
 12 | reload(sys) 
 13 | sys.setdefaultencoding('utf8')
 14 | ###
 15 | 
 16 | class every2vec(object):
 17 | 
 18 |     def __init__(self,model_path):
 19 |         self.model_path = model_path
 20 |         self.model = Word2Vec.load(self.model_path)
 21 |         self.model.init_sims(replace=True)
 22 | 
 23 |     ### build corpus  
 24 |     def built_corp(self, filepath):
 25 |         '''
 26 |         Build corpus from filepath
 27 |         '''
 28 |     	corpus=[]
 29 |     	for line in codecs.open(filepath,encoding="utf-8"):
 30 |             line = re.sub(ur'[a-zA-Z0-9\n!?<>\[\]:/~()\\@#$%&*\-,\."\'०१२३४५६७८९॥।]'," \n", line).split(' ') # custom expressions to be removed
 31 |             for word in line:
 32 |                 if word !=('').encode("utf-8"):
 33 |                     corpus.append(word)
 34 |         corpus.append(u"\n")     # insert \n at start
 35 |         corpus.insert(0, u"\n")  # insert \n at end        
 36 |         return corpus
 37 |         
 38 |     
 39 |     def corp_dict(self, corpus):
 40 |         '''
 41 |         Build corpus dictionaries-
 42 |         cword2ind: {word:unique index in corpus}
 43 |         index2cwrd: {unique index in corpus: word}
 44 |         '''
 45 |     	wordset = sorted(set(corpus))
 46 |         cwrd2index = dict((c, i) for i, c in enumerate(wordset))
 47 |         index2cwrd = dict((i, c) for i, c in enumerate(wordset))
 48 |         return cwrd2index, index2cwrd 	    
 49 | 
 50 | 
 51 |     def load_vocab(self, vocab_path):
 52 |         '''
 53 |         Load word2vec model and build dictionaries
 54 | 
 55 |         word2ind: {word:unique index in word2vec vocab}
 56 |         index2wrd: {unique index in word2vec vocab: word}
 57 |         '''
 58 |         with open(vocab_path, 'r') as f:
 59 |             data = json.loads(f.read())
 60 |         ind2word = dict([(voc, key) for key, voc in data.items()])
 61 |         word2ind = data
 62 |         return ind2word, word2ind
 63 | 
 64 | 
 65 |     def add2ind(self, corpus, word2ind):
 66 |         '''
 67 |         Make dict for words which are not in word2vec vocab i.e out of vocabulary words
 68 |         '''
 69 |         add_wrd = []
 70 |         for word in corpus:
 71 |             if word not in word2ind:
 72 |                 add_wrd.append(word)
 73 |      
 74 |         i = len(self.model.vocab)
 75 |         add_wrd = set(add_wrd)
 76 |         add_dict = {}
 77 |         for j in add_wrd:
 78 |             add_dict[j] = i
 79 |             i += 1
 80 |         return add_dict
 81 |     
 82 |     
 83 |     def corp2ind(self, corpus, full_dict):
 84 |         '''
 85 |         Vectorize corpus: map word to index using full_dict
 86 |         full_dict=word2ind+add_dict
 87 |         '''
 88 |         vec = []
 89 |         for word in corpus:
 90 |             ind = full_dict[word]
 91 |             vec.append(ind)
 92 |         return vec
 93 |     
 94 | 
 95 |     def prep_embed(self, full_i2w_dict, ind2word, w2v_dim):
 96 |         '''
 97 |         Prepare embedding vector for each word in full_dict
 98 |         
 99 |         Words which are in word2vec vocab are replaced by respective wordvector
100 |         OOV words i.e words that are not in word2vec are replaced by random weight(rand_weight)
101 |         '''
102 |     	embed_weight=np.zeros((len(full_i2w_dict),w2v_dim))
103 |         embed_dict={}
104 |         for k,v in full_i2w_dict.items():
105 |             if k in ind2word:
106 |                 model_weight=np.array(self.model[v])
107 |                 embed_weight[k]=model_weight
108 |                 embed_dict[k]=model_weight
109 |             else: 
110 |                 rand_weight=np.array(glorot_uniform((w2v_dim,)).eval())
111 |                 embed_weight[k]=rand_weight
112 |                 embed_dict[k]=rand_weight
113 |         return embed_weight, embed_dict
114 | 
115 | 
116 |     def one_hot(self, ind, vocab_size):
117 |         '''
118 |         Prepare posiiton based one hot encoding
119 |         '''
120 |         empvec = np.zeros((vocab_size), dtype=np.bool)
121 |         empvec[ind] = 1
122 |         return empvec
123 | 
124 |     def y2vec(self, cwrd2index, y, vocab_size):
125 |         '''
126 |         Special function to map corpus to word indices using corpus dictionaries
127 |         '''
128 |         modvec=np.zeros((len(y),vocab_size))
129 |         j=0
130 |         for i in y:
131 |             modvec[j]=self.one_hot(cwrd2index[i], vocab_size)
132 |             j+=1
133 |         return modvec
134 | 
135 | 
136 |     def vec2seq(self, vec, seq_length):
137 |         '''
138 |         Reshape prepared vectors according to sequence length(seq_length)
139 | 
140 |         Output: 3D array 0f shape [no. of batches, seq_length, timestep]
141 |         '''
142 |         dim= vec.shape[0]/seq_length
143 |         vec = np.reshape(vec, (dim, seq_length, vec.shape[1])).astype("int32")
144 |         return vec
145 | 
146 |     
147 |     def make_data(self,vec,seq_length,step,corpus):
148 |         '''
149 |         Special function for text prediction. Divides input into data sequence, labels
150 | 
151 |         Input: X=[[a,b,c],[b,c,d]] 
152 |         Output: y=[d,e] 
153 |         '''
154 |         X_train=[]
155 |         y_train=[]
156 |         for i in range(0, len(vec)-seq_length, step):
157 |             X_train.append(vec[i:i+seq_length])
158 |             y_train.append(corpus[i+seq_length])
159 |         return np.array(X_train), y_train
160 |      
161 |     
162 |     def replace_oov(self,corpus,word2ind,oov_token):
163 |         '''
164 |         Replace OOV words with OOV token 
165 |         '''
166 |         nw_corpus=[]
167 |         for word in corpus:
168 |             if word not in word2ind:
169 |                 nw_corpus.append(oov_token)
170 |             else:
171 |                 nw_corpus.append(word)    
172 |         return nw_corpus
173 | 


--------------------------------------------------------------------------------