├── Algorithm ├── Nureal Network │ └── FullyConnected │ │ ├── __init__.py │ │ ├── ConvNet │ │ └── layer.py │ │ ├── optim.py │ │ ├── layers.py │ │ └── FC.py ├── Regression │ ├── LR NormalEquation.py │ ├── KNeighborsRegression.py │ ├── LR SquareMean.py │ └── LR gradient.py ├── Classification │ ├── NaiveBayesian.py │ ├── K-Nearest neighbors.py │ ├── LogReg_binary.py │ └── Linear_classifier.py ├── preprocessing │ ├── normalize.py │ └── TF-IDF.py └── Clustering │ ├── DBSCAN.py │ └── K-means.py ├── README.md ├── .gitignore └── LICENSE /Algorithm/Nureal Network/FullyConnected/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Algorithm/Regression/LR NormalEquation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class LinerRegressionNE: 5 | 6 | def __init__(self): 7 | self.theta = None 8 | 9 | @staticmethod 10 | def normal_equation(x, y): 11 | # or inv(x.T @ x) @ x.T @ y 12 | theta = np.linalg.pinv(x)@ y 13 | return theta 14 | 15 | def fit(self, x, y): 16 | theta = self.normal_equation(x, y) 17 | self.theta = theta 18 | 19 | def predict(self, x_test): 20 | y_predict = np.sum(x_test * self.theta, axis=1) 21 | return y_predict 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SadLearn: machine learning Algorithms with Numpy 2 | 3 | - A little example of using MLPs algorithm 4 | ```python 5 | import FullyConnectedNet 6 | # MLP with 3 hidden layers 7 | clf = FullyConnectedNet(hidden_dims=[100, 100, 100], num_class=10, num_epoch=10) 8 | clf.train((x_train, y_train), (x_test, y_test)) 9 | ``` 10 | 11 | ## Structure: 12 | 13 | ├── Algorithm 14 | | | 15 | │ ├── Classification 16 | | | ├── K Nearest Neighbors 17 | | | ├── Linear classifier 18 | | | ├── LogReg binary 19 | | | |── Naive Bayesian 20 | | | 21 | │ ├── Clustering 22 | | | ├── K Means 23 | | | ├── DBSCAN 24 | | | 25 | │ ├── Nureal Network 26 | | | ├── Fully Connected Network 27 | | | | | 28 | | | | ├── Optimzation 29 | | | | ├── Layer 30 | | | ├── convolution Layer 31 | | | 32 | | ├── Regression 33 | | | ├── Regression SGD 34 | | | ├── Regression Normal Equation 35 | | | ├── K Neighbors Regression 36 | | | 37 | | ├── Preprocessing 38 | | | ├── Normalize 39 | | |... 40 | | 41 | |... 42 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | -------------------------------------------------------------------------------- /Algorithm/Classification/NaiveBayesian.py: -------------------------------------------------------------------------------- 1 | from scipy.stats import norm 2 | 3 | class NaiveBayesian: 4 | 5 | def __init__(self, x, y, class_names=None): 6 | self.x = x 7 | self.y = y 8 | self.class_names = class_names 9 | self.labels = set(self.y) 10 | 11 | self.std_label = np.zeros((len(self.labels), x.shape[1])) 12 | self.mean_label = self.std_label 13 | self.prior = [self.x.shape[1] / len(self.y)] * len(self.labels) 14 | 15 | 16 | @staticmethod 17 | def l2_norm(x): 18 | a = np.sqrt(np.square(x)) 19 | size_of_vector = np.sqrt(np.sum(a ** 2, axis=1)).reshape(len(x), 1) 20 | return x / size_of_vector 21 | 22 | def fit(self): 23 | for i in self.labels: 24 | label = self.x[np.where(self.y == i)] 25 | self.std_label[i] = np.std(label, axis=0) 26 | self.mean_label[i] = np.mean(label, axis=0) 27 | 28 | return self.prior 29 | 30 | 31 | def predict(self, x): 32 | if len(x.shape)==1: 33 | x = x[np.newaxis, :] 34 | 35 | # Maximum Posterior 36 | evidence = np.prod(self.l2_norm(x)) + 0.001 37 | p = np.empty(0) 38 | for obs in x: 39 | likelihood = norm.pdf((obs - self.mean_label) / self.std_label) 40 | likelihood = np.prod(likelihood, axis=1) 41 | 42 | posterior = self.prior * likelihood / evidence 43 | p = np.append(p, np.argmax(posterior)) 44 | 45 | if isinstance(self.class_names, np.ndarray): 46 | return self.class_names[p.astype(int)] 47 | return p 48 | 49 | 50 | def score(self, x_test, y_test): 51 | y_pred = self.predict(x_test) 52 | score = np.mean(y_pred == y_test) 53 | return score 54 | -------------------------------------------------------------------------------- /Algorithm/Regression/KNeighborsRegression.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class KNeighborsRegressor: 5 | """ 6 | KNeighborsRegressor.py 7 | detail: the algorithm for Regression with use Nearest neighbors 8 | Regression based on k-nearest neighbors. 9 | https://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm 10 | 11 | Parameters 12 | ---------- 13 | :param x: train data for learning 14 | :param y: correct response of train data 15 | :param tests in predict: test data for validation and predict 16 | :param n_neighbors: number of neighbors 17 | 18 | :return: predict values for test data 19 | 20 | Examples 21 | -------- 22 | >>> clf = KNeighborsRegressor(x, y, 5) 23 | >>> y_pred = clf.predict(tests) 24 | """ 25 | def __init__(self, x, y, n_neighbors): 26 | self.k = n_neighbors 27 | self.x = x 28 | self.y = y 29 | 30 | @staticmethod 31 | def ocliden(x, y): 32 | total = 0 33 | for j in range(len(y)): 34 | total += np.sqrt((x[j] - y[j]) ** 2) 35 | return total 36 | 37 | def predict(self, tests): 38 | 39 | pred = np.empty(0) 40 | for test in tests: 41 | distance = np.empty(0) 42 | # get all distance between test_ & train_s 43 | for j in range(len(self.x)): 44 | distance = np.append(distance, self.ocliden(self.x[j], test)) 45 | 46 | min_dist = [] 47 | for j in range(self.k): 48 | index = np.where(distance == np.min(distance))[0][0] 49 | min_dist.append(self.y[index]) 50 | # delete min 51 | distance = np.delete(distance, [index]) 52 | 53 | pred = np.append(pred, np.mean(min_dist)) 54 | 55 | return pred[:, np.newaxis] 56 | 57 | -------------------------------------------------------------------------------- /Algorithm/Classification/K-Nearest neighbors.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | """ 4 | KNearestNeighbors.py 5 | detail: Classifier Algorithm 6 | 7 | author: sajjad ayobi 8 | see others in repository : sadlearn 9 | in URL: https://github.com/sajjjadayobi/sadlearn/ 10 | """ 11 | 12 | 13 | class KNearestNeighbors: 14 | """ 15 | K-Nearest Neighbors: the algorithm for classification with use Nearest neighbors 16 | https://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm 17 | 18 | Parameters 19 | ---------- 20 | :param x: train data for learning 21 | :param tests in predict: test data for validation and predict 22 | :param y: correct response of train data 23 | :param n_neighbors: number of neighbors 24 | 25 | :return: predict values for test data 26 | 27 | Examples 28 | -------- 29 | >>> clf = KNearestNeighbors(x, y, 5) 30 | >>> y_pred = clf.predict(tests) 31 | """ 32 | def __init__(self, x, y, n_neighbors): 33 | self.x = x 34 | self.y = y 35 | self.k = n_neighbors 36 | 37 | @staticmethod 38 | def ocliden(x, y): 39 | total = 0 40 | for j in range(len(y)): 41 | total += np.sqrt((x[j] - y[j]) ** 2) 42 | return total 43 | 44 | def predict(self, tests): 45 | 46 | pred = [] 47 | for test in tests: 48 | distance = np.empty(0) 49 | # get all distance between test_ & train_s 50 | for j in range(len(self.x)): 51 | distance = np.append(distance, self.ocliden(self.x[j], test)) 52 | 53 | min_dict = [] 54 | for j in range(self.k): 55 | index = np.where(distance == distance.min())[0][0] 56 | min_dict.append(self.y[index]) 57 | # delete min 58 | distance = np.delete(distance, [index]) 59 | 60 | pred.append(np.median(min_dict).astype(int)) 61 | 62 | return pred 63 | -------------------------------------------------------------------------------- /Algorithm/preprocessing/normalize.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | """ 4 | normalization.py 5 | 6 | author: sajjad ayobi 7 | see others in repository : sadlearn 8 | in URL: https://github.com/sajjjadayobi/sadlearn/ 9 | 10 | date : 10/4/2018 11 | """ 12 | 13 | 14 | def min_max_normalize(x, min=0, max=1): 15 | """ 16 | Returns 17 | ------- 18 | normalization data in favorite range 19 | 20 | Parameters 21 | ---------- 22 | x: x is we data in shape Matrix 23 | 24 | min: minimum of data normalized 25 | max: maximum of data normalized 26 | 27 | Examples 28 | -------- 29 | >>> import numpy as np 30 | >>> data = np.array([[1, 2, 3], 31 | >>> [4, 5, 6], 32 | >>> [7, 8, 9]]) 33 | 34 | >>> norm = min_max_normalize(data, 0, 1) 35 | >>> print(norm) 36 | 37 | >>> [[0. 0.125 0.25 ] 38 | >>> [0.375 0.5 0.625] 39 | >>> [0.75 0.875 1. ]] 40 | """ 41 | 42 | a = max - min 43 | b = np.max(x) - np.min(x) 44 | c = x - np.max(x) 45 | return ((a / b) * c) + max 46 | 47 | 48 | def l1_norm(x): 49 | """ 50 | Returns 51 | ------- 52 | normalization of data in range(0,1) with method manhattan distance 53 | 54 | Parameters 55 | ---------- 56 | x: x is we data in shape Matrix 57 | 58 | note 59 | -------- 60 | most import numpy as np 61 | """ 62 | a = x - np.min(x) 63 | b = np.max(x) - np.min(x) 64 | return a / b 65 | 66 | 67 | def l2_norm(x): 68 | """ 69 | Returns 70 | ------- 71 | normalization of data in range(0,1) with method euclidean distance 72 | 73 | Parameters 74 | ---------- 75 | x: x is we data in shape Matrix 76 | 77 | note 78 | -------- 79 | most import numpy as np 80 | """ 81 | a = np.sqrt(np.square(x)) 82 | size_of_vector = np.sqrt(np.sum(a ** 2, axis=1)).reshape(len(x), 1) 83 | 84 | return x / size_of_vector 85 | 86 | 87 | def StandardScaler(x): 88 | """ 89 | Returns 90 | ------- 91 | Standardize features by removing the mean and scaling to unit variance 92 | 93 | Parameters 94 | ---------- 95 | x: x is we data in shape Matrix 96 | """ 97 | return (x - np.mean(x, axis=0)) / np.std(x, axis=0) -------------------------------------------------------------------------------- /Algorithm/preprocessing/TF-IDF.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from Algorithm.preprocessing.normalize import l2_norm 3 | 4 | """ 5 | TF-IDF.py 6 | detail: extraction numerical and features of text 7 | 8 | author: sajjad ayobi 9 | see others in repository : sadlearn 10 | in URL: https://github.com/sajjjadayobi/sadlearn/ 11 | 12 | date : 10/4/2018 13 | """ 14 | 15 | 16 | class TfidfVectorizer: 17 | """ 18 | Convert a collection of raw documents to a matrix of TF-IDF features. 19 | 20 | Parameters 21 | ---------- 22 | docs: get documents to form of list or np.ndarray for convert 23 | docs sample : ['in method used for natural language processing','...'] 24 | 25 | Attributes 26 | ---------- 27 | words: all words in the documents 28 | 29 | Notes 30 | ----- 31 | most by import numpy as np 32 | and from Algorithm.preprocessing.normalize import l2_norm 33 | l2_norm use for normalization matrix 34 | 35 | Return 36 | ------ 37 | a matrix than abundance words in texts 38 | 39 | Use 40 | --- 41 | tf = TfidfVectorizer(documents) 42 | matrix = tf.transform() 43 | """ 44 | 45 | def __init__(self, docs): 46 | self.docs = docs 47 | words = self.extract_words(docs) 48 | 49 | self.words = self.remove_surplus_chars(words) 50 | 51 | @staticmethod 52 | def remove_surplus_chars(words): 53 | added_words = ['a', '.', '(', ')', ',', '$', ' ', '?', '!'] 54 | 55 | words = np.unique(np.sort(words)) 56 | for i, word in enumerate(words): 57 | if word in added_words: 58 | words = np.delete(words, i) 59 | return words 60 | 61 | @staticmethod 62 | def extract_words(docs): 63 | words = np.empty(0) 64 | for doc in docs: 65 | words = np.append(words, doc.lower().split(' ')) 66 | return words 67 | 68 | def counts_words(self, doc): 69 | doc = doc.split(' ') 70 | counts = [] 71 | for word in self.words: 72 | count = 0 73 | for i in doc: 74 | if i == word: 75 | count += 1 76 | counts.append(count) 77 | return counts 78 | 79 | def transform(self): 80 | 81 | arr = np.zeros((len(self.docs), len(self.words))) 82 | for i, doc in enumerate(self.docs): 83 | arr[i] = self.counts_words(doc) 84 | 85 | transformed = l2_norm(arr) 86 | return transformed 87 | -------------------------------------------------------------------------------- /Algorithm/Regression/LR SquareMean.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from Algorithm.preprocessing.normalize import StandardScaler 3 | 4 | 5 | class LinerRegression: 6 | """ 7 | Liner Regression: 8 | Version : Normal Equation 9 | Ordinary least squares Linear Regression. 10 | can computing n dimension 11 | 12 | Parameters 13 | ---------- 14 | standardization : boolean, optional, default False 15 | if this param was true, data will normalization 16 | 17 | x: parameter in function fit 18 | x is data to shape of matrix 19 | 20 | y: parameter in function fit 21 | y or target is correct response for x to shape of vector or matrix 22 | 23 | test: parameter in function predict 24 | test for Predicting correct response 25 | 26 | Return 27 | ------- 28 | predictions : in function predict 29 | returns values of predicted for test data 30 | 31 | Notes 32 | ----- 33 | most by import numpy 34 | if you will predictions just a sample, should doing like this [x[0]] 35 | 36 | Example 37 | ------- 38 | >>> reg = LinerRegression(standardization=True) 39 | >>> reg.fit(x, y) 40 | >>> predictions = reg.predict(x) 41 | """ 42 | 43 | def __init__(self, standardization=False): 44 | self.intercept = None 45 | self.coef = None 46 | self.standardization = standardization 47 | 48 | @staticmethod 49 | def computing_coef(x, y): 50 | x_mean = x - np.mean(x) 51 | y_mean = y - np.mean(y) 52 | x_m_sq = np.sum(np.square(x_mean)) 53 | 54 | m = np.sum(x_mean * y_mean) / x_m_sq 55 | return m 56 | 57 | def fit(self, x, y): 58 | if self.standardization: 59 | # other than import can copy code than preprocessing.normalize 60 | x = StandardScaler(x) 61 | 62 | coef = [] 63 | for i in x.T: 64 | coef.append(self.computing_coef(i, y) / len(x.T)) 65 | intercept = np.mean(y) - np.sum(coef) * np.mean(x) 66 | 67 | self.intercept = intercept 68 | self.coef = coef 69 | 70 | def predict(self, test): 71 | if self.standardization: 72 | test = StandardScaler(test) 73 | 74 | predictions = [] 75 | for x in test: 76 | slop = [] 77 | for i, v in enumerate(x): 78 | slop.append(v * self.coef[i]) 79 | 80 | predictions.append(np.sum(slop) + self.intercept) 81 | 82 | return predictions 83 | -------------------------------------------------------------------------------- /Algorithm/Regression/LR gradient.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | 5 | class LinReg_gradient_descent: 6 | 7 | """ 8 | Linear Regression with gradient descent 9 | 10 | Parameter 11 | --------- 12 | n_iter: int 13 | number of iteration Gradient Descent 14 | 15 | learning_rate: float 16 | alpha in Gradient Descent Algorithm 17 | 18 | reg: float 19 | coef of regularization 20 | 21 | report: bool 22 | showing report of each iter in Gradient Descent 23 | 24 | 25 | Examples 26 | -------- 27 | >>> clf = LinReg_gradient_descent() 28 | >>> clf.fit(x, y) 29 | >>> y_pred = clf.predict(x_test) 30 | """ 31 | 32 | def __init__(self, n_iter=1000, learning_rate=0.00001, reg=0.0, report=True): 33 | self.weight = None 34 | self.lr = learning_rate 35 | self.n_iter = n_iter 36 | self.report = report 37 | self.reg = reg 38 | 39 | # x = x0 + x added bias 40 | @staticmethod 41 | def simplification(x): 42 | return np.concatenate((np.ones((x.shape[0], 1)), x), axis=1) 43 | 44 | # Normalize data 45 | @staticmethod 46 | def StandardScaler(x): 47 | return (x - np.mean(x, axis=0)) / np.std(x, axis=0) 48 | 49 | # Optimize Method 50 | @staticmethod 51 | def gradient_descent(x, y_pred, y): 52 | return x.T @ (y_pred - y) 53 | 54 | # Loss Function 55 | def sum_square_error(self, y, y_pred, w): 56 | error = y_pred - y 57 | loss = ((error.T @ error) * 0.5)[0][0] 58 | # added Regularize 59 | return loss + (self.reg * (w.T @ w)[0][0]) 60 | 61 | def fit(self, x, y): 62 | # PreProcessing 63 | x = self.simplification(self.StandardScaler(x)) 64 | y = y.reshape(-1, 1) 65 | w = 0.001 * np.random.randn(x.shape[1], 1) 66 | 67 | report_time = self.n_iter / 10 68 | for i in range(self.n_iter): 69 | # Loss 70 | y_pred = x @ w 71 | error = self.sum_square_error(y, y_pred, w) 72 | # Optimize Weight 73 | gd = self.gradient_descent(x, y_pred, y) 74 | w[0] = w[0] - (self.lr * gd[0]) 75 | w[1:] = w[1:] - (self.lr * (gd[1:] + (self.reg * w[1:]))) 76 | 77 | # Report 78 | if self.report and i % report_time == 0: 79 | print('\t error in iter {} = {}'.format(i, error)) 80 | 81 | self.weight = w 82 | 83 | def predict(self, x_test): 84 | x = self.simplification(self.StandardScaler(x_test)) 85 | return x @ self.weight 86 | -------------------------------------------------------------------------------- /Algorithm/Nureal Network/FullyConnected/ConvNet/layer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | # Conval 5 | def conv2(X, k): 6 | x_row, x_col = X.shape 7 | k_row, k_col = k.shape 8 | ret_row, ret_col = x_row - k_row + 1, x_col - k_col + 1 9 | 10 | ret = np.empty((ret_row, ret_col)) 11 | for y in range(ret_row): 12 | for x in range(ret_col): 13 | sub = X[y: y + k_row, x: x + k_col] 14 | ret[y, x] = np.sum(k * sub) 15 | return ret 16 | 17 | 18 | def conv3(X, k): 19 | x_row, x_col, x_ch = X.shape 20 | k_row, k_col = k.shape 21 | ret_row, ret_col, ret_ch = x_row - k_row + 1, x_col - k_col + 1, x_ch 22 | 23 | ret = np.empty((ret_row, ret_col, ret_ch)) 24 | for c in range(ret_ch): 25 | for y in range(ret_row): 26 | for x in range(ret_col): 27 | sub = X[y: y + k_row, x: x + k_col, c: c+1] 28 | ret[y, x, c] = np.sum(k * sub.reshape(k_row, k_col)) 29 | return ret 30 | 31 | 32 | 33 | # MAX & MEAN Pooling 34 | def pooling_2d(X, mode=np.max, size=2): 35 | x_row, x_col = X.shape 36 | ret_row, ret_col = x_row // size, x_col // size + 1 37 | 38 | ret = np.empty((ret_row, ret_col)) 39 | for i1, y in enumerate(range(0, x_row, size)): 40 | for i2, x in enumerate(range(0, x_col, size)): 41 | sub = X[y: y + size, x: x + size] 42 | ret[i1, i2] = mode(sub) 43 | 44 | return ret 45 | 46 | 47 | def pooling_3d(X, mode=np.max, size=2): 48 | x_row, x_col, x_ch = X.shape 49 | ret_row, ret_col = x_row // size, x_col // size + 1 50 | 51 | ret = np.empty((ret_row, ret_col, x_ch)) 52 | for c in range(x_ch): 53 | for i1, y in enumerate(range(0, x_row, size)): 54 | for i2, x in enumerate(range(0, x_col, size)): 55 | sub = X[y: y + size, x: x + size, c: c + 1] 56 | ret[i1, i2, c] = mode(sub) 57 | 58 | return ret 59 | 60 | 61 | 62 | 63 | # add Padding 64 | def padding_2d(X, k_size=3): 65 | x_row, x_col = X.shape 66 | pad_size = (k_size - 1) // 2 67 | 68 | ret = np.zeros((x_row + pad_size*2, x_col+ pad_size*2)) 69 | ret[pad_size: x_row + pad_size, pad_size: x_col + pad_size] = X[:, :] 70 | 71 | return ret 72 | 73 | 74 | def padding_3d(X, k_size=3): 75 | x_row, x_col, x_ch = X.shape 76 | pad_size = (k_size - 1) // 2 77 | 78 | ret = np.zeros((x_row + pad_size*2, x_col+ pad_size*2, x_ch)) 79 | ret[pad_size: x_row + pad_size, pad_size: x_col + pad_size, :] = X[:, :, :] 80 | 81 | return ret -------------------------------------------------------------------------------- /Algorithm/Clustering/DBSCAN.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | """ 4 | DBSCAN.py 5 | detail: Clustering Algorithm 6 | 7 | author: sajjad ayobi 8 | see others in repository : sadlearn 9 | in URL: https://github.com/sajjjadayobi/sadlearn/ 10 | 11 | date : 10/5/2018 12 | """ 13 | 14 | 15 | class DBSCAN: 16 | """ 17 | DBSCAN is a Algorithm for clustering also can find Noise in data 18 | Good for data which contains clusters of similar density 19 | 20 | Parameters 21 | ---------- 22 | x: list or np.ndarrary , most 23 | Data in the from of a matrix 24 | 25 | epsilon: float, optional 26 | maximum distance between tow sample in one class 27 | 28 | min_point: int, optional 29 | minimum number of sample in one class 30 | if neighbor of a sample was less than min_point, this sample is a Noise 31 | 32 | Attributes 33 | ---------- 34 | labels: list of class number 35 | 36 | Note 37 | ---- 38 | -1 in labels means it sample is Noise 39 | 40 | for example 41 | ----------- 42 | >>> import numpy as np 43 | 44 | >>> db = DBSCAN(np.array()) 45 | >>> labels = db.labels 46 | 47 | """ 48 | def __init__(self, x, epsilon=0.5, min_point=5): 49 | self.x = x 50 | self.eps = epsilon 51 | self.m_p = min_point 52 | 53 | def neighbor_points(self, x, k): 54 | neighbors = [] 55 | for i in range(len(x)): 56 | # ouclidean distance 57 | if np.sqrt(np.square(x[k] - x[i]).sum()) < self.eps: 58 | neighbors.append(i) 59 | return neighbors 60 | 61 | def expand_cluster(self, labels, i, class_id): 62 | 63 | neighbors = self.neighbor_points(self.x, i) 64 | if len(neighbors) < self.m_p: 65 | labels[i] = -1 66 | return False 67 | 68 | for i in neighbors: 69 | labels[i] = class_id 70 | while len(neighbors) > 0: 71 | current = neighbors[0] 72 | new_neighbors = self.neighbor_points(self.x, current) 73 | 74 | if len(new_neighbors) >= self.m_p: 75 | for i in range(len(new_neighbors)): 76 | point = new_neighbors[i] 77 | if labels[point] is None: 78 | labels[point] = class_id 79 | neighbors.append(point) 80 | del neighbors[0] 81 | 82 | return True 83 | 84 | @property 85 | def labels(self): 86 | x = self.x 87 | labels = np.array([None] * len(x)) 88 | 89 | class_id = 1 90 | for i in range(len(x)): 91 | if labels[i] is None: 92 | if self.expand_cluster(labels, i, class_id): 93 | class_id += 1 94 | return labels 95 | -------------------------------------------------------------------------------- /Algorithm/Nureal Network/FullyConnected/optim.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def SGD(w, dw, lr, *wargs): 5 | # Stochastic Gradient descent 6 | w -= dw * lr 7 | return w, None 8 | 9 | def Rmsprop(x, dx, lr, config): 10 | """ 11 | Uses the RMSProp update rule, which uses a moving average of squared 12 | gradient values to set adaptive per-parameter learning rates. 13 | config format: 14 | - decay_rate: Scalar between 0 and 1 giving the decay rate for the squared 15 | - cache: Moving average of second moments of gradients. 16 | """ 17 | if config is 0: config = {} 18 | config.setdefault('cache', np.zeros_like(x)) 19 | cache = config['cache'] 20 | decay_rate = 0.9 21 | 22 | cache = decay_rate * cache + (1 - decay_rate) * dx**2 23 | next_x = x - lr * dx / (np.sqrt(cache) + 1e-8) 24 | 25 | config['cache'] = cache 26 | return next_x, config 27 | 28 | 29 | 30 | def Momentum(w, dw, lr, config): 31 | """ 32 | Performs stochastic gradient descent with momentum. 33 | config format: 34 | - momentum: Scalar between 0 and 1 giving the momentum value. 35 | """ 36 | if config is 0: config = {} 37 | config.setdefault('v', np.zeros_like(w)) 38 | 39 | v = .9 * config['v'] - lr * dw 40 | w += v 41 | 42 | config['v'] = v 43 | return w, config 44 | 45 | 46 | 47 | def Nestrov_momentum(w, dw, lr, config): 48 | """ or nag 49 | Performs stochastic gradient descent with momentum. 50 | config format: 51 | - momentum: Scalar between 0 and 1 giving the momentum value. 52 | """ 53 | if config is 0: config = {} 54 | config.setdefault('v', np.zeros_like(w)) 55 | mu = .9 56 | v_pred = config['v'] 57 | 58 | v = mu * config['v'] - lr * dw 59 | w += -mu * v_pred + (1+mu) * v 60 | config['v'] = v 61 | 62 | return w, config 63 | 64 | 65 | 66 | def Adam(x, dx, lr, config): 67 | """ config format: 68 | - beta1: Decay rate for moving average of first moment of gradient. 69 | - beta2: Decay rate for moving average of second moment of gradient. 70 | - m: Moving average of gradient. 71 | - v: Moving average of squared gradient. 72 | - t: Iteration number. """ 73 | 74 | if config is 0: config={} 75 | config.setdefault('t', 1) 76 | config.setdefault('m', np.zeros_like(x)) 77 | config.setdefault('v', np.zeros_like(x)) 78 | 79 | # read params from dictionary 80 | beta1, beta2 = .9, .999 81 | m, v, t = config['m'], config['v'], config['t'] 82 | 83 | t += 1 84 | # apply adam update rule 85 | m = beta1 * m + (1 - beta1) * dx 86 | v = beta2 * v + (1 - beta2) * dx ** 2 87 | mb = m / (1 - beta1 ** t) 88 | vb = v / (1 - beta2 ** t) 89 | next_x = x - lr * mb / (np.sqrt(vb) + 1e-8) 90 | 91 | # store new params in the dictionary 92 | config['m'], config['v'] = m, v 93 | return next_x, config -------------------------------------------------------------------------------- /Algorithm/Classification/LogReg_binary.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class LogReg_binary: 5 | """ Binary Logistic classifier 6 | This class implements regularized binary logistic regression 7 | and ues Maximum likelihood Estimation for loss function 8 | 9 | Parameter 10 | --------- 11 | n_iter: int 12 | number of iteration Gradient Descent 13 | 14 | learning_rate: float 15 | alpha in Gradient Descent Algorithm 16 | 17 | reg: float 18 | lambda in regularization 19 | 20 | report: bool 21 | showing report of each iter in Gradient Descent 22 | 23 | Return 24 | ------ 25 | predict values for test data 26 | 27 | Examples 28 | -------- 29 | >>> clf = LogReg_binary() 30 | >>> clf.fit(x, y) 31 | >>> y_pred = clf.predict(x_test) 32 | """ 33 | 34 | def __init__(self, n_iter=1000, learning_rate=0.00001, reg=0.0, report=True): 35 | self.W = None 36 | self.lr = learning_rate 37 | self.n_iter = n_iter 38 | self.reg = reg 39 | self.report = report 40 | 41 | @staticmethod 42 | def simplification(x): 43 | return np.concatenate((np.ones((x.shape[0], 1)), x), axis=1) 44 | 45 | @staticmethod 46 | def sigmoid(y): 47 | return 1 / (1 + np.exp(-y)) 48 | 49 | @staticmethod 50 | def StandardScaler(x): 51 | return (x - np.mean(x, axis=0)) / np.std(x, axis=0) 52 | 53 | # Optimize Method 54 | @staticmethod 55 | def gradient_descent(x, y_pred, y): 56 | return x.T @ (y_pred - y) 57 | 58 | # Loss Function 59 | def MLE(self, y, y_pred, w): 60 | 61 | error = -np.sum((y * np.log(y_pred)) + ((1 - y) * np.log(1 - y_pred))) 62 | return error + (self.reg * (w.T @ w)[0][0]) 63 | 64 | def Probability(self, x, theta): 65 | y_predict = x @ theta 66 | return self.sigmoid(y_predict) 67 | 68 | def fit(self, x, y): 69 | # PreProcessing 70 | x = self.simplification(self.StandardScaler(x)) 71 | y = y.reshape(-1, 1) 72 | w = 0.001 * np.random.randn(x.shape[1], 1) 73 | 74 | report_time = self.n_iter / 10 75 | for i in range(self.n_iter): 76 | # Loss 77 | y_pred = self.Probability(x, w) 78 | error = self.MLE(y, y_pred, w) 79 | # Optimize Weight 80 | gd = self.gradient_descent(x, y_pred, y) 81 | 82 | w[0] = w[0] - self.lr * gd[0] 83 | w[1:] = w[1:] - self.lr * (gd[1:] + self.reg * w[1:]) 84 | # Report 85 | if self.report and i % report_time == 0: 86 | print('\terror in iter {} = {}'.format(i, error)) 87 | 88 | self.W = w 89 | 90 | def predict(self, x_test): 91 | x = self.simplification(self.StandardScaler(x_test)) 92 | probs = self.Probability(x, self.W) 93 | 94 | y_pred = np.zeros(shape=(x_test.shape[0],), dtype=int) 95 | y_pred[np.flatnonzero(probs > 0.5)] = 1 96 | return y_pred 97 | 98 | def score(self, x, y): 99 | y_pred = self.predict(x) 100 | return np.mean(y_pred == y) 101 | -------------------------------------------------------------------------------- /Algorithm/Clustering/K-means.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | """ 4 | K-means.py 5 | detail: Clustering Algorithm 6 | 7 | author: sajjad ayobi 8 | see others in repository : sadlearn 9 | in URL: https://github.com/sajjjadayobi/sadlearn/ 10 | 11 | date : 10/5/2018 12 | """ 13 | 14 | 15 | class KMeans: 16 | """ 17 | K-means is a Algorithm for clustering 18 | in this algorithm should entering number of clusters 19 | 20 | 21 | Parameters 22 | ---------- 23 | n_cluster : int, optional, default: 2 24 | The number of clusters to form as well as the number of 25 | centroids to generate. 26 | 27 | 28 | max_iter : int, default: 10 29 | Maximum number of iterations of the k-means algorithm for a single run. 30 | 31 | 32 | Attributes 33 | ---------- 34 | x: list or np.ndarray 35 | we data 36 | 37 | clusters : array, [n_clusters, n_features] 38 | Coordinates of cluster centers 39 | 40 | values: array, [class1,class2,...] 41 | list that classifies classes 42 | 43 | inertia : float 44 | Sum of squared distances of samples to their closest cluster center. 45 | 46 | Examples 47 | -------- 48 | >>> km = KMeans(list(), n_cluster=3) 49 | >>> new_values , centers = km.predict() 50 | """ 51 | 52 | def __init__(self, x, n_cluster, max_iter=10): 53 | self.x = x 54 | self.max_iter = max_iter 55 | self.inertia_ = None 56 | self.clusters = self.random_clusters(x, n_cluster) 57 | 58 | @staticmethod 59 | def random_clusters(x, n_cluster): 60 | np.random.seed(42) 61 | centers = np.array(x[np.random.choice(range(len(x)), size=n_cluster, replace=False)]) 62 | return centers 63 | 64 | @staticmethod 65 | def manhattan(x, y): 66 | total = 0 67 | for j in range(y.shape[0]): 68 | total += np.abs((x[j] - y[j])) 69 | return total 70 | 71 | def refresh_clusters(self, x): 72 | c = np.zeros((len(self.clusters), self.clusters.shape[1])) 73 | for i, k in enumerate(x): 74 | m = np.mean(k, axis=0) 75 | for j in range(len(m)): 76 | c[i][j] = m[j] 77 | return c 78 | 79 | def distance(self): 80 | new = [] 81 | for i in range(len(self.clusters)): 82 | new.append([]) 83 | for k in range(len(self.x)): 84 | distance = [] 85 | for i in self.clusters: 86 | distance.append(self.manhattan(self.x[k], i)) 87 | 88 | new[np.argmin(distance)].append(list(self.x[k])) 89 | 90 | return new 91 | 92 | def inertia(self, x): 93 | distance = [] 94 | for i, v in enumerate(x): 95 | distance.append(np.sum(np.square(v - self.clusters[i]))) 96 | return np.sum(distance) 97 | 98 | def predict(self): 99 | for i in range(self.max_iter): 100 | x = self.distance() 101 | self.clusters = self.refresh_clusters(x) 102 | 103 | self.inertia_ = self.inertia(x) 104 | return x 105 | -------------------------------------------------------------------------------- /Algorithm/Classification/Linear_classifier.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class liner_classifier: 5 | 6 | """ 7 | At the base of all neural networks 8 | Liner Classifier with Gradient descent Algorithm 9 | and find best Weight for test data 10 | note: better use Normalized data 11 | Parameter 12 | --------- 13 | batch_size: int 14 | size of batch in Gradient descent 15 | n_iter: int 16 | number of iteration Gradient Descent 17 | learning_rate: float 18 | alpha in Gradient Descent Algorithm 19 | report: bool 20 | showing report of each iter in Gradient Descent 21 | Return 22 | ------ 23 | predicted label for data 24 | Examples 25 | -------- 26 | >>> clf = liner_classifier() 27 | >>> clf.fit(x, y) 28 | >>> y_pred = clf.predict(x_test) 29 | """ 30 | 31 | def __init__(self, batch_size, n_iter=1000, learning_rate=0.001, report=True, class_names=None): 32 | self.weight = None 33 | self.bias = None 34 | self.batch_size = batch_size 35 | self.class_names = class_names 36 | self.lr = learning_rate 37 | self.n_iter = n_iter 38 | self.report = report 39 | 40 | # Loss function for multi classifier 41 | @staticmethod 42 | def softmax_loss(scores, y): 43 | # forward step: computing data loss 44 | num = scores.shape[0] 45 | scores -= np.max(scores, axis=1, keepdims=True) 46 | exp = np.exp(scores) 47 | probs = exp / np.sum(exp, axis=1, keepdims=True) 48 | loss = -np.log(probs[range(num), y]) 49 | loss = np.mean(loss) 50 | # backward step: computing Derivative with Respect to output 51 | d_out = probs 52 | d_out[np.arange(num), y] -= 1 53 | return loss, d_out 54 | 55 | # computing scores of each class 56 | @staticmethod 57 | def affine_forward(x, w, b): 58 | return x @ w.T + b 59 | 60 | # computing Derivative with Respect to input 61 | @staticmethod 62 | def affine_backward(x, d_out): 63 | d_weight = x.T @ d_out 64 | d_bais = np.sum(d_out, axis=0) 65 | return d_weight, d_bais 66 | 67 | def fit(self, x, y): 68 | # random Init weight 69 | w = np.random.randn(len(set(y)), x.shape[1]) * 0.001 70 | b = np.zeros((len(set(y)),)) 71 | 72 | for i in range(self.n_iter + 1): 73 | index = np.random.choice(x.shape[0], self.batch_size, replace=False) 74 | x_batch = x[index] 75 | y_batch = y[index] 76 | 77 | # Loss 78 | scores = self.affine_forward(x_batch, w, b) 79 | loss, dout = self.softmax_loss(scores, y_batch) 80 | # Optimize Weight 81 | dw, db = self.affine_backward(x_batch, dout) 82 | w -= self.lr * dw.T 83 | b -= self.lr * db 84 | 85 | # report 86 | if self.report: 87 | y_pred = np.argmax(scores, axis=1) 88 | score = np.mean(y_pred == y_batch) 89 | scores = self.affine_forward(x, w, b) 90 | y_pred = np.argmax(scores, axis=1) 91 | train = np.mean(y_pred == y) 92 | print('\t iter %4d loss: %2.5f | batch_acc: %0.2f | all_acc: %0.3f' % (i, loss, score, train)) 93 | 94 | self.weight = w 95 | self.bias = b 96 | 97 | def predict(self, x): 98 | if len(x.shape)==1: 99 | x = x[np.newaxis, :] 100 | 101 | scores = self.affine_forward(x, self.weight, self.bias) 102 | y = np.argmax(scores, axis=1) 103 | 104 | if isinstance(self.class_names, np.ndarray): 105 | return self.class_names[y] 106 | return y 107 | -------------------------------------------------------------------------------- /Algorithm/Nureal Network/FullyConnected/layers.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def liner_forward(x, w, b): 5 | return x @ w + b 6 | 7 | 8 | def liner_backward(dout, x, w): 9 | dw = x.T @ dout 10 | dx = dout @ w.T 11 | db = np.sum(dout, axis=0) 12 | return dx, dw, db 13 | 14 | 15 | def relu(z): 16 | return np.maximum(0, z) 17 | 18 | 19 | def max_backward(dout, local): 20 | # relu backward 21 | return dout * (local > 0) 22 | 23 | 24 | def dropout_forward(x, kp): 25 | prob = 1 - kp 26 | # for accuracy on x_test : Ansamble learning 27 | # mean of all 2^noeron_size newtwork 28 | mask = (np.random.rand(*x.shape) < prob) / prob 29 | x_dorp = x * mask 30 | return x_dorp, mask 31 | 32 | 33 | def dropout_backward(dout, mask): 34 | return dout * mask 35 | 36 | 37 | def batchnorm_forward(x, gamma, beta, test_param, mode='train'): 38 | running_mean = test_param.setdefault('run_mean', np.zeros(x.shape[1], )) 39 | running_var = test_param.setdefault('run_var', np.zeros(x.shape[1], )) 40 | cache = None 41 | 42 | if mode == 'train': 43 | # Normalize 44 | mu = np.mean(x, axis=0) 45 | xc = x - mu 46 | var = np.mean(xc ** 2, axis=0) 47 | std = np.sqrt(var + 1e-5) 48 | x_norm = xc / std 49 | 50 | # Scale and Shift 51 | out = gamma * x_norm + beta 52 | cache = (x, xc, var, std, x_norm, gamma) 53 | 54 | # update running mean and running average 55 | test_param['run_mean'] = .9 * running_mean + (1 - .9) * mu 56 | test_param['run_var'] = .9 * running_var + (1 - .9) * var 57 | else: 58 | x_norm = (x - test_param['run_mean']) / (np.sqrt(test_param['run_var'] + 1e-5)) 59 | out = gamma * x_norm + beta 60 | 61 | return out, cache 62 | 63 | 64 | def batchnorm_backward(dout, cache): 65 | """ 66 | Backward pass for batch normalization. 67 | Inputs: 68 | - dout: Upstream derivatives, of shape (N, D) 69 | - cache: Variable of intermediates from batchnorm_forward. 70 | Returns: 71 | - dx: Gradient with respect to inputs x, of shape (N, D) 72 | - dgamma: Gradient with respect to scale parameter gamma, of shape (D,) 73 | - dbeta: Gradient with respect to shift parameter beta, of shape (D,) 74 | """ 75 | x, xc, var, std, xn, gamma = cache 76 | N = x.shape[0] 77 | 78 | dbeta = np.sum(dout, axis=0) 79 | dgamma = np.sum(dout * xn, axis=0) 80 | dxn = dout * gamma 81 | 82 | dxc = dxn / std 83 | dstd = np.sum(-(xc * dxn) / (std * std), axis=0) 84 | dvar = 0.5 * dstd / std 85 | 86 | dxc += (2.0 / N) * xc * dvar 87 | dmu = -np.sum(dxc, axis=0) 88 | dx = dxc + dmu / N 89 | 90 | return dx, dgamma, dbeta 91 | 92 | 93 | def mini_batch(x, y, size=64): 94 | # mini batch gradient 95 | indexs = np.random.choice(x.shape[0], size, replace=False) 96 | return x[indexs], y[indexs] 97 | 98 | 99 | 100 | 101 | def softmax_loss(scores, y, param, reg=0): 102 | # forward step: computing data loss 103 | num = scores.shape[0] 104 | scores -= np.max(scores, axis=1, keepdims=True) 105 | exp = np.exp(scores) 106 | probs = exp / np.sum(exp, axis=1, keepdims=True) 107 | loss = -np.log(probs[range(num), y]) 108 | loss = np.mean(loss) 109 | 110 | if reg != 0: 111 | w_loss = 0 112 | for i in range(3): 113 | w_loss += np.sum(param['w%d' % i] ** 2) 114 | loss += .5 * reg * w_loss 115 | 116 | # backward step: computing Dervative with Respect to output 117 | dscore = probs 118 | dscore[np.arange(num), y] -= 1 119 | dscore /= num 120 | return loss, dscore -------------------------------------------------------------------------------- /Algorithm/Nureal Network/FullyConnected/FC.py: -------------------------------------------------------------------------------- 1 | """ 2 | Fully Connected Networks 3 | detail: Classifier Algorithm 4 | author: sajjad ayobi 5 | see others in repository : sadlearn 6 | in URL: https://github.com/sajjjadayobi/sadlearn/ 7 | """ 8 | 9 | # self module 10 | from .optim import * 11 | from .layers import * 12 | 13 | # for io task 14 | import pickle as pickle 15 | 16 | 17 | 18 | class FullyConnectedNet: 19 | 20 | """ 21 | A fully-connected neural network with an arbitrary number of hidden layers, 22 | ReLU activation, and a softmax loss function. This will also implement 23 | dropout and batch normalization as options. For a network with L layers, 24 | 25 | iteration in hidden layer 26 | affine -> batch norm-> relu -> dropout 27 | output layer 28 | affine -> softmax_loss 29 | """ 30 | 31 | 32 | def __init__(self, hidden_dims=[], input_dim=32*32*3, num_class=10, use_bacthnorm=False, dropout=0, reg=0, checkpoint_name=None, 33 | weight_scale=1e-2, learning_rate_init=1e-5, solver=Adam, lr_decay=1, batch_size=64, verbose=False, num_epoch=10): 34 | 35 | """ 36 | Initialize a new FullyConnectedNet. 37 | Inputs: 38 | - hidden_dims: A list of integers giving the size of each hidden layer. 39 | - input_dim: An integer giving the size of the input. 40 | - num_classes: An integer giving the number of classes to classify. 41 | - dropout: Scalar between 0 and 1 giving dropout strength. If dropout=0 not use dropout 42 | - use_batchnorm: Whether or not the network should use batch normalization. 43 | - reg: Scalar giving L2 regularization strength. 44 | - weight_scale: Scalar giving the standard deviation for random init of the weights. 45 | - solver: a giving the name of an update rule in optim.py 46 | - learning_rate_init: learning rate in Initialize 47 | - optim_config: A dictionary containing hyper-parameters that will be 48 | passed to the chosen update rule. Each update rule requires different 49 | hyper-parameters (see optim.py) 50 | - lr_decay: A scalar for learning rate decay; after each epoch the 51 | learning rate is multiplied by this value. 52 | - batch_size: Size of mini-batches used to compute loss and gradient during training. 53 | - verbose: if set to false then no output will be printed during training. 54 | - checkpoint_name: If not None, then save model checkpoints here every epoch. 55 | 56 | Exam: 57 | - 3 hidden layers and in each layer 100 neuron 58 | >>> clf = FullyConnectedNet(hidden_dims=[100, 100, 100], num_class=10) 59 | >>> clf.train((x, y), (x, y)) 60 | 61 | """ 62 | 63 | self.num_dims = 1 + len(hidden_dims) 64 | self.dims = [input_dim] + hidden_dims + [num_class] 65 | self.reg = reg 66 | self.solver = solver 67 | self.lr = learning_rate_init 68 | self.weight_scale = weight_scale 69 | self.batchnorm = use_bacthnorm 70 | self.lr_decay = lr_decay 71 | self.batch_size = batch_size 72 | self.verbose = verbose 73 | self.epoch = num_epoch 74 | self.checkpoint_name = checkpoint_name 75 | self.p_drop = dropout 76 | 77 | # all parameters of FullyConnected model 78 | self.param_, self.optim_config_ = self.create_params() 79 | # number of epochs is trained 80 | self.trained_epoch = 0 81 | # list of loss each iter 82 | self.loss_history = [] 83 | 84 | 85 | 86 | def create_params(self): 87 | param = {} 88 | config = {} 89 | for i in range(len(self.dims) - 1): 90 | param['w%d' % i] = np.random.randn(self.dims[i], self.dims[i + 1]) * self.weight_scale 91 | param['b%d' % i] = np.zeros(self.dims[i + 1]) 92 | config['w%d' % i] = 0 93 | config['b%d' % i] = 0 94 | if i < len(self.dims) - 2 and self.batchnorm: 95 | param['gamma%d' % i] = np.ones(self.dims[i + 1]) 96 | param['beta%d' % i] = np.zeros(self.dims[i + 1]) 97 | param['run_guss%d' % i] = {} 98 | config['gamma%d' % i] = 0 99 | config['beta%d' % i] = 0 100 | return param, config 101 | 102 | 103 | 104 | def forward_step(self, param, h): 105 | # forward step with (dropout, batchnorm) 106 | caches = {'h0': h} 107 | score = 0 108 | 109 | for i in range(self.num_dims): 110 | if i < self.num_dims - 1: 111 | h = liner_forward(h, param['w%d' % i], param['b%d' % i]) 112 | if self.batchnorm: 113 | h, caches['bn%d' % i] = batchnorm_forward(h, param['gamma%d' % i], param['beta%d' % i], param['run_guss%d' % i]) 114 | caches['norm%d' % (i + 1)] = h 115 | h, caches['mask%d' % (i + 1)] = dropout_forward(relu(h), self.p_drop) 116 | caches['h%d' % (i + 1)] = h 117 | else: 118 | score = liner_forward(h, param['w%d' % i], param['b%d' % i]) 119 | 120 | return score, caches 121 | 122 | 123 | 124 | def backward_step(self, dout, param, cache): 125 | # backward step with (dropout, batchnorm) 126 | grads = {} 127 | for i in range(self.num_dims - 1, -1, -1): 128 | if i == self.num_dims - 1: 129 | dh, grads['w%d' % i], grads['b%d' % i] = liner_backward(dout, cache['h%d' % i], param['w%d' % i]) 130 | else: 131 | dmask = dropout_backward(dh, cache['mask%d' % (i + 1)]) 132 | if self.batchnorm: 133 | dmax = max_backward(dmask, cache['norm%d' % (i + 1)]) 134 | dnorm, grads['gamma%d' % i], grads['beta%d' % i] = batchnorm_backward(dmax, cache['bn%d' % i]) 135 | dh, grads['w%d' % i], grads['b%d' % i] = liner_backward(dnorm, cache['h%d' % i], param['w%d' % i]) 136 | else: 137 | dmax = max_backward(dmask, cache['h%d' % (i + 1)]) 138 | dh, grads['w%d' % i], grads['b%d' % i] = liner_backward(dmax, cache['h%d' % i], param['w%d' % i]) 139 | return grads 140 | 141 | 142 | def forward_test(self, param, h): 143 | score = 0 144 | for i in range(self.num_dims): 145 | if i < self.num_dims - 1: 146 | h = h @ param['w%d' % i] + param['b%d' % i] 147 | if self.batchnorm: 148 | h, _ = batchnorm_forward(h, param['gamma%d' % i], param['beta%d' % i], param['run_guss%d' % i], mode='test') 149 | h = relu(h) 150 | else: 151 | score = liner_forward(h, param['w%d' % i], param['b%d' % i]) 152 | return score 153 | 154 | 155 | 156 | def update_param(self, param, grads, config): 157 | for i in range(self.num_dims): 158 | # added reg 159 | grads['w%d' % i] += self.reg * param['w%d' % i] 160 | # update_param 161 | param['w%d' % i], config['w%d' % i] = self.solver(param['w%d' % i], grads['w%d' % i], self.lr, config['w%d' % i]) 162 | param['b%d' % i], config['b%d' % i] = self.solver(param['b%d' % i], grads['b%d' % i], self.lr, config['b%d' % i]) 163 | if i < self.num_dims - 1 and self.batchnorm: 164 | param['gamma%d'%i], config['gamma%d'%i] = self.solver(param['gamma%d'%i], grads['gamma%d'%i], self.lr, config['gamma%d'%i]) 165 | param['beta%d'%i], config['beta%d'%i] = self.solver(param['beta%d'%i], grads['beta%d'%i], self.lr, config['beta%d'%i]) 166 | 167 | return param, config 168 | 169 | 170 | 171 | 172 | def save_model(self, path): 173 | if self.checkpoint_name is None: return 174 | checkpoint = {'model': self} 175 | 176 | filename = '%s_epoch_%d.pkl' % (path, self.epoch) 177 | print('\t Saving model to "%s"' % path) 178 | with open(filename, 'wb') as f: 179 | pickle.dump(checkpoint, f) 180 | 181 | 182 | @staticmethod 183 | def load_model(filename): 184 | with open(filename, 'rb') as f: 185 | model = pickle.load(f) 186 | return model 187 | 188 | 189 | 190 | def predict(self, x_test): 191 | score = self.forward_test(self.param_, x_test) 192 | return np.argmax(score, axis=1) 193 | 194 | 195 | def score(self, x_test, y_test): 196 | y_pred = self.predict(x_test) 197 | return np.mean(y_pred == y_test) 198 | 199 | 200 | def evaluate(self, x_test, y_test, batch=5): 201 | # evaluate model for after training 202 | scores = [] 203 | size = x_test.shape[0] // batch 204 | for i in range(batch): 205 | x_batch, y_batch = mini_batch(x_test, y_test, size) 206 | y_pred = self.predict(x_batch) 207 | score = np.mean(y_pred == y_batch) 208 | print('\t score in batch %d %.2f'%(i, score)) 209 | scores.append(score) 210 | print(' mean score is %.2f'%np.mean(scores)) 211 | 212 | 213 | def compute_ephoce(self, num_data): 214 | iter_per_epoch = max(num_data // self.batch_size, 1) 215 | num_iter = iter_per_epoch * self.epoch 216 | return num_iter, iter_per_epoch 217 | 218 | 219 | 220 | def train(self, train_data=(), val_data=()): 221 | x_train, y_train = train_data 222 | x_val, y_val = val_data 223 | num_iter, iter_per_epoch = self.compute_ephoce(x_train.shape[0]) 224 | 225 | for i in range(num_iter + 1): 226 | # forward and compute loss 227 | x_batch, y_batch = mini_batch(x_train, y_train, self.batch_size) 228 | scores, cache = self.forward_step(self.param_, x_batch) 229 | loss, dout = softmax_loss(scores, y_batch, self.param_, self.reg) 230 | self.loss_history.append(loss) 231 | 232 | # backward and update param 233 | grads = self.backward_step(dout, self.param_, cache) 234 | self.param_, self.optim_config_= self.update_param(self.param_, grads, self.optim_config_) 235 | 236 | if i%iter_per_epoch == 0 and self.verbose: 237 | self.lr *= self.lr_decay 238 | train_acc = self.score(x_train, y_train) 239 | val_acc = self.score(x_val, y_val) 240 | 241 | print('\t\t epchoe %4d | loss: %.4f | x_train: %.3f | x_val: %.3f' % (self.trained_epoch, loss, train_acc, val_acc)) 242 | self.save_model(self.checkpoint_name) 243 | self.trained_epoch += 1 244 | 245 | return 246 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | --------------------------------------------------------------------------------