├── .gitattributes ├── src ├── solution_1.pdf ├── solution_2.py ├── solution_3.py └── solution_4.py ├── test_data scientist.pdf ├── README.md └── .gitignore /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /src/solution_1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abhimishra91/coding_test/master/src/solution_1.pdf -------------------------------------------------------------------------------- /test_data scientist.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abhimishra91/coding_test/master/test_data scientist.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Testing 2 | This repository contains the solutions for the test. 3 | 4 | All the solutions are placed in the `src` folder. 5 | 6 | The first solution is in `.pdf` file to show the working for the given problem. All the other problems are in respective `.py` files. 7 | -------------------------------------------------------------------------------- /src/solution_2.py: -------------------------------------------------------------------------------- 1 | # This script is the solution for problem 2. 2 | # Problem Statement below: 3 | 4 | """ 5 | Counting the pairs with k different from an integer list. 6 | eg: list = [1, 3,5] and k = 2 7 | expected: we will have 2 pairs: {(1,3), (3,5)} 8 | Note: we also consider the negative numbers. You may only use Python 9 | """ 10 | 11 | 12 | def pairs(arr: list, k: int): 13 | int_set = set(arr) 14 | count = 0 15 | for x in arr: 16 | if x + k in int_set: 17 | print(f"{x} {x+k}") 18 | count += 1 19 | if x - k in int_set: 20 | count += 1 21 | return int(count / 2) 22 | 23 | 24 | if __name__ == "__main__": 25 | my_array = list( 26 | map( 27 | int, input("Enter the list on which you want to obtain the pairs: ").split() 28 | ) 29 | ) 30 | diff = int(input("Enter the differnce that you want in the digits of pairs: ")) 31 | pair_count = pairs(my_array, diff) 32 | print(f"The number of pairs that have the diffence of {diff} are: {pair_count}") 33 | -------------------------------------------------------------------------------- /src/solution_3.py: -------------------------------------------------------------------------------- 1 | # This script is the solution for problem 3. 2 | # Problem Statement below: 3 | 4 | """ 5 | Return the list of indices. The indices is a sublist points to the same person. The same persons means 6 | they have the same name or email or phone. You may only use Python. 7 | eg: 8 | data = [ 9 | ("username1","phone_number1", "email1"), 10 | ("usernameX","phone_number1", "emailX"), 11 | ("usernameZ","phone_numberZ", "email1Z"), 12 | ("usernameY","phone_numberY", "emailX"), 13 | ] 14 | expected: [[0,1,3][2]] 15 | """ 16 | 17 | 18 | def same_person(user_list: list): 19 | output_list = [[0]] 20 | aux_set = [[{user_list[0][0]}, {user_list[0][1]}, {user_list[0][2]}]] 21 | for i in range(1, len(user_list)): 22 | for j in range(len(aux_set)): 23 | if ( 24 | user_list[i][0] in aux_set[j][0] 25 | or user_list[i][1] in aux_set[j][1] 26 | or user_list[i][2] in aux_set[j][2] 27 | ): 28 | aux_set[j][0].add(user_list[i][0]) 29 | aux_set[j][1].add(user_list[i][1]) 30 | aux_set[j][2].add(user_list[i][2]) 31 | output_list[j].append(i) 32 | break 33 | aux_set.append([{user_list[i][0], user_list[i][1], user_list[i][2]}]) 34 | output_list.append([i]) 35 | print(output_list) 36 | return 37 | 38 | 39 | if __name__ == "__main__": 40 | data = [ 41 | ("username1", "phone_number1", "email1"), 42 | ("usernameX", "phone_number1", "emailX"), 43 | ("usernameZ", "phone_numberZ", "email1Z"), 44 | ("usernameY", "phone_numberY", "emailX"), 45 | ] 46 | same_person(data) 47 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # celery beat schedule file 95 | celerybeat-schedule 96 | 97 | # SageMath parsed files 98 | *.sage.py 99 | 100 | # Environments 101 | .env 102 | .venv 103 | env/ 104 | venv/ 105 | ENV/ 106 | env.bak/ 107 | venv.bak/ 108 | 109 | # Spyder project settings 110 | .spyderproject 111 | .spyproject 112 | 113 | # Rope project settings 114 | .ropeproject 115 | 116 | # mkdocs documentation 117 | /site 118 | 119 | # mypy 120 | .mypy_cache/ 121 | .dmypy.json 122 | dmypy.json 123 | 124 | # Pyre type checker 125 | .pyre/ 126 | 127 | # IDE configs 128 | .idea 129 | .idea/ 130 | .vscode/ 131 | .vscode 132 | 133 | 134 | # Model Files 135 | *.bin 136 | *vectors -------------------------------------------------------------------------------- /src/solution_4.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def initialize_parameters(n_in, n_out): 5 | """ 6 | Helper function to initialize some form of random weights and Zero biases 7 | Args: 8 | n_in: size of input layer 9 | n_out: size of output/number of neurons 10 | Returns: 11 | params: a dictionary containing W and b 12 | """ 13 | 14 | params = dict() # initialize empty dictionary of neural net parameters W and b 15 | 16 | params["W"] = ( 17 | np.random.randn(n_out, n_in) * 0.01 18 | ) # set weights 'W' to small random gaussian 19 | params["b"] = np.zeros((n_out, 1)) # set bias 'b' to zeros 20 | 21 | return params 22 | 23 | 24 | class LinearLayer: 25 | """ 26 | This Class implements all functions to be executed by a linear layer 27 | in a computational graph 28 | Args: 29 | input_shape: input shape of Data/Activations 30 | n_out: number of neurons in layer 31 | ini_type: initialization type for weight parameters, default is "plain" 32 | Opitons are: plain, xavier and he 33 | Methods: 34 | forward(A_prev) 35 | backward(upstream_grad) 36 | update_params(learning_rate) 37 | """ 38 | 39 | def __init__(self, input_shape, n_out): 40 | """ 41 | The constructor of the LinearLayer takes the following parameters 42 | Args: 43 | input_shape: input shape of Data/Activations 44 | n_out: number of neurons in layer 45 | """ 46 | 47 | self.m = input_shape[1] # number of examples in training data 48 | # `params` store weights and bias in a python dictionary 49 | self.params = initialize_parameters( 50 | input_shape[0], n_out 51 | ) # initialize weights and bias 52 | self.Z = np.zeros( 53 | (self.params["W"].shape[0], input_shape[1]) 54 | ) # create space for resultant Z output 55 | 56 | def forward(self, A_prev): 57 | """ 58 | This function performs the forwards propagation using activations from previous layer 59 | Args: 60 | A_prev: Activations/Input Data coming into the layer from previous layer 61 | """ 62 | 63 | self.A_prev = A_prev # store the Activations/Training Data coming in 64 | self.Z = ( 65 | np.dot(self.params["W"], self.A_prev) + self.params["b"] 66 | ) # compute the linear function 67 | 68 | def backward(self, upstream_grad): 69 | """ 70 | This function performs the back propagation using upstream gradients 71 | Args: 72 | upstream_grad: gradient coming in from the upper layer to couple with local gradient 73 | """ 74 | 75 | # derivative of Cost w.r.t W 76 | self.dW = np.dot(upstream_grad, self.A_prev.T) 77 | 78 | # derivative of Cost w.r.t b, sum across rows 79 | self.db = np.sum(upstream_grad, axis=1, keepdims=True) 80 | 81 | # derivative of Cost w.r.t A_prev 82 | self.dA_prev = np.dot(self.params["W"].T, upstream_grad) 83 | 84 | def update_params(self, learning_rate=0.1): 85 | """ 86 | This function performs the gradient descent update 87 | Args: 88 | learning_rate: learning rate hyper-param for gradient descent, default 0.1 89 | """ 90 | 91 | self.params["W"] = self.params["W"] - learning_rate * self.dW # update weights 92 | self.params["b"] = self.params["b"] - learning_rate * self.db # update bias(es) 93 | 94 | 95 | class SigmoidLayer: 96 | """ 97 | This file implements activation layers 98 | inline with a computational graph model 99 | Args: 100 | shape: shape of input to the layer 101 | Methods: 102 | forward(Z) 103 | backward(upstream_grad) 104 | """ 105 | 106 | def __init__(self, shape): 107 | """ 108 | The consturctor of the sigmoid/logistic activation layer takes in the following arguments 109 | Args: 110 | shape: shape of input to the layer 111 | """ 112 | self.A = np.zeros(shape) # create space for the resultant activations 113 | 114 | def forward(self, Z): 115 | """ 116 | This function performs the forwards propagation step through the activation function 117 | Args: 118 | Z: input from previous (linear) layer 119 | """ 120 | self.A = 1 / (1 + np.exp(-Z)) # compute activations 121 | 122 | def backward(self, upstream_grad): 123 | """ 124 | This function performs the back propagation step through the activation function 125 | Local gradient => derivative of sigmoid => A*(1-A) 126 | Args: 127 | upstream_grad: gradient coming into this layer from the layer above 128 | """ 129 | # couple upstream gradient with local gradient, the result will be sent back to the Linear layer 130 | self.dZ = upstream_grad * self.A * (1 - self.A) 131 | 132 | 133 | def compute_cost(Y, Y_hat): 134 | """ 135 | This function computes and returns the Cost and its derivative. 136 | The is function uses the Squared Error Cost function -> (1/2m)*sum(Y - Y_hat)^.2 137 | Args: 138 | Y: labels of data 139 | Y_hat: Predictions(activations) from a last layer, the output layer 140 | Returns: 141 | cost: The Squared Error Cost result 142 | dY_hat: gradient of Cost w.r.t the Y_hat 143 | """ 144 | m = Y.shape[1] 145 | 146 | cost = (1 / (2 * m)) * np.sum(np.square(Y - Y_hat)) 147 | cost = np.squeeze(cost) # remove extraneous dimensions to give just a scalar 148 | 149 | dY_hat = -1 / m * (Y - Y_hat) # derivative of the squared error cost function 150 | 151 | return cost, dY_hat 152 | 153 | 154 | if __name__ == "__main__": 155 | 156 | X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]]) 157 | 158 | Y = np.array([[0], [1], [1], [0]]) 159 | 160 | X_train = X.T 161 | Y_train = Y.T 162 | 163 | learning_rate = 1 164 | number_of_epochs = 5000 165 | 166 | np.random.seed(48) # set seed value so that the results are reproduceable 167 | # (weights will now be initailzaed to the same pseudo-random numbers, each time) 168 | 169 | # Our network architecture has the shape: 170 | # (input)--> [Linear->Sigmoid] -> [Linear->Sigmoid]->[Linear->Sigmoid] -->(output) 171 | 172 | # ------ LAYER-1 ----- define hidden layer that takes in training data 173 | Z1 = LinearLayer(input_shape=X_train.shape, n_out=5) 174 | A1 = SigmoidLayer(Z1.Z.shape) 175 | 176 | # ------ LAYER-2 ----- define output layer that take is values from hidden layer 177 | Z2 = LinearLayer(input_shape=A1.A.shape, n_out=3) 178 | A2 = SigmoidLayer(Z2.Z.shape) 179 | 180 | # ------ LAYER-3 ----- define output layer that take is values from 2nd hidden layer 181 | Z3 = LinearLayer(input_shape=A2.A.shape, n_out=1) 182 | A3 = SigmoidLayer(Z3.Z.shape) 183 | 184 | costs = ( 185 | [] 186 | ) # initially empty list, this will store all the costs after a certian number of epochs 187 | 188 | # Start training 189 | for epoch in range(number_of_epochs): 190 | 191 | # ------------------------- forward-prop ------------------------- 192 | Z1.forward(X_train) 193 | A1.forward(Z1.Z) 194 | 195 | Z2.forward(A1.A) 196 | A2.forward(Z2.Z) 197 | 198 | Z3.forward(A2.A) 199 | A3.forward(Z3.Z) 200 | 201 | # ---------------------- Compute Cost ---------------------------- 202 | cost, dA3 = compute_cost(Y=Y_train, Y_hat=A3.A) 203 | 204 | # print and store Costs every 100 iterations. 205 | if (epoch % 100) == 0: 206 | print("Cost at epoch#{}: {}".format(epoch, cost)) 207 | costs.append(cost) 208 | 209 | # ------------------------- back-prop ---------------------------- 210 | A3.backward(dA3) 211 | Z3.backward(A3.dZ) 212 | 213 | A2.backward(Z3.dA_prev) 214 | Z2.backward(A2.dZ) 215 | 216 | A1.backward(Z2.dA_prev) 217 | Z1.backward(A1.dZ) 218 | 219 | # ----------------------- Update weights and bias ---------------- 220 | Z3.update_params(learning_rate=learning_rate) 221 | Z2.update_params(learning_rate=learning_rate) 222 | Z1.update_params(learning_rate=learning_rate) 223 | --------------------------------------------------------------------------------