├── .gitattributes
├── src
    ├── solution_1.pdf
    ├── solution_2.py
    ├── solution_3.py
    └── solution_4.py
├── test_data scientist.pdf
├── README.md
└── .gitignore


/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/src/solution_1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/abhimishra91/coding_test/master/src/solution_1.pdf


--------------------------------------------------------------------------------
/test_data scientist.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/abhimishra91/coding_test/master/test_data scientist.pdf


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Testing
2 |  This repository contains the solutions for the test. 
3 |  
4 |  All the solutions are placed in the `src` folder.
5 |  
6 | The first solution is in `.pdf` file to show the working for the given problem. All the other problems are in respective `.py` files.
7 | 


--------------------------------------------------------------------------------
/src/solution_2.py:
--------------------------------------------------------------------------------
 1 | # This script is the solution for problem 2.
 2 | # Problem Statement below:
 3 | 
 4 | """
 5 | Counting the pairs with k different from an integer list.
 6 | eg: list = [1, 3,5] and k = 2
 7 | expected: we will have 2 pairs: {(1,3), (3,5)}
 8 | Note: we also consider the negative numbers. You may only use Python
 9 | """
10 | 
11 | 
12 | def pairs(arr: list, k: int):
13 |     int_set = set(arr)
14 |     count = 0
15 |     for x in arr:
16 |         if x + k in int_set:
17 |             print(f"{x} {x+k}")
18 |             count += 1
19 |         if x - k in int_set:
20 |             count += 1
21 |     return int(count / 2)
22 | 
23 | 
24 | if __name__ == "__main__":
25 |     my_array = list(
26 |         map(
27 |             int, input("Enter the list on which you want to obtain the pairs: ").split()
28 |         )
29 |     )
30 |     diff = int(input("Enter the differnce that you want in the digits of pairs: "))
31 |     pair_count = pairs(my_array, diff)
32 |     print(f"The number of pairs that have the diffence of {diff} are: {pair_count}")
33 | 


--------------------------------------------------------------------------------
/src/solution_3.py:
--------------------------------------------------------------------------------
 1 | # This script is the solution for problem 3.
 2 | # Problem Statement below:
 3 | 
 4 | """
 5 | Return the list of indices. The indices is a sublist points to the same person. The same persons means
 6 | they have the same name or email or phone. You may only use Python.
 7 | eg:
 8 |  data = [
 9 |  ("username1","phone_number1", "email1"),
10 |  ("usernameX","phone_number1", "emailX"),
11 |  ("usernameZ","phone_numberZ", "email1Z"),
12 |  ("usernameY","phone_numberY", "emailX"),
13 |  ]
14 |  expected: [[0,1,3][2]]
15 | """
16 | 
17 | 
18 | def same_person(user_list: list):
19 |     output_list = [[0]]
20 |     aux_set = [[{user_list[0][0]}, {user_list[0][1]}, {user_list[0][2]}]]
21 |     for i in range(1, len(user_list)):
22 |         for j in range(len(aux_set)):
23 |             if (
24 |                 user_list[i][0] in aux_set[j][0]
25 |                 or user_list[i][1] in aux_set[j][1]
26 |                 or user_list[i][2] in aux_set[j][2]
27 |             ):
28 |                 aux_set[j][0].add(user_list[i][0])
29 |                 aux_set[j][1].add(user_list[i][1])
30 |                 aux_set[j][2].add(user_list[i][2])
31 |                 output_list[j].append(i)
32 |                 break
33 |             aux_set.append([{user_list[i][0], user_list[i][1], user_list[i][2]}])
34 |             output_list.append([i])
35 |     print(output_list)
36 |     return
37 | 
38 | 
39 | if __name__ == "__main__":
40 |     data = [
41 |         ("username1", "phone_number1", "email1"),
42 |         ("usernameX", "phone_number1", "emailX"),
43 |         ("usernameZ", "phone_numberZ", "email1Z"),
44 |         ("usernameY", "phone_numberY", "emailX"),
45 |     ]
46 |     same_person(data)
47 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # celery beat schedule file
 95 | celerybeat-schedule
 96 | 
 97 | # SageMath parsed files
 98 | *.sage.py
 99 | 
100 | # Environments
101 | .env
102 | .venv
103 | env/
104 | venv/
105 | ENV/
106 | env.bak/
107 | venv.bak/
108 | 
109 | # Spyder project settings
110 | .spyderproject
111 | .spyproject
112 | 
113 | # Rope project settings
114 | .ropeproject
115 | 
116 | # mkdocs documentation
117 | /site
118 | 
119 | # mypy
120 | .mypy_cache/
121 | .dmypy.json
122 | dmypy.json
123 | 
124 | # Pyre type checker
125 | .pyre/
126 | 
127 | # IDE configs
128 | .idea
129 | .idea/
130 | .vscode/
131 | .vscode
132 | 
133 | 
134 | # Model Files
135 | *.bin
136 | *vectors


--------------------------------------------------------------------------------
/src/solution_4.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | 
  4 | def initialize_parameters(n_in, n_out):
  5 |     """
  6 |     Helper function to initialize some form of random weights and Zero biases
  7 |     Args:
  8 |         n_in: size of input layer
  9 |         n_out: size of output/number of neurons
 10 |     Returns:
 11 |         params: a dictionary containing W and b
 12 |     """
 13 | 
 14 |     params = dict()  # initialize empty dictionary of neural net parameters W and b
 15 | 
 16 |     params["W"] = (
 17 |         np.random.randn(n_out, n_in) * 0.01
 18 |     )  # set weights 'W' to small random gaussian
 19 |     params["b"] = np.zeros((n_out, 1))  # set bias 'b' to zeros
 20 | 
 21 |     return params
 22 | 
 23 | 
 24 | class LinearLayer:
 25 |     """
 26 |         This Class implements all functions to be executed by a linear layer
 27 |         in a computational graph
 28 |         Args:
 29 |             input_shape: input shape of Data/Activations
 30 |             n_out: number of neurons in layer
 31 |             ini_type: initialization type for weight parameters, default is "plain"
 32 |                       Opitons are: plain, xavier and he
 33 |         Methods:
 34 |             forward(A_prev)
 35 |             backward(upstream_grad)
 36 |             update_params(learning_rate)
 37 |     """
 38 | 
 39 |     def __init__(self, input_shape, n_out):
 40 |         """
 41 |         The constructor of the LinearLayer takes the following parameters
 42 |         Args:
 43 |             input_shape: input shape of Data/Activations
 44 |             n_out: number of neurons in layer
 45 |         """
 46 | 
 47 |         self.m = input_shape[1]  # number of examples in training data
 48 |         # `params` store weights and bias in a python dictionary
 49 |         self.params = initialize_parameters(
 50 |             input_shape[0], n_out
 51 |         )  # initialize weights and bias
 52 |         self.Z = np.zeros(
 53 |             (self.params["W"].shape[0], input_shape[1])
 54 |         )  # create space for resultant Z output
 55 | 
 56 |     def forward(self, A_prev):
 57 |         """
 58 |         This function performs the forwards propagation using activations from previous layer
 59 |         Args:
 60 |             A_prev:  Activations/Input Data coming into the layer from previous layer
 61 |         """
 62 | 
 63 |         self.A_prev = A_prev  # store the Activations/Training Data coming in
 64 |         self.Z = (
 65 |             np.dot(self.params["W"], self.A_prev) + self.params["b"]
 66 |         )  # compute the linear function
 67 | 
 68 |     def backward(self, upstream_grad):
 69 |         """
 70 |         This function performs the back propagation using upstream gradients
 71 |         Args:
 72 |             upstream_grad: gradient coming in from the upper layer to couple with local gradient
 73 |         """
 74 | 
 75 |         # derivative of Cost w.r.t W
 76 |         self.dW = np.dot(upstream_grad, self.A_prev.T)
 77 | 
 78 |         # derivative of Cost w.r.t b, sum across rows
 79 |         self.db = np.sum(upstream_grad, axis=1, keepdims=True)
 80 | 
 81 |         # derivative of Cost w.r.t A_prev
 82 |         self.dA_prev = np.dot(self.params["W"].T, upstream_grad)
 83 | 
 84 |     def update_params(self, learning_rate=0.1):
 85 |         """
 86 |         This function performs the gradient descent update
 87 |         Args:
 88 |             learning_rate: learning rate hyper-param for gradient descent, default 0.1
 89 |         """
 90 | 
 91 |         self.params["W"] = self.params["W"] - learning_rate * self.dW  # update weights
 92 |         self.params["b"] = self.params["b"] - learning_rate * self.db  # update bias(es)
 93 | 
 94 | 
 95 | class SigmoidLayer:
 96 |     """
 97 |     This file implements activation layers
 98 |     inline with a computational graph model
 99 |     Args:
100 |         shape: shape of input to the layer
101 |     Methods:
102 |         forward(Z)
103 |         backward(upstream_grad)
104 |     """
105 | 
106 |     def __init__(self, shape):
107 |         """
108 |         The consturctor of the sigmoid/logistic activation layer takes in the following arguments
109 |         Args:
110 |             shape: shape of input to the layer
111 |         """
112 |         self.A = np.zeros(shape)  # create space for the resultant activations
113 | 
114 |     def forward(self, Z):
115 |         """
116 |         This function performs the forwards propagation step through the activation function
117 |         Args:
118 |             Z: input from previous (linear) layer
119 |         """
120 |         self.A = 1 / (1 + np.exp(-Z))  # compute activations
121 | 
122 |     def backward(self, upstream_grad):
123 |         """
124 |         This function performs the  back propagation step through the activation function
125 |         Local gradient => derivative of sigmoid => A*(1-A)
126 |         Args:
127 |             upstream_grad: gradient coming into this layer from the layer above
128 |         """
129 |         # couple upstream gradient with local gradient, the result will be sent back to the Linear layer
130 |         self.dZ = upstream_grad * self.A * (1 - self.A)
131 | 
132 | 
133 | def compute_cost(Y, Y_hat):
134 |     """
135 |     This function computes and returns the Cost and its derivative.
136 |     The is function uses the Squared Error Cost function -> (1/2m)*sum(Y - Y_hat)^.2
137 |     Args:
138 |         Y: labels of data
139 |         Y_hat: Predictions(activations) from a last layer, the output layer
140 |     Returns:
141 |         cost: The Squared Error Cost result
142 |         dY_hat: gradient of Cost w.r.t the Y_hat
143 |     """
144 |     m = Y.shape[1]
145 | 
146 |     cost = (1 / (2 * m)) * np.sum(np.square(Y - Y_hat))
147 |     cost = np.squeeze(cost)  # remove extraneous dimensions to give just a scalar
148 | 
149 |     dY_hat = -1 / m * (Y - Y_hat)  # derivative of the squared error cost function
150 | 
151 |     return cost, dY_hat
152 | 
153 | 
154 | if __name__ == "__main__":
155 | 
156 |     X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
157 | 
158 |     Y = np.array([[0], [1], [1], [0]])
159 | 
160 |     X_train = X.T
161 |     Y_train = Y.T
162 | 
163 |     learning_rate = 1
164 |     number_of_epochs = 5000
165 | 
166 |     np.random.seed(48)  # set seed value so that the results are reproduceable
167 |     # (weights will now be initailzaed to the same pseudo-random numbers, each time)
168 | 
169 |     # Our network architecture has the shape:
170 |     # (input)--> [Linear->Sigmoid] -> [Linear->Sigmoid]->[Linear->Sigmoid] -->(output)
171 | 
172 |     # ------ LAYER-1 ----- define hidden layer that takes in training data
173 |     Z1 = LinearLayer(input_shape=X_train.shape, n_out=5)
174 |     A1 = SigmoidLayer(Z1.Z.shape)
175 | 
176 |     # ------ LAYER-2 ----- define output layer that take is values from hidden layer
177 |     Z2 = LinearLayer(input_shape=A1.A.shape, n_out=3)
178 |     A2 = SigmoidLayer(Z2.Z.shape)
179 | 
180 |     # ------ LAYER-3 ----- define output layer that take is values from 2nd hidden layer
181 |     Z3 = LinearLayer(input_shape=A2.A.shape, n_out=1)
182 |     A3 = SigmoidLayer(Z3.Z.shape)
183 | 
184 |     costs = (
185 |         []
186 |     )  # initially empty list, this will store all the costs after a certian number of epochs
187 | 
188 |     # Start training
189 |     for epoch in range(number_of_epochs):
190 | 
191 |         # ------------------------- forward-prop -------------------------
192 |         Z1.forward(X_train)
193 |         A1.forward(Z1.Z)
194 | 
195 |         Z2.forward(A1.A)
196 |         A2.forward(Z2.Z)
197 | 
198 |         Z3.forward(A2.A)
199 |         A3.forward(Z3.Z)
200 | 
201 |         # ---------------------- Compute Cost ----------------------------
202 |         cost, dA3 = compute_cost(Y=Y_train, Y_hat=A3.A)
203 | 
204 |         # print and store Costs every 100 iterations.
205 |         if (epoch % 100) == 0:
206 |             print("Cost at epoch#{}: {}".format(epoch, cost))
207 |             costs.append(cost)
208 | 
209 |         # ------------------------- back-prop ----------------------------
210 |         A3.backward(dA3)
211 |         Z3.backward(A3.dZ)
212 | 
213 |         A2.backward(Z3.dA_prev)
214 |         Z2.backward(A2.dZ)
215 | 
216 |         A1.backward(Z2.dA_prev)
217 |         Z1.backward(A1.dZ)
218 | 
219 |         # ----------------------- Update weights and bias ----------------
220 |         Z3.update_params(learning_rate=learning_rate)
221 |         Z2.update_params(learning_rate=learning_rate)
222 |         Z1.update_params(learning_rate=learning_rate)
223 | 


--------------------------------------------------------------------------------