├── .gitignore
├── README.md
├── autodiff.py
├── autodiff_test.py
├── data.txt
└── lr_autodiff.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *.dat
3 | *.npy
4 | .DS_Store
5 | .idea
6 | __pycache__
7 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | from  https://gitee.com/Carl-Xie/AutodiffEngine
2 | 
3 | 自动微分引擎，来源于华盛顿大学陈天奇的课程项目


--------------------------------------------------------------------------------
/autodiff.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | class Node(object):
  4 |     """Node in a computation graph."""
  5 |     def __init__(self):
  6 |         """Constructor, new node is indirectly created by Op object __call__ method.
  7 |             
  8 |             Instance variables
  9 |             ------------------
 10 |             self.inputs: the list of input nodes.
 11 |             self.op: the associated op object, 
 12 |                 e.g. add_op object if this node is created by adding two other nodes.
 13 |             self.const_attr: the add or multiply constant,
 14 |                 e.g. self.const_attr=5 if this node is created by x+5.
 15 |             self.name: node name for debugging purposes.
 16 |         """
 17 |         self.inputs = []
 18 |         self.op = None
 19 |         self.const_attr = None
 20 |         self.name = ""
 21 | 
 22 |     def __add__(self, other):
 23 |         """Adding two nodes return a new node."""
 24 |         if isinstance(other, Node):
 25 |             new_node = add_op(self, other)
 26 |         else:
 27 |             # Add by a constant stores the constant in the new node's const_attr field.
 28 |             # 'other' argument is a constant
 29 |             new_node = add_byconst_op(self, other)
 30 |         return new_node
 31 | 
 32 |     def __mul__(self, other):
 33 |         if isinstance(other, Node):
 34 |             new_node = mul_op(self, other)
 35 |         else:
 36 |             new_node = mul_byconst_op(self, other)
 37 |         return new_node
 38 | 
 39 |     def __truediv__(self, other):
 40 |         if isinstance(other, Node):
 41 |             new_node = div_op(self, other)
 42 |         else:
 43 |             new_node = div_byconst_op(self, other)
 44 |         return new_node
 45 | 
 46 |     def __rtruediv__(self, other):
 47 |         if isinstance(other, Node):
 48 |             new_node = div_op(self, other)
 49 |         else:
 50 |             new_node = rdiv_byconst_op(self, other)
 51 |         return new_node
 52 |     
 53 |     def __sub__(self, other):
 54 |         if isinstance(other, Node):
 55 |             new_node = sub_op(self, other)
 56 |         else:
 57 |             new_node = sub_byconst_op(self, other)
 58 |         return new_node
 59 | 
 60 |     def __rsub__(self, other):
 61 |         if isinstance(other, Node):
 62 |             new_node = sub_op(self, other)
 63 |         else:
 64 |             new_node = rsub_byconst_op(self, other)
 65 |         return new_node
 66 |     
 67 |     def __neg__(self):
 68 |         return neg_op(self)
 69 |     
 70 |     # Allow left-hand-side add and multiply.
 71 |     __radd__ = __add__
 72 |     __rmul__ = __mul__
 73 | 
 74 |     def __str__(self):
 75 |         """Allow print to display node name.""" 
 76 |         return self.name
 77 | 
 78 | def Variable(name):
 79 |     """User defined variables in an expression.  
 80 |         e.g. x = Variable(name = "x")
 81 |     """
 82 |     placeholder_node = placeholder_op()
 83 |     placeholder_node.name = name
 84 |     return placeholder_node
 85 | 
 86 | class Op(object):
 87 |     """Op represents operations performed on nodes."""
 88 |     def __call__(self):
 89 |         """Create a new node and associate the op object with the node.
 90 |         
 91 |         Returns
 92 |         -------
 93 |         The new node object.
 94 |         """
 95 |         new_node = Node()
 96 |         new_node.op = self
 97 |         return new_node
 98 | 
 99 |     def compute(self, node, input_vals):
100 |         """Given values of input nodes, compute the output value.
101 | 
102 |         Parameters
103 |         ----------
104 |         node: node that performs the compute.
105 |         input_vals: values of input nodes.
106 | 
107 |         Returns
108 |         -------
109 |         An output value of the node.
110 |         """
111 |         assert False, "Implemented in subclass"
112 | 
113 |     def gradient(self, node, output_grad):
114 |         """Given value of output gradient, compute gradient contributions to each input node.
115 | 
116 |         Parameters
117 |         ----------
118 |         node: node that performs the gradient.
119 |         output_grad: value of output gradient summed from children nodes' contributions
120 | 
121 |         Returns
122 |         -------
123 |         A list of gradient contributions to each input node respectively.
124 |         """
125 |         assert False, "Implemented in subclass"
126 | 
127 | 
128 | class NegOp(Op):
129 | 
130 |     def __call__(self, node):
131 |         new_node = Op.__call__(self)
132 |         new_node.inputs = [node]
133 |         new_node.name = "-%s" % node.name
134 |         return new_node
135 | 
136 |     def compute(self, node, input_vals):
137 |         assert len(input_vals) == 1
138 |         return -input_vals[0]
139 | 
140 |     def gradient(self, node, output_grad):
141 |         return [-output_grad]
142 |     
143 | 
144 | class AddOp(Op):
145 |     """Op to element-wise add two nodes."""
146 |     def __call__(self, node_A, node_B):
147 |         new_node = Op.__call__(self)
148 |         new_node.inputs = [node_A, node_B]
149 |         new_node.name = "(%s+%s)" % (node_A.name, node_B.name)
150 |         return new_node
151 | 
152 |     def compute(self, node, input_vals):
153 |         """Given values of two input nodes, return result of element-wise addition."""
154 |         assert len(input_vals) == 2
155 |         return input_vals[0] + input_vals[1]
156 | 
157 |     def gradient(self, node, output_grad):
158 |         """Given gradient of add node, return gradient contributions to each input."""
159 |         return [output_grad, output_grad]
160 | 
161 | class SubOp(Op):
162 | 
163 |     def __call__(self, node_A, node_B):
164 |         new_node = Op.__call__(self)
165 |         new_node.inputs = [node_A, node_B]
166 |         new_node.name = "%s-%s" % (node_A.name, node_B.name)
167 |         return new_node
168 | 
169 |     def compute(self, node, input_vals):
170 |         assert len(input_vals) == 2
171 |         return input_vals[0] - input_vals[1]
172 | 
173 |     def gradient(self, node, output_grad):
174 |         return [output_grad, -output_grad]
175 |     
176 | 
177 | class AddByConstOp(Op):
178 |     """Op to element-wise add a nodes by a constant."""
179 |     def __call__(self, node_A, const_val):
180 |         new_node = Op.__call__(self)
181 |         new_node.const_attr = const_val
182 |         new_node.inputs = [node_A]
183 |         new_node.name = "(%s+%s)" % (node_A.name, str(const_val))
184 |         return new_node
185 | 
186 |     def compute(self, node, input_vals):
187 |         """Given values of input node, return result of element-wise addition."""
188 |         assert len(input_vals) == 1
189 |         return input_vals[0] + node.const_attr
190 | 
191 |     def gradient(self, node, output_grad):
192 |         """Given gradient of add node, return gradient contribution to input."""
193 |         return [output_grad]
194 | 
195 | 
196 | class SubByConstOp(Op):
197 | 
198 |     def __call__(self, node_A, const_val):
199 |         new_node = Op.__call__(self)
200 |         new_node.const_attr = const_val
201 |         new_node.inputs = [node_A]
202 |         new_node.name = "(%s-%s)" % (node_A.name, str(const_val))
203 |         return new_node
204 | 
205 |     def compute(self, node, input_vals):
206 |         assert len(input_vals) == 1
207 |         return input_vals[0] - node.const_attr
208 | 
209 |     def gradient(self, node, output_grad):
210 |         return [output_grad]
211 | 
212 | 
213 | class RSubByConstOp(Op):
214 | 
215 |     def __call__(self, node_A, const_val):
216 |         new_node = Op.__call__(self)
217 |         new_node.const_attr = const_val
218 |         new_node.inputs = [node_A]
219 |         new_node.name = "(%s-%s)" % (str(const_val), node_A.name)
220 |         return new_node
221 | 
222 |     def compute(self, node, input_vals):
223 |         assert len(input_vals) == 1
224 |         return node.const_attr - input_vals[0]
225 | 
226 |     def gradient(self, node, output_grad):
227 |         return [-output_grad]
228 | 
229 | 
230 | class MulOp(Op):
231 |     """Op to element-wise multiply two nodes."""
232 |     def __call__(self, node_A, node_B):
233 |         new_node = Op.__call__(self)
234 |         new_node.inputs = [node_A, node_B]
235 |         new_node.name = "(%s*%s)" % (node_A.name, node_B.name)
236 |         return new_node
237 | 
238 |     def compute(self, node, input_vals):
239 |         """Given values of two input nodes, return result of element-wise multiplication."""
240 |         assert len(input_vals) == 2
241 |         return input_vals[0] * input_vals[1]
242 | 
243 |     def gradient(self, node, output_grad):
244 |         """Given gradient of multiply node, return gradient contributions to each input."""
245 |         return [node.inputs[1] * output_grad, node.inputs[0] * output_grad]
246 | 
247 | 
248 | class DivOp(Op):
249 | 
250 |     def __call__(self, node_A, node_B):
251 |         new_node = Op.__call__(self)
252 |         new_node.inputs = [node_A, node_B]
253 |         new_node.name = "%s/%s" % (node_A.name, node_B.name)
254 |         return new_node
255 | 
256 |     def compute(self, node, input_vals):
257 |         assert len(input_vals) == 2
258 |         return input_vals[0] / input_vals[1]
259 | 
260 |     def gradient(self, node, output_grad):
261 |         return [output_grad / node.inputs[1], -output_grad * node.inputs[0] / (node.inputs[1] * node.inputs[1])]
262 | 
263 | 
264 | class DivByConstOp(Op):
265 | 
266 |     def __call__(self, node_A, const_val):
267 |         new_node = Op.__call__(self)
268 |         new_node.inputs = [node_A]
269 |         new_node.const_attr = const_val
270 |         new_node.name = "%s/%s" % (node_A.name, str(const_val))
271 |         return new_node
272 | 
273 |     def compute(self, node, input_vals):
274 |         assert len(input_vals) == 1
275 |         return input_vals[0] / node.const_attr
276 | 
277 |     def gradient(self, node, output_grad):
278 |         return [output_grad / node.const_attr]
279 |     
280 | 
281 | class RDivByConstOp(Op):
282 |     
283 |     def __call__(self, node_A, const_val):
284 |         new_node = Op.__call__(self)
285 |         new_node.inputs = [node_A]
286 |         new_node.const_attr = const_val
287 |         new_node.name = "%s/%s" % (str(const_val), node_A.name)
288 |         return new_node
289 | 
290 |     def compute(self, node, input_vals):
291 |         assert len(input_vals) == 1
292 |         return node.const_attr / input_vals[0]
293 | 
294 |     def gradient(self, node, output_grad):
295 |         return [-output_grad * node.const_attr / (node.inputs[0] * node.inputs[0])]
296 | 
297 |     
298 | class MulByConstOp(Op):
299 |     """Op to element-wise multiply a nodes by a constant."""
300 |     def __call__(self, node_A, const_val):
301 |         new_node = Op.__call__(self)
302 |         new_node.const_attr = const_val
303 |         new_node.inputs = [node_A]
304 |         new_node.name = "(%s*%s)" % (node_A.name, str(const_val))
305 |         return new_node
306 | 
307 |     def compute(self, node, input_vals):
308 |         """Given values of input node, return result of element-wise multiplication."""
309 |         """TODO: Your code here"""
310 |         assert len(input_vals) == 1
311 |         return input_vals[0] * node.const_attr
312 | 
313 |     def gradient(self, node, output_grad):
314 |         """Given gradient of multiplication node, return gradient contribution to input."""
315 |         """TODO: Your code here"""
316 |         return [output_grad * node.const_attr]
317 | 
318 | 
319 | class MatMulOp(Op):
320 |     """Op to matrix multiply two nodes."""
321 |     def __call__(self, node_A, node_B, trans_A=False, trans_B=False):
322 |         """Create a new node that is the result a matrix multiple of two input nodes.
323 | 
324 |         Parameters
325 |         ----------
326 |         node_A: lhs of matrix multiply
327 |         node_B: rhs of matrix multiply
328 |         trans_A: whether to transpose node_A
329 |         trans_B: whether to transpose node_B
330 | 
331 |         Returns
332 |         -------
333 |         Returns a node that is the result a matrix multiple of two input nodes.
334 |         """
335 |         new_node = Op.__call__(self)
336 |         new_node.matmul_attr_trans_A = trans_A
337 |         new_node.matmul_attr_trans_B = trans_B
338 |         new_node.inputs = [node_A, node_B]
339 |         new_node.name = "MatMul(%s,%s,%s,%s)" % (node_A.name, node_B.name, str(trans_A), str(trans_B))
340 |         return new_node
341 | 
342 |     def compute(self, node, input_vals):
343 |         """Given values of input nodes, return result of matrix multiplication."""
344 |         mat_A = input_vals[0]
345 |         mat_B = input_vals[1]
346 |         if node.matmul_attr_trans_A:
347 |             mat_A = mat_A.T
348 |         if node.matmul_attr_trans_B:
349 |             mat_B = mat_B.T
350 |         return np.matmul(mat_A, mat_B)
351 | 
352 |     def gradient(self, node, output_grad):
353 |         """Given gradient of multiply node, return gradient contributions to each input.
354 |             
355 |         Useful formula: if Y=AB, then dA=dY B^T, dB=A^T dY
356 |         """
357 |         return [matmul_op(output_grad, node.inputs[1], False, True),
358 |                 matmul_op(node.inputs[0], output_grad, True, False)]
359 | 
360 | 
361 | class PlaceholderOp(Op):
362 |     """Op to feed value to a nodes."""
363 |     def __call__(self):
364 |         """Creates a variable node."""
365 |         new_node = Op.__call__(self)
366 |         return new_node
367 | 
368 |     def compute(self, node, input_vals):
369 |         """No compute function since node value is fed directly in Executor."""
370 |         assert False, "placeholder values provided by feed_dict"
371 | 
372 |     def gradient(self, node, output_grad):
373 |         """No gradient function since node has no inputs."""
374 |         return None
375 | 
376 | 
377 | class ZerosLikeOp(Op):
378 |     """Op that represents a constant np.zeros_like."""
379 |     def __call__(self, node_A):
380 |         """Creates a node that represents a np.zeros array of same shape as node_A."""
381 |         new_node = Op.__call__(self)
382 |         new_node.inputs = [node_A]
383 |         new_node.name = "Zeroslike(%s)" % node_A.name
384 |         return new_node
385 | 
386 |     def compute(self, node, input_vals):
387 |         """Returns zeros_like of the same shape as input."""
388 |         assert(isinstance(input_vals[0], np.ndarray))
389 |         return np.zeros(input_vals[0].shape)
390 | 
391 |     def gradient(self, node, output_grad):
392 |         return [zeroslike_op(node.inputs[0])]
393 | 
394 | 
395 | class OnesLikeOp(Op):
396 |     """Op that represents a constant np.ones_like."""
397 |     def __call__(self, node_A):
398 |         """Creates a node that represents a np.ones array of same shape as node_A."""
399 |         new_node = Op.__call__(self)
400 |         new_node.inputs = [node_A]
401 |         new_node.name = "Oneslike(%s)" % node_A.name
402 |         return new_node
403 | 
404 |     def compute(self, node, input_vals):
405 |         """Returns ones_like of the same shape as input."""
406 |         assert(isinstance(input_vals[0], np.ndarray))
407 |         return np.ones(input_vals[0].shape)
408 | 
409 |     def gradient(self, node, output_grad):
410 |         return [zeroslike_op(node.inputs[0])]
411 | 
412 | 
413 | class LogOp(Op):
414 | 
415 |     def __call__(self, node):
416 |         new_node = Op.__call__(self)
417 |         new_node.inputs = [node]
418 |         new_node.name = "log(%s)" % node.name
419 |         return new_node
420 | 
421 |     def compute(self, node, input_vals):
422 |         assert len(input_vals) == 1
423 |         return np.log(input_vals[0])
424 | 
425 |     def gradient(self, node, output_grad):
426 |         return [output_grad / node.inputs[0]]
427 | 
428 | 
429 | class ExpOp(Op):
430 | 
431 |     def __call__(self, node):
432 |         new_node = Op.__call__(self)
433 |         new_node.inputs = [node]
434 |         new_node.name = "exp(%s)" % node.name
435 |         return new_node
436 | 
437 |     def compute(self, node, input_vals):
438 |         assert len(input_vals) == 1
439 |         return np.exp(input_vals[0])
440 | 
441 |     def gradient(self, node, output_grad):
442 |         return [output_grad * exp_op(node.inputs[0])]
443 | 
444 | 
445 | class ReduceSumOp(Op):
446 | 
447 |     def __call__(self, node):
448 |         new_node = Op.__call__(self)
449 |         new_node.inputs = [node]
450 |         new_node.name = "reduce_sum(%s)" % node.name
451 |         return new_node
452 | 
453 |     def compute(self, node, input_vals):
454 |         assert isinstance(input_vals[0], np.ndarray)
455 |         return np.sum(input_vals[0])
456 | 
457 |     def gradient(self, node, output_grad):
458 |         return [output_grad * oneslike_op(node.inputs[0])]
459 | 
460 | # Create global singletons of operators.
461 | add_op = AddOp()
462 | mul_op = MulOp()
463 | div_op = DivOp()
464 | sub_op = SubOp()
465 | neg_op = NegOp()
466 | add_byconst_op = AddByConstOp()
467 | rsub_byconst_op = RSubByConstOp()
468 | sub_byconst_op = SubByConstOp()
469 | mul_byconst_op = MulByConstOp()
470 | div_byconst_op = DivByConstOp()
471 | rdiv_byconst_op = RDivByConstOp()
472 | matmul_op = MatMulOp()
473 | placeholder_op = PlaceholderOp()
474 | oneslike_op = OnesLikeOp()
475 | zeroslike_op = ZerosLikeOp()
476 | log_op = LogOp()
477 | exp_op = ExpOp() 
478 | reduce_sum = ReduceSumOp()
479 | 
480 | 
481 | def exp(val):
482 |     if isinstance(val, Node):
483 |         return exp_op(val)
484 |     return np.exp(val)
485 | 
486 | 
487 | def log(val):
488 |     if isinstance(val, Node):
489 |         return log_op(val)
490 |     return np.log(val)
491 | 
492 | 
493 | class Executor:
494 |     """Executor computes values for a given subset of nodes in a computation graph.""" 
495 |     def __init__(self, eval_node_list):
496 |         """
497 |         Parameters
498 |         ----------
499 |         eval_node_list: list of nodes whose values need to be computed.
500 |         """
501 |         self.eval_node_list = eval_node_list
502 | 
503 | 
504 |     def run(self, feed_dict):
505 |         """Computes values of nodes in eval_node_list given computation graph.
506 |         Parameters
507 |         ----------
508 |         feed_dict: list of variable nodes whose values are supplied by user.
509 | 
510 |         Returns
511 |         -------
512 |         A list of values for nodes in eval_node_list. 
513 |         """
514 |         node_to_val_map = dict(feed_dict)
515 |         # Traverse graph in topological sort order and compute values for all nodes.
516 | 
517 |         topo_order = find_topo_sort(self.eval_node_list)
518 |         for node in topo_order:
519 |             if isinstance(node.op, PlaceholderOp):
520 |                 continue
521 |             vals = [node_to_val_map[n] for n in node.inputs]
522 |             compute_val = node.op.compute(node, vals)
523 |             node_to_val_map[node] = compute_val if isinstance(compute_val, np.ndarray) else np.array(compute_val)
524 | 
525 |         # Collect node values.
526 |         node_val_results = [node_to_val_map[node] for node in self.eval_node_list]
527 |         return node_val_results
528 | 
529 | 
530 | def gradients(output_node, node_list):
531 |     """Take gradient of output node with respect to each node in node_list.
532 | 
533 |     Parameters
534 |     ----------
535 |     output_node: output node that we are taking derivative of.
536 |     node_list: list of nodes that we are taking derivative wrt.
537 | 
538 |     Returns
539 |     -------
540 |     A list of gradient values, one for each node in node_list respectively.
541 | 
542 |     """
543 | 
544 |     # a map from node to a list of gradient contributions from each output node
545 |     node_to_output_grads_list = {}
546 |     # Special note on initializing gradient of output_node as oneslike_op(output_node):
547 |     # We are really taking a derivative of the scalar reduce_sum(output_node)
548 |     # instead of the vector output_node. But this is the common case for loss function.
549 |     node_to_output_grads_list[output_node] = [oneslike_op(output_node)]
550 |     # a map from node to the gradient of that node
551 |     node_to_output_grad = {}
552 |     # Traverse graph in reverse topological order given the output_node that we are taking gradient wrt.
553 |     reverse_topo_order = reversed(find_topo_sort([output_node]))
554 | 
555 |     for node in reverse_topo_order:
556 |         grad = sum_node_list(node_to_output_grads_list[node])
557 |         node_to_output_grad[node] = grad
558 |         for i in range(len(node.inputs)):
559 |             ch = node.inputs[i]
560 |             grads = node.op.gradient(node, grad)
561 |             grads_list = node_to_output_grads_list.get(ch, [])
562 |             grads_list.append(grads[i])
563 |             node_to_output_grads_list[ch] = grads_list
564 | 
565 |     # Collect results for gradients requested.
566 |     grad_node_list = [node_to_output_grad[node] for node in node_list]
567 |     return grad_node_list
568 | 
569 | 
570 | 
571 | ##############################
572 | 
573 | ####### Helper Methods #######
574 | 
575 | ##############################
576 | 
577 | 
578 | def find_topo_sort(node_list):
579 |     """Given a list of nodes, return a topological sort list of nodes ending in them.
580 |     
581 |     A simple algorithm is to do a post-order DFS traversal on the given nodes, 
582 |     going backwards based on input edges. Since a node is added to the ordering
583 |     after all its predecessors are traversed due to post-order DFS, we get a topological
584 |     sort.
585 | 
586 |     """
587 |     visited = set()
588 |     topo_order = []
589 |     for node in node_list:
590 |         topo_sort_dfs(node, visited, topo_order)
591 |     return topo_order
592 | 
593 | 
594 | def topo_sort_dfs(node, visited, topo_order):
595 |     """Post-order DFS"""
596 |     if node in visited:
597 |         return
598 |     visited.add(node)
599 |     for n in node.inputs:
600 |         topo_sort_dfs(n, visited, topo_order)
601 |     topo_order.append(node)
602 | 
603 | 
604 | def sum_node_list(node_list):
605 |     """Custom sum function in order to avoid create redundant nodes in Python sum implementation."""
606 |     from operator import add
607 |     from functools import reduce
608 |     return reduce(add, node_list)
609 | 
610 | 
611 | 


--------------------------------------------------------------------------------
/autodiff_test.py:
--------------------------------------------------------------------------------
  1 | import autodiff as ad
  2 | import numpy as np
  3 | 
  4 | 
  5 | def test_identity():
  6 |     x2 = ad.Variable(name="x2")
  7 |     y = x2
  8 | 
  9 |     grad_x2, = ad.gradients(y, [x2])
 10 | 
 11 |     executor = ad.Executor([y, grad_x2])
 12 |     x2_val = 2 * np.ones(3)
 13 |     y_val, grad_x2_val = executor.run(feed_dict={x2: x2_val})
 14 | 
 15 |     assert isinstance(y, ad.Node)
 16 |     assert np.array_equal(y_val, x2_val)
 17 |     assert np.array_equal(grad_x2_val, np.ones_like(x2_val))
 18 | 
 19 | 
 20 | def test_add_by_const():
 21 |     x2 = ad.Variable(name = "x2")
 22 |     y = 5 + x2
 23 | 
 24 |     grad_x2, = ad.gradients(y, [x2])
 25 | 
 26 |     executor = ad.Executor([y, grad_x2])
 27 |     x2_val = 2 * np.ones(3)
 28 |     y_val, grad_x2_val= executor.run(feed_dict = {x2 : x2_val})
 29 | 
 30 |     assert isinstance(y, ad.Node)
 31 |     assert np.array_equal(y_val, x2_val + 5)
 32 |     assert np.array_equal(grad_x2_val, np.ones_like(x2_val))
 33 | 
 34 | 
 35 | def test_sub_by_const():
 36 |     x2 = ad.Variable(name='x2')
 37 |     y = 3 - x2
 38 |     grad_x2, = ad.gradients(y, [x2])
 39 |     executor = ad.Executor([y, grad_x2])
 40 |     x2_val = 2 * np.ones(3)
 41 |     y_val, grad_x2_val= executor.run(feed_dict = {x2 : x2_val})
 42 | 
 43 |     assert isinstance(y, ad.Node)
 44 |     assert np.array_equal(y_val, 3 - x2_val)
 45 |     assert np.array_equal(grad_x2_val, -np.ones_like(x2_val))
 46 | 
 47 | 
 48 | def test_neg():
 49 |     x1 = ad.Variable(name='x1')
 50 |     x2 = ad.Variable(name='x2')
 51 | 
 52 |     y = -x2 + x1
 53 |     
 54 |     grad_x1, grad_x2 = ad.gradients(y, [x1, x2])
 55 |     executor = ad.Executor([y, grad_x1, grad_x2])
 56 |     x2_val = 2 * np.ones(3)
 57 |     x1_val = 3 * np.ones(3)
 58 |     y_val, grad_x1_val, grad_x2_val = executor.run(feed_dict = {x1: x1_val, x2 : x2_val})
 59 | 
 60 |     assert isinstance(y, ad.Node)
 61 |     assert np.array_equal(y_val, -x2_val + x1_val)
 62 |     assert np.array_equal(grad_x2_val, -np.ones_like(x2_val))
 63 |     assert np.array_equal(grad_x1_val, np.ones_like(x1_val))
 64 | 
 65 | 
 66 | def test_mul_by_const():
 67 |     x2 = ad.Variable(name = "x2")
 68 |     y = 5 * x2
 69 | 
 70 |     grad_x2, = ad.gradients(y, [x2])
 71 | 
 72 |     executor = ad.Executor([y, grad_x2])
 73 |     x2_val = 2 * np.ones(3)
 74 |     y_val, grad_x2_val= executor.run(feed_dict = {x2 : x2_val})
 75 | 
 76 |     assert isinstance(y, ad.Node)
 77 |     assert np.array_equal(y_val, x2_val * 5)
 78 |     assert np.array_equal(grad_x2_val, np.ones_like(x2_val) * 5)
 79 | 
 80 | 
 81 | def test_div_two_vars():
 82 |     x1 = ad.Variable(name = 'x1')
 83 |     x2 = ad.Variable(name = 'x2')
 84 |     
 85 |     y = x1 / x2
 86 | 
 87 |     grad_x1, grad_x2 = ad.gradients(y, [x1, x2])
 88 | 
 89 |     executor = ad.Executor([y, grad_x1, grad_x2])
 90 |     x1_val = 2 * np.ones(3)
 91 |     x2_val = 5 * np.ones(3)
 92 |     y_val, grad_x1_val, grad_x2_val= executor.run(feed_dict = {x1: x1_val, x2 : x2_val})
 93 | 
 94 |     assert isinstance(y, ad.Node)
 95 |     assert np.array_equal(y_val, x1_val / x2_val)
 96 |     assert np.array_equal(grad_x1_val, np.ones_like(x1_val) / x2_val)
 97 |     assert np.array_equal(grad_x2_val, -x1_val / (x2_val * x2_val))
 98 | 
 99 | 
100 | def test_div_by_const():
101 |     x2 = ad.Variable(name = "x2")
102 |     y = 5 / x2
103 | 
104 |     grad_x2, = ad.gradients(y, [x2])
105 | 
106 |     executor = ad.Executor([y, grad_x2])
107 |     x2_val = 2 * np.ones(3)
108 |     y_val, grad_x2_val= executor.run(feed_dict = {x2 : x2_val})
109 | 
110 |     assert isinstance(y, ad.Node)
111 |     assert np.array_equal(y_val, 5 / x2_val)
112 |     print(grad_x2_val)
113 |     print(-5 / (x2_val * x2_val))
114 |     assert np.array_equal(grad_x2_val, -5 / (x2_val * x2_val))
115 | 
116 | 
117 | def test_add_two_vars():
118 |     x2 = ad.Variable(name = "x2")
119 |     x3 = ad.Variable(name = "x3")
120 |     y = x2 + x3
121 | 
122 |     grad_x2, grad_x3 = ad.gradients(y, [x2, x3])
123 |   
124 |     executor = ad.Executor([y, grad_x2, grad_x3])
125 |     x2_val = 2 * np.ones(3)
126 |     x3_val = 3 * np.ones(3)
127 |     y_val, grad_x2_val, grad_x3_val = executor.run(feed_dict = {x2: x2_val, x3: x3_val})
128 | 
129 |     assert isinstance(y, ad.Node)
130 |     assert np.array_equal(y_val, x2_val + x3_val)
131 |     assert np.array_equal(grad_x2_val, np.ones_like(x2_val))
132 |     assert np.array_equal(grad_x3_val, np.ones_like(x3_val))
133 | 
134 | 
135 | def test_mul_two_vars():
136 |     x2 = ad.Variable(name = "x2")
137 |     x3 = ad.Variable(name = "x3")
138 |     y = x2 * x3
139 |     
140 |     grad_x2, grad_x3 = ad.gradients(y, [x2, x3])
141 | 
142 |     executor = ad.Executor([y, grad_x2, grad_x3])
143 |     x2_val = 2 * np.ones(3)
144 |     x3_val = 3 * np.ones(3)
145 |     y_val, grad_x2_val, grad_x3_val = executor.run(feed_dict = {x2: x2_val, x3: x3_val})
146 |  
147 |     assert isinstance(y, ad.Node)
148 |     assert np.array_equal(y_val, x2_val * x3_val)
149 |     assert np.array_equal(grad_x2_val, x3_val)
150 |     assert np.array_equal(grad_x3_val, x2_val)
151 | 
152 | 
153 | def test_add_mul_mix_1():
154 |     x1 = ad.Variable(name = "x1")
155 |     x2 = ad.Variable(name = "x2")
156 |     x3 = ad.Variable(name = "x3")
157 |     y = x1 + x2 * x3 * x1
158 |     
159 |     grad_x1, grad_x2, grad_x3 = ad.gradients(y, [x1, x2, x3])
160 |    
161 |     executor = ad.Executor([y, grad_x1, grad_x2, grad_x3])
162 |     x1_val = 1 * np.ones(3)
163 |     x2_val = 2 * np.ones(3)
164 |     x3_val = 3 * np.ones(3)
165 |     y_val, grad_x1_val, grad_x2_val, grad_x3_val = executor.run(feed_dict = {x1 : x1_val, x2: x2_val, x3 : x3_val})
166 | 
167 |     assert isinstance(y, ad.Node)
168 |     assert np.array_equal(y_val, x1_val + x2_val * x3_val)
169 |     assert np.array_equal(grad_x1_val, np.ones_like(x1_val) + x2_val * x3_val)
170 |     assert np.array_equal(grad_x2_val, x3_val * x1_val)
171 |     assert np.array_equal(grad_x3_val, x2_val * x1_val)
172 | 
173 | 
174 | def test_add_mul_mix_2():
175 |     x1 = ad.Variable(name = "x1")
176 |     x2 = ad.Variable(name = "x2")
177 |     x3 = ad.Variable(name = "x3")
178 |     x4 = ad.Variable(name = "x4")
179 |     y = x1 + x2 * x3 * x4
180 |     
181 |     grad_x1, grad_x2, grad_x3, grad_x4 = ad.gradients(y, [x1, x2, x3, x4])
182 |    
183 |     executor = ad.Executor([y, grad_x1, grad_x2, grad_x3, grad_x4])
184 |     x1_val = 1 * np.ones(3)
185 |     x2_val = 2 * np.ones(3)
186 |     x3_val = 3 * np.ones(3)
187 |     x4_val = 4 * np.ones(3)
188 |     y_val, grad_x1_val, grad_x2_val, grad_x3_val, grad_x4_val = executor.run(feed_dict = {x1 : x1_val, x2: x2_val, x3 : x3_val, x4 : x4_val})
189 | 
190 |     assert isinstance(y, ad.Node)
191 |     assert np.array_equal(y_val, x1_val + x2_val * x3_val * x4_val)
192 |     assert np.array_equal(grad_x1_val, np.ones_like(x1_val))
193 |     assert np.array_equal(grad_x2_val, x3_val * x4_val)
194 |     assert np.array_equal(grad_x3_val, x2_val * x4_val)
195 |     assert np.array_equal(grad_x4_val, x2_val * x3_val)
196 | 
197 | 
198 | def test_add_mul_mix_3():
199 |     x2 = ad.Variable(name = "x2")
200 |     x3 = ad.Variable(name = "x3")
201 |     z = x2 * x2 + x2 + x3 + 3
202 |     y = z * z + x3
203 |     
204 |     grad_x2, grad_x3 = ad.gradients(y, [x2, x3])
205 | 
206 |     executor = ad.Executor([y, grad_x2, grad_x3])
207 |     x2_val = 2 * np.ones(3)
208 |     x3_val = 3 * np.ones(3)
209 |     y_val, grad_x2_val, grad_x3_val = executor.run(feed_dict = {x2: x2_val, x3: x3_val})
210 | 
211 |     z_val = x2_val * x2_val + x2_val + x3_val + 3
212 |     expected_yval = z_val * z_val + x3_val
213 |     expected_grad_x2_val = 2 * (x2_val * x2_val + x2_val + x3_val + 3) * (2 * x2_val + 1)
214 |     expected_grad_x3_val = 2 * (x2_val * x2_val + x2_val + x3_val + 3) + 1
215 |     assert isinstance(y, ad.Node)
216 |     assert np.array_equal(y_val, expected_yval)
217 |     assert np.array_equal(grad_x2_val, expected_grad_x2_val)
218 |     assert np.array_equal(grad_x3_val, expected_grad_x3_val)
219 | 
220 | 
221 | def test_grad_of_grad():
222 |     x2 = ad.Variable(name = "x2")
223 |     x3 = ad.Variable(name = "x3")
224 |     y = x2 * x2 + x2 * x3
225 |     
226 |     grad_x2, grad_x3 = ad.gradients(y, [x2, x3])
227 |     grad_x2_x2, grad_x2_x3 = ad.gradients(grad_x2, [x2, x3])
228 | 
229 |     executor = ad.Executor([y, grad_x2, grad_x3, grad_x2_x2, grad_x2_x3])
230 |     x2_val = 2 * np.ones(3)
231 |     x3_val = 3 * np.ones(3)
232 |     y_val, grad_x2_val, grad_x3_val, grad_x2_x2_val, grad_x2_x3_val = executor.run(feed_dict = {x2: x2_val, x3: x3_val})
233 | 
234 |     expected_yval = x2_val * x2_val + x2_val * x3_val
235 |     expected_grad_x2_val = 2 * x2_val + x3_val 
236 |     expected_grad_x3_val = x2_val
237 |     expected_grad_x2_x2_val = 2 * np.ones_like(x2_val)
238 |     expected_grad_x2_x3_val = 1 * np.ones_like(x2_val)
239 | 
240 |     assert isinstance(y, ad.Node)
241 |     assert np.array_equal(y_val, expected_yval)
242 |     assert np.array_equal(grad_x2_val, expected_grad_x2_val)
243 |     assert np.array_equal(grad_x3_val, expected_grad_x3_val)
244 |     assert np.array_equal(grad_x2_x2_val, expected_grad_x2_x2_val)
245 |     assert np.array_equal(grad_x2_x3_val, expected_grad_x2_x3_val)
246 | 
247 | 
248 | def test_matmul_two_vars():
249 |     x2 = ad.Variable(name = "x2")
250 |     x3 = ad.Variable(name = "x3")
251 |     y = ad.matmul_op(x2, x3)
252 | 
253 |     grad_x2, grad_x3 = ad.gradients(y, [x2, x3])
254 |     
255 |     executor = ad.Executor([y, grad_x2, grad_x3])
256 |     x2_val = np.array([[1, 2], [3, 4], [5, 6]]) # 3x2
257 |     x3_val = np.array([[7, 8, 9], [10, 11, 12]]) # 2x3
258 | 
259 |     y_val, grad_x2_val, grad_x3_val = executor.run(feed_dict = {x2: x2_val, x3: x3_val})
260 | 
261 |     expected_yval = np.matmul(x2_val, x3_val)
262 |     expected_grad_x2_val = np.matmul(np.ones_like(expected_yval), np.transpose(x3_val))
263 |     expected_grad_x3_val = np.matmul(np.transpose(x2_val), np.ones_like(expected_yval))
264 | 
265 |     assert isinstance(y, ad.Node)
266 |     assert np.array_equal(y_val, expected_yval)
267 |     assert np.array_equal(grad_x2_val, expected_grad_x2_val)
268 |     assert np.array_equal(grad_x3_val, expected_grad_x3_val)
269 | 
270 | 
271 | def test_log_op():
272 |     x1 = ad.Variable(name = "x1")
273 |     y = ad.log(x1)
274 | 
275 |     grad_x1, = ad.gradients(y, [x1])
276 | 
277 |     executor = ad.Executor([y, grad_x1])
278 |     x1_val = 2 * np.ones(3)
279 |     y_val, grad_x1_val= executor.run(feed_dict = {x1 : x1_val})
280 | 
281 |     assert isinstance(y, ad.Node)
282 |     assert np.array_equal(y_val, np.log(x1_val))
283 |     assert np.array_equal(grad_x1_val, 1 / x1_val)
284 | 
285 | 
286 | def test_log_two_vars():
287 |     x1 = ad.Variable(name = "x1")
288 |     x2 = ad.Variable(name = "x2")
289 |     y = ad.log(x1 * x2)
290 | 
291 |     grad_x1, grad_x2 = ad.gradients(y, [x1, x2])
292 | 
293 |     executor = ad.Executor([y, grad_x1, grad_x2])
294 |     x1_val = 2 * np.ones(3)
295 |     x2_val = 4 * np.ones(3)
296 |     y_val, grad_x1_val, grad_x2_val = executor.run(feed_dict = {x1 : x1_val, x2: x2_val})
297 | 
298 |     assert isinstance(y, ad.Node)
299 |     assert np.array_equal(y_val, np.log(x1_val * x2_val))
300 |     assert np.array_equal(grad_x1_val, x2_val / (x1_val * x2_val))
301 |     assert np.array_equal(grad_x2_val, x1_val / (x1_val * x2_val))
302 | 
303 | 
304 | def test_exp_op():
305 |     x1 = ad.Variable(name = "x1")
306 |     y = ad.exp(x1)
307 | 
308 |     grad_x1, = ad.gradients(y, [x1])
309 | 
310 |     executor = ad.Executor([y, grad_x1])
311 |     x1_val = 2 * np.ones(3)
312 |     y_val, grad_x1_val= executor.run(feed_dict = {x1 : x1_val})
313 | 
314 |     assert isinstance(y, ad.Node)
315 |     assert np.array_equal(y_val, np.exp(x1_val))
316 |     assert np.array_equal(grad_x1_val, np.exp(x1_val))
317 | 
318 | 
319 | def test_exp_mix_op():
320 |     x1 = ad.Variable(name="x1")
321 |     x2 = ad.Variable(name="x2")
322 |     y = ad.exp(ad.log(x1 * x2) + 1)
323 | 
324 |     grad_x1, grad_x2 = ad.gradients(y, [x1, x2])
325 | 
326 |     executor = ad.Executor([y, grad_x1, grad_x2])
327 |     x1_val = 2 * np.ones(3)
328 |     x2_val = 4 * np.ones(3)
329 |     y_val, grad_x1_val, grad_x2_val = executor.run(feed_dict = {x1 : x1_val, x2: x2_val})
330 | 
331 |     assert isinstance(y, ad.Node)
332 |     assert np.array_equal(y_val, np.exp(np.log(x1_val * x2_val) + 1))
333 |     assert np.array_equal(grad_x1_val, y_val * x2_val / (x1_val * x2_val))
334 |     assert np.array_equal(grad_x2_val, y_val * x1_val / (x1_val * x2_val))
335 | 
336 | 
337 | def test_reduce_sum():
338 |     x1 = ad.Variable(name = "x1")
339 |     y = ad.reduce_sum(x1)
340 | 
341 |     grad_x1, = ad.gradients(y, [x1])
342 | 
343 |     executor = ad.Executor([y, grad_x1])
344 |     x1_val = 2 * np.ones(3)
345 |     y_val, grad_x1_val= executor.run(feed_dict = {x1 : x1_val})
346 |     
347 |     assert isinstance(y, ad.Node)
348 |     assert np.array_equal(y_val, np.sum(x1_val))
349 |     assert np.array_equal(grad_x1_val, np.ones_like(x1_val))
350 | 
351 | 
352 | def test_reduce_sum_mix():
353 |     x1 = ad.Variable(name = "x1")
354 |     y = ad.exp(ad.reduce_sum(x1))
355 | 
356 |     grad_x1, = ad.gradients(y, [x1])
357 | 
358 |     executor = ad.Executor([y, grad_x1])
359 |     x1_val = 2 * np.ones(3)
360 |     y_val, grad_x1_val= executor.run(feed_dict = {x1 : x1_val})
361 |     expected_y_val = np.exp(np.sum(x1_val))
362 |     assert isinstance(y, ad.Node)
363 |     assert np.array_equal(y_val, expected_y_val)
364 |     assert np.array_equal(grad_x1_val, expected_y_val * np.ones_like(x1_val))
365 | 
366 |     y2 = ad.log(ad.reduce_sum(x1))
367 |     grad_x2, = ad.gradients(y2, [x1])
368 |     executor2 = ad.Executor([y2, grad_x2])
369 |     y2_val, grad_x2_val = executor2.run(feed_dict={x1: x1_val})
370 |     expected_y2_val = np.log(np.sum(x1_val))
371 |     assert isinstance(y2, ad.Node)
372 |     assert np.array_equal(y2_val, expected_y2_val)
373 |     assert np.array_equal(grad_x2_val, (1/np.sum(x1_val)) * np.ones_like(x1_val))
374 | 
375 | 
376 | def test_mix_all():
377 |     x1 = ad.Variable(name="x1")
378 |     y = 1/(1+ad.exp(-ad.reduce_sum(x1)))
379 | 
380 |     grad_x1, = ad.gradients(y, [x1])
381 | 
382 |     executor = ad.Executor([y, grad_x1])
383 |     x1_val = 2 * np.ones(3)
384 |     y_val, grad_x1_val= executor.run(feed_dict = {x1 : x1_val})
385 |     expected_y_val = 1/(1+np.exp(-np.sum(x1_val)))
386 |     expected_y_grad = expected_y_val * (1 - expected_y_val) * np.ones_like(x1_val)
387 | 
388 |     print(expected_y_grad)
389 |     print(grad_x1_val)
390 |     assert isinstance(y, ad.Node)
391 |     assert np.array_equal(y_val, expected_y_val)
392 |     assert np.sum(np.abs(grad_x1_val - expected_y_grad)) < 1E-10
393 | 
394 | 
395 | def test_logistic():
396 |     x1 = ad.Variable(name="x1")
397 |     w = ad.Variable(name='w')
398 |     y = 1/(1+ad.exp(-ad.reduce_sum(w * x1)))
399 | 
400 |     grad_w, = ad.gradients(y, [w])
401 | 
402 |     executor = ad.Executor([y, grad_w])
403 |     x1_val = 3 * np.ones(3)
404 |     w_val = 3 * np.zeros(3)
405 |     y_val, grad_w_val = executor.run(feed_dict={x1: x1_val, w: w_val})
406 |     expected_y_val = 1/(1 + np.exp(-np.sum(w_val * x1_val)))
407 |     expected_y_grad = expected_y_val * (1 - expected_y_val) * x1_val
408 | 
409 |     print(expected_y_grad)
410 |     print(grad_w_val)
411 |     assert isinstance(y, ad.Node)
412 |     assert np.array_equal(y_val, expected_y_val)
413 |     assert np.sum(np.abs(grad_w_val - expected_y_grad)) < 1E-7
414 | 
415 | 
416 | def test_log_logistic():
417 |     x1 = ad.Variable(name="x1")
418 |     w = ad.Variable(name='w')
419 |     y = ad.log(1/(1+ad.exp(-ad.reduce_sum(w * x1))))
420 | 
421 |     grad_w, = ad.gradients(y, [w])
422 | 
423 |     executor = ad.Executor([y, grad_w])
424 |     x1_val = 3 * np.ones(3)
425 |     w_val = 3 * np.zeros(3)
426 |     y_val, grad_w_val = executor.run(feed_dict={x1: x1_val, w: w_val})
427 |     logistic = 1/(1+np.exp(-np.sum(w_val * x1_val)))
428 |     expected_y_val = np.log(logistic)
429 |     expected_y_grad = (1 - logistic) * x1_val
430 | 
431 |     print(expected_y_grad)
432 |     print(grad_w_val)
433 |     assert isinstance(y, ad.Node)
434 |     assert np.array_equal(y_val, expected_y_val)
435 |     assert np.sum(np.abs(grad_w_val - expected_y_grad)) < 1E-7
436 | 
437 | 
438 | def test_logistic_loss():
439 |     x = ad.Variable(name='x')
440 |     w = ad.Variable(name='w')
441 |     y = ad.Variable(name='y')
442 | 
443 |     h = 1 / (1 + ad.exp(-ad.reduce_sum(w * x)))
444 |     L = y * ad.log(h) + (1 - y) * ad.log(1 - h)
445 |     w_grad, = ad.gradients(L, [w])
446 |     executor = ad.Executor([L, w_grad])
447 | 
448 |     y_val = 0
449 |     x_val = np.array([2, 3, 4])
450 |     w_val = np.random.random(3)
451 | 
452 |     L_val, w_grad_val = executor.run(feed_dict={x: x_val, y: y_val, w: w_val})
453 | 
454 |     logistic = 1 / (1 + np.exp(-np.sum(w_val * x_val)))
455 |     expected_L_val = y_val * np.log(logistic) + (1 - y_val) * np.log(1 - logistic)
456 |     expected_w_grad = (y_val - logistic) * x_val
457 | 
458 |     print(L_val)
459 |     print(expected_L_val)
460 |     print(expected_w_grad)
461 |     print(w_grad_val)
462 | 
463 |     assert expected_L_val == L_val
464 |     assert np.sum(np.abs(expected_w_grad - w_grad_val)) < 1E-9
465 | 


--------------------------------------------------------------------------------
/data.txt:
--------------------------------------------------------------------------------
  1 | -0.017612   14.053064   0  
  2 | -1.395634   4.662541    1  
  3 | -0.752157   6.538620    0  
  4 | -1.322371   7.152853    0  
  5 | 0.423363    11.054677   0  
  6 | 0.406704    7.067335    1  
  7 | 0.667394    12.741452   0  
  8 | -2.460150   6.866805    1  
  9 | 0.569411    9.548755    0  
 10 | -0.026632   10.427743   0  
 11 | 0.850433    6.920334    1  
 12 | 1.347183    13.175500   0  
 13 | 1.176813    3.167020    1  
 14 | -1.781871   9.097953    0  
 15 | -0.566606   5.749003    1  
 16 | 0.931635    1.589505    1  
 17 | -0.024205   6.151823    1  
 18 | -0.036453   2.690988    1  
 19 | -0.196949   0.444165    1  
 20 | 1.014459    5.754399    1  
 21 | 1.985298    3.230619    1  
 22 | -1.693453   -0.557540   1  
 23 | -0.576525   11.778922   0  
 24 | -0.346811   -1.678730   1  
 25 | -2.124484   2.672471    1  
 26 | 1.217916    9.597015    0  
 27 | -0.733928   9.098687    0  
 28 | -3.642001   -1.618087   1  
 29 | 0.315985    3.523953    1  
 30 | 1.416614    9.619232    0  
 31 | -0.386323   3.989286    1  
 32 | 0.556921    8.294984    1  
 33 | 1.224863    11.587360   0  
 34 | -1.347803   -2.406051   1  
 35 | 1.196604    4.951851    1  
 36 | 0.275221    9.543647    0  
 37 | 0.470575    9.332488    0  
 38 | -1.889567   9.542662    0  
 39 | -1.527893   12.150579   0  
 40 | -1.185247   11.309318   0  
 41 | -0.445678   3.297303    1  
 42 | 1.042222    6.105155    1  
 43 | -0.618787   10.320986   0  
 44 | 1.152083    0.548467    1  
 45 | 0.828534    2.676045    1  
 46 | -1.237728   10.549033   0  
 47 | -0.683565   -2.166125   1  
 48 | 0.229456    5.921938    1  
 49 | -0.959885   11.555336   0  
 50 | 0.492911    10.993324   0  
 51 | 0.184992    8.721488    0  
 52 | -0.355715   10.325976   0  
 53 | -0.397822   8.058397    0  
 54 | 0.824839    13.730343   0  
 55 | 1.507278    5.027866    1  
 56 | 0.099671    6.835839    1  
 57 | -0.344008   10.717485   0  
 58 | 1.785928    7.718645    1  
 59 | -0.918801   11.560217   0  
 60 | -0.364009   4.747300    1  
 61 | -0.841722   4.119083    1  
 62 | 0.490426    1.960539    1  
 63 | -0.007194   9.075792    0  
 64 | 0.356107    12.447863   0  
 65 | 0.342578    12.281162   0  
 66 | -0.810823   -1.466018   1  
 67 | 2.530777    6.476801    1  
 68 | 1.296683    11.607559   0  
 69 | 0.475487    12.040035   0  
 70 | -0.783277   11.009725   0  
 71 | 0.074798    11.023650   0  
 72 | -1.337472   0.468339    1  
 73 | -0.102781   13.763651   0  
 74 | -0.147324   2.874846    1  
 75 | 0.518389    9.887035    0  
 76 | 1.015399    7.571882    0  
 77 | -1.658086   -0.027255   1  
 78 | 1.319944    2.171228    1  
 79 | 2.056216    5.019981    1  
 80 | -0.851633   4.375691    1  
 81 | -1.510047   6.061992    0  
 82 | -1.076637   -3.181888   1  
 83 | 1.821096    10.283990   0  
 84 | 3.010150    8.401766    1  
 85 | -1.099458   1.688274    1  
 86 | -0.834872   -1.733869   1  
 87 | -0.846637   3.849075    1  
 88 | 1.400102    12.628781   0  
 89 | 1.752842    5.468166    1  
 90 | 0.078557    0.059736    1  
 91 | 0.089392    -0.715300   1  
 92 | 1.825662    12.693808   0  
 93 | 0.197445    9.744638    0  
 94 | 0.126117    0.922311    1  
 95 | -0.679797   1.220530    1  
 96 | 0.677983    2.556666    1  
 97 | 0.761349    10.693862   0  
 98 | -2.168791   0.143632    1  
 99 | 1.388610    9.341997    0  
100 | 0.317029    14.739025   0 


--------------------------------------------------------------------------------
/lr_autodiff.py:
--------------------------------------------------------------------------------
 1 | import autodiff as ad
 2 | import numpy as np
 3 | 
 4 | 
 5 | def logistic_prob(_w):
 6 |     def wrapper(_x):
 7 |         return 1 / (1 + np.exp(-np.sum(_x * _w)))
 8 |     return wrapper
 9 | 
10 | 
11 | def test_accuracy(_w, _X, _Y):
12 |     prob = logistic_prob(_w)
13 |     correct = 0
14 |     total = len(_Y)
15 |     for i in range(len(_Y)):
16 |         x = _X[i]
17 |         y = _Y[i]
18 |         p = prob(x)
19 |         if p >= 0.5 and y == 1.0:
20 |             correct += 1
21 |         elif p < 0.5 and y == 0.0:
22 |             correct += 1
23 |     print("总数：%d, 预测正确：%d" % (total, correct))
24 | 
25 | 
26 | def plot(N, X_val, Y_val, w, with_boundary=False):
27 |     import matplotlib.pyplot as plt
28 |     for i in range(N):
29 |         __x = X_val[i]
30 |         if Y_val[i] == 1:
31 |             plt.plot(__x[1], __x[2], marker='x')
32 |         else:
33 |             plt.plot(__x[1], __x[2], marker='o')
34 |     if with_boundary:
35 |         min_x1 = min(X_val[:, 1])
36 |         max_x1 = max(X_val[:, 1])
37 |         min_x2 = float(-w[0] - w[1] * min_x1) / w[2]
38 |         max_x2 = float(-w[0] - w[1] * max_x1) / w[2]
39 |         plt.plot([min_x1, max_x1], [min_x2, max_x2], '-r')
40 | 
41 |     plt.show()
42 | 
43 | 
44 | def gen_2d_data(n):
45 |     x_data = np.random.random([n, 2])
46 |     y_data = np.ones(n)
47 |     for i in range(n):
48 |         d = x_data[i]
49 |         if d[0] + d[1] < 1:
50 |             y_data[i] = 0
51 |     x_data_with_bias = np.ones([n, 3])
52 |     x_data_with_bias[:, 1:] = x_data
53 |     return x_data_with_bias, y_data
54 | 
55 | 
56 | def auto_diff_lr():
57 |     x = ad.Variable(name='x')
58 |     w = ad.Variable(name='w')
59 |     y = ad.Variable(name='y')
60 | 
61 |     # 注意，以下实现某些情况会有很大的数值误差，
62 |     # 所以一般真实系统实现会提供高阶算子，从而减少数值误差
63 | 
64 |     h = 1 / (1 + ad.exp(-ad.reduce_sum(w * x)))
65 |     L = y * ad.log(h) + (1 - y) * ad.log(1 - h)
66 |     w_grad, = ad.gradients(L, [w])
67 |     executor = ad.Executor([L, w_grad])
68 | 
69 |     N = 100
70 |     X_val, Y_val = gen_2d_data(N)
71 |     w_val = np.ones(3)
72 | 
73 |     plot(N, X_val, Y_val, w_val)
74 |     executor = ad.Executor([L, w_grad])
75 |     test_accuracy(w_val, X_val, Y_val)
76 |     alpha = 0.01
77 |     max_iters = 300
78 |     for iteration in range(max_iters):
79 |         acc_L_val = 0
80 |         for i in range(N):
81 |             x_val = X_val[i]
82 |             y_val = np.array(Y_val[i])
83 |             L_val, w_grad_val = executor.run(feed_dict={w: w_val, x: x_val, y: y_val})
84 |             w_val += alpha * w_grad_val
85 |             acc_L_val += L_val
86 |         print("iter = %d, likelihood = %s, w = %s" % (iteration, acc_L_val, w_val))
87 |     test_accuracy(w_val, X_val, Y_val)
88 |     plot(N, X_val, Y_val, w_val, True)
89 | 
90 | 
91 | if __name__ == '__main__':
92 |     auto_diff_lr()
93 | 
94 | 
95 | 
96 | 
97 | 


--------------------------------------------------------------------------------