├── .gitignore ├── README.md ├── docs ├── graph.html ├── main.html ├── ops.html ├── public │ ├── fonts │ │ ├── aller-bold.eot │ │ ├── aller-bold.ttf │ │ ├── aller-bold.woff │ │ ├── aller-light.eot │ │ ├── aller-light.ttf │ │ ├── aller-light.woff │ │ ├── roboto-black.eot │ │ ├── roboto-black.ttf │ │ └── roboto-black.woff │ └── stylesheets │ │ └── normalize.css ├── pycco.css ├── session.html ├── tensor.html └── tf_test.html ├── graph.py ├── main.py ├── ops.py ├── session.py ├── tensor.py ├── tests ├── __init__.py ├── test_gradients.py └── test_ops.py └── tf_test.py /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .env/ 3 | *.pyc 4 | .ipynb_checkpoints/ 5 | data/ 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Implementing (parts of) TensorFlow (almost) from Scratch 2 | ## A Walkthrough of Symbolic Differentiation 3 | 4 | This [literate programming](https://en.wikipedia.org/wiki/Literate_programming) 5 | exercise will construct a simple 2-layer feed-forward neural network to compute 6 | the [exclusive or](https://en.wikipedia.org/wiki/Exclusive_or), using [symbolic 7 | differentiation](https://en.wikipedia.org/wiki/Automatic_differentiation) to 8 | compute the gradients automatically. In total, about 500 lines of code, 9 | including comments. The only functional dependency is numpy. I highly recommend 10 | reading Chris Olah's [Calculus on Computational Graphs: 11 | Backpropagation](http://colah.github.io/posts/2015-08-Backprop/) for more 12 | background on what this code is doing. 13 | -------------------------------------------------------------------------------- /docs/graph.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | graph.py 6 | 7 | 8 | 9 | 18 |
19 |
20 |
21 |

graph.py

22 |
23 |
24 |
25 |
26 |
27 | # 28 |
29 |

main.py | 30 | graph.py | 31 | tensor.py | 32 | ops.py | 33 | session.py

34 |

Previous: Main | Next: Tensors

35 |
36 |
37 |
from __future__ import absolute_import
 38 | from __future__ import print_function
 39 | from __future__ import division
 40 | 
 41 | from tensor import Tensor
 42 | from ops import AddOp, SubOp, MulOp, DivOp, \
 43 |                 DotOp, TransposeOp, SquareOp, NegOp, \
 44 |                 MeanOp, SigmoidOp, AssignOp, GroupOp
45 |
46 |
47 |
48 |
49 |
50 |
51 | # 52 |
53 |

Graph represents a computation to be evaluated by a Session. With the 54 | exception of Graph#tensor, Graph#convert, and Graph#gradients, most 55 | methods simply create an operation and return the output tensor of the 56 | operation.

57 |
58 |
59 |
class Graph(object):
60 |
61 |
62 |
63 |
64 |
65 |
66 | # 67 |
68 | 69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 | # 79 |
80 |

The tensor method defines a new tensor with the given initial value 81 | and operation.

82 |
83 |
84 |
    def tensor(self, initial_value=None, op=None):
85 |
86 |
87 |
88 |
89 |
90 |
91 | # 92 |
93 | 94 |
95 |
96 |
        return Tensor(initial_value=initial_value, graph=self, op=op)
97 |
98 |
99 |
100 |
101 |
102 |
103 | # 104 |
105 |

The convert method returns the given value if it is a Tensor, 106 | otherwise convert it to one.

107 |
108 |
109 |
    def convert(self, value):
110 |
111 |
112 |
113 |
114 |
115 |
116 | # 117 |
118 | 119 |
120 |
121 |
        if isinstance(value, Tensor):
122 |             return value
123 |         return self.tensor(initial_value=value)
124 |
125 |
126 |
127 |
128 |
129 |
130 | # 131 |
132 |

The gradients method performs backpropagation using reverse accumulation and the chain rule.

133 |

It traverses the graph from y to each x in xs, accumulating 134 | gradients, and returning the partial gradients for each xs. We use a 135 | queue to keep track of the next tensor for which to compute the 136 | gradient and keep a dictionary of the gradients computed thus far. 137 | Iteration starts from the target output y with an output gradient 138 | of 1.

139 |
140 |
141 |
    def gradients(self, y, xs):
142 |
143 |
144 |
145 |
146 |
147 |
148 | # 149 |
150 | 151 |
152 |
153 |
        queue = []
154 |         queue.append((y, 1))
155 | 
156 |         grads = {}
157 |         while len(queue) > 0:
158 |             y, grad_y = queue.pop(0)
159 |             grad_y = self.convert(grad_y)
160 | 
161 |             gradients = y.op.gradient(grad_y)
162 |             assert len(gradients) == len(y.op.inputs)
163 | 
164 |             for tensor, gradient in zip(y.op.inputs, gradients):
165 |                 if tensor in grads:
166 |                     grads[tensor] += gradient
167 |                 else:
168 |                     grads[tensor] = gradient
169 | 
170 |                 if tensor.op:
171 |                     queue.append((tensor, gradient))
172 | 
173 |         return [grads[x] for x in xs]
174 |
175 |
176 |
177 |
178 |
179 |
180 | # 181 |
182 |

Operation Methods

183 |

Each operation method defines a new operation with the provided input 184 | tensors and returns the operations' output.

185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 | # 195 |
196 | 197 |
198 |
199 |
    def add(self, a, b):
200 |         op = AddOp([a, b], graph=self)
201 |         return op.output
202 |
203 |
204 |
205 |
206 |
207 |
208 | # 209 |
210 | 211 |
212 |
213 |
    def sub(self, a, b):
214 |         op = SubOp([a, b], graph=self)
215 |         return op.output
216 |
217 |
218 |
219 |
220 |
221 |
222 | # 223 |
224 | 225 |
226 |
227 |
    def mul(self, a, b):
228 |         op = MulOp([a, b], graph=self)
229 |         return op.output
230 |
231 |
232 |
233 |
234 |
235 |
236 | # 237 |
238 | 239 |
240 |
241 |
    def div(self, a, b):
242 |         op = DivOp([a, b], graph=self)
243 |         return op.output
244 |
245 |
246 |
247 |
248 |
249 |
250 | # 251 |
252 | 253 |
254 |
255 |
    def neg(self, x):
256 |         op = NegOp([x], graph=self)
257 |         return op.output
258 |
259 |
260 |
261 |
262 |
263 |
264 | # 265 |
266 | 267 |
268 |
269 |
    def square(self, x):
270 |         op = SquareOp([x], graph=self)
271 |         return op.output
272 |
273 |
274 |
275 |
276 |
277 |
278 | # 279 |
280 | 281 |
282 |
283 |
    def sigmoid(self, x):
284 |         op = SigmoidOp([x], graph=self)
285 |         return op.output
286 |
287 |
288 |
289 |
290 |
291 |
292 | # 293 |
294 | 295 |
296 |
297 |
    def dot(self, a, b):
298 |         op = DotOp([a, b], graph=self)
299 |         return op.output
300 |
301 |
302 |
303 |
304 |
305 |
306 | # 307 |
308 | 309 |
310 |
311 |
    def transpose(self, x):
312 |         op = TransposeOp([x], graph=self)
313 |         return op.output
314 |
315 |
316 |
317 |
318 |
319 |
320 | # 321 |
322 | 323 |
324 |
325 |
    def mean(self, x):
326 |         op = MeanOp([x], graph=self)
327 |         return op.output
328 |
329 |
330 |
331 |
332 |
333 |
334 | # 335 |
336 | 337 |
338 |
339 |
    def assign(self, a, b):
340 |         op = AssignOp([a, b], graph=self)
341 |         return op.output
342 |
343 |
344 |
345 |
346 |
347 |
348 | # 349 |
350 | 351 |
352 |
353 |
    def group(self, inputs):
354 |         op = GroupOp(inputs, graph=self)
355 |         return op.output
356 | 
357 | 
358 |
359 |
360 |
361 |
362 | 363 | 364 | -------------------------------------------------------------------------------- /docs/main.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | main.py 6 | 7 | 8 | 9 | 18 |
19 |
20 |
21 |

main.py

22 |
23 |
24 |
25 |
26 |
27 | # 28 |
29 |

Implementing (parts of) TensorFlow (almost) from Scratch

30 |

A Walkthrough of Symbolic Differentiation

31 |

Jim Fleming (@jimmfleming)

32 |

main.py | 33 | graph.py | 34 | tensor.py | 35 | ops.py | 36 | session.py

37 |

Next: The Graph

38 |

This literate programming 39 | exercise will construct a simple 2-layer feed-forward neural network to compute 40 | the exclusive or, using symbolic 41 | differentiation to 42 | compute the gradients automatically. In total, about 500 lines of code, 43 | including comments. The only functional dependency is numpy. I highly recommend 44 | reading Chris Olah's Calculus on Computational Graphs: 45 | Backpropagation for more 46 | background on what this code is doing.

47 |

The XOR task is convenient for a number of reasons: it's very fast to compute; 48 | it is not linearly separable thus requiring at least two layers and making the 49 | gradient calculation more interesting; it doesn't require more complicated 50 | matrix-matrix features such as broadcasting.

51 |
52 |

(I'm also working on a more involved example for MNIST but as soon as I added 53 | support for matrix shapes and broadcasting the code ballooned by 5x and it was 54 | no longer a simple example.)

55 |
56 |

Let's start by going over the architecture. We're going to use four main 57 | components:

58 |
    59 |
  • Graph, composed of Tensor nodes and Op nodes that 60 | together represent the computation we want to differentiate.
  • 61 |
  • Tensor represents a value in the graph. Tensors keep a 62 | reference to the operation that produced it, if any.
  • 63 |
  • BaseOp represents a computation to perform and its 64 | differentiable components. Operations hold references to their input 65 | tensors and an output tensor.
  • 66 |
  • Session is used to evaluate tensors in the graph.
  • 67 |
68 |

Note the return from a graph operation is actually a tensor, representing 69 | the output of the operation.

70 |
71 |
72 |
from __future__ import absolute_import
 73 | from __future__ import print_function
 74 | from __future__ import division
 75 | 
 76 | import numpy as np
 77 | np.random.seed(67)
 78 | 
 79 | from tqdm import trange
 80 | 
 81 | from graph import Graph
 82 | from session import Session
83 |
84 |
85 |
86 |
87 |
88 |
89 | # 90 |
91 |

The main method performs some setup then trains the model, displaying the 92 | current loss along the way.

93 |
94 |
95 |
def main():
96 |
97 |
98 |
99 |
100 |
101 |
102 | # 103 |
104 | 105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 | # 115 |
116 |

Define a new graph

117 |
118 |
119 |
    graph = Graph()
120 |
121 |
122 |
123 |
124 |
125 |
126 | # 127 |
128 |

Initialize the training data (XOR truth table)

129 |
130 |
131 |
    X = graph.tensor(np.array([[0, 0], [0, 1], [1, 0], [1, 1]]))
132 |     y = graph.tensor(np.array([[0, 1, 1, 0]]))
133 |
134 |
135 |
136 |
137 |
138 |
139 | # 140 |
141 |

Initialize the model's parameters (weights for each layer)

142 |
143 |
144 |
    weights0 = graph.tensor(np.random.normal(size=(2, 4)))
145 |     weights1 = graph.tensor(np.random.normal(size=(4, 1)))
146 |
147 |
148 |
149 |
150 |
151 |
152 | # 153 |
154 |

Define the model's activations

155 |
156 |
157 |
    activations0 = graph.sigmoid(graph.dot(X, weights0))
158 |     activations1 = graph.sigmoid(graph.dot(activations0, weights1))
159 |
160 |
161 |
162 |
163 |
164 |
165 | # 166 |
167 |

Define operation for computing the loss 168 | (mean squared error)

169 |
170 |
171 |
    loss_op = graph.mean(graph.square(graph.transpose(y) - activations1))
172 |
173 |
174 |
175 |
176 |
177 |
178 | # 179 |
180 |

Define operations for the gradients w.r.t. the loss and an update 181 | operation to apply the gradients to the model's parameters.

182 |
183 |
184 |
    parameters = [weights0, weights1]
185 |     gradients = graph.gradients(loss_op, parameters)
186 | 
187 |     update_op = graph.group([
188 |         graph.assign(param, param - grad) \
189 |             for param, grad in zip(parameters, gradients)
190 |     ])
191 |
192 |
193 |
194 |
195 |
196 |
197 | # 198 |
199 |

Begin training... We iterate for a number of epochs, calling the session 200 | run method each time to compute the update operation and the current 201 | loss. The progress bar's description is updated to display the loss.

202 |
203 |
204 |
    sess = Session(graph)
205 |     with trange(10000) as pbar_epoch:
206 |         for _ in pbar_epoch:
207 |             _, loss = sess.run([update_op, loss_op])
208 |             pbar_epoch.set_description('loss: {:.8f}'.format(loss))
209 | 
210 | if __name__ == '__main__':
211 |     main()
212 | 
213 | 
214 |
215 |
216 |
217 |
218 | 219 | 220 | -------------------------------------------------------------------------------- /docs/ops.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | ops.py 6 | 7 | 8 | 9 | 18 |
19 |
20 |
21 |

ops.py

22 |
23 |
24 |
25 |
26 |
27 | # 28 |
29 |

main.py | 30 | graph.py | 31 | tensor.py | 32 | ops.py | 33 | session.py

34 |

Previous: Tensors | Next: The Session

35 |
36 |
37 |
from __future__ import absolute_import
 38 | from __future__ import print_function
 39 | from __future__ import division
 40 | 
 41 | import numpy as np
42 |
43 |
44 |
45 |
46 |
47 |
48 | # 49 |
50 |

BaseOp represents an operation that performs computation on tensors. 51 | Every operation consists of the following:

52 |
    53 |
  • A list of inputs, each converted to ensure they're all tensors.
  • 54 |
  • An output tensor to represent the result of the operation (which might 55 | be None.)
  • 56 |
  • A reference to the graph so that each operation can generate new 57 | operations when constructing gradients.
  • 58 |
59 |
60 |
61 |
class BaseOp(object):
62 |
63 |
64 |
65 |
66 |
67 |
68 | # 69 |
70 | 71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 | # 81 |
82 | 83 |
84 |
85 |
    def __init__(self, inputs, graph):
 86 |         self.inputs = [graph.convert(input_) for input_ in inputs]
 87 |         self.output = graph.tensor(op=self)
 88 |         self.graph = graph
89 |
90 |
91 |
92 |
93 |
94 |
95 | # 96 |
97 |

The compute method receives as input the evaluated input tensors 98 | and returns the result of performing its operation on the inputs.

99 |
100 |
101 |
    def compute(self, sess, *args):
102 |
103 |
104 |
105 |
106 |
107 |
108 | # 109 |
110 | 111 |
112 |
113 |
        raise NotImplementedError()
114 |
115 |
116 |
117 |
118 |
119 |
120 | # 121 |
122 |

The gradient method computes the partial derivative w.r.t. each input 123 | to the operation. (Most of the derivatives come from 124 | Wikipedia.)

125 |
126 |
127 |
    def gradient(self, grad):
128 |
129 |
130 |
131 |
132 |
133 |
134 | # 135 |
136 | 137 |
138 |
139 |
        raise NotImplementedError()
140 |
141 |
142 |
143 |
144 |
145 |
146 | # 147 |
148 |

AddOp adds a tensor to another tensor. Uses the 149 | sum rule to 150 | compute the partial derivatives.

151 |
152 |
153 |
class AddOp(BaseOp):
154 |
155 |
156 |
157 |
158 |
159 |
160 | # 161 |
162 | 163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 | # 173 |
174 | 175 |
176 |
177 |
    def compute(self, sess, a, b):
178 |         return a + b
179 |
180 |
181 |
182 |
183 |
184 |
185 | # 186 |
187 | 188 |
189 |
190 |
    def gradient(self, grad):
191 |         return [grad, grad]
192 |
193 |
194 |
195 |
196 |
197 |
198 | # 199 |
200 |

SubOp subtracts a tensor from another tensor. Also uses the 201 | sum rule to 202 | compute the partial derivatives.

203 |
204 |
205 |
class SubOp(BaseOp):
206 |
207 |
208 |
209 |
210 |
211 |
212 | # 213 |
214 | 215 |
216 |
217 |
218 |
219 |
220 |
221 |
222 |
223 |
224 | # 225 |
226 | 227 |
228 |
229 |
    def compute(self, sess, a, b):
230 |         return a - b
231 |
232 |
233 |
234 |
235 |
236 |
237 | # 238 |
239 | 240 |
241 |
242 |
    def gradient(self, grad):
243 |         return [grad, -grad]
244 |
245 |
246 |
247 |
248 |
249 |
250 | # 251 |
252 |

MulOp multiplies a tensor by another tensor. Uses the 253 | product rule to compute the 254 | partial derivatives.

255 |
256 |
257 |
class MulOp(BaseOp):
258 |
259 |
260 |
261 |
262 |
263 |
264 | # 265 |
266 | 267 |
268 |
269 |
270 |
271 |
272 |
273 |
274 |
275 |
276 | # 277 |
278 | 279 |
280 |
281 |
    def compute(self, sess, a, b):
282 |         return a * b
283 |
284 |
285 |
286 |
287 |
288 |
289 | # 290 |
291 | 292 |
293 |
294 |
    def gradient(self, grad):
295 |         a, b = self.inputs
296 |         return [grad * b, grad * a]
297 |
298 |
299 |
300 |
301 |
302 |
303 | # 304 |
305 |

DivOp divides a tensor by another tensor. Uses the 306 | quotient rule to compute the 307 | partial derivatives.

308 |
309 |
310 |
class DivOp(BaseOp):
311 |
312 |
313 |
314 |
315 |
316 |
317 | # 318 |
319 | 320 |
321 |
322 |
323 |
324 |
325 |
326 |
327 |
328 |
329 | # 330 |
331 | 332 |
333 |
334 |
    def compute(self, sess, a, b):
335 |         return a / b
336 |
337 |
338 |
339 |
340 |
341 |
342 | # 343 |
344 | 345 |
346 |
347 |
    def gradient(self, grad):
348 |         a, b = self.inputs
349 |         return [grad / b, grad * (-a / self.graph.square(b))]
350 |
351 |
352 |
353 |
354 |
355 |
356 | # 357 |
358 |

NegOp negates a tensor.

359 |
360 |
361 |
class NegOp(BaseOp):
362 |
363 |
364 |
365 |
366 |
367 |
368 | # 369 |
370 | 371 |
372 |
373 |
374 |
375 |
376 |
377 |
378 |
379 |
380 | # 381 |
382 | 383 |
384 |
385 |
    def compute(self, sess, x):
386 |         return -x
387 |
388 |
389 |
390 |
391 |
392 |
393 | # 394 |
395 | 396 |
397 |
398 |
    def gradient(self, grad):
399 |         return [-grad]
400 |
401 |
402 |
403 |
404 |
405 |
406 | # 407 |
408 |

DotOp computes the dot product between two tensors. Uses the 409 | product rule to compute the 410 | partial derivatives. Note that here we need to transpose the terms and 411 | perform a dot product, assuming matrices rather than scalars.

412 |
413 |
414 |
class DotOp(BaseOp):
415 |
416 |
417 |
418 |
419 |
420 |
421 | # 422 |
423 | 424 |
425 |
426 |
427 |
428 |
429 |
430 |
431 |
432 |
433 | # 434 |
435 | 436 |
437 |
438 |
    def compute(self, sess, a, b):
439 |         return np.dot(a, b)
440 |
441 |
442 |
443 |
444 |
445 |
446 | # 447 |
448 | 449 |
450 |
451 |
    def gradient(self, grad):
452 |         a, b = self.inputs
453 |         return [
454 |             self.graph.dot(grad, self.graph.transpose(b)),
455 |             self.graph.dot(self.graph.transpose(a), grad),
456 |         ]
457 |
458 |
459 |
460 |
461 |
462 |
463 | # 464 |
465 |

SquareOp squares a tensor.

466 |
467 |
468 |
class SquareOp(BaseOp):
469 |
470 |
471 |
472 |
473 |
474 |
475 | # 476 |
477 | 478 |
479 |
480 |
481 |
482 |
483 |
484 |
485 |
486 |
487 | # 488 |
489 | 490 |
491 |
492 |
    def compute(self, sess, x):
493 |         return np.square(x)
494 |
495 |
496 |
497 |
498 |
499 |
500 | # 501 |
502 | 503 |
504 |
505 |
    def gradient(self, grad):
506 |         x = self.inputs[0]
507 |         return [grad * (2 * x)]
508 |
509 |
510 |
511 |
512 |
513 |
514 | # 515 |
516 |

TransposeOp tranposes a tensor.

517 |
518 |
519 |
class TransposeOp(BaseOp):
520 |
521 |
522 |
523 |
524 |
525 |
526 | # 527 |
528 | 529 |
530 |
531 |
532 |
533 |
534 |
535 |
536 |
537 |
538 | # 539 |
540 | 541 |
542 |
543 |
    def compute(self, sess, x):
544 |         return np.transpose(x)
545 |
546 |
547 |
548 |
549 |
550 |
551 | # 552 |
553 | 554 |
555 |
556 |
    def gradient(self, grad):
557 |         return [self.graph.transpose(grad)]
558 |
559 |
560 |
561 |
562 |
563 |
564 | # 565 |
566 |

SigmoidOp implements the 567 | sigmoid function and its 568 | derivative. Notice that the derivative uses the output of the operation 569 | which saves recomputation.

570 |
571 |
572 |
class SigmoidOp(BaseOp):
573 |
574 |
575 |
576 |
577 |
578 |
579 | # 580 |
581 | 582 |
583 |
584 |
585 |
586 |
587 |
588 |
589 |
590 |
591 | # 592 |
593 | 594 |
595 |
596 |
    def compute(self, sess, x):
597 |         return 1 / (1 + np.exp(-x))
598 |
599 |
600 |
601 |
602 |
603 |
604 | # 605 |
606 | 607 |
608 |
609 |
    def gradient(self, grad):
610 |         y = self.output
611 |         return [grad * (y * (1 - y))]
612 |
613 |
614 |
615 |
616 |
617 |
618 | # 619 |
620 |

MeanOp computes the mean of a tensor. Note the gradient here is 621 | intentionally incorrect because computing it requires knowing the shape of 622 | the input and output tensors. Fortunately, gradients are fairly malleable 623 | in optimization.

624 |
625 |
626 |
class MeanOp(BaseOp):
627 |
628 |
629 |
630 |
631 |
632 |
633 | # 634 |
635 | 636 |
637 |
638 |
639 |
640 |
641 |
642 |
643 |
644 |
645 | # 646 |
647 | 648 |
649 |
650 |
    def compute(self, sess, x):
651 |         return np.mean(x)
652 |
653 |
654 |
655 |
656 |
657 |
658 | # 659 |
660 | 661 |
662 |
663 |
    def gradient(self, grad):
664 |         return [grad]
665 |
666 |
667 |
668 |
669 |
670 |
671 | # 672 |
673 |

GroupOp exploits the fact that each input to the operation is 674 | automatically evaluated before computing the operation's output, allowing 675 | us to group together the evaluation of multiple operations. It's input 676 | gradients come from simply broadcasting the output gradient.

677 |
678 |
679 |
class GroupOp(BaseOp):
680 |
681 |
682 |
683 |
684 |
685 |
686 | # 687 |
688 | 689 |
690 |
691 |
692 |
693 |
694 |
695 |
696 |
697 |
698 | # 699 |
700 | 701 |
702 |
703 |
    def compute(self, sess, *args):
704 |         return None
705 |
706 |
707 |
708 |
709 |
710 |
711 | # 712 |
713 | 714 |
715 |
716 |
    def gradient(self, grad):
717 |         return [grad] * len(self.inputs)
718 |
719 |
720 |
721 |
722 |
723 |
724 | # 725 |
726 |

AssignOp updates the session's current state for a tensor. It is not 727 | differentiable in this implementation.

728 |
729 |
730 |
class AssignOp(BaseOp):
731 |
732 |
733 |
734 |
735 |
736 |
737 | # 738 |
739 | 740 |
741 |
742 |
743 |
744 |
745 |
746 |
747 |
748 |
749 | # 750 |
751 | 752 |
753 |
754 |
    def compute(self, sess, a, b):
755 |         assert a.shape == b.shape, \
756 |             'shapes must match to assign: {} != {}' \
757 |                 .format(a.shape, b.shape)
758 |         sess.state[self.inputs[0]] = b
759 |         return b
760 | 
761 | 
762 |
763 |
764 |
765 |
766 | 767 | 768 | -------------------------------------------------------------------------------- /docs/public/fonts/aller-bold.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jimfleming/differentiation/b7976decf8af4e5b13d315422ba5fafe902463c8/docs/public/fonts/aller-bold.eot -------------------------------------------------------------------------------- /docs/public/fonts/aller-bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jimfleming/differentiation/b7976decf8af4e5b13d315422ba5fafe902463c8/docs/public/fonts/aller-bold.ttf -------------------------------------------------------------------------------- /docs/public/fonts/aller-bold.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jimfleming/differentiation/b7976decf8af4e5b13d315422ba5fafe902463c8/docs/public/fonts/aller-bold.woff -------------------------------------------------------------------------------- /docs/public/fonts/aller-light.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jimfleming/differentiation/b7976decf8af4e5b13d315422ba5fafe902463c8/docs/public/fonts/aller-light.eot -------------------------------------------------------------------------------- /docs/public/fonts/aller-light.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jimfleming/differentiation/b7976decf8af4e5b13d315422ba5fafe902463c8/docs/public/fonts/aller-light.ttf -------------------------------------------------------------------------------- /docs/public/fonts/aller-light.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jimfleming/differentiation/b7976decf8af4e5b13d315422ba5fafe902463c8/docs/public/fonts/aller-light.woff -------------------------------------------------------------------------------- /docs/public/fonts/roboto-black.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jimfleming/differentiation/b7976decf8af4e5b13d315422ba5fafe902463c8/docs/public/fonts/roboto-black.eot -------------------------------------------------------------------------------- /docs/public/fonts/roboto-black.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jimfleming/differentiation/b7976decf8af4e5b13d315422ba5fafe902463c8/docs/public/fonts/roboto-black.ttf -------------------------------------------------------------------------------- /docs/public/fonts/roboto-black.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jimfleming/differentiation/b7976decf8af4e5b13d315422ba5fafe902463c8/docs/public/fonts/roboto-black.woff -------------------------------------------------------------------------------- /docs/public/stylesheets/normalize.css: -------------------------------------------------------------------------------- 1 | /*! normalize.css v2.0.1 | MIT License | git.io/normalize */ 2 | 3 | /* ========================================================================== 4 | HTML5 display definitions 5 | ========================================================================== */ 6 | 7 | /* 8 | * Corrects `block` display not defined in IE 8/9. 9 | */ 10 | 11 | article, 12 | aside, 13 | details, 14 | figcaption, 15 | figure, 16 | footer, 17 | header, 18 | hgroup, 19 | nav, 20 | section, 21 | summary { 22 | display: block; 23 | } 24 | 25 | /* 26 | * Corrects `inline-block` display not defined in IE 8/9. 27 | */ 28 | 29 | audio, 30 | canvas, 31 | video { 32 | display: inline-block; 33 | } 34 | 35 | /* 36 | * Prevents modern browsers from displaying `audio` without controls. 37 | * Remove excess height in iOS 5 devices. 38 | */ 39 | 40 | audio:not([controls]) { 41 | display: none; 42 | height: 0; 43 | } 44 | 45 | /* 46 | * Addresses styling for `hidden` attribute not present in IE 8/9. 47 | */ 48 | 49 | [hidden] { 50 | display: none; 51 | } 52 | 53 | /* ========================================================================== 54 | Base 55 | ========================================================================== */ 56 | 57 | /* 58 | * 1. Sets default font family to sans-serif. 59 | * 2. Prevents iOS text size adjust after orientation change, without disabling 60 | * user zoom. 61 | */ 62 | 63 | html { 64 | font-family: sans-serif; /* 1 */ 65 | -webkit-text-size-adjust: 100%; /* 2 */ 66 | -ms-text-size-adjust: 100%; /* 2 */ 67 | } 68 | 69 | /* 70 | * Removes default margin. 71 | */ 72 | 73 | body { 74 | margin: 0; 75 | } 76 | 77 | /* ========================================================================== 78 | Links 79 | ========================================================================== */ 80 | 81 | /* 82 | * Addresses `outline` inconsistency between Chrome and other browsers. 83 | */ 84 | 85 | a:focus { 86 | outline: thin dotted; 87 | } 88 | 89 | /* 90 | * Improves readability when focused and also mouse hovered in all browsers. 91 | */ 92 | 93 | a:active, 94 | a:hover { 95 | outline: 0; 96 | } 97 | 98 | /* ========================================================================== 99 | Typography 100 | ========================================================================== */ 101 | 102 | /* 103 | * Addresses `h1` font sizes within `section` and `article` in Firefox 4+, 104 | * Safari 5, and Chrome. 105 | */ 106 | 107 | h1 { 108 | font-size: 2em; 109 | } 110 | 111 | /* 112 | * Addresses styling not present in IE 8/9, Safari 5, and Chrome. 113 | */ 114 | 115 | abbr[title] { 116 | border-bottom: 1px dotted; 117 | } 118 | 119 | /* 120 | * Addresses style set to `bolder` in Firefox 4+, Safari 5, and Chrome. 121 | */ 122 | 123 | b, 124 | strong { 125 | font-weight: bold; 126 | } 127 | 128 | /* 129 | * Addresses styling not present in Safari 5 and Chrome. 130 | */ 131 | 132 | dfn { 133 | font-style: italic; 134 | } 135 | 136 | /* 137 | * Addresses styling not present in IE 8/9. 138 | */ 139 | 140 | mark { 141 | background: #ff0; 142 | color: #000; 143 | } 144 | 145 | 146 | /* 147 | * Corrects font family set oddly in Safari 5 and Chrome. 148 | */ 149 | 150 | code, 151 | kbd, 152 | pre, 153 | samp { 154 | font-family: monospace, serif; 155 | font-size: 1em; 156 | } 157 | 158 | /* 159 | * Improves readability of pre-formatted text in all browsers. 160 | */ 161 | 162 | pre { 163 | white-space: pre; 164 | white-space: pre-wrap; 165 | word-wrap: break-word; 166 | } 167 | 168 | /* 169 | * Sets consistent quote types. 170 | */ 171 | 172 | q { 173 | quotes: "\201C" "\201D" "\2018" "\2019"; 174 | } 175 | 176 | /* 177 | * Addresses inconsistent and variable font size in all browsers. 178 | */ 179 | 180 | small { 181 | font-size: 80%; 182 | } 183 | 184 | /* 185 | * Prevents `sub` and `sup` affecting `line-height` in all browsers. 186 | */ 187 | 188 | sub, 189 | sup { 190 | font-size: 75%; 191 | line-height: 0; 192 | position: relative; 193 | vertical-align: baseline; 194 | } 195 | 196 | sup { 197 | top: -0.5em; 198 | } 199 | 200 | sub { 201 | bottom: -0.25em; 202 | } 203 | 204 | /* ========================================================================== 205 | Embedded content 206 | ========================================================================== */ 207 | 208 | /* 209 | * Removes border when inside `a` element in IE 8/9. 210 | */ 211 | 212 | img { 213 | border: 0; 214 | } 215 | 216 | /* 217 | * Corrects overflow displayed oddly in IE 9. 218 | */ 219 | 220 | svg:not(:root) { 221 | overflow: hidden; 222 | } 223 | 224 | /* ========================================================================== 225 | Figures 226 | ========================================================================== */ 227 | 228 | /* 229 | * Addresses margin not present in IE 8/9 and Safari 5. 230 | */ 231 | 232 | figure { 233 | margin: 0; 234 | } 235 | 236 | /* ========================================================================== 237 | Forms 238 | ========================================================================== */ 239 | 240 | /* 241 | * Define consistent border, margin, and padding. 242 | */ 243 | 244 | fieldset { 245 | border: 1px solid #c0c0c0; 246 | margin: 0 2px; 247 | padding: 0.35em 0.625em 0.75em; 248 | } 249 | 250 | /* 251 | * 1. Corrects color not being inherited in IE 8/9. 252 | * 2. Remove padding so people aren't caught out if they zero out fieldsets. 253 | */ 254 | 255 | legend { 256 | border: 0; /* 1 */ 257 | padding: 0; /* 2 */ 258 | } 259 | 260 | /* 261 | * 1. Corrects font family not being inherited in all browsers. 262 | * 2. Corrects font size not being inherited in all browsers. 263 | * 3. Addresses margins set differently in Firefox 4+, Safari 5, and Chrome 264 | */ 265 | 266 | button, 267 | input, 268 | select, 269 | textarea { 270 | font-family: inherit; /* 1 */ 271 | font-size: 100%; /* 2 */ 272 | margin: 0; /* 3 */ 273 | } 274 | 275 | /* 276 | * Addresses Firefox 4+ setting `line-height` on `input` using `!important` in 277 | * the UA stylesheet. 278 | */ 279 | 280 | button, 281 | input { 282 | line-height: normal; 283 | } 284 | 285 | /* 286 | * 1. Avoid the WebKit bug in Android 4.0.* where (2) destroys native `audio` 287 | * and `video` controls. 288 | * 2. Corrects inability to style clickable `input` types in iOS. 289 | * 3. Improves usability and consistency of cursor style between image-type 290 | * `input` and others. 291 | */ 292 | 293 | button, 294 | html input[type="button"], /* 1 */ 295 | input[type="reset"], 296 | input[type="submit"] { 297 | -webkit-appearance: button; /* 2 */ 298 | cursor: pointer; /* 3 */ 299 | } 300 | 301 | /* 302 | * Re-set default cursor for disabled elements. 303 | */ 304 | 305 | button[disabled], 306 | input[disabled] { 307 | cursor: default; 308 | } 309 | 310 | /* 311 | * 1. Addresses box sizing set to `content-box` in IE 8/9. 312 | * 2. Removes excess padding in IE 8/9. 313 | */ 314 | 315 | input[type="checkbox"], 316 | input[type="radio"] { 317 | box-sizing: border-box; /* 1 */ 318 | padding: 0; /* 2 */ 319 | } 320 | 321 | /* 322 | * 1. Addresses `appearance` set to `searchfield` in Safari 5 and Chrome. 323 | * 2. Addresses `box-sizing` set to `border-box` in Safari 5 and Chrome 324 | * (include `-moz` to future-proof). 325 | */ 326 | 327 | input[type="search"] { 328 | -webkit-appearance: textfield; /* 1 */ 329 | -moz-box-sizing: content-box; 330 | -webkit-box-sizing: content-box; /* 2 */ 331 | box-sizing: content-box; 332 | } 333 | 334 | /* 335 | * Removes inner padding and search cancel button in Safari 5 and Chrome 336 | * on OS X. 337 | */ 338 | 339 | input[type="search"]::-webkit-search-cancel-button, 340 | input[type="search"]::-webkit-search-decoration { 341 | -webkit-appearance: none; 342 | } 343 | 344 | /* 345 | * Removes inner padding and border in Firefox 4+. 346 | */ 347 | 348 | button::-moz-focus-inner, 349 | input::-moz-focus-inner { 350 | border: 0; 351 | padding: 0; 352 | } 353 | 354 | /* 355 | * 1. Removes default vertical scrollbar in IE 8/9. 356 | * 2. Improves readability and alignment in all browsers. 357 | */ 358 | 359 | textarea { 360 | overflow: auto; /* 1 */ 361 | vertical-align: top; /* 2 */ 362 | } 363 | 364 | /* ========================================================================== 365 | Tables 366 | ========================================================================== */ 367 | 368 | /* 369 | * Remove most spacing between table cells. 370 | */ 371 | 372 | table { 373 | border-collapse: collapse; 374 | border-spacing: 0; 375 | } -------------------------------------------------------------------------------- /docs/pycco.css: -------------------------------------------------------------------------------- 1 | /*--------------------- Layout and Typography ----------------------------*/ 2 | body { 3 | font-family: 'Palatino Linotype', 'Book Antiqua', Palatino, FreeSerif, serif; 4 | font-size: 16px; 5 | line-height: 1.5em; 6 | color: #252519; 7 | margin: 0; padding: 0; 8 | background: #f5f5ff; 9 | } 10 | a { 11 | color: #261a3b; 12 | } 13 | a:visited { 14 | color: #261a3b; 15 | } 16 | p { 17 | margin: 0 0 15px 0; 18 | } 19 | h1, h2, h3, h4, h5, h6 { 20 | margin: 40px 0 15px 0; 21 | line-height: 1.2em; 22 | } 23 | h2, h3, h4, h5, h6 { 24 | margin-top: 0; 25 | } 26 | #container { 27 | background: white; 28 | } 29 | #container, div.section { 30 | position: relative; 31 | } 32 | #background { 33 | position: absolute; 34 | top: 0; left: 580px; right: 0; bottom: 0; 35 | background: #f5f5ff; 36 | border-left: 1px solid #e5e5ee; 37 | z-index: 0; 38 | } 39 | #jump_to, #jump_page { 40 | background: white; 41 | -webkit-box-shadow: 0 0 25px #777; -moz-box-shadow: 0 0 25px #777; 42 | -webkit-border-bottom-left-radius: 5px; -moz-border-radius-bottomleft: 5px; 43 | font: 10px Arial; 44 | text-transform: uppercase; 45 | cursor: pointer; 46 | text-align: right; 47 | } 48 | #jump_to, #jump_wrapper { 49 | position: fixed; 50 | right: 0; top: 0; 51 | padding: 5px 10px; 52 | } 53 | #jump_wrapper { 54 | padding: 0; 55 | display: none; 56 | } 57 | #jump_to:hover #jump_wrapper { 58 | display: block; 59 | } 60 | #jump_page { 61 | padding: 5px 0 3px; 62 | margin: 0 0 25px 25px; 63 | } 64 | #jump_page .source { 65 | display: block; 66 | padding: 5px 10px; 67 | text-decoration: none; 68 | border-top: 1px solid #eee; 69 | } 70 | #jump_page .source:hover { 71 | background: #f5f5ff; 72 | } 73 | #jump_page .source:first-child { 74 | } 75 | div.docs { 76 | float: left; 77 | max-width: 500px; 78 | min-width: 500px; 79 | min-height: 5px; 80 | padding: 10px 25px 1px 50px; 81 | vertical-align: top; 82 | text-align: left; 83 | } 84 | .docs pre { 85 | margin: 15px 0 15px; 86 | padding-left: 15px; 87 | } 88 | .docs p tt, .docs p code { 89 | background: #f8f8ff; 90 | border: 1px solid #dedede; 91 | font-size: 12px; 92 | padding: 0 0.2em; 93 | } 94 | .octowrap { 95 | position: relative; 96 | } 97 | .octothorpe { 98 | font: 12px Arial; 99 | text-decoration: none; 100 | color: #454545; 101 | position: absolute; 102 | top: 3px; left: -20px; 103 | padding: 1px 2px; 104 | opacity: 0; 105 | -webkit-transition: opacity 0.2s linear; 106 | } 107 | div.docs:hover .octothorpe { 108 | opacity: 1; 109 | } 110 | div.code { 111 | margin-left: 580px; 112 | padding: 14px 15px 16px 50px; 113 | vertical-align: top; 114 | } 115 | .code pre, .docs p code { 116 | font-size: 12px; 117 | } 118 | pre, tt, code { 119 | line-height: 1.5em; 120 | font-family: Monaco, Consolas, "Lucida Console", monospace; 121 | margin: 0; padding: 0; 122 | } 123 | div.clearall { 124 | clear: both; 125 | } 126 | 127 | 128 | /*---------------------- Syntax Highlighting -----------------------------*/ 129 | td.linenos { background-color: #f0f0f0; padding-right: 10px; } 130 | span.lineno { background-color: #f0f0f0; padding: 0 5px 0 5px; } 131 | body .hll { background-color: #ffffcc } 132 | body .c { color: #408080; font-style: italic } /* Comment */ 133 | body .err { border: 1px solid #FF0000 } /* Error */ 134 | body .k { color: #954121 } /* Keyword */ 135 | body .o { color: #666666 } /* Operator */ 136 | body .cm { color: #408080; font-style: italic } /* Comment.Multiline */ 137 | body .cp { color: #BC7A00 } /* Comment.Preproc */ 138 | body .c1 { color: #408080; font-style: italic } /* Comment.Single */ 139 | body .cs { color: #408080; font-style: italic } /* Comment.Special */ 140 | body .gd { color: #A00000 } /* Generic.Deleted */ 141 | body .ge { font-style: italic } /* Generic.Emph */ 142 | body .gr { color: #FF0000 } /* Generic.Error */ 143 | body .gh { color: #000080; font-weight: bold } /* Generic.Heading */ 144 | body .gi { color: #00A000 } /* Generic.Inserted */ 145 | body .go { color: #808080 } /* Generic.Output */ 146 | body .gp { color: #000080; font-weight: bold } /* Generic.Prompt */ 147 | body .gs { font-weight: bold } /* Generic.Strong */ 148 | body .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ 149 | body .gt { color: #0040D0 } /* Generic.Traceback */ 150 | body .kc { color: #954121 } /* Keyword.Constant */ 151 | body .kd { color: #954121; font-weight: bold } /* Keyword.Declaration */ 152 | body .kn { color: #954121; font-weight: bold } /* Keyword.Namespace */ 153 | body .kp { color: #954121 } /* Keyword.Pseudo */ 154 | body .kr { color: #954121; font-weight: bold } /* Keyword.Reserved */ 155 | body .kt { color: #B00040 } /* Keyword.Type */ 156 | body .m { color: #666666 } /* Literal.Number */ 157 | body .s { color: #219161 } /* Literal.String */ 158 | body .na { color: #7D9029 } /* Name.Attribute */ 159 | body .nb { color: #954121 } /* Name.Builtin */ 160 | body .nc { color: #0000FF; font-weight: bold } /* Name.Class */ 161 | body .no { color: #880000 } /* Name.Constant */ 162 | body .nd { color: #AA22FF } /* Name.Decorator */ 163 | body .ni { color: #999999; font-weight: bold } /* Name.Entity */ 164 | body .ne { color: #D2413A; font-weight: bold } /* Name.Exception */ 165 | body .nf { color: #0000FF } /* Name.Function */ 166 | body .nl { color: #A0A000 } /* Name.Label */ 167 | body .nn { color: #0000FF; font-weight: bold } /* Name.Namespace */ 168 | body .nt { color: #954121; font-weight: bold } /* Name.Tag */ 169 | body .nv { color: #19469D } /* Name.Variable */ 170 | body .ow { color: #AA22FF; font-weight: bold } /* Operator.Word */ 171 | body .w { color: #bbbbbb } /* Text.Whitespace */ 172 | body .mf { color: #666666 } /* Literal.Number.Float */ 173 | body .mh { color: #666666 } /* Literal.Number.Hex */ 174 | body .mi { color: #666666 } /* Literal.Number.Integer */ 175 | body .mo { color: #666666 } /* Literal.Number.Oct */ 176 | body .sb { color: #219161 } /* Literal.String.Backtick */ 177 | body .sc { color: #219161 } /* Literal.String.Char */ 178 | body .sd { color: #219161; font-style: italic } /* Literal.String.Doc */ 179 | body .s2 { color: #219161 } /* Literal.String.Double */ 180 | body .se { color: #BB6622; font-weight: bold } /* Literal.String.Escape */ 181 | body .sh { color: #219161 } /* Literal.String.Heredoc */ 182 | body .si { color: #BB6688; font-weight: bold } /* Literal.String.Interpol */ 183 | body .sx { color: #954121 } /* Literal.String.Other */ 184 | body .sr { color: #BB6688 } /* Literal.String.Regex */ 185 | body .s1 { color: #219161 } /* Literal.String.Single */ 186 | body .ss { color: #19469D } /* Literal.String.Symbol */ 187 | body .bp { color: #954121 } /* Name.Builtin.Pseudo */ 188 | body .vc { color: #19469D } /* Name.Variable.Class */ 189 | body .vg { color: #19469D } /* Name.Variable.Global */ 190 | body .vi { color: #19469D } /* Name.Variable.Instance */ 191 | body .il { color: #666666 } /* Literal.Number.Integer.Long */ 192 | -------------------------------------------------------------------------------- /docs/session.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | session.py 6 | 7 | 8 | 9 | 18 |
19 |
20 |
21 |

session.py

22 |
23 |
24 |
25 |
26 |
27 | # 28 |
29 |

main.py | 30 | graph.py | 31 | tensor.py | 32 | ops.py | 33 | session.py

34 |

Previous: Operations

35 |
36 |
37 |
from __future__ import absolute_import
 38 | from __future__ import print_function
 39 | from __future__ import division
 40 | 
 41 | import numpy as np
42 |
43 |
44 |
45 |
46 |
47 |
48 | # 49 |
50 |

Session performs computation on a graph.

51 |
52 |
53 |
class Session(object):
54 |
55 |
56 |
57 |
58 |
59 |
60 | # 61 |
62 | 63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 | # 73 |
74 |

Initializing a session with a graph and a state dictionary to hold 75 | tensor values.

76 |
77 |
78 |
    def __init__(self, graph):
79 |
80 |
81 |
82 |
83 |
84 |
85 | # 86 |
87 | 88 |
89 |
90 |
        self.graph = graph
 91 |         self.state = {}
92 |
93 |
94 |
95 |
96 |
97 |
98 | # 99 |
100 |

run_op takes as input an operation to run and a context to fetch 101 | pre-evaluted tensors.

102 |
103 |
104 |
    def run_op(self, op, context):
105 |
106 |
107 |
108 |
109 |
110 |
111 | # 112 |
113 | 114 |
115 |
116 |
        args = [self.eval_tensor(tensor, context) for tensor in op.inputs]
117 |         return op.compute(self, *args)
118 |
119 |
120 |
121 |
122 |
123 |
124 | # 125 |
126 |

eval_tensor takes as input a tensor to evaluate and a context to 127 | fetch pre-evaluted tensors. If the tensor is not already in the context 128 | there are three possibilities for evaluating the tensor:

129 |
    130 |
  • The tensor has an operation and is therefore the result of the 131 | operation that must be computed.
  • 132 |
  • The tensor has an active state from another session run that can be 133 | fetched.
  • 134 |
  • The tensor has an initial value from its instantiation that can be 135 | fetched and added to the state.
  • 136 |
137 |
138 |
139 |
    def eval_tensor(self, tensor, context):
140 |
141 |
142 |
143 |
144 |
145 |
146 | # 147 |
148 | 149 |
150 |
151 |
        if tensor not in context:
152 |             if tensor.op is not None:
153 |                 context[tensor] = self.run_op(tensor.op, context)
154 |             elif tensor in self.state and self.state[tensor] is not None:
155 |                 context[tensor] = self.state[tensor]
156 |             elif tensor not in self.state and tensor.initial_value is not None:
157 |                 context[tensor] = self.state[tensor] = tensor.initial_value
158 | 
159 |         return context[tensor]
160 |
161 |
162 |
163 |
164 |
165 |
166 | # 167 |
168 |

run takes a list of tensors to evaluate and a feed dictionary that 169 | can be used to override tensors.

170 |
171 |
172 |
    def run(self, tensors, feed_dict=None):
173 |
174 |
175 |
176 |
177 |
178 |
179 | # 180 |
181 | 182 |
183 |
184 |
        context = {}
185 | 
186 |         if feed_dict:
187 |             context.update(feed_dict)
188 | 
189 |         return [self.eval_tensor(tensor, context) for tensor in tensors]
190 | 
191 | 
192 |
193 |
194 |
195 |
196 | 197 | 198 | -------------------------------------------------------------------------------- /docs/tensor.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | tensor.py 6 | 7 | 8 | 9 | 18 |
19 |
20 |
21 |

tensor.py

22 |
23 |
24 |
25 |
26 |
27 | # 28 |
29 |

main.py | 30 | graph.py | 31 | tensor.py | 32 | ops.py | 33 | session.py

34 |

Previous: The Graph | Next: Operations

35 |
36 |
37 |
from __future__ import absolute_import
 38 | from __future__ import print_function
 39 | from __future__ import division
 40 | 
 41 | import numpy as np
42 |
43 |
44 |
45 |
46 |
47 |
48 | # 49 |
50 |

Tensor represents a value in the graph. It's just a data container with 51 | methods for operator overloading (each of which delegate to the graph). It 52 | includes:

53 |
    54 |
  • The initial value of the tensor.
  • 55 |
  • The operation which produced the tensor, if applicable.
  • 56 |
  • A reference to the graph this tensor belongs to.
  • 57 |
58 |
59 |
60 |
class Tensor(object):
61 |
62 |
63 |
64 |
65 |
66 |
67 | # 68 |
69 | 70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 | # 80 |
81 | 82 |
83 |
84 |
    def __init__(self, initial_value, op, graph):
 85 |         self.initial_value = initial_value
 86 |         self.graph = graph
 87 |         self.op = op
88 |
89 |
90 |
91 |
92 |
93 |
94 | # 95 |
96 |

Operator Overloading

97 |
98 |
99 |
    def __add__(self, other):
100 |         return self.graph.add(self, other)
101 |
102 |
103 |
104 |
105 |
106 |
107 | # 108 |
109 | 110 |
111 |
112 |
    def __sub__(self, other):
113 |         return self.graph.sub(self, other)
114 |
115 |
116 |
117 |
118 |
119 |
120 | # 121 |
122 | 123 |
124 |
125 |
    def __mul__(self, other):
126 |         return self.graph.mul(self, other)
127 |
128 |
129 |
130 |
131 |
132 |
133 | # 134 |
135 | 136 |
137 |
138 |
    def __truediv__(self, other):
139 |         return self.graph.div(self, other)
140 |
141 |
142 |
143 |
144 |
145 |
146 | # 147 |
148 | 149 |
150 |
151 |
    def __neg__(self):
152 |         return self.graph.neg(self)
153 |
154 |
155 |
156 |
157 |
158 |
159 | # 160 |
161 |

Reverse Operator Overloading

162 |
163 |
164 |
    def __radd__(self, other):
165 |         return self.graph.add(other, self)
166 |
167 |
168 |
169 |
170 |
171 |
172 | # 173 |
174 | 175 |
176 |
177 |
    def __rsub__(self, other):
178 |         return self.graph.sub(other, self)
179 |
180 |
181 |
182 |
183 |
184 |
185 | # 186 |
187 | 188 |
189 |
190 |
    def __rmul__(self, other):
191 |         return self.graph.mul(other, self)
192 |
193 |
194 |
195 |
196 |
197 |
198 | # 199 |
200 | 201 |
202 |
203 |
    def __rtruediv__(self, other):
204 |         return self.graph.div(other, self)
205 | 
206 | 
207 |
208 |
209 |
210 |
211 | 212 | 213 | -------------------------------------------------------------------------------- /docs/tf_test.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | tf_test.py 6 | 7 | 8 | 9 | 18 |
19 |
20 |
21 |

tf_test.py

22 |
23 |
24 |
25 |
26 |
27 | # 28 |
29 | 30 |
31 |
32 |
from __future__ import absolute_import
33 | from __future__ import print_function
34 | from __future__ import division
35 | 
36 | import numpy as np
37 | np.random.seed(67)
38 | 
39 | import tensorflow as tf
40 | 
41 | from tqdm import trange
42 |
43 |
44 |
45 |
46 |
47 |
48 | # 49 |
50 | 51 |
52 |
53 |
def main():
54 |     X = tf.constant([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=tf.float32)
55 |     y = tf.constant([[0, 1, 1, 0]], dtype=tf.float32)
56 | 
57 |     weights0 = tf.Variable(np.random.normal(size=(2, 4)), dtype=tf.float32)
58 |     weights1 = tf.Variable(np.random.normal(size=(4, 1)), dtype=tf.float32)
59 | 
60 |     activations0 = tf.sigmoid(tf.matmul(X, weights0))
61 |     activations1 = tf.sigmoid(tf.matmul(activations0, weights1))
62 | 
63 |     loss_op = tf.reduce_mean(tf.square(tf.transpose(y) - activations1))
64 | 
65 |     parameters = [weights0, weights1]
66 |     gradients = tf.gradients(loss_op, parameters)
67 | 
68 |     update_op = tf.group(*[
69 |         tf.assign(param, param - grad) \
70 |             for param, grad in zip(parameters, gradients)
71 |     ])
72 | 
73 |     tf.set_random_seed(67)
74 | 
75 |     with tf.Session() as sess:
76 |         sess.run(tf.global_variables_initializer())
77 |         with trange(10000) as pbar_epoch:
78 |             for _ in pbar_epoch:
79 |                 _, loss = sess.run([update_op, loss_op])
80 |                 pbar_epoch.set_description('loss: {:.8f}'.format(loss))
81 | 
82 | if __name__ == '__main__':
83 |     main()
84 | 
85 | 
86 |
87 |
88 |
89 |
90 | 91 | 92 | -------------------------------------------------------------------------------- /graph.py: -------------------------------------------------------------------------------- 1 | """ 2 | [main.py](main.html) | 3 | [graph.py](graph.html) | 4 | [tensor.py](tensor.html) | 5 | [ops.py](ops.html) | 6 | [session.py](session.html) 7 | 8 | [Previous: Main](main.html) | [Next: Tensors](tensor.html) 9 | """ 10 | 11 | from __future__ import absolute_import 12 | from __future__ import print_function 13 | from __future__ import division 14 | 15 | from tensor import Tensor 16 | from ops import AddOp, SubOp, MulOp, DivOp, \ 17 | DotOp, TransposeOp, SquareOp, NegOp, \ 18 | MeanOp, SigmoidOp, AssignOp, GroupOp 19 | 20 | class Graph(object): 21 | """ 22 | `Graph` represents a computation to be evaluated by a `Session`. With the 23 | exception of `Graph#tensor`, `Graph#convert`, and `Graph#gradients`, most 24 | methods simply create an operation and return the output tensor of the 25 | operation. 26 | """ 27 | 28 | def tensor(self, initial_value=None, op=None): 29 | """ 30 | The `tensor` method defines a new tensor with the given initial value 31 | and operation. 32 | """ 33 | return Tensor(initial_value=initial_value, graph=self, op=op) 34 | 35 | def convert(self, value): 36 | """ 37 | The `convert` method returns the given value if it is a `Tensor`, 38 | otherwise convert it to one. 39 | """ 40 | if isinstance(value, Tensor): 41 | return value 42 | return self.tensor(initial_value=value) 43 | 44 | def gradients(self, y, xs): 45 | """ 46 | The `gradients` method performs backpropagation using [reverse accumulation](https://en.wikipedia.org/wiki/Automatic_differentiation#Reverse_accumulation) and the [chain rule](https://en.wikipedia.org/wiki/Chain_rule#Higher_dimensions). 47 | 48 | It traverses the graph from `y` to each `x` in `xs`, accumulating 49 | gradients, and returning the partial gradients for each `xs`. We use a 50 | queue to keep track of the next tensor for which to compute the 51 | gradient and keep a dictionary of the gradients computed thus far. 52 | Iteration starts from the target output `y` with an output gradient 53 | of 1. 54 | """ 55 | 56 | queue = [] 57 | queue.append((y, 1)) 58 | 59 | grads = {} 60 | while len(queue) > 0: 61 | y, grad_y = queue.pop(0) 62 | grad_y = self.convert(grad_y) 63 | 64 | gradients = y.op.gradient(grad_y) 65 | assert len(gradients) == len(y.op.inputs) 66 | 67 | for tensor, gradient in zip(y.op.inputs, gradients): 68 | if tensor in grads: 69 | grads[tensor] += gradient 70 | else: 71 | grads[tensor] = gradient 72 | 73 | if tensor.op: 74 | queue.append((tensor, gradient)) 75 | 76 | return [grads[x] for x in xs] 77 | 78 | # ## Operation Methods 79 | # Each operation method defines a new operation with the provided input 80 | # tensors and returns the operations' output. 81 | 82 | def add(self, a, b): 83 | op = AddOp([a, b], graph=self) 84 | return op.output 85 | 86 | def sub(self, a, b): 87 | op = SubOp([a, b], graph=self) 88 | return op.output 89 | 90 | def mul(self, a, b): 91 | op = MulOp([a, b], graph=self) 92 | return op.output 93 | 94 | def div(self, a, b): 95 | op = DivOp([a, b], graph=self) 96 | return op.output 97 | 98 | def neg(self, x): 99 | op = NegOp([x], graph=self) 100 | return op.output 101 | 102 | def square(self, x): 103 | op = SquareOp([x], graph=self) 104 | return op.output 105 | 106 | def sigmoid(self, x): 107 | op = SigmoidOp([x], graph=self) 108 | return op.output 109 | 110 | def dot(self, a, b): 111 | op = DotOp([a, b], graph=self) 112 | return op.output 113 | 114 | def transpose(self, x): 115 | op = TransposeOp([x], graph=self) 116 | return op.output 117 | 118 | def mean(self, x): 119 | op = MeanOp([x], graph=self) 120 | return op.output 121 | 122 | def assign(self, a, b): 123 | op = AssignOp([a, b], graph=self) 124 | return op.output 125 | 126 | def group(self, inputs): 127 | op = GroupOp(inputs, graph=self) 128 | return op.output 129 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | """ 2 | # Implementing (parts of) TensorFlow (almost) from Scratch 3 | ## A Walkthrough of Symbolic Differentiation 4 | ### Jim Fleming ([@jimmfleming](https://twitter.com/jimmfleming)) 5 | 6 | [main.py](main.html) | 7 | [graph.py](graph.html) | 8 | [tensor.py](tensor.html) | 9 | [ops.py](ops.html) | 10 | [session.py](session.html) 11 | 12 | [Next: The Graph](graph.html) 13 | 14 | This [literate programming](https://en.wikipedia.org/wiki/Literate_programming) 15 | exercise will construct a simple 2-layer feed-forward neural network to compute 16 | the [exclusive or](https://en.wikipedia.org/wiki/Exclusive_or), using [symbolic 17 | differentiation](https://en.wikipedia.org/wiki/Automatic_differentiation) to 18 | compute the gradients automatically. In total, about 500 lines of code, 19 | including comments. The only functional dependency is numpy. I highly recommend 20 | reading Chris Olah's [Calculus on Computational Graphs: 21 | Backpropagation](http://colah.github.io/posts/2015-08-Backprop/) for more 22 | background on what this code is doing. 23 | 24 | The XOR task is convenient for a number of reasons: it's very fast to compute; 25 | it is not linearly separable thus requiring at least two layers and making the 26 | gradient calculation more interesting; it doesn't require more complicated 27 | matrix-matrix features such as broadcasting. 28 | 29 | > (I'm also working on a more involved example for MNIST but as soon as I added 30 | support for matrix shapes and broadcasting the code ballooned by 5x and it was 31 | no longer a simple example.) 32 | 33 | Let's start by going over the architecture. We're going to use four main 34 | components: 35 | 36 | - [`Graph`](graph.html), composed of `Tensor` nodes and `Op` nodes that 37 | together represent the computation we want to differentiate. 38 | - [`Tensor`](tensor.html) represents a value in the graph. Tensors keep a 39 | reference to the operation that produced it, if any. 40 | - [`BaseOp`](ops.html) represents a computation to perform and its 41 | differentiable components. Operations hold references to their input 42 | tensors and an output tensor. 43 | - [`Session`](session.html) is used to evaluate tensors in the graph. 44 | 45 | **Note** the return from a graph operation is actually a tensor, representing 46 | the output of the operation. 47 | """ 48 | 49 | from __future__ import absolute_import 50 | from __future__ import print_function 51 | from __future__ import division 52 | 53 | import numpy as np 54 | np.random.seed(67) 55 | 56 | from tqdm import trange 57 | 58 | from graph import Graph 59 | from session import Session 60 | 61 | def main(): 62 | """ 63 | The main method performs some setup then trains the model, displaying the 64 | current loss along the way. 65 | """ 66 | 67 | # Define a new graph 68 | graph = Graph() 69 | 70 | # Initialize the training data (XOR truth table) 71 | X = graph.tensor(np.array([[0, 0], [0, 1], [1, 0], [1, 1]])) 72 | y = graph.tensor(np.array([[0, 1, 1, 0]])) 73 | 74 | # Initialize the model's parameters (weights for each layer) 75 | weights0 = graph.tensor(np.random.normal(size=(2, 4))) 76 | weights1 = graph.tensor(np.random.normal(size=(4, 1))) 77 | 78 | # Define the model's activations 79 | activations0 = graph.sigmoid(graph.dot(X, weights0)) 80 | activations1 = graph.sigmoid(graph.dot(activations0, weights1)) 81 | 82 | # Define operation for computing the loss 83 | # ([mean squared error](https://en.wikipedia.org/wiki/Mean_squared_error)) 84 | loss_op = graph.mean(graph.square(graph.transpose(y) - activations1)) 85 | 86 | # Define operations for the gradients w.r.t. the loss and an update 87 | # operation to apply the gradients to the model's parameters. 88 | parameters = [weights0, weights1] 89 | gradients = graph.gradients(loss_op, parameters) 90 | 91 | update_op = graph.group([ 92 | graph.assign(param, param - grad) \ 93 | for param, grad in zip(parameters, gradients) 94 | ]) 95 | 96 | # Begin training... We iterate for a number of epochs, calling the session 97 | # run method each time to compute the update operation and the current 98 | # loss. The progress bar's description is updated to display the loss. 99 | sess = Session(graph) 100 | with trange(10000) as pbar_epoch: 101 | for _ in pbar_epoch: 102 | _, loss = sess.run([update_op, loss_op]) 103 | pbar_epoch.set_description('loss: {:.8f}'.format(loss)) 104 | 105 | if __name__ == '__main__': 106 | main() 107 | -------------------------------------------------------------------------------- /ops.py: -------------------------------------------------------------------------------- 1 | """ 2 | [main.py](main.html) | 3 | [graph.py](graph.html) | 4 | [tensor.py](tensor.html) | 5 | [ops.py](ops.html) | 6 | [session.py](session.html) 7 | 8 | [Previous: Tensors](tensor.html) | [Next: The Session](session.html) 9 | """ 10 | 11 | from __future__ import absolute_import 12 | from __future__ import print_function 13 | from __future__ import division 14 | 15 | import numpy as np 16 | 17 | class BaseOp(object): 18 | """ 19 | `BaseOp` represents an operation that performs computation on tensors. 20 | Every operation consists of the following: 21 | 22 | - A list of `inputs`, each converted to ensure they're all tensors. 23 | - An output tensor to represent the result of the operation (which might 24 | be `None`.) 25 | - A reference to the graph so that each operation can generate new 26 | operations when constructing gradients. 27 | """ 28 | 29 | def __init__(self, inputs, graph): 30 | self.inputs = [graph.convert(input_) for input_ in inputs] 31 | self.output = graph.tensor(op=self) 32 | self.graph = graph 33 | 34 | def compute(self, sess, *args): 35 | """ 36 | The `compute` method receives as input the _evaluated_ input tensors 37 | and returns the result of performing its operation on the inputs. 38 | """ 39 | raise NotImplementedError() 40 | 41 | def gradient(self, grad): 42 | """ 43 | The `gradient` method computes the partial derivative w.r.t. each input 44 | to the operation. (Most of the derivatives come from 45 | [Wikipedia](https://en.wikipedia.org/wiki/Differentiation_rules).) 46 | """ 47 | raise NotImplementedError() 48 | 49 | class AddOp(BaseOp): 50 | """ 51 | `AddOp` adds a tensor to another tensor. Uses the 52 | [sum rule](https://en.wikipedia.org/wiki/Sum_rule_in_differentiation) to 53 | compute the partial derivatives. 54 | """ 55 | 56 | def compute(self, sess, a, b): 57 | return a + b 58 | 59 | def gradient(self, grad): 60 | return [grad, grad] 61 | 62 | class SubOp(BaseOp): 63 | """ 64 | `SubOp` subtracts a tensor from another tensor. Also uses the 65 | [sum rule](https://en.wikipedia.org/wiki/Sum_rule_in_differentiation) to 66 | compute the partial derivatives. 67 | """ 68 | 69 | def compute(self, sess, a, b): 70 | return a - b 71 | 72 | def gradient(self, grad): 73 | return [grad, -grad] 74 | 75 | class MulOp(BaseOp): 76 | """ 77 | `MulOp` multiplies a tensor by another tensor. Uses the 78 | [product rule](https://en.wikipedia.org/wiki/Product_rule) to compute the 79 | partial derivatives. 80 | """ 81 | 82 | def compute(self, sess, a, b): 83 | return a * b 84 | 85 | def gradient(self, grad): 86 | a, b = self.inputs 87 | return [grad * b, grad * a] 88 | 89 | class DivOp(BaseOp): 90 | """ 91 | `DivOp` divides a tensor by another tensor. Uses the 92 | [quotient rule](https://en.wikipedia.org/wiki/Quotient_rule) to compute the 93 | partial derivatives. 94 | """ 95 | 96 | def compute(self, sess, a, b): 97 | return a / b 98 | 99 | def gradient(self, grad): 100 | a, b = self.inputs 101 | return [grad / b, grad * (-a / self.graph.square(b))] 102 | 103 | class NegOp(BaseOp): 104 | """ 105 | `NegOp` negates a tensor. 106 | """ 107 | 108 | def compute(self, sess, x): 109 | return -x 110 | 111 | def gradient(self, grad): 112 | return [-grad] 113 | 114 | class DotOp(BaseOp): 115 | """ 116 | `DotOp` computes the dot product between two tensors. Uses the 117 | [product rule](https://en.wikipedia.org/wiki/Product_rule) to compute the 118 | partial derivatives. Note that here we need to transpose the terms and 119 | perform a dot product, assuming matrices rather than scalars. 120 | """ 121 | 122 | def compute(self, sess, a, b): 123 | return np.dot(a, b) 124 | 125 | def gradient(self, grad): 126 | a, b = self.inputs 127 | return [ 128 | self.graph.dot(grad, self.graph.transpose(b)), 129 | self.graph.dot(self.graph.transpose(a), grad), 130 | ] 131 | 132 | class SquareOp(BaseOp): 133 | """ 134 | `SquareOp` squares a tensor. 135 | """ 136 | 137 | def compute(self, sess, x): 138 | return np.square(x) 139 | 140 | def gradient(self, grad): 141 | x = self.inputs[0] 142 | return [grad * (2 * x)] 143 | 144 | class TransposeOp(BaseOp): 145 | """ 146 | `TransposeOp` tranposes a tensor. 147 | """ 148 | 149 | def compute(self, sess, x): 150 | return np.transpose(x) 151 | 152 | def gradient(self, grad): 153 | return [self.graph.transpose(grad)] 154 | 155 | class SigmoidOp(BaseOp): 156 | """ 157 | `SigmoidOp` implements the 158 | [sigmoid function](https://en.wikipedia.org/wiki/Sigmoid_function) and its 159 | derivative. Notice that the derivative uses the output of the operation 160 | which saves recomputation. 161 | """ 162 | 163 | def compute(self, sess, x): 164 | return 1 / (1 + np.exp(-x)) 165 | 166 | def gradient(self, grad): 167 | y = self.output 168 | return [grad * (y * (1 - y))] 169 | 170 | class MeanOp(BaseOp): 171 | """ 172 | `MeanOp` computes the mean of a tensor. **Note** the gradient here is 173 | intentionally incorrect because computing it requires knowing the shape of 174 | the input and output tensors. Fortunately, gradients are fairly malleable 175 | in optimization. 176 | """ 177 | 178 | def compute(self, sess, x): 179 | return np.mean(x) 180 | 181 | def gradient(self, grad): 182 | return [grad] 183 | 184 | class GroupOp(BaseOp): 185 | """ 186 | `GroupOp` exploits the fact that each input to the operation is 187 | automatically evaluated before computing the operation's output, allowing 188 | us to group together the evaluation of multiple operations. It's input 189 | gradients come from simply broadcasting the output gradient. 190 | """ 191 | 192 | def compute(self, sess, *args): 193 | return None 194 | 195 | def gradient(self, grad): 196 | return [grad] * len(self.inputs) 197 | 198 | class AssignOp(BaseOp): 199 | """ 200 | `AssignOp` updates the session's current state for a tensor. It is not 201 | differentiable in this implementation. 202 | """ 203 | 204 | def compute(self, sess, a, b): 205 | assert a.shape == b.shape, \ 206 | 'shapes must match to assign: {} != {}' \ 207 | .format(a.shape, b.shape) 208 | sess.state[self.inputs[0]] = b 209 | return b 210 | -------------------------------------------------------------------------------- /session.py: -------------------------------------------------------------------------------- 1 | """ 2 | [main.py](main.html) | 3 | [graph.py](graph.html) | 4 | [tensor.py](tensor.html) | 5 | [ops.py](ops.html) | 6 | [session.py](session.html) 7 | 8 | [Previous: Operations](ops.html) 9 | """ 10 | 11 | from __future__ import absolute_import 12 | from __future__ import print_function 13 | from __future__ import division 14 | 15 | import numpy as np 16 | 17 | class Session(object): 18 | """ 19 | `Session` performs computation on a graph. 20 | """ 21 | 22 | def __init__(self, graph): 23 | """ 24 | Initializing a session with a graph and a state dictionary to hold 25 | tensor values. 26 | """ 27 | self.graph = graph 28 | self.state = {} 29 | 30 | def run_op(self, op, context): 31 | """ 32 | `run_op` takes as input an operation to run and a context to fetch 33 | pre-evaluted tensors. 34 | """ 35 | args = [self.eval_tensor(tensor, context) for tensor in op.inputs] 36 | return op.compute(self, *args) 37 | 38 | def eval_tensor(self, tensor, context): 39 | """ 40 | `eval_tensor` takes as input a tensor to evaluate and a context to 41 | fetch pre-evaluted tensors. If the tensor is not already in the context 42 | there are three possibilities for evaluating the tensor: 43 | 44 | - The tensor has an operation and is therefore the result of the 45 | operation that must be computed. 46 | - The tensor has an active state from another session run that can be 47 | fetched. 48 | - The tensor has an initial value from its instantiation that can be 49 | fetched and added to the state. 50 | """ 51 | if tensor not in context: 52 | if tensor.op is not None: 53 | context[tensor] = self.run_op(tensor.op, context) 54 | elif tensor in self.state and self.state[tensor] is not None: 55 | context[tensor] = self.state[tensor] 56 | elif tensor not in self.state and tensor.initial_value is not None: 57 | context[tensor] = self.state[tensor] = tensor.initial_value 58 | 59 | return context[tensor] 60 | 61 | def run(self, tensors, feed_dict=None): 62 | """ 63 | `run` takes a list of tensors to evaluate and a feed dictionary that 64 | can be used to override tensors. 65 | """ 66 | context = {} 67 | 68 | if feed_dict: 69 | context.update(feed_dict) 70 | 71 | return [self.eval_tensor(tensor, context) for tensor in tensors] 72 | -------------------------------------------------------------------------------- /tensor.py: -------------------------------------------------------------------------------- 1 | """ 2 | [main.py](main.html) | 3 | [graph.py](graph.html) | 4 | [tensor.py](tensor.html) | 5 | [ops.py](ops.html) | 6 | [session.py](session.html) 7 | 8 | [Previous: The Graph](graph.html) | [Next: Operations](ops.html) 9 | """ 10 | 11 | from __future__ import absolute_import 12 | from __future__ import print_function 13 | from __future__ import division 14 | 15 | import numpy as np 16 | 17 | class Tensor(object): 18 | """ 19 | `Tensor` represents a _value_ in the graph. It's just a data container with 20 | methods for operator overloading (each of which delegate to the graph). It 21 | includes: 22 | 23 | - The initial value of the tensor. 24 | - The operation which produced the tensor, if applicable. 25 | - A reference to the graph this tensor belongs to. 26 | """ 27 | 28 | def __init__(self, initial_value, op, graph): 29 | self.initial_value = initial_value 30 | self.graph = graph 31 | self.op = op 32 | 33 | # ## [Operator Overloading](https://docs.python.org/2/reference/datamodel.html?highlight=__radd__#emulating-numeric-types) 34 | def __add__(self, other): 35 | return self.graph.add(self, other) 36 | 37 | def __sub__(self, other): 38 | return self.graph.sub(self, other) 39 | 40 | def __mul__(self, other): 41 | return self.graph.mul(self, other) 42 | 43 | def __truediv__(self, other): 44 | return self.graph.div(self, other) 45 | 46 | def __neg__(self): 47 | return self.graph.neg(self) 48 | 49 | # ## [Reverse Operator Overloading](https://docs.python.org/2/reference/datamodel.html?highlight=__radd__#object.__radd__) 50 | def __radd__(self, other): 51 | return self.graph.add(other, self) 52 | 53 | def __rsub__(self, other): 54 | return self.graph.sub(other, self) 55 | 56 | def __rmul__(self, other): 57 | return self.graph.mul(other, self) 58 | 59 | def __rtruediv__(self, other): 60 | return self.graph.div(other, self) 61 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jimfleming/differentiation/b7976decf8af4e5b13d315422ba5fafe902463c8/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_gradients.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | from __future__ import division 4 | 5 | import unittest 6 | import numpy as np 7 | 8 | from graph import Graph 9 | from session import Session 10 | 11 | class GradientsTestCase(unittest.TestCase): 12 | 13 | def test_add_grad(self): 14 | graph = Graph() 15 | 16 | a = graph.tensor() 17 | b = graph.tensor() 18 | c = a + b 19 | 20 | grad_a, grad_b = graph.gradients(c, [a, b]) 21 | 22 | sess = Session(graph) 23 | grad_a_, grad_b_ = sess.run([grad_a, grad_b], feed_dict={a: 2, b: 1}) 24 | 25 | self.assertEqual(grad_a_, 1) 26 | self.assertEqual(grad_b_, 1) 27 | 28 | def test_sub_grad(self): 29 | graph = Graph() 30 | 31 | a = graph.tensor() 32 | b = graph.tensor() 33 | c = a - b 34 | 35 | grad_a, grad_b = graph.gradients(c, [a, b]) 36 | 37 | sess = Session(graph) 38 | grad_a_, grad_b_ = sess.run([grad_a, grad_b], feed_dict={a: 2, b: 1}) 39 | 40 | self.assertEqual(grad_a_, 1) 41 | self.assertEqual(grad_b_, -1) 42 | 43 | def test_mul_grad(self): 44 | graph = Graph() 45 | 46 | a = graph.tensor() 47 | b = graph.tensor() 48 | c = a * b 49 | 50 | grad_a, grad_b = graph.gradients(c, [a, b]) 51 | 52 | sess = Session(graph) 53 | grad_a_, grad_b_ = sess.run([grad_a, grad_b], feed_dict={a: 2, b: 3}) 54 | 55 | self.assertEqual(grad_a_, 3) 56 | self.assertEqual(grad_b_, 2) 57 | 58 | def test_div_grad(self): 59 | graph = Graph() 60 | 61 | a = graph.tensor() 62 | b = graph.tensor() 63 | c = a / b 64 | 65 | grad_a, grad_b = graph.gradients(c, [a, b]) 66 | 67 | sess = Session(graph) 68 | grad_a_, grad_b_ = sess.run([grad_a, grad_b], feed_dict={a: 2, b: 3}) 69 | 70 | self.assertAlmostEqual(grad_a_, 0.3333333) 71 | self.assertAlmostEqual(grad_b_, -0.2222222) 72 | 73 | def test_square_grad(self): 74 | graph = Graph() 75 | 76 | a = graph.tensor() 77 | b = graph.square(a) 78 | 79 | grad, = graph.gradients(b, [a]) 80 | 81 | sess = Session(graph) 82 | grad_, = sess.run([grad], feed_dict={a: 6}) 83 | 84 | self.assertEqual(grad_, 12) 85 | 86 | def test_sigmoid_grad(self): 87 | graph = Graph() 88 | 89 | a = graph.tensor() 90 | b = graph.sigmoid(a) 91 | 92 | grad, = graph.gradients(b, [a]) 93 | 94 | sess = Session(graph) 95 | grad_, = sess.run([grad], feed_dict={a: 1}) 96 | 97 | self.assertAlmostEqual(grad_, 0.19661193) 98 | 99 | def test_neg_grad(self): 100 | graph = Graph() 101 | 102 | a = graph.tensor() 103 | b = -a 104 | 105 | grad, = graph.gradients(b, [a]) 106 | 107 | sess = Session(graph) 108 | grad_, = sess.run([grad], feed_dict={a: 1}) 109 | 110 | self.assertEqual(grad_, -1) 111 | 112 | def test_dot_grad(self): 113 | graph = Graph() 114 | 115 | a = graph.tensor(np.array([0, 1, 2, 3]).reshape((1, -1))) 116 | b = graph.tensor(np.array([0, 1, 2, 3]).reshape((-1, 1))) 117 | c = graph.dot(a, b) 118 | 119 | grad_a, grad_b, = graph.gradients(c, [a, b]) 120 | 121 | sess = Session(graph) 122 | grad_a_, grad_b_ = sess.run([grad_a, grad_b]) 123 | 124 | self.assertTrue(np.array_equal(grad_a_, np.array([[0, 1, 2, 3]]))) 125 | self.assertTrue(np.array_equal(grad_b_, np.array([[0], [1], [2], [3]]))) 126 | 127 | def test_transpose_grad(self): 128 | graph = Graph() 129 | 130 | a = graph.tensor(np.array([[0, 1, 2, 3]])) 131 | b = graph.transpose(a) 132 | 133 | grad, = graph.gradients(b, [a]) 134 | 135 | sess = Session(graph) 136 | grad_, = sess.run([grad]) 137 | 138 | self.assertEqual(grad_, 1) 139 | 140 | def test_mean_grad(self): 141 | graph = Graph() 142 | 143 | a = graph.tensor(np.array([[0, 2, 4, 6]])) 144 | b = graph.mean(a) 145 | 146 | grad, = graph.gradients(b, [a]) 147 | 148 | sess = Session(graph) 149 | grad_, = sess.run([grad]) 150 | 151 | # XXX: This is intentionally incorrect. 152 | self.assertEqual(grad_, 1) 153 | 154 | def test_expression_grad(self): 155 | graph = Graph() 156 | 157 | a = graph.tensor() 158 | b = graph.tensor() 159 | 160 | c = a + b 161 | d = b + 1 162 | e = c * d 163 | 164 | de_da, de_db = graph.gradients(e, [a, b]) 165 | 166 | sess = Session(graph) 167 | 168 | a_, b_, c_, d_, e_, de_da_, de_db_ = sess.run([a, b, c, d, e, de_da, de_db], feed_dict={a: 2, b: 1}) 169 | 170 | self.assertEqual(a_, 2) 171 | self.assertEqual(b_, 1) 172 | self.assertEqual(c_, 3) 173 | self.assertEqual(d_, 2) 174 | self.assertEqual(e_, 6) 175 | self.assertEqual(de_da_, 2) 176 | self.assertEqual(de_db_, 5) 177 | 178 | if __name__ == '__main__': 179 | unittest.main() 180 | -------------------------------------------------------------------------------- /tests/test_ops.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | from __future__ import division 4 | 5 | import unittest 6 | import numpy as np 7 | 8 | from graph import Graph 9 | from session import Session 10 | 11 | class OpsTestCase(unittest.TestCase): 12 | 13 | def test_add(self): 14 | graph = Graph() 15 | 16 | a = graph.tensor() 17 | b = graph.tensor() 18 | c = a + b 19 | 20 | sess = Session(graph) 21 | 22 | a_, b_, c_ = sess.run([a, b, c], feed_dict={a: 2, b: 1}) 23 | 24 | self.assertEqual(a_, 2) 25 | self.assertEqual(b_, 1) 26 | self.assertEqual(c_, 3) 27 | 28 | def test_sub(self): 29 | graph = Graph() 30 | 31 | a = graph.tensor() 32 | b = graph.tensor() 33 | c = a - b 34 | 35 | sess = Session(graph) 36 | 37 | a_, b_, c_ = sess.run([a, b, c], feed_dict={a: 2, b: 3}) 38 | 39 | self.assertEqual(a_, 2) 40 | self.assertEqual(b_, 3) 41 | self.assertEqual(c_, -1) 42 | 43 | def test_mul(self): 44 | graph = Graph() 45 | 46 | a = graph.tensor() 47 | b = graph.tensor() 48 | c = a * b 49 | 50 | sess = Session(graph) 51 | 52 | a_, b_, c_ = sess.run([a, b, c], feed_dict={a: 2, b: 3}) 53 | 54 | self.assertEqual(a_, 2) 55 | self.assertEqual(b_, 3) 56 | self.assertEqual(c_, 6) 57 | 58 | def test_div(self): 59 | graph = Graph() 60 | 61 | a = graph.tensor() 62 | b = graph.tensor() 63 | c = a / b 64 | 65 | sess = Session(graph) 66 | 67 | a_, b_, c_ = sess.run([a, b, c], feed_dict={a: 6, b: 2}) 68 | 69 | self.assertEqual(a_, 6) 70 | self.assertEqual(b_, 2) 71 | self.assertEqual(c_, 3) 72 | 73 | def test_square(self): 74 | graph = Graph() 75 | 76 | a = graph.tensor() 77 | b = graph.square(a) 78 | 79 | sess = Session(graph) 80 | 81 | a_, b_ = sess.run([a, b], feed_dict={a: 3}) 82 | 83 | self.assertEqual(a_, 3) 84 | self.assertEqual(b_, 9) 85 | 86 | def test_neg(self): 87 | graph = Graph() 88 | 89 | a = graph.tensor() 90 | b = graph.neg(a) 91 | 92 | sess = Session(graph) 93 | 94 | a_, b_ = sess.run([a, b], feed_dict={a: 1}) 95 | 96 | self.assertEqual(a_, 1) 97 | self.assertEqual(b_, -1) 98 | 99 | def test_sigmoid(self): 100 | graph = Graph() 101 | 102 | a = graph.tensor() 103 | b = graph.sigmoid(a) 104 | 105 | sess = Session(graph) 106 | 107 | a_, b_ = sess.run([a, b], feed_dict={a: 1}) 108 | 109 | self.assertEqual(a_, 1) 110 | self.assertAlmostEqual(b_, 0.731058579) 111 | 112 | def test_dot(self): 113 | graph = Graph() 114 | 115 | a = graph.tensor(np.array([0, 1, 2, 3]).reshape((1, -1))) 116 | b = graph.tensor(np.array([0, 1, 2, 3]).reshape((-1, 1))) 117 | c = graph.dot(a, b) 118 | 119 | sess = Session(graph) 120 | 121 | c_, = sess.run([c]) 122 | 123 | self.assertTrue(np.array_equal(c_, [[14]])) 124 | 125 | def test_transpose(self): 126 | graph = Graph() 127 | 128 | a = graph.tensor(np.array([[0, 1, 2, 3]])) 129 | b = graph.transpose(a) 130 | 131 | sess = Session(graph) 132 | 133 | b_, = sess.run([b]) 134 | 135 | self.assertTrue(np.array_equal(b_, np.array([[0], [1], [2], [3]]))) 136 | 137 | def test_mean(self): 138 | graph = Graph() 139 | 140 | a = graph.tensor(np.array([[0, 2, 4, 6]])) 141 | b = graph.mean(a) 142 | 143 | sess = Session(graph) 144 | 145 | b_, = sess.run([b]) 146 | 147 | self.assertEqual(b_, 3) 148 | 149 | def test_assign(self): 150 | graph = Graph() 151 | 152 | a = graph.tensor(1) 153 | increment_op = graph.assign(a, a + 1) 154 | 155 | sess = Session(graph) 156 | 157 | a0, = sess.run([a]) 158 | sess.run([increment_op]) 159 | a1, = sess.run([a]) 160 | 161 | self.assertEqual(a0, 1) 162 | self.assertEqual(a1, 2) 163 | 164 | if __name__ == '__main__': 165 | unittest.main() 166 | -------------------------------------------------------------------------------- /tf_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | from __future__ import division 4 | 5 | import numpy as np 6 | np.random.seed(67) 7 | 8 | import tensorflow as tf 9 | 10 | from tqdm import trange 11 | 12 | def main(): 13 | X = tf.constant([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=tf.float32) 14 | y = tf.constant([[0, 1, 1, 0]], dtype=tf.float32) 15 | 16 | weights0 = tf.Variable(np.random.normal(size=(2, 4)), dtype=tf.float32) 17 | weights1 = tf.Variable(np.random.normal(size=(4, 1)), dtype=tf.float32) 18 | 19 | activations0 = tf.sigmoid(tf.matmul(X, weights0)) 20 | activations1 = tf.sigmoid(tf.matmul(activations0, weights1)) 21 | 22 | loss_op = tf.reduce_mean(tf.square(tf.transpose(y) - activations1)) 23 | 24 | parameters = [weights0, weights1] 25 | gradients = tf.gradients(loss_op, parameters) 26 | 27 | update_op = tf.group(*[ 28 | tf.assign(param, param - grad) \ 29 | for param, grad in zip(parameters, gradients) 30 | ]) 31 | 32 | tf.set_random_seed(67) 33 | 34 | with tf.Session() as sess: 35 | sess.run(tf.global_variables_initializer()) 36 | with trange(10000) as pbar_epoch: 37 | for _ in pbar_epoch: 38 | _, loss = sess.run([update_op, loss_op]) 39 | pbar_epoch.set_description('loss: {:.8f}'.format(loss)) 40 | 41 | if __name__ == '__main__': 42 | main() 43 | --------------------------------------------------------------------------------