19 |

20 |

21 |

graph.py

22 |

23 |

24 |

25 |

26 |

27 | # 28 |

29 |

main.py | 30 | graph.py | 31 | tensor.py | 32 | ops.py | 33 | session.py

34 |

Previous: Main | Next: Tensors

35 |

36 |

37 |

from __future__ import absolute_import
 38 | from __future__ import print_function
 39 | from __future__ import division
 40 | 
 41 | from tensor import Tensor
 42 | from ops import AddOp, SubOp, MulOp, DivOp, \
 43 |                 DotOp, TransposeOp, SquareOp, NegOp, \
 44 |                 MeanOp, SigmoidOp, AssignOp, GroupOp

45 |

46 |

47 |

48 |

49 |

50 |

51 | # 52 |

53 |

Graph represents a computation to be evaluated by a Session. With the 54 | exception of Graph#tensor, Graph#convert, and Graph#gradients, most 55 | methods simply create an operation and return the output tensor of the 56 | operation.

57 |

58 |

59 |

class Graph(object):

60 |

61 |

62 |

63 |

64 |

65 |

66 | # 67 |

68 | 69 |

70 |

71 |

72 |

73 |

74 |

75 |

76 |

77 |

78 | # 79 |

80 |

The tensor method defines a new tensor with the given initial value 81 | and operation.

82 |

83 |

84 |

    def tensor(self, initial_value=None, op=None):

85 |

86 |

87 |

88 |

89 |

90 |

91 | # 92 |

93 | 94 |

95 |

96 |

        return Tensor(initial_value=initial_value, graph=self, op=op)

97 |

98 |

99 |

100 |

101 |

102 |

103 | # 104 |

105 |

The convert method returns the given value if it is a Tensor, 106 | otherwise convert it to one.

107 |

108 |

109 |

    def convert(self, value):

110 |

111 |

112 |

113 |

114 |

115 |

116 | # 117 |

118 | 119 |

120 |

121 |

        if isinstance(value, Tensor):
122 |             return value
123 |         return self.tensor(initial_value=value)

124 |

125 |

126 |

127 |

128 |

129 |

130 | # 131 |

132 |

The gradients method performs backpropagation using reverse accumulation and the chain rule.

133 |

It traverses the graph from y to each x in xs, accumulating 134 | gradients, and returning the partial gradients for each xs. We use a 135 | queue to keep track of the next tensor for which to compute the 136 | gradient and keep a dictionary of the gradients computed thus far. 137 | Iteration starts from the target output y with an output gradient 138 | of 1.

139 |

140 |

141 |

    def gradients(self, y, xs):

142 |

143 |

144 |

145 |

146 |

147 |

148 | # 149 |

150 | 151 |

152 |

153 |

        queue = []
154 |         queue.append((y, 1))
155 | 
156 |         grads = {}
157 |         while len(queue) > 0:
158 |             y, grad_y = queue.pop(0)
159 |             grad_y = self.convert(grad_y)
160 | 
161 |             gradients = y.op.gradient(grad_y)
162 |             assert len(gradients) == len(y.op.inputs)
163 | 
164 |             for tensor, gradient in zip(y.op.inputs, gradients):
165 |                 if tensor in grads:
166 |                     grads[tensor] += gradient
167 |                 else:
168 |                     grads[tensor] = gradient
169 | 
170 |                 if tensor.op:
171 |                     queue.append((tensor, gradient))
172 | 
173 |         return [grads[x] for x in xs]

174 |

175 |

176 |

177 |

178 |

179 |

180 | # 181 |

182 |

Operation Methods

183 |

Each operation method defines a new operation with the provided input 184 | tensors and returns the operations' output.

185 |

186 |

187 |

188 |

189 |

190 |

191 |

192 |

193 |

194 | # 195 |

196 | 197 |

198 |

199 |

    def add(self, a, b):
200 |         op = AddOp([a, b], graph=self)
201 |         return op.output

202 |

203 |

204 |

205 |

206 |

207 |

208 | # 209 |

210 | 211 |

212 |

213 |

    def sub(self, a, b):
214 |         op = SubOp([a, b], graph=self)
215 |         return op.output

216 |

217 |

218 |

219 |

220 |

221 |

222 | # 223 |

224 | 225 |

226 |

227 |

    def mul(self, a, b):
228 |         op = MulOp([a, b], graph=self)
229 |         return op.output

230 |

231 |

232 |

233 |

234 |

235 |

236 | # 237 |

238 | 239 |

240 |

241 |

    def div(self, a, b):
242 |         op = DivOp([a, b], graph=self)
243 |         return op.output

244 |

245 |

246 |

247 |

248 |

249 |

250 | # 251 |

252 | 253 |

254 |

255 |

    def neg(self, x):
256 |         op = NegOp([x], graph=self)
257 |         return op.output

258 |

259 |

260 |

261 |

262 |

263 |

264 | # 265 |

266 | 267 |

268 |

269 |

    def square(self, x):
270 |         op = SquareOp([x], graph=self)
271 |         return op.output

272 |

273 |

274 |

275 |

276 |

277 |

278 | # 279 |

280 | 281 |

282 |

283 |

    def sigmoid(self, x):
284 |         op = SigmoidOp([x], graph=self)
285 |         return op.output

286 |

287 |

288 |

289 |

290 |

291 |

292 | # 293 |

294 | 295 |

296 |

297 |

    def dot(self, a, b):
298 |         op = DotOp([a, b], graph=self)
299 |         return op.output

300 |

301 |

302 |

303 |

304 |

305 |

306 | # 307 |

308 | 309 |

310 |

311 |

    def transpose(self, x):
312 |         op = TransposeOp([x], graph=self)
313 |         return op.output

314 |

315 |

316 |

317 |

318 |

319 |

320 | # 321 |

322 | 323 |

324 |

325 |

    def mean(self, x):
326 |         op = MeanOp([x], graph=self)
327 |         return op.output

328 |

329 |

330 |

331 |

332 |

333 |

334 | # 335 |

336 | 337 |

338 |

339 |

    def assign(self, a, b):
340 |         op = AssignOp([a, b], graph=self)
341 |         return op.output

342 |

343 |

344 |

345 |

346 |

347 |

348 | # 349 |

350 | 351 |

352 |

353 |

    def group(self, inputs):
354 |         op = GroupOp(inputs, graph=self)
355 |         return op.output
356 | 
357 |

358 |

359 |

360 |

361 |

362 | 363 | 364 | -------------------------------------------------------------------------------- /docs/main.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | main.py 6 | 7 | 8 | 9 | 18 |

19 |

20 |

21 |

main.py

22 |

23 |

24 |

25 |

26 |

27 | # 28 |

29 |

Implementing (parts of) TensorFlow (almost) from Scratch

30 |

A Walkthrough of Symbolic Differentiation

31 |

Jim Fleming (@jimmfleming)

32 |

main.py | 33 | graph.py | 34 | tensor.py | 35 | ops.py | 36 | session.py

37 |

Next: The Graph

38 |

This literate programming 39 | exercise will construct a simple 2-layer feed-forward neural network to compute 40 | the exclusive or, using symbolic 41 | differentiation to 42 | compute the gradients automatically. In total, about 500 lines of code, 43 | including comments. The only functional dependency is numpy. I highly recommend 44 | reading Chris Olah's Calculus on Computational Graphs: 45 | Backpropagation for more 46 | background on what this code is doing.

47 |

The XOR task is convenient for a number of reasons: it's very fast to compute; 48 | it is not linearly separable thus requiring at least two layers and making the 49 | gradient calculation more interesting; it doesn't require more complicated 50 | matrix-matrix features such as broadcasting.

51 |

52 |
(I'm also working on a more involved example for MNIST but as soon as I added 53 | support for matrix shapes and broadcasting the code ballooned by 5x and it was 54 | no longer a simple example.)
55 |

56 |

Let's start by going over the architecture. We're going to use four main 57 | components:

58 |

Graph, composed of Tensor nodes and Op nodes that 60 | together represent the computation we want to differentiate.
Tensor represents a value in the graph. Tensors keep a 62 | reference to the operation that produced it, if any.
BaseOp represents a computation to perform and its 64 | differentiable components. Operations hold references to their input 65 | tensors and an output tensor.
Session is used to evaluate tensors in the graph.

68 |

Note the return from a graph operation is actually a tensor, representing 69 | the output of the operation.

70 |

71 |

72 |

from __future__ import absolute_import
 73 | from __future__ import print_function
 74 | from __future__ import division
 75 | 
 76 | import numpy as np
 77 | np.random.seed(67)
 78 | 
 79 | from tqdm import trange
 80 | 
 81 | from graph import Graph
 82 | from session import Session

83 |

84 |

85 |

86 |

87 |

88 |

89 | # 90 |

91 |

The main method performs some setup then trains the model, displaying the 92 | current loss along the way.

93 |

94 |

95 |

def main():

96 |

97 |

98 |

99 |

100 |

101 |

102 | # 103 |

104 | 105 |

106 |

107 |

108 |

109 |

110 |

111 |

112 |

113 |

114 | # 115 |

116 |

Define a new graph

117 |

118 |

119 |

    graph = Graph()

120 |

121 |

122 |

123 |

124 |

125 |

126 | # 127 |

128 |

Initialize the training data (XOR truth table)

129 |

130 |

131 |

    X = graph.tensor(np.array([[0, 0], [0, 1], [1, 0], [1, 1]]))
132 |     y = graph.tensor(np.array([[0, 1, 1, 0]]))

133 |

134 |

135 |

136 |

137 |

138 |

139 | # 140 |

141 |

Initialize the model's parameters (weights for each layer)

142 |

143 |

144 |

    weights0 = graph.tensor(np.random.normal(size=(2, 4)))
145 |     weights1 = graph.tensor(np.random.normal(size=(4, 1)))

146 |

147 |

148 |

149 |

150 |

151 |

152 | # 153 |

154 |

Define the model's activations

155 |

156 |

157 |

    activations0 = graph.sigmoid(graph.dot(X, weights0))
158 |     activations1 = graph.sigmoid(graph.dot(activations0, weights1))

159 |

160 |

161 |

162 |

163 |

164 |

165 | # 166 |

167 |

Define operation for computing the loss 168 | (mean squared error)

169 |

170 |

171 |

    loss_op = graph.mean(graph.square(graph.transpose(y) - activations1))

172 |

173 |

174 |

175 |

176 |

177 |

178 | # 179 |

180 |

Define operations for the gradients w.r.t. the loss and an update 181 | operation to apply the gradients to the model's parameters.

182 |

183 |

184 |

    parameters = [weights0, weights1]
185 |     gradients = graph.gradients(loss_op, parameters)
186 | 
187 |     update_op = graph.group([
188 |         graph.assign(param, param - grad) \
189 |             for param, grad in zip(parameters, gradients)
190 |     ])

191 |

192 |

193 |

194 |

195 |

196 |

197 | # 198 |

199 |

Begin training... We iterate for a number of epochs, calling the session 200 | run method each time to compute the update operation and the current 201 | loss. The progress bar's description is updated to display the loss.

202 |

203 |

204 |

    sess = Session(graph)
205 |     with trange(10000) as pbar_epoch:
206 |         for _ in pbar_epoch:
207 |             _, loss = sess.run([update_op, loss_op])
208 |             pbar_epoch.set_description('loss: {:.8f}'.format(loss))
209 | 
210 | if __name__ == '__main__':
211 |     main()
212 | 
213 |

214 |

215 |

216 |

217 |

218 | 219 | 220 | -------------------------------------------------------------------------------- /docs/ops.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | ops.py 6 | 7 | 8 | 9 | 18 |

19 |

20 |

21 |

ops.py

22 |

23 |

24 |

25 |

26 |

27 | # 28 |

29 |

main.py | 30 | graph.py | 31 | tensor.py | 32 | ops.py | 33 | session.py

34 |

Previous: Tensors | Next: The Session

35 |

36 |

37 |

from __future__ import absolute_import
 38 | from __future__ import print_function
 39 | from __future__ import division
 40 | 
 41 | import numpy as np

42 |

43 |

44 |

45 |

46 |

47 |

48 | # 49 |

50 |

BaseOp represents an operation that performs computation on tensors. 51 | Every operation consists of the following:

52 |

A list of inputs, each converted to ensure they're all tensors.
An output tensor to represent the result of the operation (which might 55 | be None.)
A reference to the graph so that each operation can generate new 57 | operations when constructing gradients.

59 |

60 |

61 |

class BaseOp(object):

62 |

63 |

64 |

65 |

66 |

67 |

68 | # 69 |

70 | 71 |

72 |

73 |

74 |

75 |

76 |

77 |

78 |

79 |

80 | # 81 |

82 | 83 |

84 |

85 |

    def __init__(self, inputs, graph):
 86 |         self.inputs = [graph.convert(input_) for input_ in inputs]
 87 |         self.output = graph.tensor(op=self)
 88 |         self.graph = graph

89 |

90 |

91 |

92 |

93 |

94 |

95 | # 96 |

97 |

The compute method receives as input the evaluated input tensors 98 | and returns the result of performing its operation on the inputs.

99 |

100 |

101 |

    def compute(self, sess, *args):

102 |

103 |

104 |

105 |

106 |

107 |

108 | # 109 |

110 | 111 |

112 |

113 |

        raise NotImplementedError()

114 |

115 |

116 |

117 |

118 |

119 |

120 | # 121 |

122 |

The gradient method computes the partial derivative w.r.t. each input 123 | to the operation. (Most of the derivatives come from 124 | Wikipedia.)

125 |

126 |

127 |

    def gradient(self, grad):

128 |

129 |

130 |

131 |

132 |

133 |

134 | # 135 |

136 | 137 |

138 |

139 |

        raise NotImplementedError()

140 |

141 |

142 |

143 |

144 |

145 |

146 | # 147 |

148 |

AddOp adds a tensor to another tensor. Uses the 149 | sum rule to 150 | compute the partial derivatives.

151 |

152 |

153 |

class AddOp(BaseOp):

154 |

155 |

156 |

157 |

158 |

159 |

160 | # 161 |

162 | 163 |

164 |

165 |

166 |

167 |

168 |

169 |

170 |

171 |

172 | # 173 |

174 | 175 |

176 |

177 |

    def compute(self, sess, a, b):
178 |         return a + b

179 |

180 |

181 |

182 |

183 |

184 |

185 | # 186 |

187 | 188 |

189 |

190 |

    def gradient(self, grad):
191 |         return [grad, grad]

192 |

193 |

194 |

195 |

196 |

197 |

198 | # 199 |

200 |

SubOp subtracts a tensor from another tensor. Also uses the 201 | sum rule to 202 | compute the partial derivatives.

203 |

204 |

205 |

class SubOp(BaseOp):

206 |

207 |

208 |

209 |

210 |

211 |

212 | # 213 |

214 | 215 |

216 |

217 |

218 |

219 |

220 |

221 |

222 |

223 |

224 | # 225 |

226 | 227 |

228 |

229 |

    def compute(self, sess, a, b):
230 |         return a - b

231 |

232 |

233 |

234 |

235 |

236 |

237 | # 238 |

239 | 240 |

241 |

242 |

    def gradient(self, grad):
243 |         return [grad, -grad]

244 |

245 |

246 |

247 |

248 |

249 |

250 | # 251 |

252 |

MulOp multiplies a tensor by another tensor. Uses the 253 | product rule to compute the 254 | partial derivatives.

255 |

256 |

257 |

class MulOp(BaseOp):

258 |

259 |

260 |

261 |

262 |

263 |

264 | # 265 |

266 | 267 |

268 |

269 |

270 |

271 |

272 |

273 |

274 |

275 |

276 | # 277 |

278 | 279 |

280 |

281 |

    def compute(self, sess, a, b):
282 |         return a * b

283 |

284 |

285 |

286 |

287 |

288 |

289 | # 290 |

291 | 292 |

293 |

294 |

    def gradient(self, grad):
295 |         a, b = self.inputs
296 |         return [grad * b, grad * a]

297 |

298 |

299 |

300 |

301 |

302 |

303 | # 304 |

305 |

DivOp divides a tensor by another tensor. Uses the 306 | quotient rule to compute the 307 | partial derivatives.

308 |

309 |

310 |

class DivOp(BaseOp):

311 |

312 |

313 |

314 |

315 |

316 |

317 | # 318 |

319 | 320 |

321 |

322 |

323 |

324 |

325 |

326 |

327 |

328 |

329 | # 330 |

331 | 332 |

333 |

334 |

    def compute(self, sess, a, b):
335 |         return a / b

336 |

337 |

338 |

339 |

340 |

341 |

342 | # 343 |

344 | 345 |

346 |

347 |

    def gradient(self, grad):
348 |         a, b = self.inputs
349 |         return [grad / b, grad * (-a / self.graph.square(b))]

350 |

351 |

352 |

353 |

354 |

355 |

356 | # 357 |

358 |

NegOp negates a tensor.

359 |

360 |

361 |

class NegOp(BaseOp):

362 |

363 |

364 |

365 |

366 |

367 |

368 | # 369 |

370 | 371 |

372 |

373 |

374 |

375 |

376 |

377 |

378 |

379 |

380 | # 381 |

382 | 383 |

384 |

385 |

    def compute(self, sess, x):
386 |         return -x

387 |

388 |

389 |

390 |

391 |

392 |

393 | # 394 |

395 | 396 |

397 |

398 |

    def gradient(self, grad):
399 |         return [-grad]

400 |

401 |

402 |

403 |

404 |

405 |

406 | # 407 |

408 |

DotOp computes the dot product between two tensors. Uses the 409 | product rule to compute the 410 | partial derivatives. Note that here we need to transpose the terms and 411 | perform a dot product, assuming matrices rather than scalars.

412 |

413 |

414 |

class DotOp(BaseOp):

415 |

416 |

417 |

418 |

419 |

420 |

421 | # 422 |

423 | 424 |

425 |

426 |

427 |

428 |

429 |

430 |

431 |

432 |

433 | # 434 |

435 | 436 |

437 |

438 |

    def compute(self, sess, a, b):
439 |         return np.dot(a, b)

440 |

441 |

442 |

443 |

444 |

445 |

446 | # 447 |

448 | 449 |

450 |

451 |

    def gradient(self, grad):
452 |         a, b = self.inputs
453 |         return [
454 |             self.graph.dot(grad, self.graph.transpose(b)),
455 |             self.graph.dot(self.graph.transpose(a), grad),
456 |         ]

457 |

458 |

459 |

460 |

461 |

462 |

463 | # 464 |

465 |

SquareOp squares a tensor.

466 |

467 |

468 |

class SquareOp(BaseOp):

469 |

470 |

471 |

472 |

473 |

474 |

475 | # 476 |

477 | 478 |

479 |

480 |

481 |

482 |

483 |

484 |

485 |

486 |

487 | # 488 |

489 | 490 |

491 |

492 |

    def compute(self, sess, x):
493 |         return np.square(x)

494 |

495 |

496 |

497 |

498 |

499 |

500 | # 501 |

502 | 503 |

504 |

505 |

    def gradient(self, grad):
506 |         x = self.inputs[0]
507 |         return [grad * (2 * x)]

508 |

509 |

510 |

511 |

512 |

513 |

514 | # 515 |

516 |

TransposeOp tranposes a tensor.

517 |

518 |

519 |

class TransposeOp(BaseOp):

520 |

521 |

522 |

523 |

524 |

525 |

526 | # 527 |

528 | 529 |

530 |

531 |

532 |

533 |

534 |

535 |

536 |

537 |

538 | # 539 |

540 | 541 |

542 |

543 |

    def compute(self, sess, x):
544 |         return np.transpose(x)

545 |

546 |

547 |

548 |

549 |

550 |

551 | # 552 |

553 | 554 |

555 |

556 |

    def gradient(self, grad):
557 |         return [self.graph.transpose(grad)]

558 |

559 |

560 |

561 |

562 |

563 |

564 | # 565 |

566 |

SigmoidOp implements the 567 | sigmoid function and its 568 | derivative. Notice that the derivative uses the output of the operation 569 | which saves recomputation.

570 |

571 |

572 |

class SigmoidOp(BaseOp):

573 |

574 |

575 |

576 |

577 |

578 |

579 | # 580 |

581 | 582 |

583 |

584 |

585 |

586 |

587 |

588 |

589 |

590 |

591 | # 592 |

593 | 594 |

595 |

596 |

    def compute(self, sess, x):
597 |         return 1 / (1 + np.exp(-x))

598 |

599 |

600 |

601 |

602 |

603 |

604 | # 605 |

606 | 607 |

608 |

609 |

    def gradient(self, grad):
610 |         y = self.output
611 |         return [grad * (y * (1 - y))]

612 |

613 |

614 |

615 |

616 |

617 |

618 | # 619 |

620 |

MeanOp computes the mean of a tensor. Note the gradient here is 621 | intentionally incorrect because computing it requires knowing the shape of 622 | the input and output tensors. Fortunately, gradients are fairly malleable 623 | in optimization.

624 |

625 |

626 |

class MeanOp(BaseOp):

627 |

628 |

629 |

630 |

631 |

632 |

633 | # 634 |

635 | 636 |

637 |

638 |

639 |

640 |

641 |

642 |

643 |

644 |

645 | # 646 |

647 | 648 |

649 |

650 |

    def compute(self, sess, x):
651 |         return np.mean(x)

652 |

653 |

654 |

655 |

656 |

657 |

658 | # 659 |

660 | 661 |

662 |

663 |

    def gradient(self, grad):
664 |         return [grad]

665 |

666 |

667 |

668 |

669 |

670 |

671 | # 672 |

673 |

GroupOp exploits the fact that each input to the operation is 674 | automatically evaluated before computing the operation's output, allowing 675 | us to group together the evaluation of multiple operations. It's input 676 | gradients come from simply broadcasting the output gradient.

677 |

678 |

679 |

class GroupOp(BaseOp):

680 |

681 |

682 |

683 |

684 |

685 |

686 | # 687 |

688 | 689 |

690 |

691 |

692 |

693 |

694 |

695 |

696 |

697 |

698 | # 699 |

700 | 701 |

702 |

703 |

    def compute(self, sess, *args):
704 |         return None

705 |

706 |

707 |

708 |

709 |

710 |

711 | # 712 |

713 | 714 |

715 |

716 |

    def gradient(self, grad):
717 |         return [grad] * len(self.inputs)

718 |

719 |

720 |

721 |

722 |

723 |

724 | # 725 |

726 |

AssignOp updates the session's current state for a tensor. It is not 727 | differentiable in this implementation.

728 |

729 |

730 |

class AssignOp(BaseOp):

731 |

732 |

733 |

734 |

735 |

736 |

737 | # 738 |

739 | 740 |

741 |

742 |

743 |

744 |

745 |

746 |

747 |

748 |

749 | # 750 |

751 | 752 |

753 |

754 |

    def compute(self, sess, a, b):
755 |         assert a.shape == b.shape, \
756 |             'shapes must match to assign: {} != {}' \
757 |                 .format(a.shape, b.shape)
758 |         sess.state[self.inputs[0]] = b
759 |         return b
760 | 
761 |

762 |

763 |

764 |

765 |

766 | 767 | 768 | -------------------------------------------------------------------------------- /docs/public/fonts/aller-bold.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jimfleming/differentiation/b7976decf8af4e5b13d315422ba5fafe902463c8/docs/public/fonts/aller-bold.eot -------------------------------------------------------------------------------- /docs/public/fonts/aller-bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jimfleming/differentiation/b7976decf8af4e5b13d315422ba5fafe902463c8/docs/public/fonts/aller-bold.ttf -------------------------------------------------------------------------------- /docs/public/fonts/aller-bold.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jimfleming/differentiation/b7976decf8af4e5b13d315422ba5fafe902463c8/docs/public/fonts/aller-bold.woff -------------------------------------------------------------------------------- /docs/public/fonts/aller-light.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jimfleming/differentiation/b7976decf8af4e5b13d315422ba5fafe902463c8/docs/public/fonts/aller-light.eot -------------------------------------------------------------------------------- /docs/public/fonts/aller-light.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jimfleming/differentiation/b7976decf8af4e5b13d315422ba5fafe902463c8/docs/public/fonts/aller-light.ttf -------------------------------------------------------------------------------- /docs/public/fonts/aller-light.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jimfleming/differentiation/b7976decf8af4e5b13d315422ba5fafe902463c8/docs/public/fonts/aller-light.woff -------------------------------------------------------------------------------- /docs/public/fonts/roboto-black.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jimfleming/differentiation/b7976decf8af4e5b13d315422ba5fafe902463c8/docs/public/fonts/roboto-black.eot -------------------------------------------------------------------------------- /docs/public/fonts/roboto-black.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jimfleming/differentiation/b7976decf8af4e5b13d315422ba5fafe902463c8/docs/public/fonts/roboto-black.ttf -------------------------------------------------------------------------------- /docs/public/fonts/roboto-black.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jimfleming/differentiation/b7976decf8af4e5b13d315422ba5fafe902463c8/docs/public/fonts/roboto-black.woff -------------------------------------------------------------------------------- /docs/public/stylesheets/normalize.css: -------------------------------------------------------------------------------- 1 | /*! normalize.css v2.0.1 | MIT License | git.io/normalize */ 2 | 3 | /* ========================================================================== 4 | HTML5 display definitions 5 | ========================================================================== */ 6 | 7 | /* 8 | * Corrects `block` display not defined in IE 8/9. 9 | */ 10 | 11 | article, 12 | aside, 13 | details, 14 | figcaption, 15 | figure, 16 | footer, 17 | header, 18 | hgroup, 19 | nav, 20 | section, 21 | summary { 22 | display: block; 23 | } 24 | 25 | /* 26 | * Corrects `inline-block` display not defined in IE 8/9. 27 | */ 28 | 29 | audio, 30 | canvas, 31 | video { 32 | display: inline-block; 33 | } 34 | 35 | /* 36 | * Prevents modern browsers from displaying `audio` without controls. 37 | * Remove excess height in iOS 5 devices. 38 | */ 39 | 40 | audio:not([controls]) { 41 | display: none; 42 | height: 0; 43 | } 44 | 45 | /* 46 | * Addresses styling for `hidden` attribute not present in IE 8/9. 47 | */ 48 | 49 | [hidden] { 50 | display: none; 51 | } 52 | 53 | /* ========================================================================== 54 | Base 55 | ========================================================================== */ 56 | 57 | /* 58 | * 1. Sets default font family to sans-serif. 59 | * 2. Prevents iOS text size adjust after orientation change, without disabling 60 | * user zoom. 61 | */ 62 | 63 | html { 64 | font-family: sans-serif; /* 1 */ 65 | -webkit-text-size-adjust: 100%; /* 2 */ 66 | -ms-text-size-adjust: 100%; /* 2 */ 67 | } 68 | 69 | /* 70 | * Removes default margin. 71 | */ 72 | 73 | body { 74 | margin: 0; 75 | } 76 | 77 | /* ========================================================================== 78 | Links 79 | ========================================================================== */ 80 | 81 | /* 82 | * Addresses `outline` inconsistency between Chrome and other browsers. 83 | */ 84 | 85 | a:focus { 86 | outline: thin dotted; 87 | } 88 | 89 | /* 90 | * Improves readability when focused and also mouse hovered in all browsers. 91 | */ 92 | 93 | a:active, 94 | a:hover { 95 | outline: 0; 96 | } 97 | 98 | /* ========================================================================== 99 | Typography 100 | ========================================================================== */ 101 | 102 | /* 103 | * Addresses `h1` font sizes within `section` and `article` in Firefox 4+, 104 | * Safari 5, and Chrome. 105 | */ 106 | 107 | h1 { 108 | font-size: 2em; 109 | } 110 | 111 | /* 112 | * Addresses styling not present in IE 8/9, Safari 5, and Chrome. 113 | */ 114 | 115 | abbr[title] { 116 | border-bottom: 1px dotted; 117 | } 118 | 119 | /* 120 | * Addresses style set to `bolder` in Firefox 4+, Safari 5, and Chrome. 121 | */ 122 | 123 | b, 124 | strong { 125 | font-weight: bold; 126 | } 127 | 128 | /* 129 | * Addresses styling not present in Safari 5 and Chrome. 130 | */ 131 | 132 | dfn { 133 | font-style: italic; 134 | } 135 | 136 | /* 137 | * Addresses styling not present in IE 8/9. 138 | */ 139 | 140 | mark { 141 | background: #ff0; 142 | color: #000; 143 | } 144 | 145 | 146 | /* 147 | * Corrects font family set oddly in Safari 5 and Chrome. 148 | */ 149 | 150 | code, 151 | kbd, 152 | pre, 153 | samp { 154 | font-family: monospace, serif; 155 | font-size: 1em; 156 | } 157 | 158 | /* 159 | * Improves readability of pre-formatted text in all browsers. 160 | */ 161 | 162 | pre { 163 | white-space: pre; 164 | white-space: pre-wrap; 165 | word-wrap: break-word; 166 | } 167 | 168 | /* 169 | * Sets consistent quote types. 170 | */ 171 | 172 | q { 173 | quotes: "\201C" "\201D" "\2018" "\2019"; 174 | } 175 | 176 | /* 177 | * Addresses inconsistent and variable font size in all browsers. 178 | */ 179 | 180 | small { 181 | font-size: 80%; 182 | } 183 | 184 | /* 185 | * Prevents `sub` and `sup` affecting `line-height` in all browsers. 186 | */ 187 | 188 | sub, 189 | sup { 190 | font-size: 75%; 191 | line-height: 0; 192 | position: relative; 193 | vertical-align: baseline; 194 | } 195 | 196 | sup { 197 | top: -0.5em; 198 | } 199 | 200 | sub { 201 | bottom: -0.25em; 202 | } 203 | 204 | /* ========================================================================== 205 | Embedded content 206 | ========================================================================== */ 207 | 208 | /* 209 | * Removes border when inside `a` element in IE 8/9. 210 | */ 211 | 212 | img { 213 | border: 0; 214 | } 215 | 216 | /* 217 | * Corrects overflow displayed oddly in IE 9. 218 | */ 219 | 220 | svg:not(:root) { 221 | overflow: hidden; 222 | } 223 | 224 | /* ========================================================================== 225 | Figures 226 | ========================================================================== */ 227 | 228 | /* 229 | * Addresses margin not present in IE 8/9 and Safari 5. 230 | */ 231 | 232 | figure { 233 | margin: 0; 234 | } 235 | 236 | /* ========================================================================== 237 | Forms 238 | ========================================================================== */ 239 | 240 | /* 241 | * Define consistent border, margin, and padding. 242 | */ 243 | 244 | fieldset { 245 | border: 1px solid #c0c0c0; 246 | margin: 0 2px; 247 | padding: 0.35em 0.625em 0.75em; 248 | } 249 | 250 | /* 251 | * 1. Corrects color not being inherited in IE 8/9. 252 | * 2. Remove padding so people aren't caught out if they zero out fieldsets. 253 | */ 254 | 255 | legend { 256 | border: 0; /* 1 */ 257 | padding: 0; /* 2 */ 258 | } 259 | 260 | /* 261 | * 1. Corrects font family not being inherited in all browsers. 262 | * 2. Corrects font size not being inherited in all browsers. 263 | * 3. Addresses margins set differently in Firefox 4+, Safari 5, and Chrome 264 | */ 265 | 266 | button, 267 | input, 268 | select, 269 | textarea { 270 | font-family: inherit; /* 1 */ 271 | font-size: 100%; /* 2 */ 272 | margin: 0; /* 3 */ 273 | } 274 | 275 | /* 276 | * Addresses Firefox 4+ setting `line-height` on `input` using `!important` in 277 | * the UA stylesheet. 278 | */ 279 | 280 | button, 281 | input { 282 | line-height: normal; 283 | } 284 | 285 | /* 286 | * 1. Avoid the WebKit bug in Android 4.0.* where (2) destroys native `audio` 287 | * and `video` controls. 288 | * 2. Corrects inability to style clickable `input` types in iOS. 289 | * 3. Improves usability and consistency of cursor style between image-type 290 | * `input` and others. 291 | */ 292 | 293 | button, 294 | html input[type="button"], /* 1 */ 295 | input[type="reset"], 296 | input[type="submit"] { 297 | -webkit-appearance: button; /* 2 */ 298 | cursor: pointer; /* 3 */ 299 | } 300 | 301 | /* 302 | * Re-set default cursor for disabled elements. 303 | */ 304 | 305 | button[disabled], 306 | input[disabled] { 307 | cursor: default; 308 | } 309 | 310 | /* 311 | * 1. Addresses box sizing set to `content-box` in IE 8/9. 312 | * 2. Removes excess padding in IE 8/9. 313 | */ 314 | 315 | input[type="checkbox"], 316 | input[type="radio"] { 317 | box-sizing: border-box; /* 1 */ 318 | padding: 0; /* 2 */ 319 | } 320 | 321 | /* 322 | * 1. Addresses `appearance` set to `searchfield` in Safari 5 and Chrome. 323 | * 2. Addresses `box-sizing` set to `border-box` in Safari 5 and Chrome 324 | * (include `-moz` to future-proof). 325 | */ 326 | 327 | input[type="search"] { 328 | -webkit-appearance: textfield; /* 1 */ 329 | -moz-box-sizing: content-box; 330 | -webkit-box-sizing: content-box; /* 2 */ 331 | box-sizing: content-box; 332 | } 333 | 334 | /* 335 | * Removes inner padding and search cancel button in Safari 5 and Chrome 336 | * on OS X. 337 | */ 338 | 339 | input[type="search"]::-webkit-search-cancel-button, 340 | input[type="search"]::-webkit-search-decoration { 341 | -webkit-appearance: none; 342 | } 343 | 344 | /* 345 | * Removes inner padding and border in Firefox 4+. 346 | */ 347 | 348 | button::-moz-focus-inner, 349 | input::-moz-focus-inner { 350 | border: 0; 351 | padding: 0; 352 | } 353 | 354 | /* 355 | * 1. Removes default vertical scrollbar in IE 8/9. 356 | * 2. Improves readability and alignment in all browsers. 357 | */ 358 | 359 | textarea { 360 | overflow: auto; /* 1 */ 361 | vertical-align: top; /* 2 */ 362 | } 363 | 364 | /* ========================================================================== 365 | Tables 366 | ========================================================================== */ 367 | 368 | /* 369 | * Remove most spacing between table cells. 370 | */ 371 | 372 | table { 373 | border-collapse: collapse; 374 | border-spacing: 0; 375 | } -------------------------------------------------------------------------------- /docs/pycco.css: -------------------------------------------------------------------------------- 1 | /*--------------------- Layout and Typography ----------------------------*/ 2 | body { 3 | font-family: 'Palatino Linotype', 'Book Antiqua', Palatino, FreeSerif, serif; 4 | font-size: 16px; 5 | line-height: 1.5em; 6 | color: #252519; 7 | margin: 0; padding: 0; 8 | background: #f5f5ff; 9 | } 10 | a { 11 | color: #261a3b; 12 | } 13 | a:visited { 14 | color: #261a3b; 15 | } 16 | p { 17 | margin: 0 0 15px 0; 18 | } 19 | h1, h2, h3, h4, h5, h6 { 20 | margin: 40px 0 15px 0; 21 | line-height: 1.2em; 22 | } 23 | h2, h3, h4, h5, h6 { 24 | margin-top: 0; 25 | } 26 | #container { 27 | background: white; 28 | } 29 | #container, div.section { 30 | position: relative; 31 | } 32 | #background { 33 | position: absolute; 34 | top: 0; left: 580px; right: 0; bottom: 0; 35 | background: #f5f5ff; 36 | border-left: 1px solid #e5e5ee; 37 | z-index: 0; 38 | } 39 | #jump_to, #jump_page { 40 | background: white; 41 | -webkit-box-shadow: 0 0 25px #777; -moz-box-shadow: 0 0 25px #777; 42 | -webkit-border-bottom-left-radius: 5px; -moz-border-radius-bottomleft: 5px; 43 | font: 10px Arial; 44 | text-transform: uppercase; 45 | cursor: pointer; 46 | text-align: right; 47 | } 48 | #jump_to, #jump_wrapper { 49 | position: fixed; 50 | right: 0; top: 0; 51 | padding: 5px 10px; 52 | } 53 | #jump_wrapper { 54 | padding: 0; 55 | display: none; 56 | } 57 | #jump_to:hover #jump_wrapper { 58 | display: block; 59 | } 60 | #jump_page { 61 | padding: 5px 0 3px; 62 | margin: 0 0 25px 25px; 63 | } 64 | #jump_page .source { 65 | display: block; 66 | padding: 5px 10px; 67 | text-decoration: none; 68 | border-top: 1px solid #eee; 69 | } 70 | #jump_page .source:hover { 71 | background: #f5f5ff; 72 | } 73 | #jump_page .source:first-child { 74 | } 75 | div.docs { 76 | float: left; 77 | max-width: 500px; 78 | min-width: 500px; 79 | min-height: 5px; 80 | padding: 10px 25px 1px 50px; 81 | vertical-align: top; 82 | text-align: left; 83 | } 84 | .docs pre { 85 | margin: 15px 0 15px; 86 | padding-left: 15px; 87 | } 88 | .docs p tt, .docs p code { 89 | background: #f8f8ff; 90 | border: 1px solid #dedede; 91 | font-size: 12px; 92 | padding: 0 0.2em; 93 | } 94 | .octowrap { 95 | position: relative; 96 | } 97 | .octothorpe { 98 | font: 12px Arial; 99 | text-decoration: none; 100 | color: #454545; 101 | position: absolute; 102 | top: 3px; left: -20px; 103 | padding: 1px 2px; 104 | opacity: 0; 105 | -webkit-transition: opacity 0.2s linear; 106 | } 107 | div.docs:hover .octothorpe { 108 | opacity: 1; 109 | } 110 | div.code { 111 | margin-left: 580px; 112 | padding: 14px 15px 16px 50px; 113 | vertical-align: top; 114 | } 115 | .code pre, .docs p code { 116 | font-size: 12px; 117 | } 118 | pre, tt, code { 119 | line-height: 1.5em; 120 | font-family: Monaco, Consolas, "Lucida Console", monospace; 121 | margin: 0; padding: 0; 122 | } 123 | div.clearall { 124 | clear: both; 125 | } 126 | 127 | 128 | /*---------------------- Syntax Highlighting -----------------------------*/ 129 | td.linenos { background-color: #f0f0f0; padding-right: 10px; } 130 | span.lineno { background-color: #f0f0f0; padding: 0 5px 0 5px; } 131 | body .hll { background-color: #ffffcc } 132 | body .c { color: #408080; font-style: italic } /* Comment */ 133 | body .err { border: 1px solid #FF0000 } /* Error */ 134 | body .k { color: #954121 } /* Keyword */ 135 | body .o { color: #666666 } /* Operator */ 136 | body .cm { color: #408080; font-style: italic } /* Comment.Multiline */ 137 | body .cp { color: #BC7A00 } /* Comment.Preproc */ 138 | body .c1 { color: #408080; font-style: italic } /* Comment.Single */ 139 | body .cs { color: #408080; font-style: italic } /* Comment.Special */ 140 | body .gd { color: #A00000 } /* Generic.Deleted */ 141 | body .ge { font-style: italic } /* Generic.Emph */ 142 | body .gr { color: #FF0000 } /* Generic.Error */ 143 | body .gh { color: #000080; font-weight: bold } /* Generic.Heading */ 144 | body .gi { color: #00A000 } /* Generic.Inserted */ 145 | body .go { color: #808080 } /* Generic.Output */ 146 | body .gp { color: #000080; font-weight: bold } /* Generic.Prompt */ 147 | body .gs { font-weight: bold } /* Generic.Strong */ 148 | body .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ 149 | body .gt { color: #0040D0 } /* Generic.Traceback */ 150 | body .kc { color: #954121 } /* Keyword.Constant */ 151 | body .kd { color: #954121; font-weight: bold } /* Keyword.Declaration */ 152 | body .kn { color: #954121; font-weight: bold } /* Keyword.Namespace */ 153 | body .kp { color: #954121 } /* Keyword.Pseudo */ 154 | body .kr { color: #954121; font-weight: bold } /* Keyword.Reserved */ 155 | body .kt { color: #B00040 } /* Keyword.Type */ 156 | body .m { color: #666666 } /* Literal.Number */ 157 | body .s { color: #219161 } /* Literal.String */ 158 | body .na { color: #7D9029 } /* Name.Attribute */ 159 | body .nb { color: #954121 } /* Name.Builtin */ 160 | body .nc { color: #0000FF; font-weight: bold } /* Name.Class */ 161 | body .no { color: #880000 } /* Name.Constant */ 162 | body .nd { color: #AA22FF } /* Name.Decorator */ 163 | body .ni { color: #999999; font-weight: bold } /* Name.Entity */ 164 | body .ne { color: #D2413A; font-weight: bold } /* Name.Exception */ 165 | body .nf { color: #0000FF } /* Name.Function */ 166 | body .nl { color: #A0A000 } /* Name.Label */ 167 | body .nn { color: #0000FF; font-weight: bold } /* Name.Namespace */ 168 | body .nt { color: #954121; font-weight: bold } /* Name.Tag */ 169 | body .nv { color: #19469D } /* Name.Variable */ 170 | body .ow { color: #AA22FF; font-weight: bold } /* Operator.Word */ 171 | body .w { color: #bbbbbb } /* Text.Whitespace */ 172 | body .mf { color: #666666 } /* Literal.Number.Float */ 173 | body .mh { color: #666666 } /* Literal.Number.Hex */ 174 | body .mi { color: #666666 } /* Literal.Number.Integer */ 175 | body .mo { color: #666666 } /* Literal.Number.Oct */ 176 | body .sb { color: #219161 } /* Literal.String.Backtick */ 177 | body .sc { color: #219161 } /* Literal.String.Char */ 178 | body .sd { color: #219161; font-style: italic } /* Literal.String.Doc */ 179 | body .s2 { color: #219161 } /* Literal.String.Double */ 180 | body .se { color: #BB6622; font-weight: bold } /* Literal.String.Escape */ 181 | body .sh { color: #219161 } /* Literal.String.Heredoc */ 182 | body .si { color: #BB6688; font-weight: bold } /* Literal.String.Interpol */ 183 | body .sx { color: #954121 } /* Literal.String.Other */ 184 | body .sr { color: #BB6688 } /* Literal.String.Regex */ 185 | body .s1 { color: #219161 } /* Literal.String.Single */ 186 | body .ss { color: #19469D } /* Literal.String.Symbol */ 187 | body .bp { color: #954121 } /* Name.Builtin.Pseudo */ 188 | body .vc { color: #19469D } /* Name.Variable.Class */ 189 | body .vg { color: #19469D } /* Name.Variable.Global */ 190 | body .vi { color: #19469D } /* Name.Variable.Instance */ 191 | body .il { color: #666666 } /* Literal.Number.Integer.Long */ 192 | -------------------------------------------------------------------------------- /docs/session.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | session.py 6 | 7 | 8 | 9 | 18 |

19 |

20 |

21 |

session.py

22 |

23 |

24 |

25 |

26 |

27 | # 28 |

29 |

main.py | 30 | graph.py | 31 | tensor.py | 32 | ops.py | 33 | session.py

34 |

Previous: Operations

35 |

36 |

37 |

from __future__ import absolute_import
 38 | from __future__ import print_function
 39 | from __future__ import division
 40 | 
 41 | import numpy as np

42 |

43 |

44 |

45 |

46 |

47 |

48 | # 49 |

50 |

Session performs computation on a graph.

51 |

52 |

53 |

class Session(object):

54 |

55 |

56 |

57 |

58 |

59 |

60 | # 61 |

62 | 63 |

64 |

65 |

66 |

67 |

68 |

69 |

70 |

71 |

72 | # 73 |

74 |

Initializing a session with a graph and a state dictionary to hold 75 | tensor values.

76 |

77 |

78 |

    def __init__(self, graph):

79 |

80 |

81 |

82 |

83 |

84 |

85 | # 86 |

87 | 88 |

89 |

90 |

        self.graph = graph
 91 |         self.state = {}

92 |

93 |

94 |

95 |

96 |

97 |

98 | # 99 |

100 |

run_op takes as input an operation to run and a context to fetch 101 | pre-evaluted tensors.

102 |

103 |

104 |

    def run_op(self, op, context):

105 |

106 |

107 |

108 |

109 |

110 |

111 | # 112 |

113 | 114 |

115 |

116 |

        args = [self.eval_tensor(tensor, context) for tensor in op.inputs]
117 |         return op.compute(self, *args)

118 |

119 |

120 |

121 |

122 |

123 |

124 | # 125 |

126 |

eval_tensor takes as input a tensor to evaluate and a context to 127 | fetch pre-evaluted tensors. If the tensor is not already in the context 128 | there are three possibilities for evaluating the tensor:

129 |

The tensor has an operation and is therefore the result of the 131 | operation that must be computed.
The tensor has an active state from another session run that can be 133 | fetched.
The tensor has an initial value from its instantiation that can be 135 | fetched and added to the state.

137 |

138 |

139 |

    def eval_tensor(self, tensor, context):

140 |

141 |

142 |

143 |

144 |

145 |

146 | # 147 |

148 | 149 |

150 |

151 |

        if tensor not in context:
152 |             if tensor.op is not None:
153 |                 context[tensor] = self.run_op(tensor.op, context)
154 |             elif tensor in self.state and self.state[tensor] is not None:
155 |                 context[tensor] = self.state[tensor]
156 |             elif tensor not in self.state and tensor.initial_value is not None:
157 |                 context[tensor] = self.state[tensor] = tensor.initial_value
158 | 
159 |         return context[tensor]

160 |

161 |

162 |

163 |

164 |

165 |

166 | # 167 |

168 |

run takes a list of tensors to evaluate and a feed dictionary that 169 | can be used to override tensors.

170 |

171 |

172 |

    def run(self, tensors, feed_dict=None):

173 |

174 |

175 |

176 |

177 |

178 |

179 | # 180 |

181 | 182 |

183 |

184 |

        context = {}
185 | 
186 |         if feed_dict:
187 |             context.update(feed_dict)
188 | 
189 |         return [self.eval_tensor(tensor, context) for tensor in tensors]
190 | 
191 |

192 |

193 |

194 |

195 |

196 | 197 | 198 | -------------------------------------------------------------------------------- /docs/tensor.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | tensor.py 6 | 7 | 8 | 9 | 18 |

19 |

20 |

21 |

tensor.py

22 |

23 |

24 |

25 |

26 |

27 | # 28 |

29 |

main.py | 30 | graph.py | 31 | tensor.py | 32 | ops.py | 33 | session.py

34 |

Previous: The Graph | Next: Operations

35 |

36 |

37 |

from __future__ import absolute_import
 38 | from __future__ import print_function
 39 | from __future__ import division
 40 | 
 41 | import numpy as np

42 |

43 |

44 |

45 |

46 |

47 |

48 | # 49 |

50 |

Tensor represents a value in the graph. It's just a data container with 51 | methods for operator overloading (each of which delegate to the graph). It 52 | includes:

53 |

The initial value of the tensor.
The operation which produced the tensor, if applicable.
A reference to the graph this tensor belongs to.

58 |

59 |

60 |

class Tensor(object):

61 |

62 |

63 |

64 |

65 |

66 |

67 | # 68 |

69 | 70 |

71 |

72 |

73 |

74 |

75 |

76 |

77 |

78 |

79 | # 80 |

81 | 82 |

83 |

84 |

    def __init__(self, initial_value, op, graph):
 85 |         self.initial_value = initial_value
 86 |         self.graph = graph
 87 |         self.op = op

88 |

89 |

90 |

91 |

92 |

93 |

94 | # 95 |

96 |

Operator Overloading

97 |

98 |

99 |

    def __add__(self, other):
100 |         return self.graph.add(self, other)

101 |

102 |

103 |

104 |

105 |

106 |

107 | # 108 |

109 | 110 |

111 |

112 |

    def __sub__(self, other):
113 |         return self.graph.sub(self, other)

114 |

115 |

116 |

117 |

118 |

119 |

120 | # 121 |

122 | 123 |

124 |

125 |

    def __mul__(self, other):
126 |         return self.graph.mul(self, other)

127 |

128 |

129 |

130 |

131 |

132 |

133 | # 134 |

135 | 136 |

137 |

138 |

    def __truediv__(self, other):
139 |         return self.graph.div(self, other)

140 |

141 |

142 |

143 |

144 |

145 |

146 | # 147 |

148 | 149 |

150 |

151 |

    def __neg__(self):
152 |         return self.graph.neg(self)

153 |

154 |

155 |

156 |

157 |

158 |

159 | # 160 |

161 |

Reverse Operator Overloading

162 |

163 |

164 |

    def __radd__(self, other):
165 |         return self.graph.add(other, self)

166 |

167 |

168 |

169 |

170 |

171 |

172 | # 173 |

174 | 175 |

176 |

177 |

    def __rsub__(self, other):
178 |         return self.graph.sub(other, self)

179 |

180 |

181 |

182 |

183 |

184 |

185 | # 186 |

187 | 188 |

189 |

190 |

    def __rmul__(self, other):
191 |         return self.graph.mul(other, self)

192 |

193 |

194 |

195 |

196 |

197 |

198 | # 199 |

200 | 201 |

202 |

203 |

    def __rtruediv__(self, other):
204 |         return self.graph.div(other, self)
205 | 
206 |

207 |

208 |

209 |

210 |

211 | 212 | 213 | -------------------------------------------------------------------------------- /docs/tf_test.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | tf_test.py 6 | 7 | 8 | 9 | 18 |

19 |

20 |

21 |

tf_test.py

22 |

23 |

24 |

25 |

26 |

27 | # 28 |

29 | 30 |

31 |

32 |

from __future__ import absolute_import
33 | from __future__ import print_function
34 | from __future__ import division
35 | 
36 | import numpy as np
37 | np.random.seed(67)
38 | 
39 | import tensorflow as tf
40 | 
41 | from tqdm import trange

42 |

43 |

44 |

45 |

46 |

47 |

48 | # 49 |

50 | 51 |

52 |

53 |

def main():
54 |     X = tf.constant([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=tf.float32)
55 |     y = tf.constant([[0, 1, 1, 0]], dtype=tf.float32)
56 | 
57 |     weights0 = tf.Variable(np.random.normal(size=(2, 4)), dtype=tf.float32)
58 |     weights1 = tf.Variable(np.random.normal(size=(4, 1)), dtype=tf.float32)
59 | 
60 |     activations0 = tf.sigmoid(tf.matmul(X, weights0))
61 |     activations1 = tf.sigmoid(tf.matmul(activations0, weights1))
62 | 
63 |     loss_op = tf.reduce_mean(tf.square(tf.transpose(y) - activations1))
64 | 
65 |     parameters = [weights0, weights1]
66 |     gradients = tf.gradients(loss_op, parameters)
67 | 
68 |     update_op = tf.group(*[
69 |         tf.assign(param, param - grad) \
70 |             for param, grad in zip(parameters, gradients)
71 |     ])
72 | 
73 |     tf.set_random_seed(67)
74 | 
75 |     with tf.Session() as sess:
76 |         sess.run(tf.global_variables_initializer())
77 |         with trange(10000) as pbar_epoch:
78 |             for _ in pbar_epoch:
79 |                 _, loss = sess.run([update_op, loss_op])
80 |                 pbar_epoch.set_description('loss: {:.8f}'.format(loss))
81 | 
82 | if __name__ == '__main__':
83 |     main()
84 | 
85 |

86 |

87 |

88 |

89 |

90 | 91 | 92 | -------------------------------------------------------------------------------- /graph.py: -------------------------------------------------------------------------------- 1 | """ 2 | [main.py](main.html) | 3 | [graph.py](graph.html) | 4 | [tensor.py](tensor.html) | 5 | [ops.py](ops.html) | 6 | [session.py](session.html) 7 | 8 | [Previous: Main](main.html) | [Next: Tensors](tensor.html) 9 | """ 10 | 11 | from __future__ import absolute_import 12 | from __future__ import print_function 13 | from __future__ import division 14 | 15 | from tensor import Tensor 16 | from ops import AddOp, SubOp, MulOp, DivOp, \ 17 | DotOp, TransposeOp, SquareOp, NegOp, \ 18 | MeanOp, SigmoidOp, AssignOp, GroupOp 19 | 20 | class Graph(object): 21 | """ 22 | `Graph` represents a computation to be evaluated by a `Session`. With the 23 | exception of `Graph#tensor`, `Graph#convert`, and `Graph#gradients`, most 24 | methods simply create an operation and return the output tensor of the 25 | operation. 26 | """ 27 | 28 | def tensor(self, initial_value=None, op=None): 29 | """ 30 | The `tensor` method defines a new tensor with the given initial value 31 | and operation. 32 | """ 33 | return Tensor(initial_value=initial_value, graph=self, op=op) 34 | 35 | def convert(self, value): 36 | """ 37 | The `convert` method returns the given value if it is a `Tensor`, 38 | otherwise convert it to one. 39 | """ 40 | if isinstance(value, Tensor): 41 | return value 42 | return self.tensor(initial_value=value) 43 | 44 | def gradients(self, y, xs): 45 | """ 46 | The `gradients` method performs backpropagation using [reverse accumulation](https://en.wikipedia.org/wiki/Automatic_differentiation#Reverse_accumulation) and the [chain rule](https://en.wikipedia.org/wiki/Chain_rule#Higher_dimensions). 47 | 48 | It traverses the graph from `y` to each `x` in `xs`, accumulating 49 | gradients, and returning the partial gradients for each `xs`. We use a 50 | queue to keep track of the next tensor for which to compute the 51 | gradient and keep a dictionary of the gradients computed thus far. 52 | Iteration starts from the target output `y` with an output gradient 53 | of 1. 54 | """ 55 | 56 | queue = [] 57 | queue.append((y, 1)) 58 | 59 | grads = {} 60 | while len(queue) > 0: 61 | y, grad_y = queue.pop(0) 62 | grad_y = self.convert(grad_y) 63 | 64 | gradients = y.op.gradient(grad_y) 65 | assert len(gradients) == len(y.op.inputs) 66 | 67 | for tensor, gradient in zip(y.op.inputs, gradients): 68 | if tensor in grads: 69 | grads[tensor] += gradient 70 | else: 71 | grads[tensor] = gradient 72 | 73 | if tensor.op: 74 | queue.append((tensor, gradient)) 75 | 76 | return [grads[x] for x in xs] 77 | 78 | # ## Operation Methods 79 | # Each operation method defines a new operation with the provided input 80 | # tensors and returns the operations' output. 81 | 82 | def add(self, a, b): 83 | op = AddOp([a, b], graph=self) 84 | return op.output 85 | 86 | def sub(self, a, b): 87 | op = SubOp([a, b], graph=self) 88 | return op.output 89 | 90 | def mul(self, a, b): 91 | op = MulOp([a, b], graph=self) 92 | return op.output 93 | 94 | def div(self, a, b): 95 | op = DivOp([a, b], graph=self) 96 | return op.output 97 | 98 | def neg(self, x): 99 | op = NegOp([x], graph=self) 100 | return op.output 101 | 102 | def square(self, x): 103 | op = SquareOp([x], graph=self) 104 | return op.output 105 | 106 | def sigmoid(self, x): 107 | op = SigmoidOp([x], graph=self) 108 | return op.output 109 | 110 | def dot(self, a, b): 111 | op = DotOp([a, b], graph=self) 112 | return op.output 113 | 114 | def transpose(self, x): 115 | op = TransposeOp([x], graph=self) 116 | return op.output 117 | 118 | def mean(self, x): 119 | op = MeanOp([x], graph=self) 120 | return op.output 121 | 122 | def assign(self, a, b): 123 | op = AssignOp([a, b], graph=self) 124 | return op.output 125 | 126 | def group(self, inputs): 127 | op = GroupOp(inputs, graph=self) 128 | return op.output 129 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | """ 2 | # Implementing (parts of) TensorFlow (almost) from Scratch 3 | ## A Walkthrough of Symbolic Differentiation 4 | ### Jim Fleming ([@jimmfleming](https://twitter.com/jimmfleming)) 5 | 6 | [main.py](main.html) | 7 | [graph.py](graph.html) | 8 | [tensor.py](tensor.html) | 9 | [ops.py](ops.html) | 10 | [session.py](session.html) 11 | 12 | [Next: The Graph](graph.html) 13 | 14 | This [literate programming](https://en.wikipedia.org/wiki/Literate_programming) 15 | exercise will construct a simple 2-layer feed-forward neural network to compute 16 | the [exclusive or](https://en.wikipedia.org/wiki/Exclusive_or), using [symbolic 17 | differentiation](https://en.wikipedia.org/wiki/Automatic_differentiation) to 18 | compute the gradients automatically. In total, about 500 lines of code, 19 | including comments. The only functional dependency is numpy. I highly recommend 20 | reading Chris Olah's [Calculus on Computational Graphs: 21 | Backpropagation](http://colah.github.io/posts/2015-08-Backprop/) for more 22 | background on what this code is doing. 23 | 24 | The XOR task is convenient for a number of reasons: it's very fast to compute; 25 | it is not linearly separable thus requiring at least two layers and making the 26 | gradient calculation more interesting; it doesn't require more complicated 27 | matrix-matrix features such as broadcasting. 28 | 29 | > (I'm also working on a more involved example for MNIST but as soon as I added 30 | support for matrix shapes and broadcasting the code ballooned by 5x and it was 31 | no longer a simple example.) 32 | 33 | Let's start by going over the architecture. We're going to use four main 34 | components: 35 | 36 | - [`Graph`](graph.html), composed of `Tensor` nodes and `Op` nodes that 37 | together represent the computation we want to differentiate. 38 | - [`Tensor`](tensor.html) represents a value in the graph. Tensors keep a 39 | reference to the operation that produced it, if any. 40 | - [`BaseOp`](ops.html) represents a computation to perform and its 41 | differentiable components. Operations hold references to their input 42 | tensors and an output tensor. 43 | - [`Session`](session.html) is used to evaluate tensors in the graph. 44 | 45 | **Note** the return from a graph operation is actually a tensor, representing 46 | the output of the operation. 47 | """ 48 | 49 | from __future__ import absolute_import 50 | from __future__ import print_function 51 | from __future__ import division 52 | 53 | import numpy as np 54 | np.random.seed(67) 55 | 56 | from tqdm import trange 57 | 58 | from graph import Graph 59 | from session import Session 60 | 61 | def main(): 62 | """ 63 | The main method performs some setup then trains the model, displaying the 64 | current loss along the way. 65 | """ 66 | 67 | # Define a new graph 68 | graph = Graph() 69 | 70 | # Initialize the training data (XOR truth table) 71 | X = graph.tensor(np.array([[0, 0], [0, 1], [1, 0], [1, 1]])) 72 | y = graph.tensor(np.array([[0, 1, 1, 0]])) 73 | 74 | # Initialize the model's parameters (weights for each layer) 75 | weights0 = graph.tensor(np.random.normal(size=(2, 4))) 76 | weights1 = graph.tensor(np.random.normal(size=(4, 1))) 77 | 78 | # Define the model's activations 79 | activations0 = graph.sigmoid(graph.dot(X, weights0)) 80 | activations1 = graph.sigmoid(graph.dot(activations0, weights1)) 81 | 82 | # Define operation for computing the loss 83 | # ([mean squared error](https://en.wikipedia.org/wiki/Mean_squared_error)) 84 | loss_op = graph.mean(graph.square(graph.transpose(y) - activations1)) 85 | 86 | # Define operations for the gradients w.r.t. the loss and an update 87 | # operation to apply the gradients to the model's parameters. 88 | parameters = [weights0, weights1] 89 | gradients = graph.gradients(loss_op, parameters) 90 | 91 | update_op = graph.group([ 92 | graph.assign(param, param - grad) \ 93 | for param, grad in zip(parameters, gradients) 94 | ]) 95 | 96 | # Begin training... We iterate for a number of epochs, calling the session 97 | # run method each time to compute the update operation and the current 98 | # loss. The progress bar's description is updated to display the loss. 99 | sess = Session(graph) 100 | with trange(10000) as pbar_epoch: 101 | for _ in pbar_epoch: 102 | _, loss = sess.run([update_op, loss_op]) 103 | pbar_epoch.set_description('loss: {:.8f}'.format(loss)) 104 | 105 | if __name__ == '__main__': 106 | main() 107 | -------------------------------------------------------------------------------- /ops.py: -------------------------------------------------------------------------------- 1 | """ 2 | [main.py](main.html) | 3 | [graph.py](graph.html) | 4 | [tensor.py](tensor.html) | 5 | [ops.py](ops.html) | 6 | [session.py](session.html) 7 | 8 | [Previous: Tensors](tensor.html) | [Next: The Session](session.html) 9 | """ 10 | 11 | from __future__ import absolute_import 12 | from __future__ import print_function 13 | from __future__ import division 14 | 15 | import numpy as np 16 | 17 | class BaseOp(object): 18 | """ 19 | `BaseOp` represents an operation that performs computation on tensors. 20 | Every operation consists of the following: 21 | 22 | - A list of `inputs`, each converted to ensure they're all tensors. 23 | - An output tensor to represent the result of the operation (which might 24 | be `None`.) 25 | - A reference to the graph so that each operation can generate new 26 | operations when constructing gradients. 27 | """ 28 | 29 | def __init__(self, inputs, graph): 30 | self.inputs = [graph.convert(input_) for input_ in inputs] 31 | self.output = graph.tensor(op=self) 32 | self.graph = graph 33 | 34 | def compute(self, sess, *args): 35 | """ 36 | The `compute` method receives as input the _evaluated_ input tensors 37 | and returns the result of performing its operation on the inputs. 38 | """ 39 | raise NotImplementedError() 40 | 41 | def gradient(self, grad): 42 | """ 43 | The `gradient` method computes the partial derivative w.r.t. each input 44 | to the operation. (Most of the derivatives come from 45 | [Wikipedia](https://en.wikipedia.org/wiki/Differentiation_rules).) 46 | """ 47 | raise NotImplementedError() 48 | 49 | class AddOp(BaseOp): 50 | """ 51 | `AddOp` adds a tensor to another tensor. Uses the 52 | [sum rule](https://en.wikipedia.org/wiki/Sum_rule_in_differentiation) to 53 | compute the partial derivatives. 54 | """ 55 | 56 | def compute(self, sess, a, b): 57 | return a + b 58 | 59 | def gradient(self, grad): 60 | return [grad, grad] 61 | 62 | class SubOp(BaseOp): 63 | """ 64 | `SubOp` subtracts a tensor from another tensor. Also uses the 65 | [sum rule](https://en.wikipedia.org/wiki/Sum_rule_in_differentiation) to 66 | compute the partial derivatives. 67 | """ 68 | 69 | def compute(self, sess, a, b): 70 | return a - b 71 | 72 | def gradient(self, grad): 73 | return [grad, -grad] 74 | 75 | class MulOp(BaseOp): 76 | """ 77 | `MulOp` multiplies a tensor by another tensor. Uses the 78 | [product rule](https://en.wikipedia.org/wiki/Product_rule) to compute the 79 | partial derivatives. 80 | """ 81 | 82 | def compute(self, sess, a, b): 83 | return a * b 84 | 85 | def gradient(self, grad): 86 | a, b = self.inputs 87 | return [grad * b, grad * a] 88 | 89 | class DivOp(BaseOp): 90 | """ 91 | `DivOp` divides a tensor by another tensor. Uses the 92 | [quotient rule](https://en.wikipedia.org/wiki/Quotient_rule) to compute the 93 | partial derivatives. 94 | """ 95 | 96 | def compute(self, sess, a, b): 97 | return a / b 98 | 99 | def gradient(self, grad): 100 | a, b = self.inputs 101 | return [grad / b, grad * (-a / self.graph.square(b))] 102 | 103 | class NegOp(BaseOp): 104 | """ 105 | `NegOp` negates a tensor. 106 | """ 107 | 108 | def compute(self, sess, x): 109 | return -x 110 | 111 | def gradient(self, grad): 112 | return [-grad] 113 | 114 | class DotOp(BaseOp): 115 | """ 116 | `DotOp` computes the dot product between two tensors. Uses the 117 | [product rule](https://en.wikipedia.org/wiki/Product_rule) to compute the 118 | partial derivatives. Note that here we need to transpose the terms and 119 | perform a dot product, assuming matrices rather than scalars. 120 | """ 121 | 122 | def compute(self, sess, a, b): 123 | return np.dot(a, b) 124 | 125 | def gradient(self, grad): 126 | a, b = self.inputs 127 | return [ 128 | self.graph.dot(grad, self.graph.transpose(b)), 129 | self.graph.dot(self.graph.transpose(a), grad), 130 | ] 131 | 132 | class SquareOp(BaseOp): 133 | """ 134 | `SquareOp` squares a tensor. 135 | """ 136 | 137 | def compute(self, sess, x): 138 | return np.square(x) 139 | 140 | def gradient(self, grad): 141 | x = self.inputs[0] 142 | return [grad * (2 * x)] 143 | 144 | class TransposeOp(BaseOp): 145 | """ 146 | `TransposeOp` tranposes a tensor. 147 | """ 148 | 149 | def compute(self, sess, x): 150 | return np.transpose(x) 151 | 152 | def gradient(self, grad): 153 | return [self.graph.transpose(grad)] 154 | 155 | class SigmoidOp(BaseOp): 156 | """ 157 | `SigmoidOp` implements the 158 | [sigmoid function](https://en.wikipedia.org/wiki/Sigmoid_function) and its 159 | derivative. Notice that the derivative uses the output of the operation 160 | which saves recomputation. 161 | """ 162 | 163 | def compute(self, sess, x): 164 | return 1 / (1 + np.exp(-x)) 165 | 166 | def gradient(self, grad): 167 | y = self.output 168 | return [grad * (y * (1 - y))] 169 | 170 | class MeanOp(BaseOp): 171 | """ 172 | `MeanOp` computes the mean of a tensor. **Note** the gradient here is 173 | intentionally incorrect because computing it requires knowing the shape of 174 | the input and output tensors. Fortunately, gradients are fairly malleable 175 | in optimization. 176 | """ 177 | 178 | def compute(self, sess, x): 179 | return np.mean(x) 180 | 181 | def gradient(self, grad): 182 | return [grad] 183 | 184 | class GroupOp(BaseOp): 185 | """ 186 | `GroupOp` exploits the fact that each input to the operation is 187 | automatically evaluated before computing the operation's output, allowing 188 | us to group together the evaluation of multiple operations. It's input 189 | gradients come from simply broadcasting the output gradient. 190 | """ 191 | 192 | def compute(self, sess, *args): 193 | return None 194 | 195 | def gradient(self, grad): 196 | return [grad] * len(self.inputs) 197 | 198 | class AssignOp(BaseOp): 199 | """ 200 | `AssignOp` updates the session's current state for a tensor. It is not 201 | differentiable in this implementation. 202 | """ 203 | 204 | def compute(self, sess, a, b): 205 | assert a.shape == b.shape, \ 206 | 'shapes must match to assign: {} != {}' \ 207 | .format(a.shape, b.shape) 208 | sess.state[self.inputs[0]] = b 209 | return b 210 | -------------------------------------------------------------------------------- /session.py: -------------------------------------------------------------------------------- 1 | """ 2 | [main.py](main.html) | 3 | [graph.py](graph.html) | 4 | [tensor.py](tensor.html) | 5 | [ops.py](ops.html) | 6 | [session.py](session.html) 7 | 8 | [Previous: Operations](ops.html) 9 | """ 10 | 11 | from __future__ import absolute_import 12 | from __future__ import print_function 13 | from __future__ import division 14 | 15 | import numpy as np 16 | 17 | class Session(object): 18 | """ 19 | `Session` performs computation on a graph. 20 | """ 21 | 22 | def __init__(self, graph): 23 | """ 24 | Initializing a session with a graph and a state dictionary to hold 25 | tensor values. 26 | """ 27 | self.graph = graph 28 | self.state = {} 29 | 30 | def run_op(self, op, context): 31 | """ 32 | `run_op` takes as input an operation to run and a context to fetch 33 | pre-evaluted tensors. 34 | """ 35 | args = [self.eval_tensor(tensor, context) for tensor in op.inputs] 36 | return op.compute(self, *args) 37 | 38 | def eval_tensor(self, tensor, context): 39 | """ 40 | `eval_tensor` takes as input a tensor to evaluate and a context to 41 | fetch pre-evaluted tensors. If the tensor is not already in the context 42 | there are three possibilities for evaluating the tensor: 43 | 44 | - The tensor has an operation and is therefore the result of the 45 | operation that must be computed. 46 | - The tensor has an active state from another session run that can be 47 | fetched. 48 | - The tensor has an initial value from its instantiation that can be 49 | fetched and added to the state. 50 | """ 51 | if tensor not in context: 52 | if tensor.op is not None: 53 | context[tensor] = self.run_op(tensor.op, context) 54 | elif tensor in self.state and self.state[tensor] is not None: 55 | context[tensor] = self.state[tensor] 56 | elif tensor not in self.state and tensor.initial_value is not None: 57 | context[tensor] = self.state[tensor] = tensor.initial_value 58 | 59 | return context[tensor] 60 | 61 | def run(self, tensors, feed_dict=None): 62 | """ 63 | `run` takes a list of tensors to evaluate and a feed dictionary that 64 | can be used to override tensors. 65 | """ 66 | context = {} 67 | 68 | if feed_dict: 69 | context.update(feed_dict) 70 | 71 | return [self.eval_tensor(tensor, context) for tensor in tensors] 72 | -------------------------------------------------------------------------------- /tensor.py: -------------------------------------------------------------------------------- 1 | """ 2 | [main.py](main.html) | 3 | [graph.py](graph.html) | 4 | [tensor.py](tensor.html) | 5 | [ops.py](ops.html) | 6 | [session.py](session.html) 7 | 8 | [Previous: The Graph](graph.html) | [Next: Operations](ops.html) 9 | """ 10 | 11 | from __future__ import absolute_import 12 | from __future__ import print_function 13 | from __future__ import division 14 | 15 | import numpy as np 16 | 17 | class Tensor(object): 18 | """ 19 | `Tensor` represents a _value_ in the graph. It's just a data container with 20 | methods for operator overloading (each of which delegate to the graph). It 21 | includes: 22 | 23 | - The initial value of the tensor. 24 | - The operation which produced the tensor, if applicable. 25 | - A reference to the graph this tensor belongs to. 26 | """ 27 | 28 | def __init__(self, initial_value, op, graph): 29 | self.initial_value = initial_value 30 | self.graph = graph 31 | self.op = op 32 | 33 | # ## [Operator Overloading](https://docs.python.org/2/reference/datamodel.html?highlight=__radd__#emulating-numeric-types) 34 | def __add__(self, other): 35 | return self.graph.add(self, other) 36 | 37 | def __sub__(self, other): 38 | return self.graph.sub(self, other) 39 | 40 | def __mul__(self, other): 41 | return self.graph.mul(self, other) 42 | 43 | def __truediv__(self, other): 44 | return self.graph.div(self, other) 45 | 46 | def __neg__(self): 47 | return self.graph.neg(self) 48 | 49 | # ## [Reverse Operator Overloading](https://docs.python.org/2/reference/datamodel.html?highlight=__radd__#object.__radd__) 50 | def __radd__(self, other): 51 | return self.graph.add(other, self) 52 | 53 | def __rsub__(self, other): 54 | return self.graph.sub(other, self) 55 | 56 | def __rmul__(self, other): 57 | return self.graph.mul(other, self) 58 | 59 | def __rtruediv__(self, other): 60 | return self.graph.div(other, self) 61 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jimfleming/differentiation/b7976decf8af4e5b13d315422ba5fafe902463c8/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_gradients.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | from __future__ import division 4 | 5 | import unittest 6 | import numpy as np 7 | 8 | from graph import Graph 9 | from session import Session 10 | 11 | class GradientsTestCase(unittest.TestCase): 12 | 13 | def test_add_grad(self): 14 | graph = Graph() 15 | 16 | a = graph.tensor() 17 | b = graph.tensor() 18 | c = a + b 19 | 20 | grad_a, grad_b = graph.gradients(c, [a, b]) 21 | 22 | sess = Session(graph) 23 | grad_a_, grad_b_ = sess.run([grad_a, grad_b], feed_dict={a: 2, b: 1}) 24 | 25 | self.assertEqual(grad_a_, 1) 26 | self.assertEqual(grad_b_, 1) 27 | 28 | def test_sub_grad(self): 29 | graph = Graph() 30 | 31 | a = graph.tensor() 32 | b = graph.tensor() 33 | c = a - b 34 | 35 | grad_a, grad_b = graph.gradients(c, [a, b]) 36 | 37 | sess = Session(graph) 38 | grad_a_, grad_b_ = sess.run([grad_a, grad_b], feed_dict={a: 2, b: 1}) 39 | 40 | self.assertEqual(grad_a_, 1) 41 | self.assertEqual(grad_b_, -1) 42 | 43 | def test_mul_grad(self): 44 | graph = Graph() 45 | 46 | a = graph.tensor() 47 | b = graph.tensor() 48 | c = a * b 49 | 50 | grad_a, grad_b = graph.gradients(c, [a, b]) 51 | 52 | sess = Session(graph) 53 | grad_a_, grad_b_ = sess.run([grad_a, grad_b], feed_dict={a: 2, b: 3}) 54 | 55 | self.assertEqual(grad_a_, 3) 56 | self.assertEqual(grad_b_, 2) 57 | 58 | def test_div_grad(self): 59 | graph = Graph() 60 | 61 | a = graph.tensor() 62 | b = graph.tensor() 63 | c = a / b 64 | 65 | grad_a, grad_b = graph.gradients(c, [a, b]) 66 | 67 | sess = Session(graph) 68 | grad_a_, grad_b_ = sess.run([grad_a, grad_b], feed_dict={a: 2, b: 3}) 69 | 70 | self.assertAlmostEqual(grad_a_, 0.3333333) 71 | self.assertAlmostEqual(grad_b_, -0.2222222) 72 | 73 | def test_square_grad(self): 74 | graph = Graph() 75 | 76 | a = graph.tensor() 77 | b = graph.square(a) 78 | 79 | grad, = graph.gradients(b, [a]) 80 | 81 | sess = Session(graph) 82 | grad_, = sess.run([grad], feed_dict={a: 6}) 83 | 84 | self.assertEqual(grad_, 12) 85 | 86 | def test_sigmoid_grad(self): 87 | graph = Graph() 88 | 89 | a = graph.tensor() 90 | b = graph.sigmoid(a) 91 | 92 | grad, = graph.gradients(b, [a]) 93 | 94 | sess = Session(graph) 95 | grad_, = sess.run([grad], feed_dict={a: 1}) 96 | 97 | self.assertAlmostEqual(grad_, 0.19661193) 98 | 99 | def test_neg_grad(self): 100 | graph = Graph() 101 | 102 | a = graph.tensor() 103 | b = -a 104 | 105 | grad, = graph.gradients(b, [a]) 106 | 107 | sess = Session(graph) 108 | grad_, = sess.run([grad], feed_dict={a: 1}) 109 | 110 | self.assertEqual(grad_, -1) 111 | 112 | def test_dot_grad(self): 113 | graph = Graph() 114 | 115 | a = graph.tensor(np.array([0, 1, 2, 3]).reshape((1, -1))) 116 | b = graph.tensor(np.array([0, 1, 2, 3]).reshape((-1, 1))) 117 | c = graph.dot(a, b) 118 | 119 | grad_a, grad_b, = graph.gradients(c, [a, b]) 120 | 121 | sess = Session(graph) 122 | grad_a_, grad_b_ = sess.run([grad_a, grad_b]) 123 | 124 | self.assertTrue(np.array_equal(grad_a_, np.array([[0, 1, 2, 3]]))) 125 | self.assertTrue(np.array_equal(grad_b_, np.array([[0], [1], [2], [3]]))) 126 | 127 | def test_transpose_grad(self): 128 | graph = Graph() 129 | 130 | a = graph.tensor(np.array([[0, 1, 2, 3]])) 131 | b = graph.transpose(a) 132 | 133 | grad, = graph.gradients(b, [a]) 134 | 135 | sess = Session(graph) 136 | grad_, = sess.run([grad]) 137 | 138 | self.assertEqual(grad_, 1) 139 | 140 | def test_mean_grad(self): 141 | graph = Graph() 142 | 143 | a = graph.tensor(np.array([[0, 2, 4, 6]])) 144 | b = graph.mean(a) 145 | 146 | grad, = graph.gradients(b, [a]) 147 | 148 | sess = Session(graph) 149 | grad_, = sess.run([grad]) 150 | 151 | # XXX: This is intentionally incorrect. 152 | self.assertEqual(grad_, 1) 153 | 154 | def test_expression_grad(self): 155 | graph = Graph() 156 | 157 | a = graph.tensor() 158 | b = graph.tensor() 159 | 160 | c = a + b 161 | d = b + 1 162 | e = c * d 163 | 164 | de_da, de_db = graph.gradients(e, [a, b]) 165 | 166 | sess = Session(graph) 167 | 168 | a_, b_, c_, d_, e_, de_da_, de_db_ = sess.run([a, b, c, d, e, de_da, de_db], feed_dict={a: 2, b: 1}) 169 | 170 | self.assertEqual(a_, 2) 171 | self.assertEqual(b_, 1) 172 | self.assertEqual(c_, 3) 173 | self.assertEqual(d_, 2) 174 | self.assertEqual(e_, 6) 175 | self.assertEqual(de_da_, 2) 176 | self.assertEqual(de_db_, 5) 177 | 178 | if __name__ == '__main__': 179 | unittest.main() 180 | -------------------------------------------------------------------------------- /tests/test_ops.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | from __future__ import division 4 | 5 | import unittest 6 | import numpy as np 7 | 8 | from graph import Graph 9 | from session import Session 10 | 11 | class OpsTestCase(unittest.TestCase): 12 | 13 | def test_add(self): 14 | graph = Graph() 15 | 16 | a = graph.tensor() 17 | b = graph.tensor() 18 | c = a + b 19 | 20 | sess = Session(graph) 21 | 22 | a_, b_, c_ = sess.run([a, b, c], feed_dict={a: 2, b: 1}) 23 | 24 | self.assertEqual(a_, 2) 25 | self.assertEqual(b_, 1) 26 | self.assertEqual(c_, 3) 27 | 28 | def test_sub(self): 29 | graph = Graph() 30 | 31 | a = graph.tensor() 32 | b = graph.tensor() 33 | c = a - b 34 | 35 | sess = Session(graph) 36 | 37 | a_, b_, c_ = sess.run([a, b, c], feed_dict={a: 2, b: 3}) 38 | 39 | self.assertEqual(a_, 2) 40 | self.assertEqual(b_, 3) 41 | self.assertEqual(c_, -1) 42 | 43 | def test_mul(self): 44 | graph = Graph() 45 | 46 | a = graph.tensor() 47 | b = graph.tensor() 48 | c = a * b 49 | 50 | sess = Session(graph) 51 | 52 | a_, b_, c_ = sess.run([a, b, c], feed_dict={a: 2, b: 3}) 53 | 54 | self.assertEqual(a_, 2) 55 | self.assertEqual(b_, 3) 56 | self.assertEqual(c_, 6) 57 | 58 | def test_div(self): 59 | graph = Graph() 60 | 61 | a = graph.tensor() 62 | b = graph.tensor() 63 | c = a / b 64 | 65 | sess = Session(graph) 66 | 67 | a_, b_, c_ = sess.run([a, b, c], feed_dict={a: 6, b: 2}) 68 | 69 | self.assertEqual(a_, 6) 70 | self.assertEqual(b_, 2) 71 | self.assertEqual(c_, 3) 72 | 73 | def test_square(self): 74 | graph = Graph() 75 | 76 | a = graph.tensor() 77 | b = graph.square(a) 78 | 79 | sess = Session(graph) 80 | 81 | a_, b_ = sess.run([a, b], feed_dict={a: 3}) 82 | 83 | self.assertEqual(a_, 3) 84 | self.assertEqual(b_, 9) 85 | 86 | def test_neg(self): 87 | graph = Graph() 88 | 89 | a = graph.tensor() 90 | b = graph.neg(a) 91 | 92 | sess = Session(graph) 93 | 94 | a_, b_ = sess.run([a, b], feed_dict={a: 1}) 95 | 96 | self.assertEqual(a_, 1) 97 | self.assertEqual(b_, -1) 98 | 99 | def test_sigmoid(self): 100 | graph = Graph() 101 | 102 | a = graph.tensor() 103 | b = graph.sigmoid(a) 104 | 105 | sess = Session(graph) 106 | 107 | a_, b_ = sess.run([a, b], feed_dict={a: 1}) 108 | 109 | self.assertEqual(a_, 1) 110 | self.assertAlmostEqual(b_, 0.731058579) 111 | 112 | def test_dot(self): 113 | graph = Graph() 114 | 115 | a = graph.tensor(np.array([0, 1, 2, 3]).reshape((1, -1))) 116 | b = graph.tensor(np.array([0, 1, 2, 3]).reshape((-1, 1))) 117 | c = graph.dot(a, b) 118 | 119 | sess = Session(graph) 120 | 121 | c_, = sess.run([c]) 122 | 123 | self.assertTrue(np.array_equal(c_, [[14]])) 124 | 125 | def test_transpose(self): 126 | graph = Graph() 127 | 128 | a = graph.tensor(np.array([[0, 1, 2, 3]])) 129 | b = graph.transpose(a) 130 | 131 | sess = Session(graph) 132 | 133 | b_, = sess.run([b]) 134 | 135 | self.assertTrue(np.array_equal(b_, np.array([[0], [1], [2], [3]]))) 136 | 137 | def test_mean(self): 138 | graph = Graph() 139 | 140 | a = graph.tensor(np.array([[0, 2, 4, 6]])) 141 | b = graph.mean(a) 142 | 143 | sess = Session(graph) 144 | 145 | b_, = sess.run([b]) 146 | 147 | self.assertEqual(b_, 3) 148 | 149 | def test_assign(self): 150 | graph = Graph() 151 | 152 | a = graph.tensor(1) 153 | increment_op = graph.assign(a, a + 1) 154 | 155 | sess = Session(graph) 156 | 157 | a0, = sess.run([a]) 158 | sess.run([increment_op]) 159 | a1, = sess.run([a]) 160 | 161 | self.assertEqual(a0, 1) 162 | self.assertEqual(a1, 2) 163 | 164 | if __name__ == '__main__': 165 | unittest.main() 166 | -------------------------------------------------------------------------------- /tf_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | from __future__ import division 4 | 5 | import numpy as np 6 | np.random.seed(67) 7 | 8 | import tensorflow as tf 9 | 10 | from tqdm import trange 11 | 12 | def main(): 13 | X = tf.constant([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=tf.float32) 14 | y = tf.constant([[0, 1, 1, 0]], dtype=tf.float32) 15 | 16 | weights0 = tf.Variable(np.random.normal(size=(2, 4)), dtype=tf.float32) 17 | weights1 = tf.Variable(np.random.normal(size=(4, 1)), dtype=tf.float32) 18 | 19 | activations0 = tf.sigmoid(tf.matmul(X, weights0)) 20 | activations1 = tf.sigmoid(tf.matmul(activations0, weights1)) 21 | 22 | loss_op = tf.reduce_mean(tf.square(tf.transpose(y) - activations1)) 23 | 24 | parameters = [weights0, weights1] 25 | gradients = tf.gradients(loss_op, parameters) 26 | 27 | update_op = tf.group(*[ 28 | tf.assign(param, param - grad) \ 29 | for param, grad in zip(parameters, gradients) 30 | ]) 31 | 32 | tf.set_random_seed(67) 33 | 34 | with tf.Session() as sess: 35 | sess.run(tf.global_variables_initializer()) 36 | with trange(10000) as pbar_epoch: 37 | for _ in pbar_epoch: 38 | _, loss = sess.run([update_op, loss_op]) 39 | pbar_epoch.set_description('loss: {:.8f}'.format(loss)) 40 | 41 | if __name__ == '__main__': 42 | main() 43 | --------------------------------------------------------------------------------