├── .gitignore ├── README.md ├── autodiff.py ├── autodiff_test.py ├── data.txt └── lr_autodiff.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.dat 3 | *.npy 4 | .DS_Store 5 | .idea 6 | __pycache__ 7 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | from https://gitee.com/Carl-Xie/AutodiffEngine 2 | 3 | 自动微分引擎,来源于华盛顿大学陈天奇的课程项目 -------------------------------------------------------------------------------- /autodiff.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class Node(object): 4 | """Node in a computation graph.""" 5 | def __init__(self): 6 | """Constructor, new node is indirectly created by Op object __call__ method. 7 | 8 | Instance variables 9 | ------------------ 10 | self.inputs: the list of input nodes. 11 | self.op: the associated op object, 12 | e.g. add_op object if this node is created by adding two other nodes. 13 | self.const_attr: the add or multiply constant, 14 | e.g. self.const_attr=5 if this node is created by x+5. 15 | self.name: node name for debugging purposes. 16 | """ 17 | self.inputs = [] 18 | self.op = None 19 | self.const_attr = None 20 | self.name = "" 21 | 22 | def __add__(self, other): 23 | """Adding two nodes return a new node.""" 24 | if isinstance(other, Node): 25 | new_node = add_op(self, other) 26 | else: 27 | # Add by a constant stores the constant in the new node's const_attr field. 28 | # 'other' argument is a constant 29 | new_node = add_byconst_op(self, other) 30 | return new_node 31 | 32 | def __mul__(self, other): 33 | if isinstance(other, Node): 34 | new_node = mul_op(self, other) 35 | else: 36 | new_node = mul_byconst_op(self, other) 37 | return new_node 38 | 39 | def __truediv__(self, other): 40 | if isinstance(other, Node): 41 | new_node = div_op(self, other) 42 | else: 43 | new_node = div_byconst_op(self, other) 44 | return new_node 45 | 46 | def __rtruediv__(self, other): 47 | if isinstance(other, Node): 48 | new_node = div_op(self, other) 49 | else: 50 | new_node = rdiv_byconst_op(self, other) 51 | return new_node 52 | 53 | def __sub__(self, other): 54 | if isinstance(other, Node): 55 | new_node = sub_op(self, other) 56 | else: 57 | new_node = sub_byconst_op(self, other) 58 | return new_node 59 | 60 | def __rsub__(self, other): 61 | if isinstance(other, Node): 62 | new_node = sub_op(self, other) 63 | else: 64 | new_node = rsub_byconst_op(self, other) 65 | return new_node 66 | 67 | def __neg__(self): 68 | return neg_op(self) 69 | 70 | # Allow left-hand-side add and multiply. 71 | __radd__ = __add__ 72 | __rmul__ = __mul__ 73 | 74 | def __str__(self): 75 | """Allow print to display node name.""" 76 | return self.name 77 | 78 | def Variable(name): 79 | """User defined variables in an expression. 80 | e.g. x = Variable(name = "x") 81 | """ 82 | placeholder_node = placeholder_op() 83 | placeholder_node.name = name 84 | return placeholder_node 85 | 86 | class Op(object): 87 | """Op represents operations performed on nodes.""" 88 | def __call__(self): 89 | """Create a new node and associate the op object with the node. 90 | 91 | Returns 92 | ------- 93 | The new node object. 94 | """ 95 | new_node = Node() 96 | new_node.op = self 97 | return new_node 98 | 99 | def compute(self, node, input_vals): 100 | """Given values of input nodes, compute the output value. 101 | 102 | Parameters 103 | ---------- 104 | node: node that performs the compute. 105 | input_vals: values of input nodes. 106 | 107 | Returns 108 | ------- 109 | An output value of the node. 110 | """ 111 | assert False, "Implemented in subclass" 112 | 113 | def gradient(self, node, output_grad): 114 | """Given value of output gradient, compute gradient contributions to each input node. 115 | 116 | Parameters 117 | ---------- 118 | node: node that performs the gradient. 119 | output_grad: value of output gradient summed from children nodes' contributions 120 | 121 | Returns 122 | ------- 123 | A list of gradient contributions to each input node respectively. 124 | """ 125 | assert False, "Implemented in subclass" 126 | 127 | 128 | class NegOp(Op): 129 | 130 | def __call__(self, node): 131 | new_node = Op.__call__(self) 132 | new_node.inputs = [node] 133 | new_node.name = "-%s" % node.name 134 | return new_node 135 | 136 | def compute(self, node, input_vals): 137 | assert len(input_vals) == 1 138 | return -input_vals[0] 139 | 140 | def gradient(self, node, output_grad): 141 | return [-output_grad] 142 | 143 | 144 | class AddOp(Op): 145 | """Op to element-wise add two nodes.""" 146 | def __call__(self, node_A, node_B): 147 | new_node = Op.__call__(self) 148 | new_node.inputs = [node_A, node_B] 149 | new_node.name = "(%s+%s)" % (node_A.name, node_B.name) 150 | return new_node 151 | 152 | def compute(self, node, input_vals): 153 | """Given values of two input nodes, return result of element-wise addition.""" 154 | assert len(input_vals) == 2 155 | return input_vals[0] + input_vals[1] 156 | 157 | def gradient(self, node, output_grad): 158 | """Given gradient of add node, return gradient contributions to each input.""" 159 | return [output_grad, output_grad] 160 | 161 | class SubOp(Op): 162 | 163 | def __call__(self, node_A, node_B): 164 | new_node = Op.__call__(self) 165 | new_node.inputs = [node_A, node_B] 166 | new_node.name = "%s-%s" % (node_A.name, node_B.name) 167 | return new_node 168 | 169 | def compute(self, node, input_vals): 170 | assert len(input_vals) == 2 171 | return input_vals[0] - input_vals[1] 172 | 173 | def gradient(self, node, output_grad): 174 | return [output_grad, -output_grad] 175 | 176 | 177 | class AddByConstOp(Op): 178 | """Op to element-wise add a nodes by a constant.""" 179 | def __call__(self, node_A, const_val): 180 | new_node = Op.__call__(self) 181 | new_node.const_attr = const_val 182 | new_node.inputs = [node_A] 183 | new_node.name = "(%s+%s)" % (node_A.name, str(const_val)) 184 | return new_node 185 | 186 | def compute(self, node, input_vals): 187 | """Given values of input node, return result of element-wise addition.""" 188 | assert len(input_vals) == 1 189 | return input_vals[0] + node.const_attr 190 | 191 | def gradient(self, node, output_grad): 192 | """Given gradient of add node, return gradient contribution to input.""" 193 | return [output_grad] 194 | 195 | 196 | class SubByConstOp(Op): 197 | 198 | def __call__(self, node_A, const_val): 199 | new_node = Op.__call__(self) 200 | new_node.const_attr = const_val 201 | new_node.inputs = [node_A] 202 | new_node.name = "(%s-%s)" % (node_A.name, str(const_val)) 203 | return new_node 204 | 205 | def compute(self, node, input_vals): 206 | assert len(input_vals) == 1 207 | return input_vals[0] - node.const_attr 208 | 209 | def gradient(self, node, output_grad): 210 | return [output_grad] 211 | 212 | 213 | class RSubByConstOp(Op): 214 | 215 | def __call__(self, node_A, const_val): 216 | new_node = Op.__call__(self) 217 | new_node.const_attr = const_val 218 | new_node.inputs = [node_A] 219 | new_node.name = "(%s-%s)" % (str(const_val), node_A.name) 220 | return new_node 221 | 222 | def compute(self, node, input_vals): 223 | assert len(input_vals) == 1 224 | return node.const_attr - input_vals[0] 225 | 226 | def gradient(self, node, output_grad): 227 | return [-output_grad] 228 | 229 | 230 | class MulOp(Op): 231 | """Op to element-wise multiply two nodes.""" 232 | def __call__(self, node_A, node_B): 233 | new_node = Op.__call__(self) 234 | new_node.inputs = [node_A, node_B] 235 | new_node.name = "(%s*%s)" % (node_A.name, node_B.name) 236 | return new_node 237 | 238 | def compute(self, node, input_vals): 239 | """Given values of two input nodes, return result of element-wise multiplication.""" 240 | assert len(input_vals) == 2 241 | return input_vals[0] * input_vals[1] 242 | 243 | def gradient(self, node, output_grad): 244 | """Given gradient of multiply node, return gradient contributions to each input.""" 245 | return [node.inputs[1] * output_grad, node.inputs[0] * output_grad] 246 | 247 | 248 | class DivOp(Op): 249 | 250 | def __call__(self, node_A, node_B): 251 | new_node = Op.__call__(self) 252 | new_node.inputs = [node_A, node_B] 253 | new_node.name = "%s/%s" % (node_A.name, node_B.name) 254 | return new_node 255 | 256 | def compute(self, node, input_vals): 257 | assert len(input_vals) == 2 258 | return input_vals[0] / input_vals[1] 259 | 260 | def gradient(self, node, output_grad): 261 | return [output_grad / node.inputs[1], -output_grad * node.inputs[0] / (node.inputs[1] * node.inputs[1])] 262 | 263 | 264 | class DivByConstOp(Op): 265 | 266 | def __call__(self, node_A, const_val): 267 | new_node = Op.__call__(self) 268 | new_node.inputs = [node_A] 269 | new_node.const_attr = const_val 270 | new_node.name = "%s/%s" % (node_A.name, str(const_val)) 271 | return new_node 272 | 273 | def compute(self, node, input_vals): 274 | assert len(input_vals) == 1 275 | return input_vals[0] / node.const_attr 276 | 277 | def gradient(self, node, output_grad): 278 | return [output_grad / node.const_attr] 279 | 280 | 281 | class RDivByConstOp(Op): 282 | 283 | def __call__(self, node_A, const_val): 284 | new_node = Op.__call__(self) 285 | new_node.inputs = [node_A] 286 | new_node.const_attr = const_val 287 | new_node.name = "%s/%s" % (str(const_val), node_A.name) 288 | return new_node 289 | 290 | def compute(self, node, input_vals): 291 | assert len(input_vals) == 1 292 | return node.const_attr / input_vals[0] 293 | 294 | def gradient(self, node, output_grad): 295 | return [-output_grad * node.const_attr / (node.inputs[0] * node.inputs[0])] 296 | 297 | 298 | class MulByConstOp(Op): 299 | """Op to element-wise multiply a nodes by a constant.""" 300 | def __call__(self, node_A, const_val): 301 | new_node = Op.__call__(self) 302 | new_node.const_attr = const_val 303 | new_node.inputs = [node_A] 304 | new_node.name = "(%s*%s)" % (node_A.name, str(const_val)) 305 | return new_node 306 | 307 | def compute(self, node, input_vals): 308 | """Given values of input node, return result of element-wise multiplication.""" 309 | """TODO: Your code here""" 310 | assert len(input_vals) == 1 311 | return input_vals[0] * node.const_attr 312 | 313 | def gradient(self, node, output_grad): 314 | """Given gradient of multiplication node, return gradient contribution to input.""" 315 | """TODO: Your code here""" 316 | return [output_grad * node.const_attr] 317 | 318 | 319 | class MatMulOp(Op): 320 | """Op to matrix multiply two nodes.""" 321 | def __call__(self, node_A, node_B, trans_A=False, trans_B=False): 322 | """Create a new node that is the result a matrix multiple of two input nodes. 323 | 324 | Parameters 325 | ---------- 326 | node_A: lhs of matrix multiply 327 | node_B: rhs of matrix multiply 328 | trans_A: whether to transpose node_A 329 | trans_B: whether to transpose node_B 330 | 331 | Returns 332 | ------- 333 | Returns a node that is the result a matrix multiple of two input nodes. 334 | """ 335 | new_node = Op.__call__(self) 336 | new_node.matmul_attr_trans_A = trans_A 337 | new_node.matmul_attr_trans_B = trans_B 338 | new_node.inputs = [node_A, node_B] 339 | new_node.name = "MatMul(%s,%s,%s,%s)" % (node_A.name, node_B.name, str(trans_A), str(trans_B)) 340 | return new_node 341 | 342 | def compute(self, node, input_vals): 343 | """Given values of input nodes, return result of matrix multiplication.""" 344 | mat_A = input_vals[0] 345 | mat_B = input_vals[1] 346 | if node.matmul_attr_trans_A: 347 | mat_A = mat_A.T 348 | if node.matmul_attr_trans_B: 349 | mat_B = mat_B.T 350 | return np.matmul(mat_A, mat_B) 351 | 352 | def gradient(self, node, output_grad): 353 | """Given gradient of multiply node, return gradient contributions to each input. 354 | 355 | Useful formula: if Y=AB, then dA=dY B^T, dB=A^T dY 356 | """ 357 | return [matmul_op(output_grad, node.inputs[1], False, True), 358 | matmul_op(node.inputs[0], output_grad, True, False)] 359 | 360 | 361 | class PlaceholderOp(Op): 362 | """Op to feed value to a nodes.""" 363 | def __call__(self): 364 | """Creates a variable node.""" 365 | new_node = Op.__call__(self) 366 | return new_node 367 | 368 | def compute(self, node, input_vals): 369 | """No compute function since node value is fed directly in Executor.""" 370 | assert False, "placeholder values provided by feed_dict" 371 | 372 | def gradient(self, node, output_grad): 373 | """No gradient function since node has no inputs.""" 374 | return None 375 | 376 | 377 | class ZerosLikeOp(Op): 378 | """Op that represents a constant np.zeros_like.""" 379 | def __call__(self, node_A): 380 | """Creates a node that represents a np.zeros array of same shape as node_A.""" 381 | new_node = Op.__call__(self) 382 | new_node.inputs = [node_A] 383 | new_node.name = "Zeroslike(%s)" % node_A.name 384 | return new_node 385 | 386 | def compute(self, node, input_vals): 387 | """Returns zeros_like of the same shape as input.""" 388 | assert(isinstance(input_vals[0], np.ndarray)) 389 | return np.zeros(input_vals[0].shape) 390 | 391 | def gradient(self, node, output_grad): 392 | return [zeroslike_op(node.inputs[0])] 393 | 394 | 395 | class OnesLikeOp(Op): 396 | """Op that represents a constant np.ones_like.""" 397 | def __call__(self, node_A): 398 | """Creates a node that represents a np.ones array of same shape as node_A.""" 399 | new_node = Op.__call__(self) 400 | new_node.inputs = [node_A] 401 | new_node.name = "Oneslike(%s)" % node_A.name 402 | return new_node 403 | 404 | def compute(self, node, input_vals): 405 | """Returns ones_like of the same shape as input.""" 406 | assert(isinstance(input_vals[0], np.ndarray)) 407 | return np.ones(input_vals[0].shape) 408 | 409 | def gradient(self, node, output_grad): 410 | return [zeroslike_op(node.inputs[0])] 411 | 412 | 413 | class LogOp(Op): 414 | 415 | def __call__(self, node): 416 | new_node = Op.__call__(self) 417 | new_node.inputs = [node] 418 | new_node.name = "log(%s)" % node.name 419 | return new_node 420 | 421 | def compute(self, node, input_vals): 422 | assert len(input_vals) == 1 423 | return np.log(input_vals[0]) 424 | 425 | def gradient(self, node, output_grad): 426 | return [output_grad / node.inputs[0]] 427 | 428 | 429 | class ExpOp(Op): 430 | 431 | def __call__(self, node): 432 | new_node = Op.__call__(self) 433 | new_node.inputs = [node] 434 | new_node.name = "exp(%s)" % node.name 435 | return new_node 436 | 437 | def compute(self, node, input_vals): 438 | assert len(input_vals) == 1 439 | return np.exp(input_vals[0]) 440 | 441 | def gradient(self, node, output_grad): 442 | return [output_grad * exp_op(node.inputs[0])] 443 | 444 | 445 | class ReduceSumOp(Op): 446 | 447 | def __call__(self, node): 448 | new_node = Op.__call__(self) 449 | new_node.inputs = [node] 450 | new_node.name = "reduce_sum(%s)" % node.name 451 | return new_node 452 | 453 | def compute(self, node, input_vals): 454 | assert isinstance(input_vals[0], np.ndarray) 455 | return np.sum(input_vals[0]) 456 | 457 | def gradient(self, node, output_grad): 458 | return [output_grad * oneslike_op(node.inputs[0])] 459 | 460 | # Create global singletons of operators. 461 | add_op = AddOp() 462 | mul_op = MulOp() 463 | div_op = DivOp() 464 | sub_op = SubOp() 465 | neg_op = NegOp() 466 | add_byconst_op = AddByConstOp() 467 | rsub_byconst_op = RSubByConstOp() 468 | sub_byconst_op = SubByConstOp() 469 | mul_byconst_op = MulByConstOp() 470 | div_byconst_op = DivByConstOp() 471 | rdiv_byconst_op = RDivByConstOp() 472 | matmul_op = MatMulOp() 473 | placeholder_op = PlaceholderOp() 474 | oneslike_op = OnesLikeOp() 475 | zeroslike_op = ZerosLikeOp() 476 | log_op = LogOp() 477 | exp_op = ExpOp() 478 | reduce_sum = ReduceSumOp() 479 | 480 | 481 | def exp(val): 482 | if isinstance(val, Node): 483 | return exp_op(val) 484 | return np.exp(val) 485 | 486 | 487 | def log(val): 488 | if isinstance(val, Node): 489 | return log_op(val) 490 | return np.log(val) 491 | 492 | 493 | class Executor: 494 | """Executor computes values for a given subset of nodes in a computation graph.""" 495 | def __init__(self, eval_node_list): 496 | """ 497 | Parameters 498 | ---------- 499 | eval_node_list: list of nodes whose values need to be computed. 500 | """ 501 | self.eval_node_list = eval_node_list 502 | 503 | 504 | def run(self, feed_dict): 505 | """Computes values of nodes in eval_node_list given computation graph. 506 | Parameters 507 | ---------- 508 | feed_dict: list of variable nodes whose values are supplied by user. 509 | 510 | Returns 511 | ------- 512 | A list of values for nodes in eval_node_list. 513 | """ 514 | node_to_val_map = dict(feed_dict) 515 | # Traverse graph in topological sort order and compute values for all nodes. 516 | 517 | topo_order = find_topo_sort(self.eval_node_list) 518 | for node in topo_order: 519 | if isinstance(node.op, PlaceholderOp): 520 | continue 521 | vals = [node_to_val_map[n] for n in node.inputs] 522 | compute_val = node.op.compute(node, vals) 523 | node_to_val_map[node] = compute_val if isinstance(compute_val, np.ndarray) else np.array(compute_val) 524 | 525 | # Collect node values. 526 | node_val_results = [node_to_val_map[node] for node in self.eval_node_list] 527 | return node_val_results 528 | 529 | 530 | def gradients(output_node, node_list): 531 | """Take gradient of output node with respect to each node in node_list. 532 | 533 | Parameters 534 | ---------- 535 | output_node: output node that we are taking derivative of. 536 | node_list: list of nodes that we are taking derivative wrt. 537 | 538 | Returns 539 | ------- 540 | A list of gradient values, one for each node in node_list respectively. 541 | 542 | """ 543 | 544 | # a map from node to a list of gradient contributions from each output node 545 | node_to_output_grads_list = {} 546 | # Special note on initializing gradient of output_node as oneslike_op(output_node): 547 | # We are really taking a derivative of the scalar reduce_sum(output_node) 548 | # instead of the vector output_node. But this is the common case for loss function. 549 | node_to_output_grads_list[output_node] = [oneslike_op(output_node)] 550 | # a map from node to the gradient of that node 551 | node_to_output_grad = {} 552 | # Traverse graph in reverse topological order given the output_node that we are taking gradient wrt. 553 | reverse_topo_order = reversed(find_topo_sort([output_node])) 554 | 555 | for node in reverse_topo_order: 556 | grad = sum_node_list(node_to_output_grads_list[node]) 557 | node_to_output_grad[node] = grad 558 | for i in range(len(node.inputs)): 559 | ch = node.inputs[i] 560 | grads = node.op.gradient(node, grad) 561 | grads_list = node_to_output_grads_list.get(ch, []) 562 | grads_list.append(grads[i]) 563 | node_to_output_grads_list[ch] = grads_list 564 | 565 | # Collect results for gradients requested. 566 | grad_node_list = [node_to_output_grad[node] for node in node_list] 567 | return grad_node_list 568 | 569 | 570 | 571 | ############################## 572 | 573 | ####### Helper Methods ####### 574 | 575 | ############################## 576 | 577 | 578 | def find_topo_sort(node_list): 579 | """Given a list of nodes, return a topological sort list of nodes ending in them. 580 | 581 | A simple algorithm is to do a post-order DFS traversal on the given nodes, 582 | going backwards based on input edges. Since a node is added to the ordering 583 | after all its predecessors are traversed due to post-order DFS, we get a topological 584 | sort. 585 | 586 | """ 587 | visited = set() 588 | topo_order = [] 589 | for node in node_list: 590 | topo_sort_dfs(node, visited, topo_order) 591 | return topo_order 592 | 593 | 594 | def topo_sort_dfs(node, visited, topo_order): 595 | """Post-order DFS""" 596 | if node in visited: 597 | return 598 | visited.add(node) 599 | for n in node.inputs: 600 | topo_sort_dfs(n, visited, topo_order) 601 | topo_order.append(node) 602 | 603 | 604 | def sum_node_list(node_list): 605 | """Custom sum function in order to avoid create redundant nodes in Python sum implementation.""" 606 | from operator import add 607 | from functools import reduce 608 | return reduce(add, node_list) 609 | 610 | 611 | -------------------------------------------------------------------------------- /autodiff_test.py: -------------------------------------------------------------------------------- 1 | import autodiff as ad 2 | import numpy as np 3 | 4 | 5 | def test_identity(): 6 | x2 = ad.Variable(name="x2") 7 | y = x2 8 | 9 | grad_x2, = ad.gradients(y, [x2]) 10 | 11 | executor = ad.Executor([y, grad_x2]) 12 | x2_val = 2 * np.ones(3) 13 | y_val, grad_x2_val = executor.run(feed_dict={x2: x2_val}) 14 | 15 | assert isinstance(y, ad.Node) 16 | assert np.array_equal(y_val, x2_val) 17 | assert np.array_equal(grad_x2_val, np.ones_like(x2_val)) 18 | 19 | 20 | def test_add_by_const(): 21 | x2 = ad.Variable(name = "x2") 22 | y = 5 + x2 23 | 24 | grad_x2, = ad.gradients(y, [x2]) 25 | 26 | executor = ad.Executor([y, grad_x2]) 27 | x2_val = 2 * np.ones(3) 28 | y_val, grad_x2_val= executor.run(feed_dict = {x2 : x2_val}) 29 | 30 | assert isinstance(y, ad.Node) 31 | assert np.array_equal(y_val, x2_val + 5) 32 | assert np.array_equal(grad_x2_val, np.ones_like(x2_val)) 33 | 34 | 35 | def test_sub_by_const(): 36 | x2 = ad.Variable(name='x2') 37 | y = 3 - x2 38 | grad_x2, = ad.gradients(y, [x2]) 39 | executor = ad.Executor([y, grad_x2]) 40 | x2_val = 2 * np.ones(3) 41 | y_val, grad_x2_val= executor.run(feed_dict = {x2 : x2_val}) 42 | 43 | assert isinstance(y, ad.Node) 44 | assert np.array_equal(y_val, 3 - x2_val) 45 | assert np.array_equal(grad_x2_val, -np.ones_like(x2_val)) 46 | 47 | 48 | def test_neg(): 49 | x1 = ad.Variable(name='x1') 50 | x2 = ad.Variable(name='x2') 51 | 52 | y = -x2 + x1 53 | 54 | grad_x1, grad_x2 = ad.gradients(y, [x1, x2]) 55 | executor = ad.Executor([y, grad_x1, grad_x2]) 56 | x2_val = 2 * np.ones(3) 57 | x1_val = 3 * np.ones(3) 58 | y_val, grad_x1_val, grad_x2_val = executor.run(feed_dict = {x1: x1_val, x2 : x2_val}) 59 | 60 | assert isinstance(y, ad.Node) 61 | assert np.array_equal(y_val, -x2_val + x1_val) 62 | assert np.array_equal(grad_x2_val, -np.ones_like(x2_val)) 63 | assert np.array_equal(grad_x1_val, np.ones_like(x1_val)) 64 | 65 | 66 | def test_mul_by_const(): 67 | x2 = ad.Variable(name = "x2") 68 | y = 5 * x2 69 | 70 | grad_x2, = ad.gradients(y, [x2]) 71 | 72 | executor = ad.Executor([y, grad_x2]) 73 | x2_val = 2 * np.ones(3) 74 | y_val, grad_x2_val= executor.run(feed_dict = {x2 : x2_val}) 75 | 76 | assert isinstance(y, ad.Node) 77 | assert np.array_equal(y_val, x2_val * 5) 78 | assert np.array_equal(grad_x2_val, np.ones_like(x2_val) * 5) 79 | 80 | 81 | def test_div_two_vars(): 82 | x1 = ad.Variable(name = 'x1') 83 | x2 = ad.Variable(name = 'x2') 84 | 85 | y = x1 / x2 86 | 87 | grad_x1, grad_x2 = ad.gradients(y, [x1, x2]) 88 | 89 | executor = ad.Executor([y, grad_x1, grad_x2]) 90 | x1_val = 2 * np.ones(3) 91 | x2_val = 5 * np.ones(3) 92 | y_val, grad_x1_val, grad_x2_val= executor.run(feed_dict = {x1: x1_val, x2 : x2_val}) 93 | 94 | assert isinstance(y, ad.Node) 95 | assert np.array_equal(y_val, x1_val / x2_val) 96 | assert np.array_equal(grad_x1_val, np.ones_like(x1_val) / x2_val) 97 | assert np.array_equal(grad_x2_val, -x1_val / (x2_val * x2_val)) 98 | 99 | 100 | def test_div_by_const(): 101 | x2 = ad.Variable(name = "x2") 102 | y = 5 / x2 103 | 104 | grad_x2, = ad.gradients(y, [x2]) 105 | 106 | executor = ad.Executor([y, grad_x2]) 107 | x2_val = 2 * np.ones(3) 108 | y_val, grad_x2_val= executor.run(feed_dict = {x2 : x2_val}) 109 | 110 | assert isinstance(y, ad.Node) 111 | assert np.array_equal(y_val, 5 / x2_val) 112 | print(grad_x2_val) 113 | print(-5 / (x2_val * x2_val)) 114 | assert np.array_equal(grad_x2_val, -5 / (x2_val * x2_val)) 115 | 116 | 117 | def test_add_two_vars(): 118 | x2 = ad.Variable(name = "x2") 119 | x3 = ad.Variable(name = "x3") 120 | y = x2 + x3 121 | 122 | grad_x2, grad_x3 = ad.gradients(y, [x2, x3]) 123 | 124 | executor = ad.Executor([y, grad_x2, grad_x3]) 125 | x2_val = 2 * np.ones(3) 126 | x3_val = 3 * np.ones(3) 127 | y_val, grad_x2_val, grad_x3_val = executor.run(feed_dict = {x2: x2_val, x3: x3_val}) 128 | 129 | assert isinstance(y, ad.Node) 130 | assert np.array_equal(y_val, x2_val + x3_val) 131 | assert np.array_equal(grad_x2_val, np.ones_like(x2_val)) 132 | assert np.array_equal(grad_x3_val, np.ones_like(x3_val)) 133 | 134 | 135 | def test_mul_two_vars(): 136 | x2 = ad.Variable(name = "x2") 137 | x3 = ad.Variable(name = "x3") 138 | y = x2 * x3 139 | 140 | grad_x2, grad_x3 = ad.gradients(y, [x2, x3]) 141 | 142 | executor = ad.Executor([y, grad_x2, grad_x3]) 143 | x2_val = 2 * np.ones(3) 144 | x3_val = 3 * np.ones(3) 145 | y_val, grad_x2_val, grad_x3_val = executor.run(feed_dict = {x2: x2_val, x3: x3_val}) 146 | 147 | assert isinstance(y, ad.Node) 148 | assert np.array_equal(y_val, x2_val * x3_val) 149 | assert np.array_equal(grad_x2_val, x3_val) 150 | assert np.array_equal(grad_x3_val, x2_val) 151 | 152 | 153 | def test_add_mul_mix_1(): 154 | x1 = ad.Variable(name = "x1") 155 | x2 = ad.Variable(name = "x2") 156 | x3 = ad.Variable(name = "x3") 157 | y = x1 + x2 * x3 * x1 158 | 159 | grad_x1, grad_x2, grad_x3 = ad.gradients(y, [x1, x2, x3]) 160 | 161 | executor = ad.Executor([y, grad_x1, grad_x2, grad_x3]) 162 | x1_val = 1 * np.ones(3) 163 | x2_val = 2 * np.ones(3) 164 | x3_val = 3 * np.ones(3) 165 | y_val, grad_x1_val, grad_x2_val, grad_x3_val = executor.run(feed_dict = {x1 : x1_val, x2: x2_val, x3 : x3_val}) 166 | 167 | assert isinstance(y, ad.Node) 168 | assert np.array_equal(y_val, x1_val + x2_val * x3_val) 169 | assert np.array_equal(grad_x1_val, np.ones_like(x1_val) + x2_val * x3_val) 170 | assert np.array_equal(grad_x2_val, x3_val * x1_val) 171 | assert np.array_equal(grad_x3_val, x2_val * x1_val) 172 | 173 | 174 | def test_add_mul_mix_2(): 175 | x1 = ad.Variable(name = "x1") 176 | x2 = ad.Variable(name = "x2") 177 | x3 = ad.Variable(name = "x3") 178 | x4 = ad.Variable(name = "x4") 179 | y = x1 + x2 * x3 * x4 180 | 181 | grad_x1, grad_x2, grad_x3, grad_x4 = ad.gradients(y, [x1, x2, x3, x4]) 182 | 183 | executor = ad.Executor([y, grad_x1, grad_x2, grad_x3, grad_x4]) 184 | x1_val = 1 * np.ones(3) 185 | x2_val = 2 * np.ones(3) 186 | x3_val = 3 * np.ones(3) 187 | x4_val = 4 * np.ones(3) 188 | y_val, grad_x1_val, grad_x2_val, grad_x3_val, grad_x4_val = executor.run(feed_dict = {x1 : x1_val, x2: x2_val, x3 : x3_val, x4 : x4_val}) 189 | 190 | assert isinstance(y, ad.Node) 191 | assert np.array_equal(y_val, x1_val + x2_val * x3_val * x4_val) 192 | assert np.array_equal(grad_x1_val, np.ones_like(x1_val)) 193 | assert np.array_equal(grad_x2_val, x3_val * x4_val) 194 | assert np.array_equal(grad_x3_val, x2_val * x4_val) 195 | assert np.array_equal(grad_x4_val, x2_val * x3_val) 196 | 197 | 198 | def test_add_mul_mix_3(): 199 | x2 = ad.Variable(name = "x2") 200 | x3 = ad.Variable(name = "x3") 201 | z = x2 * x2 + x2 + x3 + 3 202 | y = z * z + x3 203 | 204 | grad_x2, grad_x3 = ad.gradients(y, [x2, x3]) 205 | 206 | executor = ad.Executor([y, grad_x2, grad_x3]) 207 | x2_val = 2 * np.ones(3) 208 | x3_val = 3 * np.ones(3) 209 | y_val, grad_x2_val, grad_x3_val = executor.run(feed_dict = {x2: x2_val, x3: x3_val}) 210 | 211 | z_val = x2_val * x2_val + x2_val + x3_val + 3 212 | expected_yval = z_val * z_val + x3_val 213 | expected_grad_x2_val = 2 * (x2_val * x2_val + x2_val + x3_val + 3) * (2 * x2_val + 1) 214 | expected_grad_x3_val = 2 * (x2_val * x2_val + x2_val + x3_val + 3) + 1 215 | assert isinstance(y, ad.Node) 216 | assert np.array_equal(y_val, expected_yval) 217 | assert np.array_equal(grad_x2_val, expected_grad_x2_val) 218 | assert np.array_equal(grad_x3_val, expected_grad_x3_val) 219 | 220 | 221 | def test_grad_of_grad(): 222 | x2 = ad.Variable(name = "x2") 223 | x3 = ad.Variable(name = "x3") 224 | y = x2 * x2 + x2 * x3 225 | 226 | grad_x2, grad_x3 = ad.gradients(y, [x2, x3]) 227 | grad_x2_x2, grad_x2_x3 = ad.gradients(grad_x2, [x2, x3]) 228 | 229 | executor = ad.Executor([y, grad_x2, grad_x3, grad_x2_x2, grad_x2_x3]) 230 | x2_val = 2 * np.ones(3) 231 | x3_val = 3 * np.ones(3) 232 | y_val, grad_x2_val, grad_x3_val, grad_x2_x2_val, grad_x2_x3_val = executor.run(feed_dict = {x2: x2_val, x3: x3_val}) 233 | 234 | expected_yval = x2_val * x2_val + x2_val * x3_val 235 | expected_grad_x2_val = 2 * x2_val + x3_val 236 | expected_grad_x3_val = x2_val 237 | expected_grad_x2_x2_val = 2 * np.ones_like(x2_val) 238 | expected_grad_x2_x3_val = 1 * np.ones_like(x2_val) 239 | 240 | assert isinstance(y, ad.Node) 241 | assert np.array_equal(y_val, expected_yval) 242 | assert np.array_equal(grad_x2_val, expected_grad_x2_val) 243 | assert np.array_equal(grad_x3_val, expected_grad_x3_val) 244 | assert np.array_equal(grad_x2_x2_val, expected_grad_x2_x2_val) 245 | assert np.array_equal(grad_x2_x3_val, expected_grad_x2_x3_val) 246 | 247 | 248 | def test_matmul_two_vars(): 249 | x2 = ad.Variable(name = "x2") 250 | x3 = ad.Variable(name = "x3") 251 | y = ad.matmul_op(x2, x3) 252 | 253 | grad_x2, grad_x3 = ad.gradients(y, [x2, x3]) 254 | 255 | executor = ad.Executor([y, grad_x2, grad_x3]) 256 | x2_val = np.array([[1, 2], [3, 4], [5, 6]]) # 3x2 257 | x3_val = np.array([[7, 8, 9], [10, 11, 12]]) # 2x3 258 | 259 | y_val, grad_x2_val, grad_x3_val = executor.run(feed_dict = {x2: x2_val, x3: x3_val}) 260 | 261 | expected_yval = np.matmul(x2_val, x3_val) 262 | expected_grad_x2_val = np.matmul(np.ones_like(expected_yval), np.transpose(x3_val)) 263 | expected_grad_x3_val = np.matmul(np.transpose(x2_val), np.ones_like(expected_yval)) 264 | 265 | assert isinstance(y, ad.Node) 266 | assert np.array_equal(y_val, expected_yval) 267 | assert np.array_equal(grad_x2_val, expected_grad_x2_val) 268 | assert np.array_equal(grad_x3_val, expected_grad_x3_val) 269 | 270 | 271 | def test_log_op(): 272 | x1 = ad.Variable(name = "x1") 273 | y = ad.log(x1) 274 | 275 | grad_x1, = ad.gradients(y, [x1]) 276 | 277 | executor = ad.Executor([y, grad_x1]) 278 | x1_val = 2 * np.ones(3) 279 | y_val, grad_x1_val= executor.run(feed_dict = {x1 : x1_val}) 280 | 281 | assert isinstance(y, ad.Node) 282 | assert np.array_equal(y_val, np.log(x1_val)) 283 | assert np.array_equal(grad_x1_val, 1 / x1_val) 284 | 285 | 286 | def test_log_two_vars(): 287 | x1 = ad.Variable(name = "x1") 288 | x2 = ad.Variable(name = "x2") 289 | y = ad.log(x1 * x2) 290 | 291 | grad_x1, grad_x2 = ad.gradients(y, [x1, x2]) 292 | 293 | executor = ad.Executor([y, grad_x1, grad_x2]) 294 | x1_val = 2 * np.ones(3) 295 | x2_val = 4 * np.ones(3) 296 | y_val, grad_x1_val, grad_x2_val = executor.run(feed_dict = {x1 : x1_val, x2: x2_val}) 297 | 298 | assert isinstance(y, ad.Node) 299 | assert np.array_equal(y_val, np.log(x1_val * x2_val)) 300 | assert np.array_equal(grad_x1_val, x2_val / (x1_val * x2_val)) 301 | assert np.array_equal(grad_x2_val, x1_val / (x1_val * x2_val)) 302 | 303 | 304 | def test_exp_op(): 305 | x1 = ad.Variable(name = "x1") 306 | y = ad.exp(x1) 307 | 308 | grad_x1, = ad.gradients(y, [x1]) 309 | 310 | executor = ad.Executor([y, grad_x1]) 311 | x1_val = 2 * np.ones(3) 312 | y_val, grad_x1_val= executor.run(feed_dict = {x1 : x1_val}) 313 | 314 | assert isinstance(y, ad.Node) 315 | assert np.array_equal(y_val, np.exp(x1_val)) 316 | assert np.array_equal(grad_x1_val, np.exp(x1_val)) 317 | 318 | 319 | def test_exp_mix_op(): 320 | x1 = ad.Variable(name="x1") 321 | x2 = ad.Variable(name="x2") 322 | y = ad.exp(ad.log(x1 * x2) + 1) 323 | 324 | grad_x1, grad_x2 = ad.gradients(y, [x1, x2]) 325 | 326 | executor = ad.Executor([y, grad_x1, grad_x2]) 327 | x1_val = 2 * np.ones(3) 328 | x2_val = 4 * np.ones(3) 329 | y_val, grad_x1_val, grad_x2_val = executor.run(feed_dict = {x1 : x1_val, x2: x2_val}) 330 | 331 | assert isinstance(y, ad.Node) 332 | assert np.array_equal(y_val, np.exp(np.log(x1_val * x2_val) + 1)) 333 | assert np.array_equal(grad_x1_val, y_val * x2_val / (x1_val * x2_val)) 334 | assert np.array_equal(grad_x2_val, y_val * x1_val / (x1_val * x2_val)) 335 | 336 | 337 | def test_reduce_sum(): 338 | x1 = ad.Variable(name = "x1") 339 | y = ad.reduce_sum(x1) 340 | 341 | grad_x1, = ad.gradients(y, [x1]) 342 | 343 | executor = ad.Executor([y, grad_x1]) 344 | x1_val = 2 * np.ones(3) 345 | y_val, grad_x1_val= executor.run(feed_dict = {x1 : x1_val}) 346 | 347 | assert isinstance(y, ad.Node) 348 | assert np.array_equal(y_val, np.sum(x1_val)) 349 | assert np.array_equal(grad_x1_val, np.ones_like(x1_val)) 350 | 351 | 352 | def test_reduce_sum_mix(): 353 | x1 = ad.Variable(name = "x1") 354 | y = ad.exp(ad.reduce_sum(x1)) 355 | 356 | grad_x1, = ad.gradients(y, [x1]) 357 | 358 | executor = ad.Executor([y, grad_x1]) 359 | x1_val = 2 * np.ones(3) 360 | y_val, grad_x1_val= executor.run(feed_dict = {x1 : x1_val}) 361 | expected_y_val = np.exp(np.sum(x1_val)) 362 | assert isinstance(y, ad.Node) 363 | assert np.array_equal(y_val, expected_y_val) 364 | assert np.array_equal(grad_x1_val, expected_y_val * np.ones_like(x1_val)) 365 | 366 | y2 = ad.log(ad.reduce_sum(x1)) 367 | grad_x2, = ad.gradients(y2, [x1]) 368 | executor2 = ad.Executor([y2, grad_x2]) 369 | y2_val, grad_x2_val = executor2.run(feed_dict={x1: x1_val}) 370 | expected_y2_val = np.log(np.sum(x1_val)) 371 | assert isinstance(y2, ad.Node) 372 | assert np.array_equal(y2_val, expected_y2_val) 373 | assert np.array_equal(grad_x2_val, (1/np.sum(x1_val)) * np.ones_like(x1_val)) 374 | 375 | 376 | def test_mix_all(): 377 | x1 = ad.Variable(name="x1") 378 | y = 1/(1+ad.exp(-ad.reduce_sum(x1))) 379 | 380 | grad_x1, = ad.gradients(y, [x1]) 381 | 382 | executor = ad.Executor([y, grad_x1]) 383 | x1_val = 2 * np.ones(3) 384 | y_val, grad_x1_val= executor.run(feed_dict = {x1 : x1_val}) 385 | expected_y_val = 1/(1+np.exp(-np.sum(x1_val))) 386 | expected_y_grad = expected_y_val * (1 - expected_y_val) * np.ones_like(x1_val) 387 | 388 | print(expected_y_grad) 389 | print(grad_x1_val) 390 | assert isinstance(y, ad.Node) 391 | assert np.array_equal(y_val, expected_y_val) 392 | assert np.sum(np.abs(grad_x1_val - expected_y_grad)) < 1E-10 393 | 394 | 395 | def test_logistic(): 396 | x1 = ad.Variable(name="x1") 397 | w = ad.Variable(name='w') 398 | y = 1/(1+ad.exp(-ad.reduce_sum(w * x1))) 399 | 400 | grad_w, = ad.gradients(y, [w]) 401 | 402 | executor = ad.Executor([y, grad_w]) 403 | x1_val = 3 * np.ones(3) 404 | w_val = 3 * np.zeros(3) 405 | y_val, grad_w_val = executor.run(feed_dict={x1: x1_val, w: w_val}) 406 | expected_y_val = 1/(1 + np.exp(-np.sum(w_val * x1_val))) 407 | expected_y_grad = expected_y_val * (1 - expected_y_val) * x1_val 408 | 409 | print(expected_y_grad) 410 | print(grad_w_val) 411 | assert isinstance(y, ad.Node) 412 | assert np.array_equal(y_val, expected_y_val) 413 | assert np.sum(np.abs(grad_w_val - expected_y_grad)) < 1E-7 414 | 415 | 416 | def test_log_logistic(): 417 | x1 = ad.Variable(name="x1") 418 | w = ad.Variable(name='w') 419 | y = ad.log(1/(1+ad.exp(-ad.reduce_sum(w * x1)))) 420 | 421 | grad_w, = ad.gradients(y, [w]) 422 | 423 | executor = ad.Executor([y, grad_w]) 424 | x1_val = 3 * np.ones(3) 425 | w_val = 3 * np.zeros(3) 426 | y_val, grad_w_val = executor.run(feed_dict={x1: x1_val, w: w_val}) 427 | logistic = 1/(1+np.exp(-np.sum(w_val * x1_val))) 428 | expected_y_val = np.log(logistic) 429 | expected_y_grad = (1 - logistic) * x1_val 430 | 431 | print(expected_y_grad) 432 | print(grad_w_val) 433 | assert isinstance(y, ad.Node) 434 | assert np.array_equal(y_val, expected_y_val) 435 | assert np.sum(np.abs(grad_w_val - expected_y_grad)) < 1E-7 436 | 437 | 438 | def test_logistic_loss(): 439 | x = ad.Variable(name='x') 440 | w = ad.Variable(name='w') 441 | y = ad.Variable(name='y') 442 | 443 | h = 1 / (1 + ad.exp(-ad.reduce_sum(w * x))) 444 | L = y * ad.log(h) + (1 - y) * ad.log(1 - h) 445 | w_grad, = ad.gradients(L, [w]) 446 | executor = ad.Executor([L, w_grad]) 447 | 448 | y_val = 0 449 | x_val = np.array([2, 3, 4]) 450 | w_val = np.random.random(3) 451 | 452 | L_val, w_grad_val = executor.run(feed_dict={x: x_val, y: y_val, w: w_val}) 453 | 454 | logistic = 1 / (1 + np.exp(-np.sum(w_val * x_val))) 455 | expected_L_val = y_val * np.log(logistic) + (1 - y_val) * np.log(1 - logistic) 456 | expected_w_grad = (y_val - logistic) * x_val 457 | 458 | print(L_val) 459 | print(expected_L_val) 460 | print(expected_w_grad) 461 | print(w_grad_val) 462 | 463 | assert expected_L_val == L_val 464 | assert np.sum(np.abs(expected_w_grad - w_grad_val)) < 1E-9 465 | -------------------------------------------------------------------------------- /data.txt: -------------------------------------------------------------------------------- 1 | -0.017612 14.053064 0 2 | -1.395634 4.662541 1 3 | -0.752157 6.538620 0 4 | -1.322371 7.152853 0 5 | 0.423363 11.054677 0 6 | 0.406704 7.067335 1 7 | 0.667394 12.741452 0 8 | -2.460150 6.866805 1 9 | 0.569411 9.548755 0 10 | -0.026632 10.427743 0 11 | 0.850433 6.920334 1 12 | 1.347183 13.175500 0 13 | 1.176813 3.167020 1 14 | -1.781871 9.097953 0 15 | -0.566606 5.749003 1 16 | 0.931635 1.589505 1 17 | -0.024205 6.151823 1 18 | -0.036453 2.690988 1 19 | -0.196949 0.444165 1 20 | 1.014459 5.754399 1 21 | 1.985298 3.230619 1 22 | -1.693453 -0.557540 1 23 | -0.576525 11.778922 0 24 | -0.346811 -1.678730 1 25 | -2.124484 2.672471 1 26 | 1.217916 9.597015 0 27 | -0.733928 9.098687 0 28 | -3.642001 -1.618087 1 29 | 0.315985 3.523953 1 30 | 1.416614 9.619232 0 31 | -0.386323 3.989286 1 32 | 0.556921 8.294984 1 33 | 1.224863 11.587360 0 34 | -1.347803 -2.406051 1 35 | 1.196604 4.951851 1 36 | 0.275221 9.543647 0 37 | 0.470575 9.332488 0 38 | -1.889567 9.542662 0 39 | -1.527893 12.150579 0 40 | -1.185247 11.309318 0 41 | -0.445678 3.297303 1 42 | 1.042222 6.105155 1 43 | -0.618787 10.320986 0 44 | 1.152083 0.548467 1 45 | 0.828534 2.676045 1 46 | -1.237728 10.549033 0 47 | -0.683565 -2.166125 1 48 | 0.229456 5.921938 1 49 | -0.959885 11.555336 0 50 | 0.492911 10.993324 0 51 | 0.184992 8.721488 0 52 | -0.355715 10.325976 0 53 | -0.397822 8.058397 0 54 | 0.824839 13.730343 0 55 | 1.507278 5.027866 1 56 | 0.099671 6.835839 1 57 | -0.344008 10.717485 0 58 | 1.785928 7.718645 1 59 | -0.918801 11.560217 0 60 | -0.364009 4.747300 1 61 | -0.841722 4.119083 1 62 | 0.490426 1.960539 1 63 | -0.007194 9.075792 0 64 | 0.356107 12.447863 0 65 | 0.342578 12.281162 0 66 | -0.810823 -1.466018 1 67 | 2.530777 6.476801 1 68 | 1.296683 11.607559 0 69 | 0.475487 12.040035 0 70 | -0.783277 11.009725 0 71 | 0.074798 11.023650 0 72 | -1.337472 0.468339 1 73 | -0.102781 13.763651 0 74 | -0.147324 2.874846 1 75 | 0.518389 9.887035 0 76 | 1.015399 7.571882 0 77 | -1.658086 -0.027255 1 78 | 1.319944 2.171228 1 79 | 2.056216 5.019981 1 80 | -0.851633 4.375691 1 81 | -1.510047 6.061992 0 82 | -1.076637 -3.181888 1 83 | 1.821096 10.283990 0 84 | 3.010150 8.401766 1 85 | -1.099458 1.688274 1 86 | -0.834872 -1.733869 1 87 | -0.846637 3.849075 1 88 | 1.400102 12.628781 0 89 | 1.752842 5.468166 1 90 | 0.078557 0.059736 1 91 | 0.089392 -0.715300 1 92 | 1.825662 12.693808 0 93 | 0.197445 9.744638 0 94 | 0.126117 0.922311 1 95 | -0.679797 1.220530 1 96 | 0.677983 2.556666 1 97 | 0.761349 10.693862 0 98 | -2.168791 0.143632 1 99 | 1.388610 9.341997 0 100 | 0.317029 14.739025 0 -------------------------------------------------------------------------------- /lr_autodiff.py: -------------------------------------------------------------------------------- 1 | import autodiff as ad 2 | import numpy as np 3 | 4 | 5 | def logistic_prob(_w): 6 | def wrapper(_x): 7 | return 1 / (1 + np.exp(-np.sum(_x * _w))) 8 | return wrapper 9 | 10 | 11 | def test_accuracy(_w, _X, _Y): 12 | prob = logistic_prob(_w) 13 | correct = 0 14 | total = len(_Y) 15 | for i in range(len(_Y)): 16 | x = _X[i] 17 | y = _Y[i] 18 | p = prob(x) 19 | if p >= 0.5 and y == 1.0: 20 | correct += 1 21 | elif p < 0.5 and y == 0.0: 22 | correct += 1 23 | print("总数:%d, 预测正确:%d" % (total, correct)) 24 | 25 | 26 | def plot(N, X_val, Y_val, w, with_boundary=False): 27 | import matplotlib.pyplot as plt 28 | for i in range(N): 29 | __x = X_val[i] 30 | if Y_val[i] == 1: 31 | plt.plot(__x[1], __x[2], marker='x') 32 | else: 33 | plt.plot(__x[1], __x[2], marker='o') 34 | if with_boundary: 35 | min_x1 = min(X_val[:, 1]) 36 | max_x1 = max(X_val[:, 1]) 37 | min_x2 = float(-w[0] - w[1] * min_x1) / w[2] 38 | max_x2 = float(-w[0] - w[1] * max_x1) / w[2] 39 | plt.plot([min_x1, max_x1], [min_x2, max_x2], '-r') 40 | 41 | plt.show() 42 | 43 | 44 | def gen_2d_data(n): 45 | x_data = np.random.random([n, 2]) 46 | y_data = np.ones(n) 47 | for i in range(n): 48 | d = x_data[i] 49 | if d[0] + d[1] < 1: 50 | y_data[i] = 0 51 | x_data_with_bias = np.ones([n, 3]) 52 | x_data_with_bias[:, 1:] = x_data 53 | return x_data_with_bias, y_data 54 | 55 | 56 | def auto_diff_lr(): 57 | x = ad.Variable(name='x') 58 | w = ad.Variable(name='w') 59 | y = ad.Variable(name='y') 60 | 61 | # 注意,以下实现某些情况会有很大的数值误差, 62 | # 所以一般真实系统实现会提供高阶算子,从而减少数值误差 63 | 64 | h = 1 / (1 + ad.exp(-ad.reduce_sum(w * x))) 65 | L = y * ad.log(h) + (1 - y) * ad.log(1 - h) 66 | w_grad, = ad.gradients(L, [w]) 67 | executor = ad.Executor([L, w_grad]) 68 | 69 | N = 100 70 | X_val, Y_val = gen_2d_data(N) 71 | w_val = np.ones(3) 72 | 73 | plot(N, X_val, Y_val, w_val) 74 | executor = ad.Executor([L, w_grad]) 75 | test_accuracy(w_val, X_val, Y_val) 76 | alpha = 0.01 77 | max_iters = 300 78 | for iteration in range(max_iters): 79 | acc_L_val = 0 80 | for i in range(N): 81 | x_val = X_val[i] 82 | y_val = np.array(Y_val[i]) 83 | L_val, w_grad_val = executor.run(feed_dict={w: w_val, x: x_val, y: y_val}) 84 | w_val += alpha * w_grad_val 85 | acc_L_val += L_val 86 | print("iter = %d, likelihood = %s, w = %s" % (iteration, acc_L_val, w_val)) 87 | test_accuracy(w_val, X_val, Y_val) 88 | plot(N, X_val, Y_val, w_val, True) 89 | 90 | 91 | if __name__ == '__main__': 92 | auto_diff_lr() 93 | 94 | 95 | 96 | 97 | --------------------------------------------------------------------------------