├── .gitignore ├── .travis.yml ├── LICENSE ├── README.org ├── benchmarks ├── README.org ├── conv_3d.py ├── elu.py ├── fractional_max_pooling.py ├── lag_task_lasgne.py ├── local_response_normalization_2d.py ├── repeat_n_d.py ├── repeat_n_d_vs_sparse_upsample.py ├── sparse_repeated_updates.py ├── sparse_updates.py └── walk_utils_cached_walk.py ├── canopy ├── __init__.py ├── fn_utils.py ├── handlers │ ├── __init__.py │ ├── base.py │ ├── batch.py │ ├── conditional.py │ ├── debug.py │ ├── fn.py │ ├── misc.py │ ├── monitor.py │ ├── nodes.py │ └── tests │ │ ├── base_test.py │ │ ├── batch_test.py │ │ ├── conditional_test.py │ │ ├── debug_test.py │ │ ├── fn_test.py │ │ ├── misc_test.py │ │ ├── monitor_test.py │ │ └── nodes_test.py ├── network_utils.py ├── node_utils.py ├── sandbox │ ├── __init__.py │ ├── datasets.py │ ├── monitor_ui.py │ └── vgg_net.py ├── schedules.py ├── serialization.py ├── templates │ ├── __init__.py │ └── monitor_ui │ │ ├── index.html │ │ ├── monitor.js │ │ └── monitor.jsonl ├── tests │ ├── network_utils_test.py │ ├── node_utils_test.py │ ├── schedules_test.py │ ├── serialization_test.py │ └── walk_utils_test.py ├── transforms │ ├── __init__.py │ ├── fns.py │ ├── node.py │ ├── tests │ │ ├── fns_test.py │ │ ├── node_test.py │ │ └── tree_test.py │ └── tree.py └── walk_utils.py ├── design_notes.org ├── examples ├── REINFORCE │ ├── cluttered_mnist.py │ ├── constant.py │ └── linear.py ├── activation_transformation │ ├── cifar10.py │ └── mnist_concatenate_negation.py ├── anrat │ └── mnist_mlp_anrat.py ├── bachelor_normalization │ └── mnist_mlp_bn.py ├── batch_normalization │ ├── mnist_cnn_bn.py │ ├── mnist_mlp_bn.py │ ├── svhn_cnn_bn.py │ └── svhn_mlp_bn.py ├── channel_out │ └── mnist_cnn.py ├── cifar10_cnn.py ├── contraction_penalty │ └── mnist_mlp.py ├── discovering_hidden_factors_of_variation_in_deep_networks │ └── mnist.py ├── dropout_max_pool │ └── mnist_cnn.py ├── expected_batches │ ├── mnist_mlp_gradual_bn.py │ ├── mnist_mlp_gradual_dropout.py │ └── mnist_mlp_grelu.py ├── fmp │ └── mnist_cnn_dnn.py ├── highway_networks │ └── mnist.py ├── inception │ ├── mnist.py │ └── mnist_bn.py ├── maxout │ └── mnist_cnn.py ├── mnist_cnn.py ├── mnist_cnn_dnn.py ├── mnist_lr.py ├── mnist_ml_l2.py ├── mnist_mlp.py ├── mnist_mlp_training_loop.py ├── monitor_ui │ └── mnist_mlp_trainer.py ├── no_batch_normalization │ ├── mnist_cnn_bn.py │ └── mnist_mlp_bn.py ├── polyak_averaging │ └── mnist_mlp.py ├── prelu │ └── mnist_cnn.py ├── recurrent_convolution │ └── mnist_cnn.py ├── recurrent_hc │ ├── lag_task_gru.py │ ├── lag_task_lstm.py │ └── lag_task_rnn.py ├── resnet │ ├── cifar10_cnn.py │ └── mnist_cnn.py ├── schedules │ └── mnist_mlp_trainer.py ├── sensitivity_analysis │ └── Sensitivity Analysis.ipynb ├── simple_rnn.py ├── simple_rnn_comparison │ ├── README.org │ ├── with_treeano.py │ └── without_treeano.py ├── sparsity_penalty │ └── mnist_mlp.py ├── spatial_transformer │ ├── cluttered_mnist.py │ ├── helpers.py │ └── spatial_transformer_network.ipynb ├── stochastic_pooling │ └── mnist_cnn.py ├── timesout │ ├── mnist_cnn.py │ └── mnist_mlp.py ├── update_dropout │ └── mnist_cnn.py └── weight_normalization │ └── mnist_cnn.py ├── setup.py └── treeano ├── __init__.py ├── core ├── __init__.py ├── children_container.py ├── graph.py ├── inits.py ├── network.py ├── node.py ├── node_impl.py ├── serialization_state.py ├── tests │ ├── __init__.py │ ├── children_container_test.py │ ├── inits_test.py │ ├── network_test.py │ ├── node_impl_test.py │ ├── serialization_state_test.py │ ├── update_deltas_test.py │ └── variable_test.py ├── update_deltas.py └── variable.py ├── inits ├── __init__.py └── tests │ ├── __init__.py │ └── inits_test.py ├── lasagne ├── __init__.py ├── inits.py ├── nodes.py └── tests │ ├── multi_test.py │ └── updates_test.py ├── node_utils.py ├── nodes ├── __init__.py ├── activations.py ├── combine.py ├── composite.py ├── containers.py ├── conv.py ├── costs.py ├── debug.py ├── dnn.py ├── downsample.py ├── embedding.py ├── hyperparameter.py ├── monitor.py ├── recurrent.py ├── scan.py ├── simple.py ├── stochastic.py ├── test_utils.py ├── tests │ ├── __init__.py │ ├── activations_test.py │ ├── combine_test.py │ ├── composite_test.py │ ├── containers_test.py │ ├── conv_test.py │ ├── costs_test.py │ ├── debug_test.py │ ├── dnn_test.py │ ├── downsample_test.py │ ├── hyperparameter_test.py │ ├── monitor_test.py │ ├── recurrent_test.py │ ├── scan_test.py │ ├── simple_test.py │ ├── stochastic_test.py │ ├── theanode_test.py │ ├── updates_test.py │ └── upsample_test.py ├── theanode.py ├── toy.py ├── updates.py └── upsample.py ├── sandbox ├── __init__.py ├── nodes │ ├── REINFORCE.py │ ├── __init__.py │ ├── activation_transformation.py │ ├── adaadam.py │ ├── anrat.py │ ├── auxiliary_costs.py │ ├── bachelor_normalization.py │ ├── batch_fold.py │ ├── batch_normalization.py │ ├── biased_adam.py │ ├── bttf_mean.py │ ├── channel_out.py │ ├── contraction_penalty.py │ ├── dNDF.py │ ├── deconv_upsample.py │ ├── deconvnet.py │ ├── dropout_max_pool.py │ ├── equilibrated_sgd.py │ ├── expected_batches.py │ ├── fmp.py │ ├── gradient_normalization.py │ ├── gradnet.py │ ├── guided_backprop.py │ ├── highway.py │ ├── inception.py │ ├── input_scaling.py │ ├── interval_relu.py │ ├── invariant_dropout.py │ ├── inverse.py │ ├── irregular_length.py │ ├── kl_sparsity_penalty.py │ ├── kumaraswamy_unit.py │ ├── l2_pool.py │ ├── label_smoothing.py │ ├── lrn.py │ ├── mixed_pooling.py │ ├── monitor_update_ratio.py │ ├── monitored_adam.py │ ├── nadam.py │ ├── no_batch_normalization.py │ ├── norm_stabilizer.py │ ├── paired_conv.py │ ├── partition_axis.py │ ├── prelu.py │ ├── quickprop.py │ ├── randomized_relu.py │ ├── recurrent_convolution.py │ ├── recurrent_hc.py │ ├── relu_grad_modifications.py │ ├── resnet.py │ ├── rms_normalization.py │ ├── rmsprop.py │ ├── sample_variance_penalization.py │ ├── scaled_updates.py │ ├── segmentation.py │ ├── smorms3.py │ ├── spatial_attention.py │ ├── spatial_transformer.py │ ├── spp_net.py │ ├── static_bucket.py │ ├── std_adam.py │ ├── stochastic_pooling.py │ ├── tests │ │ ├── activation_transformation_test.py │ │ ├── anrat_test.py │ │ ├── auxiliary_costs_test.py │ │ ├── batch_fold_test.py │ │ ├── batch_normalization_test.py │ │ ├── bttf_mean_test.py │ │ ├── channel_out_test.py │ │ ├── contraction_penalty_test.py │ │ ├── dNDF_test.py │ │ ├── deconv_upsample_test.py │ │ ├── dropout_max_pool_test.py │ │ ├── equilibrated_sgd_test.py │ │ ├── expected_batches_test.py │ │ ├── gradient_normalization_test.py │ │ ├── gradnet_test.py │ │ ├── input_scaling_test.py │ │ ├── interval_relu_test.py │ │ ├── invariant_dropout_test.py │ │ ├── inverse_test.py │ │ ├── irregular_length_test.py │ │ ├── kl_sparsity_penalty_test.py │ │ ├── kumaraswamy_unit_test.py │ │ ├── l2_pool_test.py │ │ ├── lrn_test.py │ │ ├── mixed_pooling_test.py │ │ ├── monitor_update_ratio_test.py │ │ ├── paired_conv_test.py │ │ ├── partition_axis_test.py │ │ ├── prelu_test.py │ │ ├── randomized_relu_test.py │ │ ├── recurrent_convolution_test.py │ │ ├── resnet_test.py │ │ ├── smorms3_test.py │ │ ├── spatial_attention_test.py │ │ ├── spatial_transformer_test.py │ │ ├── spp_net_test.py │ │ ├── stochastic_pooling_test.py │ │ ├── triplet_network_test.py │ │ ├── unbiased_nesterov_momentum_test.py │ │ └── wta_sparsity_test.py │ ├── timesout.py │ ├── triplet_network.py │ ├── unbiased_nesterov_momentum.py │ ├── update_dropout.py │ ├── weight_normalization.py │ ├── word_vectors.py │ └── wta_sparisty.py ├── sensitivity_analysis.py ├── tests │ └── utils_test.py ├── update_utils.py └── utils.py ├── tests ├── treeano_test.py └── utils_test.py ├── theano_extensions ├── __init__.py ├── fractional_max_pooling.py ├── gradient.py ├── irregular_length.py ├── meshgrid.py ├── nanguardmode.py ├── padding.py ├── tensor.py ├── tests │ ├── fractional_max_pooling_test.py │ ├── gradient_test.py │ ├── irregular_length_test.py │ ├── meshgrid_test.py │ ├── nanguardmode_test.py │ ├── padding_test.py │ └── tensor_test.py └── tree_probability.py ├── utils.py └── visualization.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | 26 | # PyInstaller 27 | # Usually these files are written by a python script from a template 28 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 29 | *.manifest 30 | *.spec 31 | 32 | # Installer logs 33 | pip-log.txt 34 | pip-delete-this-directory.txt 35 | 36 | # Unit test / coverage reports 37 | htmlcov/ 38 | .tox/ 39 | .coverage 40 | .coverage.* 41 | .cache 42 | nosetests.xml 43 | coverage.xml 44 | *,cover 45 | 46 | # Translations 47 | *.mo 48 | *.pot 49 | 50 | # Django stuff: 51 | *.log 52 | 53 | # Sphinx documentation 54 | docs/_build/ 55 | 56 | # PyBuilder 57 | target/ 58 | 59 | # used for monitor ui examples 60 | monitor_dir/ 61 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | sudo: false 3 | python: 4 | - "2.7" 5 | - "3.4" 6 | addons: 7 | apt: 8 | packages: 9 | - gfortran 10 | - libblas-dev 11 | - liblapack-dev 12 | before_install: 13 | - pip install -U pip 14 | install: 15 | - pip install -U numpy 16 | - pip install -U https://github.com/theano/theano/archive/master.zip 17 | - pip install -U networkx 18 | - pip install -U six 19 | - pip install -U toolz 20 | - pip install -U lasagne 21 | - pip install -U scikit-image 22 | - pip install -U nose 23 | - pip install -U matplotlib 24 | script: 25 | - THEANO_FLAGS=floatX=float32 nosetests -v 26 | cache: 27 | - apt 28 | - directories: 29 | - $HOME/.cache/pip 30 | - $HOME/.theano 31 | -------------------------------------------------------------------------------- /README.org: -------------------------------------------------------------------------------- 1 | * treeano 2 | WARNING: 3 | - Treeano changes extremely frequently with breaking features 4 | - Treeano is designed for advanced users i.e. not newb-friendly 5 | - Treeano is 100% focused on composability 6 | 7 | [[https://travis-ci.org/diogo149/treeano.svg]] 8 | * Dependencies 9 | - required 10 | - numpy 11 | - theano 12 | - networkx 13 | - six 14 | - toolz 15 | - optional 16 | - pydot 17 | - pygraphviz 18 | - matplotlib 19 | - scikit-learn (for the examples) 20 | - nose (for tests) 21 | - scikit-image (for tests / spatial transformer) 22 | - lasagne (for tests / wrapped nodes) 23 | -------------------------------------------------------------------------------- /benchmarks/README.org: -------------------------------------------------------------------------------- 1 | TODO 2 | -------------------------------------------------------------------------------- /benchmarks/conv_3d.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import theano 3 | import theano.tensor as T 4 | import treeano.nodes as tn 5 | fX = theano.config.floatX 6 | 7 | # TODO change me 8 | # conv3d_node = tn.Conv3DNode 9 | # conv3d_node = tn.DnnConv3DNode 10 | conv3d_node = tn.Conv3D2DNode 11 | 12 | network = tn.SequentialNode( 13 | "s", 14 | [tn.InputNode("i", shape=(1, 1, 32, 32, 32)), 15 | conv3d_node("conv", num_filters=32, filter_size=(3, 3, 3)), 16 | tn.DnnMeanPoolNode("pool", pool_size=(30, 30, 30))] 17 | ).network() 18 | fn = network.function(["i"], ["s"]) 19 | x = np.random.randn(1, 1, 32, 32, 32).astype(fX) 20 | 21 | # FIXME add memory logging 22 | 23 | """ 24 | 20150916 results: 25 | 26 | %timeit fn(x) 27 | 28 | Conv3DNode => 86.2 ms 29 | 30 | DnnConv3DNode => 1.85 ms 31 | 32 | THEANO_FLAGS=optimizer_including=conv3d_fft:convgrad3d_fft:convtransp3d_fft 33 | + Conv3DNode => 17.4 ms 34 | 35 | Conv3D2DNode => 7.25 ms 36 | """ 37 | -------------------------------------------------------------------------------- /benchmarks/elu.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import theano 3 | import theano.tensor as T 4 | import treeano.nodes as tn 5 | fX = theano.config.floatX 6 | 7 | 8 | def elu1(x, alpha=1.): 9 | return T.switch(T.gt(x, 0.), x, alpha * (T.exp(x) - 1)) 10 | 11 | 12 | def elu2(x, alpha=1.): 13 | pos = (x + abs(x)) / 2 14 | neg = (x + -abs(x)) / 2 15 | return pos + alpha * (T.exp(neg) - 1) 16 | 17 | 18 | for _ in range(100): 19 | tmp = np.random.randn() 20 | np.testing.assert_allclose(elu1(tmp).eval(), 21 | elu2(tmp).eval()) 22 | 23 | # TODO change me 24 | # elu = elu1 25 | elu = elu2 26 | 27 | x = T.matrix() 28 | f = elu(x) 29 | b = T.grad(f.sum(), x) 30 | X = np.random.randn(4096, 4096).astype(fX) 31 | 32 | """ 33 | 20151204 results 34 | 35 | %timeit f.eval({x: X}) 36 | elu1 => 33.3 ms 37 | elu2 => 28.3 ms 38 | 39 | %timeit b.eval({x: X}) 40 | elu1 => 161 ms 41 | elu2 => 29.2 ms 42 | """ 43 | -------------------------------------------------------------------------------- /benchmarks/fractional_max_pooling.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import theano 3 | import theano.tensor as T 4 | import treeano.nodes as tn 5 | from treeano.sandbox.nodes import fmp 6 | fX = theano.config.floatX 7 | 8 | # TODO change me 9 | node = "fmp2" 10 | compute_grad = True 11 | 12 | if node == "mp": 13 | n = tn.MaxPool2DNode("mp", pool_size=(2, 2)) 14 | elif node == "fmp": 15 | n = fmp.DisjointPseudorandomFractionalMaxPool2DNode("fmp1", 16 | fmp_alpha=1.414, 17 | fmp_u=0.5) 18 | elif node == "fmp2": 19 | n = fmp.OverlappingRandomFractionalMaxPool2DNode("fmp2", 20 | pool_size=(1.414, 1.414)) 21 | else: 22 | assert False 23 | 24 | 25 | network = tn.SequentialNode( 26 | "s", 27 | [tn.InputNode("i", shape=(1, 1, 32, 32)), 28 | n] 29 | ).network() 30 | 31 | 32 | if compute_grad: 33 | i = network["i"].get_vw("default").variable 34 | s = network["s"].get_vw("default").variable 35 | fn = network.function(["i"], [T.grad(s.sum(), i)]) 36 | else: 37 | fn = network.function(["i"], ["s"]) 38 | 39 | x = np.random.randn(1, 1, 32, 32).astype(fX) 40 | 41 | """ 42 | 20150924 results: 43 | 44 | %timeit fn(x) 45 | 46 | no grad: 47 | mp: 33.7 us 48 | fmp: 77.6 us 49 | fmp2: 1.91 ms 50 | 51 | with grad: 52 | mp: 67.1 us 53 | fmp: 162 us 54 | fmp2: 2.66 ms 55 | """ 56 | -------------------------------------------------------------------------------- /benchmarks/lag_task_lasgne.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, absolute_import 2 | from __future__ import print_function, unicode_literals 3 | 4 | import numpy as np 5 | import theano 6 | import theano.tensor as T 7 | import lasagne 8 | 9 | fX = theano.config.floatX 10 | 11 | # ################################## config ################################## 12 | 13 | N_TRAIN = 1000 14 | LAG = 10 15 | LENGTH = 50 16 | HIDDEN_STATE_SIZE = 10 17 | BATCH_SIZE = 64 18 | 19 | # ############################### prepare data ############################### 20 | 21 | 22 | def binary_toy_data(lag=1, length=20): 23 | inputs = np.random.randint(0, 2, length).astype(fX) 24 | outputs = np.array(lag * [0] + list(inputs), dtype=fX)[:length] 25 | return inputs, outputs 26 | 27 | 28 | def minibatch(lag, length, batch_size): 29 | inputs = [] 30 | outputs = [] 31 | for _ in range(batch_size): 32 | i, o = binary_toy_data(lag, length) 33 | inputs.append(i) 34 | outputs.append(o) 35 | return np.array(inputs)[..., np.newaxis], np.array(outputs)[..., np.newaxis] 36 | 37 | 38 | # ############################## prepare model ############################## 39 | 40 | l = lasagne.layers.InputLayer(shape=(None, None, 1)) 41 | l = lasagne.layers.LSTMLayer(l, 42 | num_units=HIDDEN_STATE_SIZE, 43 | grad_clipping=1, 44 | learn_init=True) 45 | l = lasagne.layers.ReshapeLayer(l, shape=(-1, HIDDEN_STATE_SIZE)) 46 | l = lasagne.layers.DenseLayer(l, 47 | num_units=1, 48 | nonlinearity=lasagne.nonlinearities.sigmoid) 49 | 50 | in_var = T.tensor3() 51 | targets = T.tensor3() 52 | outputs = lasagne.layers.get_output(l, in_var).reshape(in_var.shape) 53 | loss = T.mean((targets - outputs) ** 2) 54 | all_params = lasagne.layers.get_all_params(l) 55 | updates = lasagne.updates.adam(loss, all_params) 56 | 57 | train_fn = theano.function([in_var, targets], [loss], updates=updates) 58 | valid_fn = theano.function([in_var], [outputs]) 59 | 60 | 61 | # ################################# training ################################# 62 | 63 | print("Starting training...") 64 | 65 | import time 66 | st = time.time() 67 | for i in range(N_TRAIN): 68 | inputs, outputs = minibatch(lag=LAG, length=LENGTH, batch_size=BATCH_SIZE) 69 | loss = train_fn(inputs, outputs)[0] 70 | print(loss) 71 | print("total_time: %s" % (time.time() - st)) 72 | 73 | inputs, outputs = minibatch(lag=LAG, length=LENGTH, batch_size=BATCH_SIZE) 74 | pred = valid_fn(inputs)[0] 75 | pred_accuracies = (np.round(pred) == outputs).mean(axis=0)[LAG:] 76 | print(pred_accuracies) 77 | print(pred_accuracies.mean()) 78 | -------------------------------------------------------------------------------- /benchmarks/local_response_normalization_2d.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import theano 3 | import theano.tensor as T 4 | import treeano 5 | from treeano.sandbox.nodes import lrn 6 | fX = theano.config.floatX 7 | 8 | # shape = (3, 4, 5, 6) 9 | # shape = (32, 32, 32, 32) 10 | shape = (128, 32, 128, 128) 11 | 12 | f = lrn.local_response_normalization_2d_v1 13 | # f = lrn.local_response_normalization_2d_v2 14 | # f = lrn.local_response_normalization_2d_dnn 15 | # f = lrn.local_response_normalization_2d_pool 16 | 17 | vw = treeano.VariableWrapper("foo", 18 | variable=T.tensor4(), 19 | shape=shape) 20 | kwargs = dict( 21 | alpha=1e-4, 22 | k=2, 23 | beta=0.75, 24 | n=5, 25 | ) 26 | target = f(vw, **kwargs).sum() 27 | g_sum = T.grad(target, vw.variable).sum() 28 | fn1 = theano.function( 29 | [vw.variable], 30 | [target]) 31 | fn2 = theano.function( 32 | [vw.variable], 33 | [g_sum]) 34 | x = np.random.randn(*shape).astype(fX) 35 | 36 | """ 37 | 20151004 results: 38 | 39 | ===== 40 | 41 | shape = (3, 4, 5, 6) 42 | 43 | forward pass: 44 | %timeit fn1(x) 45 | local_response_normalization_2d_v1: 66.2 us 46 | local_response_normalization_2d_v2: 66.5 us 47 | local_response_normalization_2d_dnn: 61.8 us 48 | local_response_normalization_2d_pool: 66.6 us 49 | 50 | forward + backward pass: 51 | %timeit fn2(x) 52 | local_response_normalization_2d_v1: 117 us 53 | local_response_normalization_2d_v2: 115 us 54 | local_response_normalization_2d_dnn: 91.4 us 55 | local_response_normalization_2d_pool: 87.4 us 56 | 57 | ===== 58 | 59 | shape = (32, 32, 32, 32) 60 | 61 | forward pass: 62 | %timeit fn1(x) 63 | local_response_normalization_2d_v1: 2.26 ms 64 | local_response_normalization_2d_v2: 2.29 ms 65 | local_response_normalization_2d_pool: 2.15 ms 66 | 67 | forward + backward pass: 68 | %timeit fn2(x) 69 | local_response_normalization_2d_v1: 6.71 ms 70 | local_response_normalization_2d_v2: 6.71 ms 71 | local_response_normalization_2d_pool: 2.69 ms 72 | 73 | ===== 74 | 75 | shape = (128, 32, 128, 128) 76 | 77 | forward pass: 78 | %timeit fn1(x) 79 | local_response_normalization_2d_v1: 145 ms 80 | local_response_normalization_2d_pool: 139 ms 81 | 82 | forward + backward pass: 83 | %timeit fn2(x) 84 | local_response_normalization_2d_v1: 584 ms 85 | local_response_normalization_2d_pool: 167 ms 86 | """ 87 | -------------------------------------------------------------------------------- /benchmarks/repeat_n_d.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import theano 3 | import theano.tensor as T 4 | import treeano.nodes as tn 5 | fX = theano.config.floatX 6 | 7 | network = tn.SequentialNode( 8 | "s", 9 | [tn.InputNode("i", shape=(32, 32, 32, 32, 32)), 10 | tn.SpatialRepeatNDNode("r", upsample_factor=(2, 2, 2))] 11 | ).network() 12 | fn = network.function(["i"], ["s"]) 13 | x = np.random.randn(32, 32, 32, 32, 32).astype(fX) 14 | 15 | """ 16 | 20150922 results: 17 | 18 | %timeit fn(x) 19 | 20 | from axis 0 to 4: 596 ms 21 | from axis 4 to 0: 526 ms 22 | """ 23 | -------------------------------------------------------------------------------- /benchmarks/repeat_n_d_vs_sparse_upsample.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import theano 3 | import theano.tensor as T 4 | import treeano.nodes as tn 5 | fX = theano.config.floatX 6 | 7 | # TODO change me 8 | # n = tn.SpatialRepeatNDNode 9 | n = tn.SpatialSparseUpsampleNode 10 | 11 | network = tn.SequentialNode( 12 | "s", 13 | [tn.InputNode("i", shape=(32, 32, 32, 32, 32)), 14 | n("us", upsample_factor=(2, 2, 2))] 15 | ).network() 16 | fn = network.function(["i"], ["s"]) 17 | x = np.random.randn(32, 32, 32, 32, 32).astype(fX) 18 | 19 | """ 20 | 20150926 results: 21 | 22 | %timeit fn(x) 23 | 24 | SpatialRepeatNDNode: 663 ms 25 | SpatialSparseUpsampleNode: 424 ms 26 | """ 27 | -------------------------------------------------------------------------------- /benchmarks/sparse_repeated_updates.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import theano 3 | import theano.tensor as T 4 | fX = theano.config.floatX 5 | s = theano.shared(np.ones((10, 1), dtype=fX)) 6 | idxs = [0, 1, 1] 7 | fn = theano.function([], updates=[(s, T.inc_subtensor(s[idxs], s[idxs] ** 2))]) 8 | fn() 9 | print s.get_value() 10 | -------------------------------------------------------------------------------- /benchmarks/sparse_updates.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import theano 3 | import theano.tensor as T 4 | fX = theano.config.floatX 5 | s = theano.shared(np.zeros((30000, 10000), dtype=fX)) 6 | fn1 = theano.function([], updates=[(s, s + 1)]) 7 | fn2 = theano.function([], updates=[(s, T.inc_subtensor(s[1497], s[1497] ** 2))]) 8 | # as in update deltas 9 | fn3 = theano.function([], 10 | updates=[(s, s + (T.inc_subtensor(s[1497], s[1497] ** 2) - s))]) 11 | 12 | """ 13 | on cpu: 14 | %timeit fn1() # 166ms 15 | %timeit fn2() # 14.4us 16 | %timeit fn3() # 12us 17 | """ 18 | 19 | # doesn't work 20 | cost = (s[0] + s[0] + 3 * s[1]).sum() 21 | g1, g2 = theano.grad(cost, [s[0], s[1]]) 22 | # DisconnectedInputError: grad method was asked to compute the gradient 23 | # with respect to a variable that is not part of the computational graph 24 | # of the cost, or is used only by a non-differentiable operator: 25 | # Subtensor{int64}.0 26 | g1.eval() 27 | g2.eval() 28 | 29 | 30 | # works 31 | s0 = s[0] 32 | s1 = s[1] 33 | cost = (s0 + s0 + 3 * s1).sum() 34 | g1, g2 = theano.grad(cost, [s0, s1]) 35 | g1.eval() 36 | g2.eval() 37 | -------------------------------------------------------------------------------- /benchmarks/walk_utils_cached_walk.py: -------------------------------------------------------------------------------- 1 | import treeano 2 | import treeano.nodes as tn 3 | import canopy 4 | 5 | 6 | def create_big_node_graph(levels): 7 | assert levels >= 0 8 | if levels == 0: 9 | return tn.IdentityNode("i") 10 | else: 11 | prev = create_big_node_graph(levels - 1) 12 | return tn.SequentialNode( 13 | "s", 14 | [canopy.node_utils.suffix_node(prev, "0"), 15 | canopy.node_utils.suffix_node(prev, "1")]) 16 | 17 | 18 | """ 19 | 20150808 results: 20 | 21 | %timeit create_big_node_graph(5) 22 | # cached_walk = True => 49.7ms 23 | # cached_walk = False => 254ms 24 | 25 | %timeit create_big_node_graph(10) 26 | # cached_walk = True => 1.77s 27 | # cached_walk = False => 16.5s 28 | """ 29 | -------------------------------------------------------------------------------- /canopy/__init__.py: -------------------------------------------------------------------------------- 1 | from . import handlers 2 | from . import network_utils 3 | from . import node_utils 4 | from . import schedules 5 | from . import serialization 6 | from . import transforms 7 | from . import templates 8 | from . import walk_utils 9 | 10 | # TODO rename fn_utils 11 | # --- 12 | # fn_utils is not imported by name, because the functions in the file 13 | # don't really belong anywhere 14 | # import fn_utils 15 | 16 | 17 | from .fn_utils import evaluate_until 18 | from .handlers import handled_fn 19 | -------------------------------------------------------------------------------- /canopy/fn_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, absolute_import 2 | from __future__ import print_function, unicode_literals 3 | 4 | import time 5 | import pprint 6 | 7 | 8 | # TODO move to handlers-specific module, since this assumes a handled_fn as 9 | # input 10 | def evaluate_until(fn, 11 | gen, 12 | max_iters=None, 13 | max_seconds=None, 14 | callback=pprint.pprint, 15 | catch_keyboard_interrupt=True): 16 | """ 17 | evaluates a function on the output of a data generator until a given 18 | stopping condition 19 | 20 | fn: 21 | handled_fn 22 | """ 23 | start_time = time.time() 24 | new_gen = enumerate(gen) 25 | 26 | to_catch = (StopIteration,) 27 | if catch_keyboard_interrupt: 28 | to_catch = to_catch + (KeyboardInterrupt,) 29 | 30 | print("Beginning evaluate_until") 31 | try: 32 | while True: 33 | with fn.state.time("generating_data"): 34 | i, data = new_gen.next() 35 | if (max_iters is not None) and (i >= max_iters): 36 | break 37 | if ((max_seconds is not None) 38 | and (time.time() - start_time >= max_seconds)): 39 | break 40 | res = fn(data) 41 | # adding 1 to be 1-indexed instead of 0-indexed 42 | res["_iter"] = i + 1 43 | res["_time"] = time.time() - start_time 44 | if callback is not None: 45 | callback(res) 46 | except to_catch: 47 | print("Ending evaluate_until") 48 | -------------------------------------------------------------------------------- /canopy/handlers/__init__.py: -------------------------------------------------------------------------------- 1 | from . import base 2 | from . import conditional 3 | from . import nodes 4 | from . import batch 5 | from . import fn 6 | from . import monitor 7 | from . import debug 8 | 9 | from .base import (NetworkHandlerAPI, 10 | NetworkHandlerImpl) 11 | from .fn import (handled_fn) 12 | from .conditional import (call_after_every) 13 | from .nodes import (remove_nodes_with_class, 14 | with_hyperparameters, 15 | override_hyperparameters, 16 | update_hyperparameters, 17 | schedule_hyperparameter, 18 | use_scheduled_hyperparameter) 19 | from .batch import (split_input, 20 | chunk_variables, 21 | batch_pad) 22 | from .monitor import (time_call, 23 | time_per_row, 24 | evaluate_monitoring_variables, 25 | monitor_network_state, 26 | monitor_variable, 27 | monitor_shared_in_subtree) 28 | from .misc import (callback_with_input, 29 | exponential_polyak_averaging) 30 | from .debug import (output_nanguard, 31 | network_nanguard, 32 | nanguardmode, 33 | save_last_inputs_and_networks, 34 | make_updates_synchronous) 35 | -------------------------------------------------------------------------------- /canopy/handlers/conditional.py: -------------------------------------------------------------------------------- 1 | from . import base 2 | 3 | 4 | class CallAfterEvery(base.NetworkHandlerImpl): 5 | 6 | """ 7 | handler that calls a callback with the result of a function every few 8 | calls 9 | """ 10 | 11 | def __init__(self, iters, callback): 12 | self.iters = iters 13 | self.callback = callback 14 | self.count = 0 15 | 16 | def call(self, fn, in_dict, *args, **kwargs): 17 | res = fn(in_dict, *args, **kwargs) 18 | self.count += 1 19 | if (self.count % self.iters) == 0: 20 | # WARNING: dict may be mutated here 21 | self.callback(in_dict, res) 22 | return res 23 | 24 | call_after_every = CallAfterEvery 25 | -------------------------------------------------------------------------------- /canopy/handlers/fn.py: -------------------------------------------------------------------------------- 1 | from . import base 2 | 3 | 4 | class CallWithDict(base.NetworkHandlerImpl): 5 | 6 | """ 7 | allows calling a function with a map/dict instead of positional arguments 8 | """ 9 | 10 | def transform_compile_function_kwargs(self, state, inputs, **kwargs): 11 | assert isinstance(inputs, dict) 12 | self.input_key_order_ = [] 13 | new_inputs = [] 14 | for k, v in inputs.items(): 15 | self.input_key_order_.append(k) 16 | new_inputs.append(v) 17 | 18 | kwargs["inputs"] = new_inputs 19 | return kwargs 20 | 21 | def call(self, fn, in_dict, **kwargs): 22 | assert isinstance(in_dict, dict) 23 | new_args = [in_dict[k] for k in self.input_key_order_] 24 | return fn(*new_args, **kwargs) 25 | 26 | call_with_dict = CallWithDict 27 | 28 | 29 | class ReturnDict(base.NetworkHandlerImpl): 30 | 31 | """ 32 | has a function return a map/dict instead of positional outputs 33 | """ 34 | 35 | def transform_compile_function_kwargs(self, state, outputs, **kwargs): 36 | assert isinstance(outputs, dict) 37 | self.output_key_order_ = [] 38 | new_outputs = [] 39 | for k, v in outputs.items(): 40 | self.output_key_order_.append(k) 41 | new_outputs.append(v) 42 | 43 | kwargs["outputs"] = new_outputs 44 | return kwargs 45 | 46 | def call(self, fn, *args, **kwargs): 47 | res = fn(*args, **kwargs) 48 | assert len(res) == len(self.output_key_order_) 49 | return {k: v for k, v in zip(self.output_key_order_, res)} 50 | 51 | return_dict = ReturnDict 52 | 53 | 54 | class _HandledFunction(object): 55 | 56 | """ 57 | class that stores handler-chain wide state 58 | """ 59 | 60 | def __init__(self, network, handlers, inputs, outputs=None, **kwargs): 61 | self.network = network 62 | self.handlers = handlers + [call_with_dict(), 63 | return_dict(), 64 | base.FinalHandler()] 65 | 66 | self.state = base._HandledFunctionState(network) 67 | 68 | for outer, inner in zip(self.handlers, self.handlers[1:]): 69 | outer.set_inner(inner) 70 | 71 | self.outermost = self.handlers[0] 72 | self.outermost.initial_build(self.state, 73 | self.network, 74 | inputs=inputs, 75 | outputs=outputs, 76 | **kwargs) 77 | 78 | def __call__(self, *args, **kwargs): 79 | return self.outermost(self.state, *args, **kwargs) 80 | 81 | handled_fn = _HandledFunction 82 | -------------------------------------------------------------------------------- /canopy/handlers/misc.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import treeano 4 | 5 | from .. import network_utils 6 | from . import base 7 | 8 | 9 | class CallbackWithInput(base.NetworkHandlerImpl): 10 | 11 | """ 12 | handler that calls a callback with both the result of the function and the 13 | inputs to the function 14 | """ 15 | 16 | def __init__(self, callback): 17 | self.callback = callback 18 | 19 | def call(self, fn, in_dict, *args, **kwargs): 20 | res = fn(in_dict, *args, **kwargs) 21 | self.callback(in_dict, res) 22 | return res 23 | 24 | callback_with_input = CallbackWithInput 25 | 26 | 27 | class ExponentialPolyakAveraging(base.NetworkHandlerImpl): 28 | 29 | """ 30 | Polyak-Ruppert averaging using an exponential moving average 31 | 32 | handler that averages together weights over time 33 | 34 | see "Adam: A Method for Stochastic Optimization" v8 -> extensions -> 35 | temporal averaging 36 | (http://arxiv.org/abs/1412.6980) 37 | """ 38 | 39 | def __init__(self, beta=0.9, average_only_floats=True): 40 | assert 0 < beta < 1 41 | self.beta = beta 42 | self.average_only_floats = average_only_floats 43 | self.iters_ = 0 44 | self.theta_bar_ = None 45 | 46 | def get_value_dict(self): 47 | value_dict = {} 48 | # unbias the estimates 49 | for k, v in self.theta_bar_.items(): 50 | unbiased = (v / (1 - self.beta ** self.iters_)).astype(v.dtype) 51 | value_dict[k] = unbiased 52 | return value_dict 53 | 54 | def __call__(self, state, *args, **kwargs): 55 | res = super(ExponentialPolyakAveraging, self).__call__( 56 | state, *args, **kwargs) 57 | value_dict = network_utils.to_value_dict(state.network) 58 | self.iters_ += 1 59 | # initialize moving weights 60 | if self.theta_bar_ is None: 61 | self.theta_bar_ = {} 62 | for k, v in value_dict.items(): 63 | self.theta_bar_[k] = np.zeros_like(v) 64 | # update values 65 | for k, v in value_dict.items(): 66 | if (self.average_only_floats 67 | and not treeano.utils.is_float_ndarray(v)): 68 | # keep last 69 | # --- 70 | # because we might only want to save parameters 71 | # ie. not averaging things like batch counts 72 | self.theta_bar_[k] = v 73 | else: 74 | # exponential moving average 75 | prev = self.theta_bar_[k] 76 | curr = self.beta * prev + (1 - self.beta) * v 77 | self.theta_bar_[k] = curr.astype(prev.dtype) 78 | return res 79 | 80 | exponential_polyak_averaging = ExponentialPolyakAveraging 81 | -------------------------------------------------------------------------------- /canopy/handlers/tests/base_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diogo149/treeano/9b3fd6bb5eb2f6738c9e5c357e70bef95dcae7b7/canopy/handlers/tests/base_test.py -------------------------------------------------------------------------------- /canopy/handlers/tests/batch_test.py: -------------------------------------------------------------------------------- 1 | import nose.tools as nt 2 | import numpy as np 3 | import theano 4 | import theano.tensor as T 5 | 6 | import treeano 7 | import treeano.nodes as tn 8 | import canopy 9 | 10 | 11 | fX = theano.config.floatX 12 | 13 | 14 | def test_split_input(): 15 | network = tn.SequentialNode( 16 | "seq", 17 | [tn.InputNode("i", shape=(None, 2)), 18 | tn.ApplyNode("a", 19 | fn=(lambda x: x.shape[0].astype(fX) + x), 20 | shape_fn=(lambda s: s))] 21 | ).network() 22 | 23 | fn1 = canopy.handlers.handled_fn(network, 24 | [], 25 | {"x": "i"}, 26 | {"out": "seq"}) 27 | np.testing.assert_equal(fn1({"x": np.zeros((18, 2), dtype=fX)})["out"], 28 | np.ones((18, 2), dtype=fX) * 18) 29 | 30 | fn2 = canopy.handlers.handled_fn( 31 | network, 32 | [canopy.handlers.split_input(3, ["x"])], 33 | {"x": "i"}, 34 | {"out": "seq"}) 35 | np.testing.assert_equal(fn2({"x": np.zeros((18, 2), dtype=fX)})["out"], 36 | np.ones((18, 2), dtype=fX) * 3) 37 | 38 | 39 | def test_chunk_variables(): 40 | network = tn.SequentialNode( 41 | "seq", 42 | [tn.InputNode("i", shape=(None, 2)), 43 | tn.ApplyNode("a", 44 | fn=(lambda x: x.shape[0].astype(fX) + x), 45 | shape_fn=(lambda s: s))] 46 | ).network() 47 | 48 | fn1 = canopy.handlers.handled_fn(network, 49 | [], 50 | {"x": "i"}, 51 | {"out": "seq"}) 52 | np.testing.assert_equal(fn1({"x": np.zeros((18, 2), dtype=fX)})["out"], 53 | np.ones((18, 2), dtype=fX) * 18) 54 | 55 | fn2 = canopy.handlers.handled_fn( 56 | network, 57 | [canopy.handlers.chunk_variables(3, ["i"])], 58 | {"x": "i"}, 59 | {"out": "seq"}) 60 | np.testing.assert_equal(fn2({"x": np.zeros((18, 2), dtype=fX)})["out"], 61 | np.ones((18, 2), dtype=fX) * 3) 62 | 63 | 64 | def test_batch_pad(): 65 | 66 | def tmp(include_batch_pad): 67 | network = tn.SequentialNode( 68 | "seq", 69 | [tn.InputNode("i", shape=(None, 2)), 70 | tn.ApplyNode("a", 71 | fn=(lambda x: x.shape[0].astype(fX) + x), 72 | shape_fn=(lambda s: s))] 73 | ).network() 74 | handlers = [canopy.handlers.chunk_variables(3, ["i"])] 75 | if include_batch_pad: 76 | handlers.insert(0, canopy.handlers.batch_pad(3, ["x"])) 77 | fn = canopy.handlers.handled_fn(network, 78 | handlers, 79 | {"x": "i"}, 80 | {"out": "seq"}) 81 | return fn({"x": np.zeros((16, 2), dtype=fX)}) 82 | 83 | nt.raises(AssertionError)(tmp)(False) 84 | res = tmp(True) 85 | 86 | np.testing.assert_equal(res["out"], np.ones((18, 2), dtype=fX) * 3) 87 | -------------------------------------------------------------------------------- /canopy/handlers/tests/conditional_test.py: -------------------------------------------------------------------------------- 1 | import nose.tools as nt 2 | import numpy as np 3 | import theano 4 | import theano.tensor as T 5 | 6 | import treeano 7 | import treeano.nodes as tn 8 | import canopy 9 | 10 | 11 | fX = theano.config.floatX 12 | 13 | 14 | def test_call_after_every(): 15 | vals = [] 16 | 17 | def save_val(in_dict, result_dict): 18 | vals.append(result_dict["out"]) 19 | 20 | network = tn.InputNode("i", shape=()).network() 21 | fn = canopy.handlers.handled_fn( 22 | network, 23 | [canopy.handlers.call_after_every(3, save_val)], 24 | {"x": "i"}, 25 | {"out": "i"}) 26 | for i in range(100): 27 | fn({"x": i}) 28 | 29 | np.testing.assert_equal(np.arange(start=2, stop=100, step=3, dtype=fX), 30 | np.array(vals).ravel()) 31 | -------------------------------------------------------------------------------- /canopy/handlers/tests/misc_test.py: -------------------------------------------------------------------------------- 1 | import nose.tools as nt 2 | import numpy as np 3 | import theano 4 | import theano.tensor as T 5 | 6 | import treeano 7 | import treeano.nodes as tn 8 | import canopy 9 | 10 | 11 | fX = theano.config.floatX 12 | 13 | 14 | def test_callback_with_input(): 15 | vals = [] 16 | 17 | def save_sum(in_dict, result_dict): 18 | vals.append(result_dict["out"] + in_dict["x"]) 19 | 20 | network = tn.InputNode("i", shape=()).network() 21 | fn = canopy.handlers.handled_fn( 22 | network, 23 | [canopy.handlers.callback_with_input(save_sum)], 24 | {"x": "i"}, 25 | {"out": "i"}) 26 | for i in range(100): 27 | fn({"x": i}) 28 | 29 | np.testing.assert_equal(2 * np.arange(100), 30 | np.array(vals).ravel()) 31 | -------------------------------------------------------------------------------- /canopy/network_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, absolute_import 2 | from __future__ import print_function, unicode_literals 3 | import treeano 4 | 5 | 6 | def to_shared_dict(network): 7 | network.build() 8 | if not network.is_relative(): 9 | network = network[network.root_node.name] 10 | vws = network.find_vws_in_subtree(is_shared=True) 11 | name_to_shared = {} 12 | for vw in vws: 13 | assert vw.name not in name_to_shared 14 | # if vw.name != vw.variable.name, preallocated init will break 15 | assert vw.name == vw.variable.name 16 | name_to_shared[vw.name] = vw.variable 17 | return name_to_shared 18 | 19 | 20 | def to_value_dict(network): 21 | shared_dict = to_shared_dict(network) 22 | return {k: v.get_value() for k, v in shared_dict.items()} 23 | 24 | 25 | def load_value_dict(network, 26 | value_dict, 27 | strict_keys=True, 28 | ignore_different_shape=False): 29 | """ 30 | strict_keys: 31 | whether or not the network must have the exact same set of keys as the 32 | value_dict 33 | """ 34 | shared_dict = to_shared_dict(network) 35 | value_keys = set(value_dict.keys()) 36 | network_keys = set(shared_dict.keys()) 37 | if strict_keys: 38 | assert value_keys == network_keys 39 | keys = value_keys 40 | else: 41 | keys = set(value_dict.keys()) & set(shared_dict.keys()) 42 | 43 | loaded = 0 44 | for k in keys: 45 | shared = shared_dict[k] 46 | old_val = shared.get_value() 47 | new_val = value_dict[k] 48 | if ignore_different_shape: 49 | if old_val.shape != new_val.shape: 50 | continue 51 | else: 52 | assert old_val.shape == new_val.shape 53 | shared.set_value(new_val) 54 | loaded += 1 55 | print("loaded %d keys (out of %d in value dict, %d in network)" 56 | % (loaded, len(value_dict), len(shared_dict))) 57 | 58 | 59 | def to_preallocated_init(network): 60 | return treeano.inits.PreallocatedInit(to_shared_dict(network)) 61 | 62 | 63 | def num_parameters(network): 64 | """ 65 | returns the number of "parameter"s in a network 66 | """ 67 | vws = network.relative_network().find_vws_in_subtree(tags=["parameter"]) 68 | return sum(vw.value.size for vw in vws) 69 | -------------------------------------------------------------------------------- /canopy/node_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | TODO should this be in treeano.node_utils 3 | """ 4 | 5 | import treeano 6 | 7 | from . import walk_utils 8 | 9 | 10 | def postwalk_node(root_node, fn): 11 | """ 12 | traverses a tree of nodes in a postwalk with a function that can 13 | transform nodes 14 | """ 15 | def postwalk_fn(obj): 16 | if isinstance(obj, treeano.core.NodeAPI): 17 | res = fn(obj) 18 | assert isinstance(res, treeano.core.NodeAPI) 19 | return res 20 | else: 21 | return obj 22 | 23 | # make a copy so that we don't have to worry about mutating the 24 | # architecture 25 | node_copy = treeano.node_utils.copy_node(root_node) 26 | return walk_utils.walk(node_copy, postwalk_fn=postwalk_fn) 27 | 28 | 29 | def suffix_node(root_node, suffix): 30 | """ 31 | creates a copy of a node, with names suffixed by given suffix 32 | """ 33 | # use seen set to make sure there are no bugs 34 | seen = set() 35 | 36 | def copy_and_suffix(node): 37 | assert node.name not in seen 38 | seen.add(node.name) 39 | # assert that node is nodeimpl, since we only know how to set 40 | # name for those 41 | assert isinstance(node, treeano.NodeImpl) 42 | node._name += suffix 43 | return node 44 | 45 | return postwalk_node(root_node, copy_and_suffix) 46 | 47 | 48 | def format_node_name(root_node, format): 49 | """ 50 | creates a copy of a node, with names suffixed by given suffix 51 | """ 52 | # use seen set to make sure there are no bugs 53 | seen = set() 54 | 55 | def copy_and_format(node): 56 | assert node.name not in seen 57 | seen.add(node.name) 58 | # assert that node is nodeimpl, since we only know how to set 59 | # name for those 60 | assert isinstance(node, treeano.NodeImpl) 61 | node._name = format % node.name 62 | return node 63 | 64 | return postwalk_node(root_node, copy_and_format) 65 | -------------------------------------------------------------------------------- /canopy/sandbox/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diogo149/treeano/9b3fd6bb5eb2f6738c9e5c357e70bef95dcae7b7/canopy/sandbox/__init__.py -------------------------------------------------------------------------------- /canopy/serialization.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import os 3 | 4 | from . import network_utils 5 | 6 | 7 | def pickle_network(network, dirname): 8 | if not os.path.isdir(dirname): 9 | os.mkdir(dirname) 10 | root_node = network.root_node 11 | value_dict = network_utils.to_value_dict(network) 12 | with open(os.path.join(dirname, "root_node.pkl"), 'wb') as f: 13 | pickle.dump(root_node, f, protocol=pickle.HIGHEST_PROTOCOL) 14 | with open(os.path.join(dirname, "value_dict.pkl"), 'wb') as f: 15 | pickle.dump(value_dict, f, protocol=pickle.HIGHEST_PROTOCOL) 16 | 17 | 18 | def unpickle_network(dirname): 19 | with open(os.path.join(dirname, "root_node.pkl"), 'rb') as f: 20 | root_node = pickle.load(f) 21 | with open(os.path.join(dirname, "value_dict.pkl"), 'rb') as f: 22 | value_dict = pickle.load(f) 23 | network = root_node.network() 24 | network_utils.load_value_dict(network, value_dict) 25 | network.build() 26 | return network 27 | -------------------------------------------------------------------------------- /canopy/templates/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | 4 | TEMPLATE_DIR = os.path.dirname(os.path.abspath(__file__)) 5 | 6 | 7 | def template_path(*ps): 8 | """ 9 | returns a path relative to the template directory 10 | """ 11 | return os.path.join(TEMPLATE_DIR, *ps) 12 | 13 | 14 | def copy_template(template_name, location): 15 | """ 16 | performs a simple recursive copy of a template to a desired location 17 | """ 18 | assert not os.path.exists(location) 19 | shutil.copytree(os.path.join(TEMPLATE_DIR, template_name), 20 | location) 21 | -------------------------------------------------------------------------------- /canopy/templates/monitor_ui/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 21 | 22 | 23 | 24 | 25 | 26 |
27 |
28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /canopy/templates/monitor_ui/monitor.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diogo149/treeano/9b3fd6bb5eb2f6738c9e5c357e70bef95dcae7b7/canopy/templates/monitor_ui/monitor.jsonl -------------------------------------------------------------------------------- /canopy/tests/node_utils_test.py: -------------------------------------------------------------------------------- 1 | import nose.tools as nt 2 | import numpy as np 3 | import theano 4 | import theano.tensor as T 5 | import treeano 6 | import treeano.nodes as tn 7 | 8 | import canopy 9 | 10 | fX = theano.config.floatX 11 | 12 | 13 | def test_postwalk_node(): 14 | names = [] 15 | 16 | def f(node): 17 | names.append(node.name) 18 | return node 19 | 20 | node = tn.HyperparameterNode( 21 | "1", 22 | tn.HyperparameterNode( 23 | "2", 24 | tn.IdentityNode("3"))) 25 | canopy.node_utils.postwalk_node(node, f) 26 | nt.assert_equal(names, ["3", "2", "1"]) 27 | 28 | 29 | def test_suffix_node(): 30 | node1 = tn.HyperparameterNode( 31 | "1", 32 | tn.HyperparameterNode( 33 | "2", 34 | tn.IdentityNode("3"))) 35 | node2 = tn.HyperparameterNode( 36 | "1_foo", 37 | tn.HyperparameterNode( 38 | "2_foo", 39 | tn.IdentityNode("3_foo"))) 40 | nt.assert_equal(canopy.node_utils.suffix_node(node1, "_foo"), 41 | node2) 42 | -------------------------------------------------------------------------------- /canopy/tests/serialization_test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import tempfile 4 | 5 | import nose.tools as nt 6 | import numpy as np 7 | import theano 8 | import theano.tensor as T 9 | import treeano 10 | import treeano.nodes as tn 11 | 12 | import canopy 13 | 14 | fX = theano.config.floatX 15 | 16 | 17 | def test_pickle_unpickle_network(): 18 | temp_dir = tempfile.mkdtemp() 19 | dirname = os.path.join(temp_dir, "network") 20 | try: 21 | n1 = tn.SequentialNode( 22 | "seq", 23 | [tn.InputNode("i", shape=(10, 100)), 24 | tn.LinearMappingNode( 25 | "lm", 26 | output_dim=15, 27 | inits=[treeano.inits.NormalWeightInit()])] 28 | ).network() 29 | 30 | fn1 = n1.function(["i"], ["lm"]) 31 | x = np.random.randn(10, 100).astype(fX) 32 | canopy.serialization.pickle_network(n1, dirname) 33 | n2 = canopy.serialization.unpickle_network(dirname) 34 | fn2 = n2.function(["i"], ["lm"]) 35 | np.testing.assert_equal(fn1(x), fn2(x)) 36 | finally: 37 | shutil.rmtree(temp_dir) 38 | -------------------------------------------------------------------------------- /canopy/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | from . import fns 2 | from . import node 3 | from . import tree 4 | 5 | from .fns import (transform_root_node, 6 | transform_node_data, 7 | transform_root_node_postwalk, 8 | transform_node_data_postwalk) 9 | from .node import (remove_nodes_with_class, 10 | remove_dropout, 11 | replace_node, 12 | update_hyperparameters) 13 | from .tree import (remove_nodes, 14 | remove_subtree, 15 | remove_parent, 16 | add_parent, 17 | add_hyperparameters, 18 | remove_parents, 19 | move_node) 20 | -------------------------------------------------------------------------------- /canopy/transforms/node.py: -------------------------------------------------------------------------------- 1 | """ 2 | node based transformations 3 | """ 4 | 5 | import treeano 6 | import treeano.nodes as tn 7 | 8 | from . import fns 9 | 10 | 11 | def remove_nodes_with_class(network, cls, **kwargs): 12 | """ 13 | replaced nodes of a given class with IdentityNode's with the same name 14 | """ 15 | 16 | def inner(node): 17 | if isinstance(node, cls): 18 | return tn.IdentityNode(node.name) 19 | else: 20 | return node 21 | 22 | return fns.transform_root_node_postwalk(network, inner, **kwargs) 23 | 24 | 25 | def remove_dropout(network, **kwargs): 26 | """ 27 | replaced DropoutNode's with IdentityNode's with the same name 28 | 29 | NOTE: only removes bernoulli dropout nodes 30 | """ 31 | return remove_nodes_with_class(network, tn.DropoutNode, **kwargs) 32 | 33 | 34 | def replace_node(network, name_to_node, **kwargs): 35 | """ 36 | name_to_node: 37 | map from name of the node to replace, to the new node 38 | """ 39 | 40 | def inner(node): 41 | if node.name in name_to_node: 42 | return name_to_node[node.name] 43 | else: 44 | return node 45 | 46 | return fns.transform_root_node_postwalk(network, inner, **kwargs) 47 | 48 | 49 | def update_hyperparameters(network, node_name, hyperparameters, **kwargs): 50 | """ 51 | updates a node's hyperparameters 52 | """ 53 | 54 | found = [False] 55 | 56 | def inner(node): 57 | if node.name == node_name: 58 | found[0] = True 59 | for k in hyperparameters: 60 | assert k in node.hyperparameter_names 61 | new_node = treeano.node_utils.copy_node(node) 62 | new_node.hyperparameters.update(hyperparameters) 63 | return new_node 64 | else: 65 | return node 66 | 67 | res = fns.transform_root_node_postwalk(network, inner, **kwargs) 68 | assert found[0], "%s not found in network" % node_name 69 | return res 70 | -------------------------------------------------------------------------------- /canopy/transforms/tests/node_test.py: -------------------------------------------------------------------------------- 1 | import nose.tools as nt 2 | import numpy as np 3 | import theano 4 | import theano.tensor as T 5 | import treeano 6 | import treeano.nodes as tn 7 | 8 | import canopy 9 | 10 | 11 | fX = theano.config.floatX 12 | 13 | 14 | def test_remove_dropout(): 15 | network1 = tn.SequentialNode( 16 | "seq", 17 | [tn.InputNode("i", shape=(3, 4, 5)), 18 | tn.DropoutNode("do", dropout_probability=0.5)]).network() 19 | network2 = canopy.transforms.remove_dropout(network1) 20 | 21 | assert "DropoutNode" in str(network1.root_node) 22 | assert "DropoutNode" not in str(network2.root_node) 23 | 24 | fn1 = network1.function(["i"], ["do"]) 25 | fn2 = network2.function(["i"], ["do"]) 26 | x = np.random.randn(3, 4, 5).astype(fX) 27 | 28 | @nt.raises(AssertionError) 29 | def fails(): 30 | np.testing.assert_equal(x, fn1(x)[0]) 31 | 32 | fails() 33 | np.testing.assert_equal(x, fn2(x)[0]) 34 | 35 | 36 | def test_replace_node(): 37 | network1 = tn.SequentialNode( 38 | "seq", 39 | [tn.InputNode("i", shape=(3, 4, 5)), 40 | tn.DropoutNode("do", dropout_probability=0.5)]).network() 41 | network2 = canopy.transforms.replace_node(network1, 42 | {"do": tn.IdentityNode("do")}) 43 | 44 | assert "DropoutNode" in str(network1.root_node) 45 | assert "DropoutNode" not in str(network2.root_node) 46 | 47 | fn1 = network1.function(["i"], ["do"]) 48 | fn2 = network2.function(["i"], ["do"]) 49 | x = np.random.randn(3, 4, 5).astype(fX) 50 | 51 | @nt.raises(AssertionError) 52 | def fails(): 53 | np.testing.assert_equal(x, fn1(x)[0]) 54 | 55 | fails() 56 | np.testing.assert_equal(x, fn2(x)[0]) 57 | 58 | 59 | def test_update_hyperparameters(): 60 | network1 = tn.SequentialNode( 61 | "seq", 62 | [tn.InputNode("i", shape=(3, 4, 5)), 63 | tn.DropoutNode("do", dropout_probability=0.5)]).network() 64 | network2 = canopy.transforms.update_hyperparameters( 65 | network1, 66 | "do", 67 | {"dropout_probability": 0.3}) 68 | 69 | assert network1["do"].find_hyperparameter(["dropout_probability"]) == 0.5 70 | assert network2["do"].find_hyperparameter(["dropout_probability"]) == 0.3 71 | -------------------------------------------------------------------------------- /examples/REINFORCE/constant.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import theano 3 | import theano.tensor as T 4 | import treeano 5 | import treeano.nodes as tn 6 | from treeano.sandbox.nodes import REINFORCE 7 | 8 | fX = theano.config.floatX 9 | 10 | 11 | class ConstantStateNode(treeano.NodeImpl): 12 | 13 | input_keys = () 14 | hyperparameter_names = ("shape",) 15 | 16 | def compute_output(self, network): 17 | shape = network.find_hyperparameter(["shape"]) 18 | network.create_vw( 19 | "default", 20 | is_shared=True, 21 | shape=shape, 22 | tags={"parameter"}, 23 | default_inits=[], 24 | ) 25 | 26 | 27 | def reward_fn(x): 28 | return -T.sqr(x - 3.5).sum(axis=1) + 100 29 | 30 | graph = tn.GraphNode( 31 | "graph", 32 | [[tn.ConstantNode("state", value=T.zeros((1, 1))), 33 | ConstantStateNode("mu", shape=(1, 1)), 34 | tn.ConstantNode("sigma", value=1.), 35 | REINFORCE.NormalSampleNode("sampled"), 36 | tn.ApplyNode("reward", fn=reward_fn, shape_fn=lambda x: x[:1]), 37 | REINFORCE.NormalREINFORCECostNode("REINFORCE")], 38 | [{"from": "mu", "to": "sampled", "to_key": "mu"}, 39 | {"from": "sigma", "to": "sampled", "to_key": "sigma"}, 40 | {"from": "sampled", "to": "reward"}, 41 | {"from": "state", "to": "REINFORCE", "to_key": "state"}, 42 | {"from": "mu", "to": "REINFORCE", "to_key": "mu"}, 43 | {"from": "sigma", "to": "REINFORCE", "to_key": "sigma"}, 44 | {"from": "reward", "to": "REINFORCE", "to_key": "reward"}, 45 | {"from": "sampled", "to": "REINFORCE", "to_key": "sampled"}, 46 | {"from": "REINFORCE"}]] 47 | ) 48 | 49 | network = tn.AdamNode( 50 | "adam", 51 | {"subtree": graph, 52 | "cost": tn.ReferenceNode("cost", reference="REINFORCE")}, 53 | learning_rate=0.1 54 | ).network() 55 | fn = network.function([], ["graph", "mu"], include_updates=True) 56 | 57 | mus = [] 58 | for i in range(1000): 59 | _, mu = fn() 60 | print("Iter:", i, "Predicted constant:", mu) 61 | mus.append(mu) 62 | 63 | print("MSE from optimal constant:", np.mean((np.array(mus) - 3.5) ** 2)) 64 | -------------------------------------------------------------------------------- /examples/REINFORCE/linear.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, absolute_import 2 | from __future__ import print_function, unicode_literals 3 | 4 | import numpy as np 5 | import theano 6 | import theano.tensor as T 7 | import treeano 8 | import treeano.nodes as tn 9 | from treeano.sandbox.nodes import REINFORCE 10 | 11 | fX = theano.config.floatX 12 | 13 | 14 | TARGET_WEIGHT = np.random.randn(10, 2).astype(fX) 15 | TARGET_BIAS = np.random.randn(2).astype(fX) 16 | 17 | 18 | class RewardNode(treeano.NodeImpl): 19 | 20 | input_keys = ("state", "sampled") 21 | 22 | def compute_output(self, network, state_vw, sampled_vw): 23 | W = T.constant(TARGET_WEIGHT) 24 | b = T.constant(TARGET_BIAS) 25 | target = T.dot(state_vw.variable, W) + b.dimshuffle("x", 0) 26 | reward = -T.sqr(sampled_vw.variable - target).sum(axis=1) 27 | network.create_vw( 28 | "raw_reward", 29 | variable=T.mean(reward), 30 | shape=(), 31 | ) 32 | baseline_reward = 100 33 | network.create_vw( 34 | "default", 35 | variable=reward + baseline_reward, 36 | shape=(state_vw.shape[0],), 37 | tags={"output"}, 38 | ) 39 | 40 | 41 | BATCH_SIZE = 64 42 | graph = tn.GraphNode( 43 | "graph", 44 | [[tn.InputNode("state", shape=(BATCH_SIZE, 10)), 45 | tn.DenseNode("mu", num_units=2), 46 | tn.ConstantNode("sigma", value=1.), 47 | REINFORCE.NormalSampleNode("sampled"), 48 | RewardNode("reward"), 49 | REINFORCE.NormalREINFORCECostNode("REINFORCE")], 50 | [{"from": "state", "to": "mu"}, 51 | {"from": "mu", "to": "sampled", "to_key": "mu"}, 52 | {"from": "sigma", "to": "sampled", "to_key": "sigma"}, 53 | {"from": "sampled", "to": "reward", "to_key": "sampled"}, 54 | {"from": "state", "to": "reward", "to_key": "state"}, 55 | {"from": "state", "to": "REINFORCE", "to_key": "state"}, 56 | {"from": "mu", "to": "REINFORCE", "to_key": "mu"}, 57 | {"from": "sigma", "to": "REINFORCE", "to_key": "sigma"}, 58 | {"from": "reward", "to": "REINFORCE", "to_key": "reward"}, 59 | {"from": "sampled", "to": "REINFORCE", "to_key": "sampled"}, 60 | {"from": "REINFORCE"}]] 61 | ) 62 | 63 | network = tn.AdamNode( 64 | "adam", 65 | {"subtree": graph, 66 | "cost": tn.ReferenceNode("cost", reference="REINFORCE")}, 67 | learning_rate=0.1 68 | ).network() 69 | fn = network.function( 70 | ["state"], [("reward", "raw_reward")], include_updates=True) 71 | 72 | errors = [] 73 | for i in range(5000): 74 | error, = fn(np.random.randn(BATCH_SIZE, 10).astype(fX)) 75 | if i % 100 == 0: 76 | print("Iter:", i, "Error:", error) 77 | errors.append(error) 78 | 79 | print("mean reward:", np.mean(errors)) 80 | -------------------------------------------------------------------------------- /examples/recurrent_hc/lag_task_gru.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, absolute_import 2 | from __future__ import print_function, unicode_literals 3 | 4 | import numpy as np 5 | import theano 6 | import theano.tensor as T 7 | import treeano 8 | import treeano.nodes as tn 9 | from treeano.sandbox.nodes import recurrent_hc 10 | 11 | fX = theano.config.floatX 12 | 13 | # ################################## config ################################## 14 | 15 | N_TRAIN = 1000 16 | LAG = 10 17 | LENGTH = 50 18 | HIDDEN_STATE_SIZE = 10 19 | BATCH_SIZE = 64 20 | 21 | # ############################### prepare data ############################### 22 | 23 | 24 | def binary_toy_data(lag=1, length=20): 25 | inputs = np.random.randint(0, 2, length).astype(fX) 26 | outputs = np.array(lag * [0] + list(inputs), dtype=fX)[:length] 27 | return inputs, outputs 28 | 29 | 30 | def minibatch(lag, length, batch_size): 31 | inputs = [] 32 | outputs = [] 33 | for _ in range(batch_size): 34 | i, o = binary_toy_data(lag, length) 35 | inputs.append(i) 36 | outputs.append(o) 37 | return np.array(inputs)[..., np.newaxis], np.array(outputs)[..., np.newaxis] 38 | 39 | 40 | # ############################## prepare model ############################## 41 | 42 | model = tn.HyperparameterNode( 43 | "model", 44 | tn.SequentialNode( 45 | "seq", 46 | [tn.InputNode("x", shape=(None, None, 1)), 47 | recurrent_hc.GRUNode("gru1"), 48 | tn.LinearMappingNode("y_linear", output_dim=1), 49 | tn.AddBiasNode("y_bias", broadcastable_axes=(0, 1)), 50 | tn.SigmoidNode("sigmoid"), 51 | ]), 52 | inits=[treeano.inits.OrthogonalInit()], 53 | num_units=HIDDEN_STATE_SIZE, 54 | learn_init=True, 55 | grad_clip=1, 56 | ) 57 | 58 | with_updates = tn.HyperparameterNode( 59 | "with_updates", 60 | tn.AdamNode( 61 | "adam", 62 | {"subtree": model, 63 | "cost": tn.TotalCostNode("cost", { 64 | "pred": tn.ReferenceNode("pred_ref", reference="model"), 65 | "target": tn.InputNode("y", shape=(None, None, 1))}, 66 | )}), 67 | cost_function=treeano.utils.squared_error, 68 | ) 69 | network = with_updates.network() 70 | 71 | train_fn = network.function(["x", "y"], ["cost"], include_updates=True) 72 | valid_fn = network.function(["x"], ["model"]) 73 | 74 | 75 | # ################################# training ################################# 76 | 77 | print("Starting training...") 78 | 79 | import time 80 | st = time.time() 81 | for i in range(N_TRAIN): 82 | inputs, outputs = minibatch(lag=LAG, length=LENGTH, batch_size=BATCH_SIZE) 83 | loss = train_fn(inputs, outputs)[0] 84 | print(loss) 85 | print("total_time: %s" % (time.time() - st)) 86 | 87 | inputs, outputs = minibatch(lag=LAG, length=LENGTH, batch_size=BATCH_SIZE) 88 | pred = valid_fn(inputs)[0] 89 | pred_accuracies = (np.round(pred) == outputs).mean(axis=0)[LAG:] 90 | print(pred_accuracies) 91 | print(pred_accuracies.mean()) 92 | -------------------------------------------------------------------------------- /examples/recurrent_hc/lag_task_lstm.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, absolute_import 2 | from __future__ import print_function, unicode_literals 3 | 4 | import numpy as np 5 | import theano 6 | import theano.tensor as T 7 | import treeano 8 | import treeano.nodes as tn 9 | from treeano.sandbox.nodes import recurrent_hc 10 | 11 | fX = theano.config.floatX 12 | 13 | # ################################## config ################################## 14 | 15 | N_TRAIN = 1000 16 | LAG = 10 17 | LENGTH = 50 18 | HIDDEN_STATE_SIZE = 10 19 | BATCH_SIZE = 64 20 | 21 | # ############################### prepare data ############################### 22 | 23 | 24 | def binary_toy_data(lag=1, length=20): 25 | inputs = np.random.randint(0, 2, length).astype(fX) 26 | outputs = np.array(lag * [0] + list(inputs), dtype=fX)[:length] 27 | return inputs, outputs 28 | 29 | 30 | def minibatch(lag, length, batch_size): 31 | inputs = [] 32 | outputs = [] 33 | for _ in range(batch_size): 34 | i, o = binary_toy_data(lag, length) 35 | inputs.append(i) 36 | outputs.append(o) 37 | return np.array(inputs)[..., np.newaxis], np.array(outputs)[..., np.newaxis] 38 | 39 | 40 | # ############################## prepare model ############################## 41 | 42 | model = tn.HyperparameterNode( 43 | "model", 44 | tn.SequentialNode( 45 | "seq", 46 | [tn.InputNode("x", shape=(None, None, 1)), 47 | recurrent_hc.LSTMNode("lstm1"), 48 | tn.LinearMappingNode("y_linear", output_dim=1), 49 | tn.AddBiasNode("y_bias", broadcastable_axes=(0, 1)), 50 | tn.SigmoidNode("sigmoid"), 51 | ]), 52 | inits=[treeano.inits.OrthogonalInit()], 53 | num_units=HIDDEN_STATE_SIZE, 54 | learn_init=True, 55 | grad_clip=1, 56 | ) 57 | 58 | with_updates = tn.HyperparameterNode( 59 | "with_updates", 60 | tn.AdamNode( 61 | "adam", 62 | {"subtree": model, 63 | "cost": tn.TotalCostNode("cost", { 64 | "pred": tn.ReferenceNode("pred_ref", reference="model"), 65 | "target": tn.InputNode("y", shape=(None, None, 1))}, 66 | )}), 67 | cost_function=treeano.utils.squared_error, 68 | ) 69 | network = with_updates.network() 70 | 71 | train_fn = network.function(["x", "y"], ["cost"], include_updates=True) 72 | valid_fn = network.function(["x"], ["model"]) 73 | 74 | 75 | # ################################# training ################################# 76 | 77 | print("Starting training...") 78 | 79 | import time 80 | st = time.time() 81 | for i in range(N_TRAIN): 82 | inputs, outputs = minibatch(lag=LAG, length=LENGTH, batch_size=BATCH_SIZE) 83 | loss = train_fn(inputs, outputs)[0] 84 | print(loss) 85 | print("total_time: %s" % (time.time() - st)) 86 | 87 | inputs, outputs = minibatch(lag=LAG, length=LENGTH, batch_size=BATCH_SIZE) 88 | pred = valid_fn(inputs)[0] 89 | pred_accuracies = (np.round(pred) == outputs).mean(axis=0)[LAG:] 90 | print(pred_accuracies) 91 | print(pred_accuracies.mean()) 92 | -------------------------------------------------------------------------------- /examples/recurrent_hc/lag_task_rnn.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, absolute_import 2 | from __future__ import print_function, unicode_literals 3 | 4 | import numpy as np 5 | import theano 6 | import theano.tensor as T 7 | import treeano 8 | import treeano.nodes as tn 9 | from treeano.sandbox.nodes import recurrent_hc 10 | 11 | fX = theano.config.floatX 12 | 13 | # ################################## config ################################## 14 | 15 | N_TRAIN = 1000 16 | LAG = 10 17 | LENGTH = 50 18 | HIDDEN_STATE_SIZE = 10 19 | BATCH_SIZE = 64 20 | 21 | # ############################### prepare data ############################### 22 | 23 | 24 | def binary_toy_data(lag=1, length=20): 25 | inputs = np.random.randint(0, 2, length).astype(fX) 26 | outputs = np.array(lag * [0] + list(inputs), dtype=fX)[:length] 27 | return inputs, outputs 28 | 29 | 30 | def minibatch(lag, length, batch_size): 31 | inputs = [] 32 | outputs = [] 33 | for _ in range(batch_size): 34 | i, o = binary_toy_data(lag, length) 35 | inputs.append(i) 36 | outputs.append(o) 37 | return np.array(inputs)[..., np.newaxis], np.array(outputs)[..., np.newaxis] 38 | 39 | 40 | # ############################## prepare model ############################## 41 | 42 | model = tn.HyperparameterNode( 43 | "model", 44 | tn.SequentialNode( 45 | "seq", 46 | [tn.InputNode("x", shape=(None, None, 1)), 47 | recurrent_hc.RNNNode("rnn1"), 48 | recurrent_hc.RNNNode("rnn2"), 49 | tn.LinearMappingNode("y_linear", output_dim=1), 50 | tn.AddBiasNode("y_bias", broadcastable_axes=(0, 1)), 51 | tn.SigmoidNode("sigmoid"), 52 | ]), 53 | inits=[treeano.inits.OrthogonalInit()], 54 | num_units=HIDDEN_STATE_SIZE, 55 | learn_init=True, 56 | grad_clip=1, 57 | ) 58 | 59 | with_updates = tn.HyperparameterNode( 60 | "with_updates", 61 | tn.AdamNode( 62 | "adam", 63 | {"subtree": model, 64 | "cost": tn.TotalCostNode("cost", { 65 | "pred": tn.ReferenceNode("pred_ref", reference="model"), 66 | "target": tn.InputNode("y", shape=(None, None, 1))}, 67 | )}), 68 | cost_function=treeano.utils.squared_error, 69 | ) 70 | network = with_updates.network() 71 | 72 | train_fn = network.function(["x", "y"], ["cost"], include_updates=True) 73 | valid_fn = network.function(["x"], ["model"]) 74 | 75 | 76 | # ################################# training ################################# 77 | 78 | print("Starting training...") 79 | 80 | import time 81 | st = time.time() 82 | for i in range(N_TRAIN): 83 | inputs, outputs = minibatch(lag=LAG, length=LENGTH, batch_size=BATCH_SIZE) 84 | loss = train_fn(inputs, outputs)[0] 85 | print(loss) 86 | print("total_time: %s" % (time.time() - st)) 87 | 88 | inputs, outputs = minibatch(lag=LAG, length=LENGTH, batch_size=BATCH_SIZE) 89 | pred = valid_fn(inputs)[0] 90 | pred_accuracies = (np.round(pred) == outputs).mean(axis=0)[LAG:] 91 | print(pred_accuracies) 92 | print(pred_accuracies.mean()) 93 | -------------------------------------------------------------------------------- /examples/simple_rnn.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, absolute_import 2 | from __future__ import print_function, unicode_literals 3 | 4 | import numpy as np 5 | import theano 6 | import theano.tensor as T 7 | import treeano 8 | import treeano.nodes as tn 9 | 10 | fX = theano.config.floatX 11 | 12 | # ################################## config ################################## 13 | 14 | N_TRAIN = 5000 15 | LAG = 20 16 | LENGTH = 50 17 | HIDDEN_STATE_SIZE = 10 18 | 19 | # ############################### prepare data ############################### 20 | 21 | 22 | def binary_toy_data(lag=1, length=20): 23 | inputs = np.random.randint(0, 2, length).astype(fX) 24 | outputs = np.array(lag * [0] + list(inputs), dtype=fX)[:length] 25 | return inputs, outputs 26 | 27 | 28 | # ############################## prepare model ############################## 29 | 30 | model = tn.HyperparameterNode( 31 | "model", 32 | tn.SequentialNode( 33 | "seq", 34 | [tn.InputNode("x", shape=(None, 1)), 35 | tn.recurrent.SimpleRecurrentNode( 36 | "srn", 37 | tn.TanhNode("nonlin"), 38 | batch_size=None, 39 | num_units=HIDDEN_STATE_SIZE), 40 | tn.scan.ScanNode( 41 | "scan", 42 | tn.DenseNode("fc", num_units=1)), 43 | tn.SigmoidNode("pred"), 44 | ]), 45 | inits=[treeano.inits.NormalWeightInit(0.01)], 46 | scan_axis=0 47 | ) 48 | 49 | with_updates = tn.HyperparameterNode( 50 | "with_updates", 51 | tn.AdamNode( 52 | "adam", 53 | {"subtree": model, 54 | "cost": tn.TotalCostNode("cost", { 55 | "pred": tn.ReferenceNode("pred_ref", reference="model"), 56 | "target": tn.InputNode("y", shape=(None, 1))}, 57 | )}), 58 | cost_function=treeano.utils.squared_error, 59 | ) 60 | network = with_updates.network() 61 | 62 | train_fn = network.function(["x", "y"], ["cost"], include_updates=True) 63 | valid_fn = network.function(["x"], ["model"]) 64 | 65 | 66 | # ################################# training ################################# 67 | 68 | print("Starting training...") 69 | 70 | import time 71 | st = time.time() 72 | for i in range(N_TRAIN): 73 | inputs, outputs = binary_toy_data(lag=LAG, length=LENGTH) 74 | loss = train_fn(inputs.reshape(-1, 1), outputs.reshape(-1, 1))[0] 75 | if (i % (N_TRAIN // 100)) == 0: 76 | print(loss) 77 | print("total_time: %s" % (time.time() - st)) 78 | 79 | inputs, outputs = binary_toy_data(lag=LAG, length=LENGTH) 80 | pred = valid_fn(inputs.reshape(-1, 1))[0].flatten() 81 | print(np.round(pred) == outputs) 82 | -------------------------------------------------------------------------------- /examples/simple_rnn_comparison/README.org: -------------------------------------------------------------------------------- 1 | A simple comparison between a pure theano RNN and a treeano RNN. 2 | 3 | Goal: have treeano RNN be as fast as pure theano version. 4 | -------------------------------------------------------------------------------- /examples/simple_rnn_comparison/with_treeano.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, absolute_import 2 | from __future__ import print_function, unicode_literals 3 | 4 | import numpy as np 5 | import theano 6 | import theano.tensor as T 7 | import treeano 8 | import treeano.nodes as tn 9 | 10 | fX = theano.config.floatX 11 | 12 | # ################################## config ################################## 13 | 14 | N_TRAIN = 5000 15 | LAG = 20 16 | LENGTH = 50 17 | HIDDEN_STATE_SIZE = 10 18 | 19 | # ############################### prepare data ############################### 20 | 21 | 22 | def binary_toy_data(lag=1, length=20): 23 | inputs = np.random.randint(0, 2, length).astype(fX) 24 | outputs = np.array(lag * [0] + list(inputs), dtype=fX)[:length] 25 | return inputs, outputs 26 | 27 | 28 | # ############################## prepare model ############################## 29 | 30 | model = tn.HyperparameterNode( 31 | "model", 32 | tn.SequentialNode( 33 | "seq", 34 | [tn.InputNode("x", shape=(None, 1)), 35 | tn.recurrent.SimpleRecurrentNode( 36 | "srn", 37 | tn.TanhNode("nonlin"), 38 | batch_size=None, 39 | num_units=HIDDEN_STATE_SIZE), 40 | tn.scan.ScanNode( 41 | "scan", 42 | tn.DenseNode("fc", num_units=1)), 43 | tn.SigmoidNode("pred"), 44 | ]), 45 | inits=[treeano.inits.NormalWeightInit(0.01)], 46 | batch_axis=None, 47 | scan_axis=0 48 | ) 49 | 50 | with_updates = tn.HyperparameterNode( 51 | "with_updates", 52 | tn.SGDNode( 53 | "adam", 54 | {"subtree": model, 55 | "cost": tn.TotalCostNode("cost", { 56 | "pred": tn.ReferenceNode("pred_ref", reference="model"), 57 | "target": tn.InputNode("y", shape=(None, 1))}, 58 | )}), 59 | learning_rate=0.1, 60 | cost_function=treeano.utils.squared_error, 61 | ) 62 | network = with_updates.network() 63 | 64 | train_fn = network.function(["x", "y"], ["cost"], include_updates=True) 65 | valid_fn = network.function(["x"], ["model"]) 66 | 67 | 68 | # ################################# training ################################# 69 | 70 | print("Starting training...") 71 | 72 | import time 73 | st = time.time() 74 | for i in range(N_TRAIN): 75 | inputs, outputs = binary_toy_data(lag=LAG, length=LENGTH) 76 | loss = train_fn(inputs.reshape(-1, 1), outputs.reshape(-1, 1))[0] 77 | if (i % (N_TRAIN // 100)) == 0: 78 | print(loss) 79 | print("total_time: %s" % (time.time() - st)) 80 | 81 | inputs, outputs = binary_toy_data(lag=LAG, length=LENGTH) 82 | pred = valid_fn(inputs.reshape(-1, 1))[0].flatten() 83 | print(np.round(pred) == outputs) 84 | -------------------------------------------------------------------------------- /examples/simple_rnn_comparison/without_treeano.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import theano 3 | import theano.tensor as T 4 | 5 | fX = theano.config.floatX 6 | 7 | LAG = 20 8 | LENGTH = 50 9 | N_TRAIN = 5000 10 | HIDDEN_STATE_SIZE = 10 11 | 12 | 13 | def binary_toy_data(lag=1, length=20): 14 | inputs = np.random.randint(0, 2, length).astype(fX) 15 | outputs = np.array(lag * [0] + list(inputs), dtype=fX)[:length] 16 | return inputs, outputs 17 | 18 | 19 | W_x = theano.shared( 20 | (0.1 * np.random.randn(1, HIDDEN_STATE_SIZE)).astype(fX)) 21 | W_h = theano.shared( 22 | (0.1 * np.random.randn(HIDDEN_STATE_SIZE, 23 | HIDDEN_STATE_SIZE)).astype(fX)) 24 | W_y = theano.shared( 25 | (0.1 * np.random.randn(HIDDEN_STATE_SIZE, 1)).astype(fX)) 26 | 27 | b_h = theano.shared(np.zeros((HIDDEN_STATE_SIZE,), dtype=fX)) 28 | b_y = theano.shared(np.zeros((1,), dtype=fX)) 29 | 30 | 31 | X = T.matrix("X") 32 | Y = T.matrix("Y") 33 | 34 | 35 | def step(x, h): 36 | new_h = T.tanh(T.dot(x, W_x) + T.dot(h, W_h) + b_h) 37 | new_y = T.nnet.sigmoid(T.dot(new_h, W_y) + b_y) 38 | return new_h, new_y 39 | 40 | 41 | results, updates = theano.scan( 42 | fn=step, 43 | sequences=[X], 44 | outputs_info=[T.patternbroadcast(T.zeros((HIDDEN_STATE_SIZE)), 45 | (False,)), None], 46 | ) 47 | ys = results[1] 48 | 49 | loss = T.mean((ys - Y) ** 2) 50 | params = [W_x, W_h, W_y, b_h, b_y] 51 | grads = T.grad(loss, params) 52 | updates = [] 53 | for param, grad in zip(params, grads): 54 | updates.append((param, param - grad * 0.1)) 55 | 56 | train_fn = theano.function([X, Y], loss, updates=updates) 57 | valid_fn = theano.function([X], ys) 58 | 59 | 60 | import time 61 | st = time.time() 62 | for i in range(N_TRAIN): 63 | inputs, outputs = binary_toy_data(lag=LAG, length=LENGTH) 64 | loss = train_fn(inputs.reshape(-1, 1), outputs.reshape(-1, 1)) 65 | if (i % (N_TRAIN // 100)) == 0: 66 | print(loss) 67 | print "total_time: %s" % (time.time() - st) 68 | 69 | inputs, outputs = binary_toy_data(lag=LAG, length=LENGTH) 70 | pred = valid_fn(inputs.reshape(-1, 1)).flatten() 71 | print(np.round(pred) == outputs) 72 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | treeano_version = '0.0.1' 4 | 5 | setup( 6 | name="treeano", 7 | version=treeano_version, 8 | packages=["treeano", "canopy"] 9 | ) 10 | -------------------------------------------------------------------------------- /treeano/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = """ 2 | lasagne 3 | sandbox 4 | visualization 5 | """.split() 6 | 7 | from . import utils 8 | from . import core 9 | from . import theano_extensions 10 | from . import nodes 11 | from . import inits 12 | from . import node_utils 13 | 14 | from .core import (UpdateDeltas, 15 | SharedInit, 16 | WeightInit, 17 | VariableWrapper, 18 | register_node, 19 | Network, 20 | NodeImpl, 21 | WrapperNodeImpl, 22 | Wrapper1NodeImpl, 23 | Wrapper0NodeImpl) 24 | -------------------------------------------------------------------------------- /treeano/core/__init__.py: -------------------------------------------------------------------------------- 1 | from . import update_deltas 2 | from . import graph 3 | from . import inits 4 | from . import variable 5 | from . import serialization_state 6 | from . import children_container 7 | from . import network 8 | from . import node 9 | from . import node_impl 10 | 11 | from .update_deltas import UpdateDeltas 12 | from .inits import (SharedInit, 13 | WeightInit) 14 | from .variable import VariableWrapper 15 | from .serialization_state import (register_node, 16 | register_children_container, 17 | children_container_to_data, 18 | children_container_from_data, 19 | node_to_data, 20 | node_from_data) 21 | from .children_container import (ChildrenContainer, 22 | ListChildrenContainer, 23 | NoneChildrenContainer, 24 | ChildContainer, 25 | DictChildrenContainer, 26 | DictChildrenContainerSchema, 27 | NodesAndEdgesContainer) 28 | from .network import (MissingHyperparameter, 29 | Network) 30 | from .node import NodeAPI 31 | from .node_impl import (NodeImpl, 32 | WrapperNodeImpl, 33 | Wrapper1NodeImpl, 34 | Wrapper0NodeImpl) 35 | -------------------------------------------------------------------------------- /treeano/core/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diogo149/treeano/9b3fd6bb5eb2f6738c9e5c357e70bef95dcae7b7/treeano/core/tests/__init__.py -------------------------------------------------------------------------------- /treeano/core/tests/inits_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import theano 3 | import theano.tensor as T 4 | 5 | import treeano 6 | 7 | 8 | fX = theano.config.floatX 9 | 10 | 11 | def test_constant_init(): 12 | class DummyNode(treeano.NodeImpl): 13 | 14 | input_keys = () 15 | 16 | def init_state(self, network): 17 | network.set_hyperparameter(self.name, 18 | "inits", 19 | [treeano.inits.ConstantInit(1)]) 20 | 21 | def compute_output(self, network): 22 | inits = network.find_hyperparameter(["inits"]) 23 | network.create_vw( 24 | "default", 25 | is_shared=True, 26 | shape=(1, 2, 3), 27 | inits=inits, 28 | ) 29 | 30 | network = DummyNode("dummy").network() 31 | fn = network.function([], ["dummy"]) 32 | np.testing.assert_allclose(fn()[0], 33 | np.ones((1, 2, 3)).astype(fX), 34 | rtol=1e-5, 35 | atol=1e-8) 36 | -------------------------------------------------------------------------------- /treeano/core/tests/network_test.py: -------------------------------------------------------------------------------- 1 | import nose.tools as nt 2 | from treeano import core 3 | import treeano.nodes as tn 4 | 5 | 6 | def test_find_hyperparameters(): 7 | class FooNode(core.WrapperNodeImpl): 8 | hyperparameter_names = ("a", "b", "c") 9 | 10 | last_foo = FooNode("last", [tn.InputNode("i", shape=(1,))], b=1) 11 | mid_foo = FooNode("mid", [last_foo], a=2, c=3) 12 | top_foo = FooNode("top", [mid_foo], a=4, b=5, c=6) 13 | 14 | network = top_foo.network( 15 | default_hyperparameters={"a": 7, "b": 8, "c": 9}, 16 | override_hyperparameters={"a": 10, "b": 11, "c": 12} 17 | ) 18 | 19 | nt.assert_equal([10, 11, 12, 1, 2, 3, 4, 5, 6, 13, 7, 8, 9], 20 | list(network["last"].find_hyperparameters(["a", "b", "c"], 21 | 13))) 22 | nt.assert_equal([10, 11, 12, 2, 3, 4, 5, 6, 13, 7, 8, 9], 23 | list(network["mid"].find_hyperparameters(["a", "b", "c"], 24 | 13))) 25 | nt.assert_equal([10, 11, 12, 4, 5, 6, 13, 7, 8, 9], 26 | list(network["top"].find_hyperparameters(["a", "b", "c"], 27 | 13))) 28 | -------------------------------------------------------------------------------- /treeano/core/tests/node_impl_test.py: -------------------------------------------------------------------------------- 1 | import nose.tools as nt 2 | from treeano import core 3 | 4 | 5 | @nt.raises(AssertionError) 6 | def test_node_impl_hyperparameter_names(): 7 | class FooNode(core.NodeImpl): 8 | hyperparameter_names = ("a", "b") 9 | 10 | FooNode(name="foo", c=3) 11 | 12 | 13 | def test_node_impl_repr1(): 14 | class FooNode(core.NodeImpl): 15 | hyperparameter_names = ("a", "b") 16 | 17 | nt.assert_equal(repr(FooNode(name="foo", a=3)), 18 | "FooNode(name='foo', a=3)") 19 | 20 | nt.assert_equal(repr(FooNode(name="foo", a=3)), 21 | str(FooNode(name="foo", a=3)),) 22 | 23 | 24 | def test_node_impl_repr_children_container(): 25 | class FooNode(core.NodeImpl): 26 | hyperparameter_names = ("a", "b") 27 | children_container = core.ListChildrenContainer 28 | 29 | node = FooNode(name="foo", 30 | a=3, 31 | children=[FooNode(name="bar1", 32 | children=[FooNode(name="choo", 33 | children=[])]), 34 | FooNode(name="bar2", 35 | children=[]), ]) 36 | nt.assert_equal(repr(node), 37 | """ 38 | FooNode(name='foo', a=3) 39 | | FooNode(name='bar1') 40 | | | FooNode(name='choo') 41 | | FooNode(name='bar2') 42 | """.strip()) 43 | 44 | 45 | def test_node_impl_get_hyperparameter1(): 46 | class FooNode(core.NodeImpl): 47 | hyperparameter_names = ("a", "b") 48 | 49 | nt.assert_equal(FooNode(name="foo", a=3).get_hyperparameter(None, "a"), 3) 50 | 51 | 52 | @nt.raises(core.MissingHyperparameter) 53 | def test_node_impl_get_hyperparameter2(): 54 | class FooNode(core.NodeImpl): 55 | hyperparameter_names = ("a", "b") 56 | 57 | FooNode(name="foo", a=3).get_hyperparameter(None, "b") 58 | -------------------------------------------------------------------------------- /treeano/core/tests/serialization_state_test.py: -------------------------------------------------------------------------------- 1 | import nose.tools as nt 2 | from treeano import core 3 | 4 | 5 | @nt.raises(AssertionError) 6 | def test_duplicate_register_child_container(): 7 | @core.register_children_container("list") 8 | class Foo(object): 9 | pass 10 | 11 | 12 | @nt.raises(AssertionError) 13 | def test_duplicate_register_node(): 14 | @core.register_node("input") 15 | class Foo(object): 16 | pass 17 | -------------------------------------------------------------------------------- /treeano/core/tests/update_deltas_test.py: -------------------------------------------------------------------------------- 1 | import theano 2 | import theano.tensor as T 3 | import treeano 4 | 5 | 6 | def test_update_deltas(): 7 | x = theano.shared(0, name="x") 8 | ud = treeano.core.UpdateDeltas({x: 0}) 9 | ud += 1 10 | ud *= 2 11 | fn = theano.function([], updates=ud.to_updates()) 12 | fn() 13 | assert x.get_value() == 2 14 | fn() 15 | assert x.get_value() == 4 16 | 17 | 18 | def test_update_deltas_getitem(): 19 | x = theano.shared(0, name="x") 20 | ud = treeano.core.UpdateDeltas({}) 21 | assert ud[x] == 0 22 | ud = treeano.core.UpdateDeltas({x: 5}) 23 | fn = theano.function([], updates=ud.to_updates()) 24 | fn() 25 | assert x.get_value() == 5 26 | fn() 27 | assert x.get_value() == 10 28 | 29 | 30 | def test_update_deltas_setitem(): 31 | x = theano.shared(0, name="x") 32 | ud = treeano.core.UpdateDeltas({}) 33 | ud[x] += 3 34 | assert ud[x] == 3 35 | ud[x] = 7 36 | assert ud[x] == 7 37 | fn = theano.function([], updates=ud.to_updates()) 38 | fn() 39 | assert x.get_value() == 7 40 | 41 | 42 | def test_update_deltas_add1(): 43 | x = theano.shared(0, name="x") 44 | ud1 = treeano.core.UpdateDeltas({x: 3}) 45 | ud1b = ud1 46 | ud2 = treeano.core.UpdateDeltas({x: 4}) 47 | ud3 = ud1 + ud2 48 | assert ud1[x] == 3 49 | assert ud1b[x] == 3 50 | assert ud2[x] == 4 51 | assert ud3[x] == 7 52 | 53 | 54 | def test_update_deltas_iadd1(): 55 | x = theano.shared(0, name="x") 56 | ud1 = treeano.core.UpdateDeltas({x: 3}) 57 | ud1b = ud1 58 | ud2 = treeano.core.UpdateDeltas({x: 4}) 59 | ud1 += ud2 60 | assert ud1[x] == 7 61 | assert ud1b[x] == 7 62 | assert ud2[x] == 4 63 | 64 | 65 | def test_update_deltas_mul1(): 66 | x = theano.shared(0, name="x") 67 | ud1 = treeano.core.UpdateDeltas({x: 3}) 68 | ud2 = ud1 69 | ud1 = ud1 * 2 70 | assert ud1[x] == 6 71 | assert ud2[x] == 3 72 | 73 | 74 | def test_update_deltas_imul1(): 75 | x = theano.shared(0, name="x") 76 | ud1 = treeano.core.UpdateDeltas({x: 3}) 77 | ud2 = ud1 78 | ud1 *= 2 79 | assert ud1[x] == 6 80 | assert ud2[x] == 6 81 | 82 | 83 | def test_update_deltas_smart_mul1(): 84 | x = theano.shared(0, name="x") 85 | s = T.scalar() 86 | ud = treeano.core.UpdateDeltas({x: s}) 87 | assert ud[x] is s 88 | ud *= 0 89 | assert ud[x] == 0 90 | 91 | 92 | def test_update_deltas_smart_mul2(): 93 | x = theano.shared(0, name="x") 94 | s = T.scalar() 95 | ud = treeano.core.UpdateDeltas({x: s}) 96 | assert ud[x] is s 97 | ud *= 1 98 | assert ud[x] is s 99 | 100 | 101 | def test_update_deltas_smart_add(): 102 | x = theano.shared(0, name="x") 103 | s = T.scalar() 104 | ud = treeano.core.UpdateDeltas({x: s}) 105 | assert ud[x] is s 106 | ud += 0 107 | assert ud[x] is s 108 | 109 | 110 | def test_update_deltas_to_updates_zero_update(): 111 | x = theano.shared(0, name="x") 112 | ud = treeano.core.UpdateDeltas({x: 0}) 113 | assert len(ud.to_updates()) == 0 114 | -------------------------------------------------------------------------------- /treeano/core/tests/variable_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, absolute_import 2 | from __future__ import print_function, unicode_literals 3 | 4 | import numpy as np 5 | import theano 6 | import theano.tensor as T 7 | 8 | import treeano 9 | 10 | fX = theano.config.floatX 11 | 12 | 13 | def test_variable1(): 14 | i = T.iscalar() 15 | o = treeano.core.variable.VariableWrapper("foo", variable=i).variable 16 | fn = theano.function([i], o) 17 | for _ in range(10): 18 | x = np.random.randint(1e6) 19 | assert fn(x) == x 20 | 21 | 22 | def test_variable2(): 23 | s = treeano.core.variable.VariableWrapper("foo", 24 | shape=(3, 4, 5), 25 | is_shared=True, 26 | inits=[]) 27 | assert s.value.sum() == 0 28 | x = np.random.randn(3, 4, 5).astype(theano.config.floatX) 29 | s.value = x 30 | assert np.allclose(s.value, x) 31 | try: 32 | s.value = np.random.randn(5, 4, 3) 33 | except: 34 | pass 35 | else: 36 | assert False 37 | 38 | 39 | def test_variable_symbolic_shape(): 40 | m = T.matrix() 41 | f = treeano.core.variable.VariableWrapper("foo", 42 | variable=m, 43 | shape=(4, None)) 44 | s = f.symbolic_shape() 45 | assert isinstance(s, tuple) 46 | assert s[0] == 4 47 | assert isinstance(s[1], theano.gof.graph.Variable) 48 | assert s[1].eval({m: np.zeros((4, 100), dtype=fX)}) == 100 49 | -------------------------------------------------------------------------------- /treeano/inits/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diogo149/treeano/9b3fd6bb5eb2f6738c9e5c357e70bef95dcae7b7/treeano/inits/tests/__init__.py -------------------------------------------------------------------------------- /treeano/inits/tests/inits_test.py: -------------------------------------------------------------------------------- 1 | import nose.tools as nt 2 | import numpy as np 3 | import theano 4 | import theano.tensor as T 5 | 6 | import treeano 7 | import treeano.nodes as tn 8 | 9 | fX = theano.config.floatX 10 | 11 | 12 | def test_tied_init(): 13 | network = tn.SequentialNode( 14 | "s", 15 | [tn.InputNode("i", shape=()), 16 | tn.AddBiasNode("b1", inits=[treeano.inits.ConstantInit(42)]), 17 | tn.AddBiasNode("b2", inits=[treeano.inits.TiedInit("b2", "b1")])] 18 | ).network() 19 | fn = network.function(["i"], ["s"]) 20 | np.testing.assert_equal(84, fn(0)[0]) 21 | network["b1"].get_vw("bias").variable.set_value(43) 22 | np.testing.assert_equal(86, fn(0)[0]) 23 | -------------------------------------------------------------------------------- /treeano/lasagne/__init__.py: -------------------------------------------------------------------------------- 1 | from . import inits 2 | from . import nodes 3 | 4 | from .nodes import (DenseNode, 5 | ReLUNode, 6 | SGDNode, 7 | Conv2DNode, 8 | MaxPool2DNode) 9 | -------------------------------------------------------------------------------- /treeano/lasagne/inits.py: -------------------------------------------------------------------------------- 1 | import lasagne 2 | 3 | from .. import core 4 | 5 | 6 | class GlorotUniformInit(core.WeightInit): 7 | 8 | def __init__(self, gain=1.0): 9 | self.gain = gain 10 | 11 | def initialize_value(self, var): 12 | init = lasagne.init.GlorotUniform(gain=self.gain) 13 | return init.sample(var.shape).astype(var.dtype) 14 | -------------------------------------------------------------------------------- /treeano/lasagne/tests/updates_test.py: -------------------------------------------------------------------------------- 1 | import treeano.nodes as tn 2 | import treeano.lasagne.nodes as tl 3 | 4 | 5 | def test_sgd_node(): 6 | tn.test_utils.check_updates_node(tl.SGDNode, learning_rate=0.01) 7 | 8 | 9 | def test_nesterov_momentum_node(): 10 | tn.test_utils.check_updates_node(tl.NesterovMomentumNode, 11 | learning_rate=0.01) 12 | -------------------------------------------------------------------------------- /treeano/node_utils.py: -------------------------------------------------------------------------------- 1 | from . import core 2 | 3 | 4 | def copy_node(node): 5 | return core.node_from_data(core.node_to_data(node)) 6 | -------------------------------------------------------------------------------- /treeano/nodes/debug.py: -------------------------------------------------------------------------------- 1 | """ 2 | nodes to help debugging 3 | """ 4 | 5 | import theano 6 | import theano.tensor as T 7 | 8 | from .. import utils 9 | from .. import core 10 | 11 | 12 | @core.register_node("print") 13 | class PrintNode(core.NodeImpl): 14 | 15 | hyperparameter_names = ("message",) 16 | 17 | def compute_output(self, network, in_vw): 18 | message = network.find_hyperparameter(["message"], self.name) 19 | # TODO add attrs as hyperparameter for debugging 20 | out_var = theano.printing.Print(message)(in_vw.variable) 21 | network.create_vw( 22 | "default", 23 | variable=out_var, 24 | shape=in_vw.shape, 25 | tags={"output"} 26 | ) 27 | -------------------------------------------------------------------------------- /treeano/nodes/embedding.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, absolute_import 2 | from __future__ import print_function, unicode_literals 3 | 4 | import theano 5 | import theano.tensor as T 6 | 7 | from .. import utils 8 | from .. import core 9 | 10 | 11 | @core.register_node("embedding") 12 | class EmbeddingNode(core.NodeImpl): 13 | 14 | hyperparameter_names = ("input_size", 15 | "output_size") 16 | 17 | def compute_output(self, network, in_vw): 18 | input_size = network.find_hyperparameter(["input_size"]) 19 | output_size = network.find_hyperparameter(["output_size"]) 20 | W = network.create_vw( 21 | name="weight", 22 | is_shared=True, 23 | shape=(input_size, output_size), 24 | tags={"parameter", "weight"}, 25 | default_inits=[], 26 | ).variable 27 | 28 | out_shape = in_vw.shape + (output_size,) 29 | out_ss = in_vw.symbolic_shape() + (output_size,) 30 | 31 | assert in_vw.dtype == "int32" 32 | out_var = W[in_vw.variable.ravel()] 33 | out_var = out_var.reshape(out_ss) 34 | 35 | network.create_vw( 36 | name="default", 37 | variable=out_var, 38 | shape=out_shape, 39 | tags={"output"}, 40 | ) 41 | -------------------------------------------------------------------------------- /treeano/nodes/monitor.py: -------------------------------------------------------------------------------- 1 | """ 2 | nodes for creating monitor variables 3 | """ 4 | 5 | import theano 6 | import theano.tensor as T 7 | 8 | from .. import core 9 | from .. import utils 10 | 11 | 12 | @core.register_node("monitor_variance") 13 | class MonitorVarianceNode(core.NodeImpl): 14 | 15 | def compute_output(self, network, in_vw): 16 | super(MonitorVarianceNode, self).compute_output(network, in_vw) 17 | if network.find_hyperparameter(["monitor"]): 18 | network.create_vw( 19 | "var", 20 | variable=T.var(in_vw.variable), 21 | shape=(), 22 | tags={"monitor"}, 23 | ) 24 | -------------------------------------------------------------------------------- /treeano/nodes/test_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | some utilities for testing nodes 3 | """ 4 | import json 5 | 6 | import numpy as np 7 | import theano 8 | 9 | from .. import core 10 | from . import simple 11 | from . import containers 12 | from . import costs 13 | from . import composite 14 | from . import activations 15 | 16 | floatX = theano.config.floatX 17 | 18 | 19 | def check_serialization(node): 20 | import nose.tools as nt 21 | # test __eq__ 22 | nt.assert_equal( 23 | node, 24 | node) 25 | # test serialization 26 | nt.assert_equal( 27 | node, 28 | core.node_from_data(core.node_to_data(node))) 29 | # test that serialization is json-serializable 30 | nt.assert_equal( 31 | node, 32 | core.node_from_data(json.loads(json.dumps(core.node_to_data(node))))) 33 | 34 | 35 | def check_updates_node(updates_node_cls, 36 | activation="relu", 37 | **hyperparameters): 38 | """ 39 | nonlinearity: 40 | some nodes don't work with ReLU (eg. equilibrated sgd) 41 | """ 42 | import nose.tools as nt 43 | np.random.seed(42) 44 | 45 | activation = dict( 46 | relu=activations.ReLUNode, 47 | sigmoid=activations.SigmoidNode, 48 | )[activation] 49 | 50 | network = simple.HyperparameterNode( 51 | "g", 52 | updates_node_cls( 53 | "updates", 54 | {"subtree": containers.SequentialNode("seq", [ 55 | simple.InputNode("input", shape=(3, 4, 5)), 56 | composite.DenseNode("b"), 57 | activation("c")]), 58 | "cost": costs.TotalCostNode("cost", { 59 | "pred": simple.ReferenceNode("pred_ref", reference="seq"), 60 | "target": simple.InputNode("target", shape=(3, 14))}) 61 | }, 62 | **hyperparameters), 63 | num_units=14, 64 | cost_function=lambda preds, y_true: (preds - y_true) ** 2, 65 | cost_reference="cost", 66 | ).network() 67 | fn = network.function(["input", "target"], ["cost"]) 68 | fn2 = network.function(["input", "target"], 69 | ["cost"], 70 | include_updates=True) 71 | x = np.random.randn(3, 4, 5).astype(floatX) 72 | y = np.random.randn(3, 14).astype(floatX) 73 | initial_cost = fn(x, y) 74 | next_cost = fn(x, y) 75 | np.testing.assert_allclose(initial_cost, 76 | next_cost, 77 | rtol=1e-5, 78 | atol=1e-8) 79 | prev_cost = fn2(x, y) 80 | for _ in range(10): 81 | current_cost = fn2(x, y) 82 | print(current_cost) 83 | nt.assert_greater(prev_cost, current_cost) 84 | prev_cost = current_cost 85 | -------------------------------------------------------------------------------- /treeano/nodes/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diogo149/treeano/9b3fd6bb5eb2f6738c9e5c357e70bef95dcae7b7/treeano/nodes/tests/__init__.py -------------------------------------------------------------------------------- /treeano/nodes/tests/activations_test.py: -------------------------------------------------------------------------------- 1 | import nose.tools as nt 2 | import numpy as np 3 | import theano 4 | import theano.tensor as T 5 | 6 | import treeano 7 | import treeano.nodes as tn 8 | 9 | fX = theano.config.floatX 10 | 11 | 12 | def test_relu_node_serialization(): 13 | tn.check_serialization(tn.ReLUNode("a")) 14 | 15 | 16 | def test_softmax_node_serialization(): 17 | tn.check_serialization(tn.SoftmaxNode("a")) 18 | 19 | 20 | def test_resqrt_node_serialization(): 21 | tn.check_serialization(tn.ReSQRTNode("a")) 22 | -------------------------------------------------------------------------------- /treeano/nodes/tests/conv_test.py: -------------------------------------------------------------------------------- 1 | import nose.tools as nt 2 | import numpy as np 3 | import theano 4 | import theano.tensor as T 5 | 6 | import treeano 7 | import treeano.nodes as tn 8 | 9 | fX = theano.config.floatX 10 | 11 | 12 | def test_conv_parse_pad(): 13 | tests = [ 14 | [(3, 4, 5), "full", (2, 3, 4)], 15 | [(3, 4, 5), "valid", (0, 0, 0)], 16 | [(3, 5, 7), "same", (1, 2, 3)], 17 | [(1, 1), "same", (0, 0)], 18 | [(1, 1), (3, 3), (3, 3)], 19 | ] 20 | for filter_size, pad, ans in tests: 21 | nt.assert_equal(ans, tn.conv.conv_parse_pad(filter_size, pad)) 22 | 23 | fails_fn = nt.raises(AssertionError)(tn.conv.conv_parse_pad) 24 | fails_fn((2,), "same") 25 | fails_fn((2, 3), (1, 2, 3)) 26 | 27 | 28 | def test_conv_2d_node_serialization(): 29 | tn.check_serialization(tn.Conv2DNode("a")) 30 | -------------------------------------------------------------------------------- /treeano/nodes/tests/debug_test.py: -------------------------------------------------------------------------------- 1 | import nose.tools as nt 2 | import numpy as np 3 | import theano 4 | import theano.tensor as T 5 | 6 | import treeano 7 | import treeano.nodes as tn 8 | 9 | fX = theano.config.floatX 10 | 11 | 12 | def test_print_node_serialization(): 13 | tn.check_serialization(tn.PrintNode("a")) 14 | -------------------------------------------------------------------------------- /treeano/nodes/tests/dnn_test.py: -------------------------------------------------------------------------------- 1 | import nose.tools as nt 2 | import numpy as np 3 | import theano 4 | import theano.tensor as T 5 | 6 | import treeano 7 | import treeano.nodes as tn 8 | 9 | fX = theano.config.floatX 10 | 11 | 12 | def test_dnn_pool_node_serialization(): 13 | tn.check_serialization(tn.DnnPoolNode("a")) 14 | -------------------------------------------------------------------------------- /treeano/nodes/tests/hyperparameter_test.py: -------------------------------------------------------------------------------- 1 | import copy 2 | 3 | import nose.tools as nt 4 | import numpy as np 5 | import theano 6 | import theano.tensor as T 7 | 8 | import treeano 9 | import treeano.nodes as tn 10 | 11 | fX = theano.config.floatX 12 | 13 | 14 | def test_variable_hyperparameter_node_serialization(): 15 | tn.check_serialization(tn.VariableHyperparameterNode("a", 16 | tn.IdentityNode("b"))) 17 | 18 | 19 | def test_output_hyperparameter_node_serialization(): 20 | tn.check_serialization(tn.OutputHyperparameterNode("a")) 21 | 22 | 23 | def test_variable_hyperparameter_node(): 24 | network = tn.VariableHyperparameterNode( 25 | "a", 26 | tn.InputNode("b", shape=())).network() 27 | hp = network["a"].get_vw("hyperparameter").variable 28 | nt.assert_equal(hp.ndim, 0) 29 | fn = network.function([("a", "hyperparameter")], [hp]) 30 | x = 42 31 | nt.assert_equal(fn(x), [x]) 32 | 33 | 34 | def test_shared_hyperparameter_node(): 35 | network = tn.SharedHyperparameterNode( 36 | "a", 37 | tn.InputNode("b", shape=())).network() 38 | hp = network["a"].get_vw("hyperparameter").variable 39 | nt.assert_equal(hp.ndim, 0) 40 | fn1 = network.function([("a", "hyperparameter")], 41 | [hp], 42 | include_updates=True) 43 | fn2 = network.function([], [hp]) 44 | x = 42 45 | nt.assert_equal(fn1(x), [x]) 46 | nt.assert_equal(fn2(), [x]) 47 | 48 | 49 | def test_output_hyperparameter_node(): 50 | network = tn.VariableHyperparameterNode( 51 | "a", 52 | tn.OutputHyperparameterNode("b"), 53 | hyperparameter="foobar" 54 | ).network() 55 | fn = network.function([("a", "hyperparameter")], ["b"]) 56 | x = 253 57 | nt.assert_equal(fn(x), [x]) 58 | 59 | 60 | def test_variable_hyperparameter_node_double(): 61 | network = tn.VariableHyperparameterNode( 62 | "a", 63 | tn.VariableHyperparameterNode( 64 | "b", 65 | tn.OutputHyperparameterNode("c", hyperparameter="foo"), 66 | hyperparameter="bar"), 67 | hyperparameter="foo").network() 68 | fn = network.function([("a", "hyperparameter")], ["c"]) 69 | x = 253 70 | nt.assert_equal(fn(x), [x]) 71 | -------------------------------------------------------------------------------- /treeano/nodes/tests/monitor_test.py: -------------------------------------------------------------------------------- 1 | import nose.tools as nt 2 | import numpy as np 3 | import theano 4 | import theano.tensor as T 5 | 6 | import treeano 7 | import treeano.nodes as tn 8 | 9 | fX = theano.config.floatX 10 | 11 | 12 | def test_monitor_variance_node_serialization(): 13 | tn.check_serialization(tn.MonitorVarianceNode("a")) 14 | 15 | 16 | def test_monitor_variance_node(): 17 | network = tn.SequentialNode( 18 | "s", 19 | [tn.InputNode("x", shape=(3, 4, 5)), 20 | tn.MonitorVarianceNode("mv")]).network() 21 | vw = network["mv"].get_vw("var") 22 | x = np.random.randn(3, 4, 5).astype(fX) 23 | ans = x.var() 24 | fn = network.function(["x"], [vw.variable]) 25 | np.testing.assert_allclose(fn(x), [ans], rtol=1e-5) 26 | -------------------------------------------------------------------------------- /treeano/nodes/tests/recurrent_test.py: -------------------------------------------------------------------------------- 1 | import nose.tools as nt 2 | import numpy as np 3 | import theano 4 | import theano.tensor as T 5 | 6 | from treeano import nodes 7 | 8 | fX = theano.config.floatX 9 | 10 | 11 | def test_simple_recurrent_node_serialization(): 12 | nodes.check_serialization(nodes.recurrent.SimpleRecurrentNode( 13 | "a", nodes.IdentityNode("b"))) 14 | nodes.check_serialization(nodes.recurrent.SimpleRecurrentNode( 15 | "a", nodes.IdentityNode("b"), num_units=32, batch_size=2 ** 7)) 16 | 17 | 18 | def test_simple_recurrent_node(): 19 | # just testing that it runs 20 | # --- 21 | # the test may look dumb, but it's found a LOT of problems 22 | network = nodes.SequentialNode( 23 | "n", 24 | [nodes.InputNode("in", shape=(3, 4, 5)), 25 | nodes.recurrent.SimpleRecurrentNode("srn", 26 | nodes.ReLUNode("relu"), 27 | batch_size=4, 28 | num_units=35, 29 | scan_axis=0)] 30 | ).network() 31 | fn = network.function(["in"], ["n"]) 32 | x = np.random.rand(3, 4, 5).astype(fX) 33 | res = fn(x)[0] 34 | # 3 = scan axis, 4 = batch axis, 35 = num output units 35 | nt.assert_equal(res.shape, (3, 4, 35)) 36 | -------------------------------------------------------------------------------- /treeano/nodes/tests/scan_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import theano 3 | import treeano 4 | from treeano.nodes.scan import ScanNode 5 | 6 | floatX = theano.config.floatX 7 | 8 | 9 | def test_basic_scan(): 10 | 11 | class x2Node(treeano.NodeImpl): 12 | 13 | def compute_output(self, network, in_vw): 14 | network.create_vw( 15 | name="default", 16 | variable=in_vw.variable * 2, 17 | shape=in_vw.shape, 18 | tags={"output"} 19 | ) 20 | 21 | network = treeano.nodes.SequentialNode( 22 | "seq", 23 | children=[ 24 | treeano.nodes.InputNode("input", shape=(3, 2, 1)), 25 | ScanNode("scan", x2Node("x2")) 26 | ], 27 | ).network() 28 | fn = network.function(["input"], ["scan"]) 29 | np.testing.assert_allclose(fn(np.ones((3, 2, 1)).astype(floatX))[0], 30 | 2 * np.ones((3, 2, 1))) 31 | x = np.random.rand(3, 2, 1).astype(floatX) 32 | np.testing.assert_allclose(fn(x)[0], 33 | 2 * x) 34 | -------------------------------------------------------------------------------- /treeano/nodes/tests/stochastic_test.py: -------------------------------------------------------------------------------- 1 | import nose.tools as nt 2 | import numpy as np 3 | import theano 4 | import theano.tensor as T 5 | 6 | import treeano.core 7 | import treeano.nodes as tn 8 | 9 | fX = theano.config.floatX 10 | 11 | 12 | def test_dropout_node_serialization(): 13 | tn.check_serialization(tn.DropoutNode("a")) 14 | tn.check_serialization(tn.DropoutNode("a", p=0.5)) 15 | 16 | 17 | def test_gaussian_dropout_node_serialization(): 18 | tn.check_serialization(tn.GaussianDropoutNode("a")) 19 | tn.check_serialization(tn.GaussianDropoutNode("a", p=0)) 20 | 21 | 22 | def test_spatial_dropout_node_serialization(): 23 | tn.check_serialization(tn.SpatialDropoutNode("a")) 24 | tn.check_serialization(tn.SpatialDropoutNode("a", p=0.5)) 25 | 26 | 27 | def test_dropout_node(): 28 | def make_network(p): 29 | return tn.SequentialNode("s", [ 30 | tn.InputNode("i", shape=(3, 4, 5)), 31 | tn.DropoutNode("do", p=p) 32 | ]).network() 33 | 34 | x = np.random.randn(3, 4, 5).astype(fX) 35 | fn1 = make_network(0).function(["i"], ["s"]) 36 | np.testing.assert_allclose(fn1(x)[0], x) 37 | 38 | @nt.raises(AssertionError) 39 | def test_not_identity(): 40 | fn2 = make_network(0.5).function(["i"], ["s"]) 41 | np.testing.assert_allclose(fn2(x)[0], x) 42 | 43 | test_not_identity() 44 | 45 | 46 | def test_gaussian_dropout_node(): 47 | def make_network(p): 48 | return tn.SequentialNode("s", [ 49 | tn.InputNode("i", shape=(3, 4, 5)), 50 | tn.GaussianDropoutNode("do", p=p) 51 | ]).network() 52 | 53 | x = np.random.randn(3, 4, 5).astype(fX) 54 | fn1 = make_network(0).function(["i"], ["s"]) 55 | np.testing.assert_allclose(fn1(x)[0], x) 56 | 57 | @nt.raises(AssertionError) 58 | def test_not_identity(): 59 | fn2 = make_network(0.5).function(["i"], ["s"]) 60 | np.testing.assert_allclose(fn2(x)[0], x) 61 | 62 | test_not_identity() 63 | 64 | 65 | def test_spatial_dropout_node(): 66 | def make_network(p): 67 | return tn.SequentialNode("s", [ 68 | tn.InputNode("i", shape=(3, 6, 4, 5)), 69 | tn.SpatialDropoutNode("do", p=p) 70 | ]).network() 71 | 72 | x = np.random.randn(3, 6, 4, 5).astype(fX) 73 | fn1 = make_network(0).function(["i"], ["s"]) 74 | np.testing.assert_allclose(fn1(x)[0], x) 75 | 76 | @nt.raises(AssertionError) 77 | def test_not_identity(): 78 | fn2 = make_network(0.5).function(["i"], ["s"]) 79 | np.testing.assert_allclose(fn2(x)[0], x) 80 | 81 | test_not_identity() 82 | 83 | x = np.ones((3, 6, 4, 5)).astype(fX) 84 | fn3 = make_network(0.5).function(["i"], ["s"]) 85 | out = fn3(x)[0] 86 | np.testing.assert_equal(out.std(axis=(2, 3)), np.zeros((3, 6), dtype=fX)) 87 | -------------------------------------------------------------------------------- /treeano/nodes/tests/upsample_test.py: -------------------------------------------------------------------------------- 1 | import nose.tools as nt 2 | import numpy as np 3 | import theano 4 | import theano.tensor as T 5 | 6 | import treeano 7 | import treeano.nodes as tn 8 | 9 | fX = theano.config.floatX 10 | 11 | 12 | def test_repeat_n_d_node_serialization(): 13 | tn.check_serialization(tn.RepeatNDNode("a")) 14 | 15 | 16 | def test_repeat_n_d_node_serialization(): 17 | tn.check_serialization(tn.SparseUpsampleNode("a")) 18 | 19 | 20 | def test_repeat_n_d_node1(): 21 | network = tn.SequentialNode( 22 | "s", 23 | [tn.InputNode("i", shape=(3,)), 24 | tn.RepeatNDNode("r", upsample_factor=(2,))]).network() 25 | 26 | fn = network.function(["i"], ["s"]) 27 | x = np.arange(3).astype(fX) 28 | np.testing.assert_equal(np.array([0, 0, 1, 1, 2, 2], dtype=fX), 29 | fn(x)[0]) 30 | 31 | 32 | def test_repeat_n_d_node2(): 33 | network = tn.SequentialNode( 34 | "s", 35 | [tn.InputNode("i", shape=(3, 4, 5)), 36 | tn.RepeatNDNode("r", upsample_factor=(1, 1, 1))]).network() 37 | 38 | fn = network.function(["i"], ["s"]) 39 | x = np.random.randn(3, 4, 5).astype(fX) 40 | np.testing.assert_equal(x, 41 | fn(x)[0]) 42 | 43 | 44 | def test_repeat_n_d_node3(): 45 | network = tn.SequentialNode( 46 | "s", 47 | [tn.InputNode("i", shape=(2, 3)), 48 | tn.RepeatNDNode("r", upsample_factor=(2, 1))]).network() 49 | 50 | fn = network.function(["i"], ["s"]) 51 | x = np.arange(6).astype(fX).reshape(2, 3) 52 | np.testing.assert_equal(np.array([[0, 1, 2], 53 | [0, 1, 2], 54 | [3, 4, 5], 55 | [3, 4, 5]]), 56 | fn(x)[0]) 57 | 58 | 59 | def test_repeat_n_d_node4(): 60 | network = tn.SequentialNode( 61 | "s", 62 | [tn.InputNode("i", shape=(2, 3)), 63 | tn.RepeatNDNode("r", upsample_factor=(1, 2))]).network() 64 | 65 | fn = network.function(["i"], ["s"]) 66 | x = np.arange(6).astype(fX).reshape(2, 3) 67 | np.testing.assert_equal(np.array([[0, 0, 1, 1, 2, 2], 68 | [3, 3, 4, 4, 5, 5]]), 69 | fn(x)[0]) 70 | 71 | 72 | def test_sparse_upsample_node(): 73 | network = tn.SequentialNode( 74 | "s", 75 | [tn.InputNode("i", shape=(2, 3)), 76 | tn.SparseUpsampleNode("r", upsample_factor=(1, 2))]).network() 77 | 78 | fn = network.function(["i"], ["s"]) 79 | x = np.arange(6).astype(fX).reshape(2, 3) 80 | np.testing.assert_equal(np.array([[0, 0, 1, 0, 2, 0], 81 | [3, 0, 4, 0, 5, 0]]), 82 | fn(x)[0]) 83 | -------------------------------------------------------------------------------- /treeano/nodes/toy.py: -------------------------------------------------------------------------------- 1 | """ 2 | some not-actually-useful nodes (if they are, move them elsewhere) mostly 3 | for tests and code samples 4 | """ 5 | 6 | from .. import core 7 | 8 | 9 | @core.register_node("constant_updater") 10 | class ConstantUpdaterNode(core.Wrapper1NodeImpl): 11 | 12 | """ 13 | provides updates as a constant value 14 | """ 15 | 16 | hyperparameter_names = ("value",) 17 | 18 | def new_update_deltas(self, network): 19 | value = network.find_hyperparameter(["value"]) 20 | parameters = network.find_vws_in_subtree(tags=["parameter"]) 21 | return core.UpdateDeltas({p.variable: value for p in parameters}) 22 | 23 | 24 | @core.register_node("scalar_sum") 25 | class ScalarSumNode(core.NodeImpl): 26 | 27 | """ 28 | sums up its input into a scalar 29 | """ 30 | 31 | def compute_output(self, network, in_vw): 32 | network.create_vw( 33 | "default", 34 | variable=in_vw.variable.sum(), 35 | shape=(), 36 | tags={"output"}, 37 | ) 38 | -------------------------------------------------------------------------------- /treeano/sandbox/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diogo149/treeano/9b3fd6bb5eb2f6738c9e5c357e70bef95dcae7b7/treeano/sandbox/__init__.py -------------------------------------------------------------------------------- /treeano/sandbox/nodes/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diogo149/treeano/9b3fd6bb5eb2f6738c9e5c357e70bef95dcae7b7/treeano/sandbox/nodes/__init__.py -------------------------------------------------------------------------------- /treeano/sandbox/nodes/activation_transformation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import theano 3 | import theano.tensor as T 4 | import treeano 5 | import treeano.nodes as tn 6 | 7 | 8 | fX = theano.config.floatX 9 | 10 | 11 | @treeano.register_node("concatenate_negation") 12 | class ConcatenateNegationNode(treeano.NodeImpl): 13 | 14 | """ 15 | concatenates a negated copy of the actviations on a specified axis 16 | """ 17 | 18 | hyperparameter_names = ("axis",) 19 | 20 | def compute_output(self, network, in_vw): 21 | axis = network.find_hyperparameter(["axis"], 1) 22 | 23 | in_var = in_vw.variable 24 | out_var = T.concatenate([in_var, -in_var], axis=axis) 25 | 26 | out_shape = list(in_vw.shape) 27 | if out_shape[axis] is not None: 28 | out_shape[axis] *= 2 29 | out_shape = tuple(out_shape) 30 | 31 | network.create_vw( 32 | "default", 33 | variable=out_var, 34 | shape=out_shape, 35 | tags={"output"}, 36 | ) 37 | 38 | 39 | class NegatedInit(treeano.inits.WeightInit): 40 | 41 | """ 42 | init specifically for units after ConcatenateNegationNode 43 | 44 | takes in a different init, and initializes the first half of with that 45 | init, and the second half with the negated version of that tensor 46 | 47 | rationale: ConcatenateNegationNode + this init + ReLU will initialize 48 | the network to be linear 49 | """ 50 | 51 | def __init__(self, init, axis=1): 52 | self.init = init 53 | self.axis = axis 54 | 55 | def initialize_value(self, vw): 56 | # temporary variable wrapper with fake shape 57 | tmp_vw_shape = list(vw.shape) 58 | if tmp_vw_shape[self.axis] % 2 != 0: 59 | # this weight is probably not after a ConcatenateNegationNode, 60 | # so instead revert to initial init 61 | return self.init.initialize_value(vw) 62 | tmp_vw_shape[self.axis] /= 2 63 | tmp_vw_shape = tuple(tmp_vw_shape) 64 | tmp_vw = treeano.VariableWrapper( 65 | "tmp", 66 | shape=tmp_vw_shape, 67 | is_shared=True, 68 | inits=[], 69 | ) 70 | 71 | val = self.init.initialize_value(tmp_vw) 72 | 73 | return np.concatenate([val, -val], axis=self.axis) 74 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/auxiliary_costs.py: -------------------------------------------------------------------------------- 1 | import theano 2 | import theano.tensor as T 3 | import treeano 4 | import treeano.nodes as tn 5 | 6 | 7 | @treeano.register_node("auxiliary_dense_softmax_categorical_crossentropy") 8 | class AuxiliaryDenseSoftmaxCCENode(treeano.WrapperNodeImpl): 9 | 10 | hyperparameter_names = (tn.DenseNode.hyperparameter_names 11 | + tn.AuxiliaryCostNode.hyperparameter_names) 12 | children_container = treeano.core.DictChildrenContainerSchema( 13 | target=treeano.core.ChildContainer, 14 | ) 15 | 16 | def architecture_children(self): 17 | return [tn.AuxiliaryCostNode( 18 | self.name + "_auxiliary", 19 | {"target": self.raw_children()["target"], 20 | "pre_cost": tn.SequentialNode( 21 | self.name + "_sequential", 22 | [tn.DenseNode(self.name + "_dense"), 23 | tn.SoftmaxNode(self.name + "_softmax")])}, 24 | cost_function=T.nnet.categorical_crossentropy)] 25 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/bttf_mean.py: -------------------------------------------------------------------------------- 1 | # TODO: see if op can change default_updates 2 | 3 | import warnings 4 | import theano 5 | import theano.tensor as T 6 | import treeano 7 | from treeano.sandbox.utils import OverwriteGrad 8 | 9 | 10 | def _backprop_to_the_future_mean_forward(batch_mean, 11 | rolling_mean, 12 | rolling_grad, 13 | alpha): 14 | return rolling_mean + 0 * (batch_mean + rolling_grad) + 0 * alpha 15 | 16 | 17 | class BackpropToTheFutureMeanOp(OverwriteGrad): 18 | 19 | def __init__(self, update_averages): 20 | super(BackpropToTheFutureMeanOp, self).__init__( 21 | fn=_backprop_to_the_future_mean_forward) 22 | self.update_averages = update_averages 23 | 24 | def grad(self, inputs, out_grads): 25 | batch_mean, rolling_mean, rolling_grad, alpha = inputs 26 | out_grad, = out_grads 27 | 28 | if self.update_averages: 29 | assert treeano.utils.is_shared_variable(rolling_mean) 30 | assert treeano.utils.is_shared_variable(rolling_grad) 31 | # HACK this is super hacky and won't work for certain 32 | # computation graphs 33 | # TODO make assertion again 34 | if (hasattr(rolling_mean, "default_update") or 35 | hasattr(rolling_grad, "default_update")): 36 | warnings.warn("rolling mean/grad already has updates - " 37 | "overwritting. this can be caused by calculating " 38 | "the gradient of backprop to the future mean " 39 | "multiple times") 40 | 41 | rolling_mean.default_update = (alpha * rolling_mean + 42 | (1 - alpha) * batch_mean) 43 | rolling_grad.default_update = (alpha * rolling_grad + 44 | (1 - alpha) * out_grad) 45 | else: 46 | # HACK remove default_update 47 | if hasattr(rolling_mean, "default_update"): 48 | delattr(rolling_mean, "default_update") 49 | if hasattr(rolling_grad, "default_update"): 50 | delattr(rolling_grad, "default_update") 51 | 52 | return [rolling_grad, 53 | T.zeros_like(rolling_mean), 54 | T.zeros_like(rolling_grad), 55 | T.zeros_like(alpha)] 56 | 57 | backprop_to_the_future_mean_with_updates = BackpropToTheFutureMeanOp( 58 | update_averages=True) 59 | backprop_to_the_future_mean_no_updates = BackpropToTheFutureMeanOp( 60 | update_averages=False) 61 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/channel_out.py: -------------------------------------------------------------------------------- 1 | """ 2 | from "From Maxout to Channel-Out: Encoding Information on Sparse Pathways" 3 | http://arxiv.org/abs/1312.1909 4 | 5 | NOTE: implementation seems quite slow 6 | """ 7 | 8 | import theano 9 | import theano.tensor as T 10 | 11 | import treeano 12 | import treeano.nodes as tn 13 | 14 | 15 | @treeano.register_node("channel_out") 16 | class ChannelOutNode(tn.BaseActivationNode): 17 | 18 | hyperparameter_names = ("num_pieces", 19 | "feature_pool_axis", 20 | "axis") 21 | 22 | def activation(self, network, in_vw): 23 | # NOTE: mostly copied from FeaturePoolNode 24 | k = network.find_hyperparameter(["num_pieces"]) 25 | axis = network.find_hyperparameter( 26 | ["feature_pool_axis", 27 | "axis"], 28 | # by default, the first non-batch axis 29 | treeano.utils.nth_non_batch_axis(network, 0)) 30 | 31 | # shape calculation 32 | in_shape = in_vw.shape 33 | in_features = in_shape[axis] 34 | assert (in_features % k) == 0 35 | out_shape = list(in_shape) 36 | out_shape[axis] = in_shape[axis] // k 37 | out_shape = tuple(out_shape) 38 | 39 | # calculate indices of maximum activation 40 | in_var = in_vw.variable 41 | symbolic_shape = in_vw.symbolic_shape() 42 | new_symbolic_shape = (symbolic_shape[:axis] 43 | + (out_shape[axis], k) + 44 | symbolic_shape[axis + 1:]) 45 | reshaped = in_var.reshape(new_symbolic_shape) 46 | if True: 47 | # this implementation seems to be slightly faster 48 | maxed = T.max(reshaped, axis=axis + 1, keepdims=True) 49 | 50 | mask = T.eq(maxed, reshaped).reshape(symbolic_shape) 51 | else: 52 | max_idxs = T.argmax(reshaped, axis=axis + 1, keepdims=True) 53 | 54 | # calculate indices of each unit 55 | arange_pattern = ["x"] * (in_vw.ndim + 1) 56 | arange_pattern[axis + 1] = 0 57 | idxs = T.arange(k).dimshuffle(tuple(arange_pattern)) 58 | 59 | mask = T.eq(max_idxs, idxs).reshape(symbolic_shape) 60 | return in_vw.variable * mask 61 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/deconvnet.py: -------------------------------------------------------------------------------- 1 | """ 2 | from "Visualizing and Understanding Convolutional Networks" 3 | http://arxiv.org/abs/1311.2901 4 | """ 5 | 6 | import theano 7 | import theano.tensor as T 8 | 9 | import treeano 10 | import treeano.nodes as tn 11 | import treeano.sandbox.utils 12 | import canopy 13 | 14 | 15 | class _Deconvnet(treeano.sandbox.utils.OverwriteGrad): 16 | 17 | """ 18 | based on Lasagne Recipes on Guided Backpropagation 19 | """ 20 | 21 | def grad(self, inputs, out_grads): 22 | (inp,) = inputs 23 | (grd,) = out_grads 24 | if False: 25 | # explicitly rectify 26 | return (grd * (grd > 0).astype(inp.dtype),) 27 | else: 28 | # use the given fn 29 | return (self.fn(grd),) 30 | 31 | 32 | deconvnet_relu = _Deconvnet(treeano.utils.rectify) 33 | 34 | 35 | @treeano.register_node("deconvnet_relu") 36 | class DeconvnetReLUNode(tn.BaseActivationNode): 37 | 38 | def activation(self, network, in_vw): 39 | return deconvnet_relu(in_vw.variable) 40 | 41 | 42 | def replace_relu_with_deconvnet_transform(network, 43 | nodes=(tn.ReLUNode,), 44 | **kwargs): 45 | 46 | def inner(node): 47 | if isinstance(node, nodes): 48 | return DeconvnetReLUNode(node.name) 49 | else: 50 | return node 51 | 52 | return canopy.transforms.fns.transform_root_node_postwalk( 53 | network, inner, **kwargs) 54 | 55 | 56 | class ReplaceReLUWithDeconvnet(canopy.handlers.NetworkHandlerImpl): 57 | 58 | def __init__(self, nodes=(tn.ReLUNode,)): 59 | self.nodes = nodes 60 | 61 | def transform_network(self, network): 62 | return replace_relu_with_deconvnet_transform(network, self.nodes) 63 | 64 | replace_relu_with_deconvnet_handler = ReplaceReLUWithDeconvnet 65 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/equilibrated_sgd.py: -------------------------------------------------------------------------------- 1 | """ 2 | from 3 | "RMSProp and equilibrated adaptive learning rates for non-convex optimization" 4 | http://arxiv.org/abs/1502.04390 5 | 6 | NOTE: Rop doesn't work for many operations, and it often causes nan's 7 | """ 8 | 9 | import numpy as np 10 | import theano 11 | import theano.tensor as T 12 | from theano.sandbox.rng_mrg import MRG_RandomStreams 13 | import treeano 14 | import treeano.nodes as tn 15 | 16 | fX = theano.config.floatX 17 | 18 | 19 | @treeano.register_node("equilibrated_sgd") 20 | class EquilibratedSGDNode(tn.StandardUpdatesNode): 21 | 22 | hyperparameter_names = ("learning_rate", 23 | "damping_factor") 24 | 25 | def _new_update_deltas(self, network, parameter_vws, grads): 26 | # NOTE: in the paper, learning_rate is referred to as epsilon 27 | # not doing that here as it would be confusing 28 | learning_rate = network.find_hyperparameter(["learning_rate"], 0.01) 29 | # NOTE: this is referred to as lambda in the paper 30 | # NOTE: when doing hyperparameter selection in the paper, 31 | # they select from 1e-4, 1e-5, 1e-6 32 | damping_factor = network.find_hyperparameter(["damping_factor"], 1e-2) 33 | 34 | update_deltas = treeano.UpdateDeltas() 35 | 36 | k_vw = network.create_vw( 37 | "esgd_count", 38 | shape=(), 39 | is_shared=True, 40 | tags={"state"}, 41 | default_inits=[], 42 | ) 43 | k = k_vw.variable 44 | new_k = k + 1 45 | update_deltas[k] = new_k - k 46 | 47 | for parameter_vw, grad in zip(parameter_vws, grads): 48 | D_vw = network.create_vw( 49 | "esgd_D(%s)" % parameter_vw.name, 50 | shape=parameter_vw.shape, 51 | is_shared=True, 52 | tags={"state"}, 53 | default_inits=[], 54 | ) 55 | 56 | # TODO ESGD update should only occur every 20 iterations 57 | # to amortize cost 58 | parameter = parameter_vw.variable 59 | D = D_vw.variable 60 | # TODO save this state so that we can seed the rng 61 | srng = MRG_RandomStreams() 62 | # noise vector 63 | v = srng.normal(size=parameter.shape) 64 | Hv = T.Rop(grad, parameter, v) 65 | D_delta = T.sqr(Hv) 66 | new_D = D + D_delta 67 | # new_D / new_k is essentially a mean 68 | denominator = damping_factor + T.sqrt(new_D / new_k) 69 | parameter_delta = -learning_rate * grad / denominator 70 | update_deltas[parameter] = parameter_delta 71 | update_deltas[D] = D_delta 72 | return update_deltas 73 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/guided_backprop.py: -------------------------------------------------------------------------------- 1 | """ 2 | from "Striving for Simplicity - The All Convolutional Net" 3 | http://arxiv.org/abs/1412.6806 4 | """ 5 | 6 | import theano 7 | import theano.tensor as T 8 | 9 | import treeano 10 | import treeano.nodes as tn 11 | import treeano.sandbox.utils 12 | import canopy 13 | 14 | 15 | class _GuidedBackprop(treeano.sandbox.utils.OverwriteGrad): 16 | 17 | """ 18 | based on Lasagne Recipes on Guided Backpropagation 19 | """ 20 | 21 | def grad(self, inputs, out_grads): 22 | (inp,) = inputs 23 | (grd,) = out_grads 24 | dtype = inp.dtype 25 | return (grd * (inp > 0).astype(dtype) * (grd > 0).astype(dtype),) 26 | 27 | 28 | guided_backprop_relu = _GuidedBackprop(treeano.utils.rectify) 29 | 30 | 31 | @treeano.register_node("guided_backprop_relu") 32 | class GuidedBackpropReLUNode(tn.BaseActivationNode): 33 | 34 | def activation(self, network, in_vw): 35 | return guided_backprop_relu(in_vw.variable) 36 | 37 | 38 | def replace_relu_with_guided_backprop_transform(network, 39 | nodes=(tn.ReLUNode,), 40 | **kwargs): 41 | 42 | def inner(node): 43 | if isinstance(node, nodes): 44 | return GuidedBackpropReLUNode(node.name) 45 | else: 46 | return node 47 | 48 | return canopy.transforms.fns.transform_root_node_postwalk( 49 | network, inner, **kwargs) 50 | 51 | 52 | class ReplaceReLUWithGuidedBackprop(canopy.handlers.NetworkHandlerImpl): 53 | 54 | def __init__(self, nodes=(tn.ReLUNode,)): 55 | self.nodes = nodes 56 | 57 | def transform_network(self, network): 58 | return replace_relu_with_guided_backprop_transform(network, self.nodes) 59 | 60 | replace_relu_with_guided_backprop_handler = ReplaceReLUWithGuidedBackprop 61 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/interval_relu.py: -------------------------------------------------------------------------------- 1 | """ 2 | relu where each channel has a different leak rate 3 | """ 4 | 5 | import numpy as np 6 | import theano 7 | import theano.tensor as T 8 | import treeano 9 | import treeano.nodes as tn 10 | 11 | fX = theano.config.floatX 12 | 13 | 14 | @treeano.register_node("interval_relu") 15 | class IntervalReLUNode(treeano.NodeImpl): 16 | 17 | hyperparameter_names = ("leak_min", 18 | "leak_max") 19 | 20 | def compute_output(self, network, in_vw): 21 | leak_min = network.find_hyperparameter(["leak_min"], 0) 22 | leak_max = network.find_hyperparameter(["leak_max"], 1) 23 | num_channels = in_vw.shape[1] 24 | alpha = np.linspace(leak_min, leak_max, num_channels).astype(fX) 25 | pattern = ["x" if i != 1 else 0 for i in range(in_vw.ndim)] 26 | alpha_var = T.constant(alpha).dimshuffle(*pattern) 27 | out_var = treeano.utils.rectify(in_vw.variable, 28 | negative_coefficient=alpha_var) 29 | network.create_vw( 30 | "default", 31 | variable=out_var, 32 | shape=in_vw.shape, 33 | tags={"output"}, 34 | ) 35 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/invariant_dropout.py: -------------------------------------------------------------------------------- 1 | """ 2 | from 3 | "Making Dropout Invariant to Transformations of Activation Functions and 4 | Inputs" 5 | http://www.dlworkshop.org/56.pdf?attredirects=0 6 | """ 7 | import numpy as np 8 | import theano 9 | import theano.tensor as T 10 | import treeano 11 | import treeano.nodes as tn 12 | 13 | 14 | fX = theano.config.floatX 15 | 16 | 17 | @treeano.register_node("invariant_dropout") 18 | class InvariantDropoutNode(treeano.Wrapper0NodeImpl): 19 | 20 | hyperparameter_names = (tn.DropoutNode.hyperparameter_names 21 | + tn.AddBiasNode.hyperparameter_names) 22 | 23 | def architecture_children(self): 24 | bias_node = tn.AddBiasNode(self.name + "_bias") 25 | dropout_node = tn.DropoutNode(self.name + "_dropout") 26 | return [tn.SequentialNode( 27 | self.name + "_sequential", 28 | [bias_node, 29 | dropout_node])] 30 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/inverse.py: -------------------------------------------------------------------------------- 1 | """ 2 | performs inverse operation of a single node by applying the 3 | its partial derivative respect to its input 4 | """ 5 | 6 | import treeano 7 | import theano 8 | import theano.tensor as T 9 | 10 | fX = theano.config.floatX 11 | 12 | 13 | @treeano.register_node("inverse") 14 | class InverseNode(treeano.NodeImpl): 15 | 16 | hyperparameter_names = ("reference",) 17 | 18 | def compute_output(self, network, in_vw): 19 | reference_name = network.find_hyperparameter(["reference"]) 20 | ref_network = network[reference_name] 21 | 22 | in_var = in_vw.variable 23 | reference_input_vw = ref_network.get_input_vw("default") 24 | reference_input = reference_input_vw.variable 25 | reference_output_vw = ref_network.get_vw("default") 26 | reference_output = reference_output_vw.variable 27 | 28 | out_var = T.grad(None, 29 | wrt=reference_input, 30 | known_grads={reference_output: in_var}) 31 | 32 | network.create_vw( 33 | 'default', 34 | variable=out_var, 35 | shape=reference_input_vw.shape, 36 | tags={'output'} 37 | ) 38 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/kl_sparsity_penalty.py: -------------------------------------------------------------------------------- 1 | """ 2 | for applying a sparsity (for saturating nonlinearities) using KL-divergence 3 | of a bernoulli distribution 4 | 5 | unsure of origin, but see the following pdf for info: 6 | http://web.stanford.edu/class/cs294a/sparseAutoencoder.pdf 7 | """ 8 | import numpy as np 9 | import theano 10 | import theano.tensor as T 11 | import treeano 12 | import treeano.nodes as tn 13 | 14 | 15 | def _bernoulli_kl_divergence(p, outputs): 16 | """ 17 | p: 18 | sparsity parameter (target sparsity) 19 | 20 | outputs: 21 | actual network outputs 22 | """ 23 | return (p * T.log(p) 24 | - p * T.log(outputs) 25 | + (1 - p) * T.log(1 - p) 26 | - (1 - p) * T.log(1 - outputs)) 27 | 28 | 29 | @treeano.register_node("elementwise_kl_sparsity_penalty") 30 | class ElementwiseKLSparsityPenaltyNode(treeano.NodeImpl): 31 | 32 | hyperparameter_names = ("target_sparsity", 33 | "sparsity", 34 | "min_value", 35 | "max_value") 36 | 37 | def compute_output(self, network, in_vw): 38 | p = network.find_hyperparameter(["target_sparsity", 39 | "sparsity"]) 40 | min_value = network.find_hyperparameter(["min_value"], 0) 41 | max_value = network.find_hyperparameter(["max_value"], 1) 42 | scaled_output = (in_vw.variable - min_value) / (max_value - min_value) 43 | cost = _bernoulli_kl_divergence(p, scaled_output) 44 | network.create_vw( 45 | "default", 46 | variable=cost, 47 | shape=in_vw.shape, 48 | tags={"output"}, 49 | ) 50 | 51 | 52 | @treeano.register_node("auxiliary_kl_sparsity_penalty") 53 | class AuxiliaryKLSparsityPenaltyNode(treeano.Wrapper0NodeImpl): 54 | 55 | hyperparameter_names = ( 56 | ("cost_reference", 57 | "cost_weight") 58 | + ElementwiseKLSparsityPenaltyNode.hyperparameter_names) 59 | 60 | def architecture_children(self): 61 | return [ 62 | tn.AuxiliaryNode( 63 | self.name + "_auxiliary", 64 | tn.SequentialNode( 65 | self.name + "_sequential", 66 | [ElementwiseKLSparsityPenaltyNode( 67 | self.name + "_sparsitypenalty"), 68 | tn.AggregatorNode(self.name + "_aggregator"), 69 | tn.MultiplyConstantNode(self.name + "_multiplyweight"), 70 | tn.SendToNode(self.name + "_sendto", to_key=self.name)]))] 71 | 72 | def init_long_range_dependencies(self, network): 73 | network.forward_hyperparameter(self.name + "_sendto", 74 | "send_to_reference", 75 | ["cost_reference"]) 76 | 77 | def init_state(self, network): 78 | super(AuxiliaryKLSparsityPenaltyNode, self).init_state(network) 79 | network.forward_hyperparameter(self.name + "_multiplyweight", 80 | "value", 81 | ["cost_weight"], 82 | 1) 83 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/kumaraswamy_unit.py: -------------------------------------------------------------------------------- 1 | """ 2 | from 3 | "Improving neural networks with bunches of neurons modeled by Kumaraswamy 4 | units: Preliminary study" 5 | http://arxiv.org/abs/1505.02581 6 | """ 7 | import numpy as np 8 | import theano 9 | import theano.tensor as T 10 | import treeano 11 | import treeano.nodes as tn 12 | 13 | 14 | fX = theano.config.floatX 15 | 16 | 17 | def kumaraswamy_unit(x, a=8, b=30): 18 | return 1 - (1 - T.nnet.sigmoid(x) ** a) ** b 19 | 20 | 21 | @treeano.register_node("kumaraswamy_unit") 22 | class KumaraswamyUnitNode(treeano.NodeImpl): 23 | 24 | hyperparameter_names = ("kumaraswamy_a", 25 | "kumaraswamy_b") 26 | 27 | def compute_output(self, network, in_vw): 28 | a = network.find_hyperparameter(["kumaraswamy_a"], 8) 29 | b = network.find_hyperparameter(["kumaraswamy_b"], 30) 30 | network.create_vw( 31 | "default", 32 | variable=kumaraswamy_unit(in_vw.variable, a, b), 33 | shape=in_vw.shape, 34 | tags={"output"}, 35 | ) 36 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/l2_pool.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import theano 3 | import theano.tensor as T 4 | 5 | import treeano 6 | import treeano.nodes as tn 7 | 8 | fX = theano.config.floatX 9 | 10 | 11 | @treeano.register_node("l2_pool") 12 | class L2PoolNode(treeano.Wrapper1NodeImpl): 13 | 14 | """ 15 | node that takes the L2 norm of the pooled over region 16 | """ 17 | 18 | hyperparameter_names = ("pool_size",) 19 | 20 | def architecture_children(self): 21 | nodes = [ 22 | tn.SqrNode(self.name + "_sqr"), 23 | self.raw_children(), 24 | # convert mean pool to sum pool by multiplying by pool size 25 | tn.MultiplyConstantNode(self.name + "_mul"), 26 | tn.SqrtNode(self.name + "_sqrt"), 27 | ] 28 | return [tn.SequentialNode(self.name + "_sequential", nodes)] 29 | 30 | def init_state(self, network): 31 | super(L2PoolNode, self).init_state(network) 32 | pool_size = network.find_hyperparameter(["pool_size"]) 33 | network.set_hyperparameter(self.name + "_mul", 34 | "value", 35 | # cast to float, to not trigger 36 | # warn_float64 37 | float(np.prod(pool_size))) 38 | 39 | 40 | def L2Pool2DNode(name, **kwargs): 41 | l2_kwargs = {} 42 | if "pool_size" in kwargs: 43 | l2_kwargs["pool_size"] = kwargs.pop("pool_size") 44 | return L2PoolNode( 45 | name, 46 | tn.MeanPool2DNode(name + "_pool", **kwargs), 47 | **l2_kwargs) 48 | 49 | 50 | def DnnL2PoolNode(name, **kwargs): 51 | l2_kwargs = {} 52 | if "pool_size" in kwargs: 53 | l2_kwargs["pool_size"] = kwargs.pop("pool_size") 54 | return L2PoolNode( 55 | name, 56 | tn.DnnMeanPoolNode(name + "_pool", **kwargs), 57 | **l2_kwargs) 58 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/label_smoothing.py: -------------------------------------------------------------------------------- 1 | import functools 2 | import theano 3 | import theano.tensor as T 4 | 5 | fX = theano.config.floatX 6 | 7 | 8 | def label_smoothing_categorical_crossentropy(pred, 9 | target, 10 | alpha, 11 | beta=None, 12 | num_classes=None): 13 | if target.dtype == "int32": 14 | assert pred.ndim - 1 == target.ndim 15 | assert target.ndim == 1 16 | assert pred.dtype == fX 17 | assert pred.ndim == 2 18 | target = T.extra_ops.to_one_hot(target, nb_class=num_classes, dtype=fX) 19 | if beta is None: 20 | beta = (1.0 - alpha) / (num_classes - 1) 21 | return T.nnet.categorical_crossentropy(pred, 22 | T.clip(target, beta, alpha)) 23 | 24 | 25 | def label_smoothing_categorical_crossentropy_fn(alpha, 26 | beta=None, 27 | num_classes=None): 28 | return functools.partial(label_smoothing_categorical_crossentropy, 29 | alpha=alpha, 30 | beta=beta, 31 | num_classes=num_classes) 32 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/monitor_update_ratio.py: -------------------------------------------------------------------------------- 1 | import theano 2 | import theano.tensor as T 3 | import treeano 4 | 5 | 6 | @treeano.register_node("monitor_update_ratio") 7 | class MonitorUpdateRatioNode(treeano.Wrapper1NodeImpl): 8 | 9 | """ 10 | monitor's the ratio between the a statistic (eg. norm, max, min) between an 11 | update of a parameter and the parameter itself 12 | 13 | monitor's parameters of this nodes children, based on the updates already 14 | defined when traversing the architectural tree (most probably this node's 15 | parents) 16 | 17 | see: 18 | http://yyue.blogspot.in/2015/01/a-brief-overview-of-deep-learning.html 19 | http://cs231n.github.io/neural-networks-3/#ratio 20 | 21 | both links recommend a value of approximately 1e-3 22 | """ 23 | 24 | hyperparameter_names = ("statistics",) 25 | 26 | @staticmethod 27 | def statistic_to_fn(statistic): 28 | return { 29 | "2-norm": lambda x: x.norm(2), 30 | "max": T.max, 31 | "min": T.min, 32 | }[statistic] 33 | 34 | def mutate_update_deltas(self, network, update_deltas): 35 | if not network.find_hyperparameter(["monitor"]): 36 | return 37 | if not network.find_hyperparameter(["monitor_updates"], True): 38 | # don't do anything if manually asking to ignore 39 | # --- 40 | # rationale: want the ability for a validation network to turn 41 | # this off 42 | return 43 | # these need to be strings so that we can print their names 44 | # --- 45 | # by default, only show 2-norm 46 | # because max and min don't always have the same sign and are harder 47 | # to compare 48 | statistics = network.find_hyperparameter(["statistics"], 49 | ["2-norm"]) 50 | # TODO parameterize search tags (to affect not only "parameters"s) 51 | vws = network.find_vws_in_subtree(tags={"parameter"}, 52 | is_shared=True) 53 | for vw in vws: 54 | if vw.variable not in update_deltas: 55 | continue 56 | delta = update_deltas[vw.variable] 57 | for stat in statistics: 58 | assert isinstance(stat, str) 59 | name = "%s_%s" % (vw.name, stat) 60 | stat_fn = self.statistic_to_fn(stat) 61 | # computing the value of the stat after the update instead of 62 | # before 63 | # --- 64 | # rationale: avoiding 0-division errors for 0-initialized 65 | # shared variables 66 | shared_stat = stat_fn(vw.variable + delta) 67 | delta_stat = stat_fn(delta) 68 | ratio = delta_stat / shared_stat 69 | network.create_vw( 70 | name, 71 | variable=ratio, 72 | shape=(), 73 | tags={"monitor"} 74 | ) 75 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/paired_conv.py: -------------------------------------------------------------------------------- 1 | """ 2 | node for 2 conv's paired together, which allows more flexible combinations of 3 | filter size and padding - specifically even filter sizes can have "same" 4 | padding 5 | """ 6 | 7 | import numpy as np 8 | import theano 9 | import theano.tensor as T 10 | import treeano 11 | import treeano.nodes as tn 12 | import canopy 13 | 14 | fX = theano.config.floatX 15 | 16 | 17 | @treeano.register_node("paired_conv") 18 | class PairedConvNode(treeano.WrapperNodeImpl): 19 | 20 | hyperparameter_names = ("inits", 21 | "filter_size", 22 | "num_filters", 23 | "conv_pad", 24 | "pad") 25 | children_container = treeano.core.DictChildrenContainerSchema( 26 | conv=treeano.core.ChildContainer, 27 | separator=treeano.core.ChildContainer, 28 | ) 29 | 30 | def architecture_children(self): 31 | children = self.raw_children() 32 | conv_node = children["conv"] 33 | separator_node = children["separator"] 34 | return [tn.SequentialNode( 35 | self.name + "_sequential", 36 | [canopy.node_utils.suffix_node(conv_node, "_1"), 37 | separator_node, 38 | canopy.node_utils.suffix_node(conv_node, "_2")])] 39 | 40 | def init_state(self, network): 41 | super(PairedConvNode, self).init_state(network) 42 | filter_size = network.find_hyperparameter(["filter_size"]) 43 | # calculate effective total filter size 44 | total_filter_size = tuple([fs * 2 - 1 for fs in filter_size]) 45 | # by default, do same padding 46 | pad = network.find_hyperparameter(["conv_pad", "pad"], "same") 47 | total_pad = tn.conv.conv_parse_pad(total_filter_size, pad) 48 | second_pad = tuple([p // 2 for p in total_pad]) 49 | first_pad = tuple([p - p2 for p, p2 in zip(total_pad, second_pad)]) 50 | conv_node_name = self.raw_children()["conv"].name 51 | network.set_hyperparameter(conv_node_name + "_1", 52 | "pad", 53 | first_pad) 54 | network.set_hyperparameter(conv_node_name + "_2", 55 | "pad", 56 | second_pad) 57 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/partition_axis.py: -------------------------------------------------------------------------------- 1 | """ 2 | NOTE: concatenation seems very slow 3 | """ 4 | 5 | import treeano 6 | import treeano.nodes as tn 7 | 8 | 9 | @treeano.register_node("partition_axis") 10 | class PartitionAxisNode(treeano.NodeImpl): 11 | 12 | """ 13 | node that returns a fraction of the input tensor 14 | 15 | rough explanation: 16 | x.shape == (4, 8, 12, 16, 20) 17 | y = partition_axis(x, split_idx=2, num_splits=4, channel_axis=3) 18 | => 19 | y == x[:, :, :, 8:12, :] 20 | """ 21 | 22 | hyperparameter_names = ("split_idx", 23 | "num_splits", 24 | "channel_axis") 25 | 26 | def compute_output(self, network, in_vw): 27 | # FIXME make default in terms of batch axis 28 | channel_axis = network.find_hyperparameter(["channel_axis"], 1) 29 | split_idx = network.find_hyperparameter(["split_idx"]) 30 | num_splits = network.find_hyperparameter(["num_splits"]) 31 | 32 | var = in_vw.variable 33 | shape = in_vw.shape 34 | 35 | num_channels = shape[channel_axis] 36 | start_idx = (num_channels * split_idx) // num_splits 37 | end_idx = num_channels * (split_idx + 1) // num_splits 38 | 39 | new_shape = list(shape) 40 | new_shape[channel_axis] = end_idx - start_idx 41 | new_shape = tuple(new_shape) 42 | 43 | idx = tuple([slice(None) for _ in range(channel_axis)] 44 | + [slice(start_idx, end_idx)]) 45 | network.create_vw( 46 | "default", 47 | variable=var[idx], 48 | shape=new_shape, 49 | tags={"output"}, 50 | ) 51 | 52 | 53 | def MultiPool2DNode(name, **kwargs): 54 | # TODO tests 55 | # TODO make a node that verifies hyperparameters 56 | return tn.HyperparameterNode( 57 | name, 58 | tn.ConcatenateNode( 59 | name + "_concat", 60 | [tn.SequentialNode(name + "_seq0", 61 | [PartitionAxisNode(name + "_part0", 62 | split_idx=0, 63 | num_splits=2), 64 | tn.MaxPool2DNode(name + "_max", 65 | ignore_border=True)]), 66 | tn.SequentialNode(name + "_seq1", 67 | [PartitionAxisNode(name + "_part1", 68 | split_idx=1, 69 | num_splits=2), 70 | tn.MeanPool2DNode(name + "_mean")])]), 71 | **kwargs) 72 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/quickprop.py: -------------------------------------------------------------------------------- 1 | """ 2 | based on https://en.wikipedia.org/wiki/Quickprop 3 | 4 | standard quickprop has some issues: 5 | - numerical stability when dividing by (prev_grad - grad) 6 | - multiplying by the previous update 7 | - can be an issue if a previous update is 0 8 | 9 | possible (partial) solutions: 10 | - use momentum on the update 11 | - add noise to the gradient (similar to "Adding Gradient Noise Improves 12 | Learning for Very Deep Networks" http://arxiv.org/abs/1511.06807) 13 | """ 14 | 15 | import numpy as np 16 | import theano 17 | import theano.tensor as T 18 | from theano.sandbox.rng_mrg import MRG_RandomStreams 19 | import treeano 20 | import treeano.nodes as tn 21 | from treeano.sandbox import update_utils 22 | 23 | fX = theano.config.floatX 24 | 25 | 26 | @treeano.register_node("quickprop") 27 | class QuickpropNode(tn.StandardUpdatesNode): 28 | 29 | def _new_update_deltas(self, network, parameter_vws, grads): 30 | update_deltas = treeano.UpdateDeltas() 31 | for parameter_vw, grad in zip(parameter_vws, grads): 32 | prev_grad, _ = update_utils.update_previous( 33 | network, 34 | update_deltas, 35 | grad, 36 | "grad(%s)" % parameter_vw.name, 37 | parameter_vw.shape) 38 | 39 | prev_update = network.create_vw( 40 | "quickprop_prev_update(%s)" % parameter_vw.name, 41 | shape=parameter_vw.shape, 42 | is_shared=True, 43 | tags={"state"}, 44 | default_inits=[treeano.inits.ConstantInit(1)], 45 | ).variable 46 | 47 | denom = prev_grad - grad 48 | # TODO paramerize 49 | epsilon = 1e-6 50 | denom = denom + treeano.utils.sign_non_zero(denom) * epsilon 51 | parameter_delta = prev_update * grad / denom 52 | 53 | parameter = parameter_vw.variable 54 | update_deltas[parameter] = parameter_delta 55 | update_deltas[prev_update] = parameter_delta - prev_update 56 | return update_deltas 57 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/recurrent_convolution.py: -------------------------------------------------------------------------------- 1 | """ 2 | from "Recurrent Convolutional Neural Network for Object Recognition" 3 | http://www.xlhu.cn/papers/Liang15-cvpr.pdf 4 | """ 5 | 6 | import toolz 7 | import numpy as np 8 | import theano 9 | import theano.tensor as T 10 | import treeano 11 | import treeano.nodes as tn 12 | from treeano.sandbox.nodes import lrn 13 | 14 | 15 | fX = theano.config.floatX 16 | 17 | 18 | @treeano.register_node("default_recurrent_conv_2d") 19 | class DefaultRecurrentConv2DNode(treeano.Wrapper0NodeImpl): 20 | 21 | hyperparameter_names = ("inits", 22 | "num_filters", 23 | "filter_size", 24 | "conv_pad", 25 | "pad") 26 | # TODO parameterize 27 | steps = 3 28 | 29 | def architecture_children(self): 30 | # TODO set LRN n = num_filters / 8 + 1 31 | nodes = [ 32 | # NOTE: not explicitly giving the first conv a pad of "same", 33 | # since the first conv can have any output shape 34 | tn.DnnConv2DWithBiasNode(self.name + "_conv0"), 35 | tn.IdentityNode(self.name + "_z0"), 36 | tn.ReLUNode(self.name + "_z0_relu"), 37 | lrn.LocalResponseNormalizationNode(self.name + "_z0_lrn"), 38 | tn.IdentityNode(self.name + "_x0"), 39 | ] 40 | for t in range(1, self.steps + 1): 41 | nodes += [ 42 | tn.DnnConv2DWithBiasNode(self.name + "_conv%d" % t, 43 | stride=(1, 1), 44 | pad="same"), 45 | tn.ElementwiseSumNode( 46 | self.name + "_sum%d" % t, 47 | [tn.ReferenceNode(self.name + "_sum%d_curr" % t, 48 | reference=self.name + "_conv%d" % t), 49 | tn.ReferenceNode(self.name + "_sum%d_prev" % t, 50 | reference=self.name + "_z0")]), 51 | tn.IdentityNode(self.name + "_z%d" % t), 52 | tn.ReLUNode(self.name + "_z%d_relu" % t), 53 | lrn.LocalResponseNormalizationNode(self.name + "_z%d_lrn" % t), 54 | tn.IdentityNode(self.name + "_x%d" % t), 55 | ] 56 | return [tn.SequentialNode(self.name + "_sequential", nodes)] 57 | 58 | def init_state(self, network): 59 | super(DefaultRecurrentConv2DNode, self).init_state(network) 60 | target_root_node_name = self.name + "_conv1" 61 | for t in range(2, self.steps + 1): 62 | root_node_name = self.name + "_conv%d" % t 63 | inits = [treeano.inits.TiedInit(root_node_name, 64 | target_root_node_name)] 65 | network.set_hyperparameter(root_node_name, 66 | "inits", 67 | inits) 68 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/rms_normalization.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import theano 3 | import theano.tensor as T 4 | import treeano 5 | import treeano.nodes as tn 6 | 7 | fX = theano.config.floatX 8 | 9 | 10 | @treeano.register_node("rms_normalization") 11 | class RMSNormalizationNode(treeano.NodeImpl): 12 | 13 | hyperparameter_names = ("epsilon",) 14 | 15 | def compute_output(self, network, in_vw): 16 | x = in_vw.variable 17 | epsilon = 1e-5 18 | 19 | kwargs = dict(axis=[dim for dim in range(x.ndim) 20 | if dim != 0], 21 | keepdims=True) 22 | gamma = network.create_vw( 23 | name="gamma", 24 | is_shared=True, 25 | shape=(in_vw.shape[1],), 26 | tags={"parameter"}, 27 | default_inits=[], 28 | ).variable.dimshuffle("x", 0, *(["x"] * (in_vw.ndim - 2))) 29 | z = x * (T.exp(gamma) / T.sqrt(T.sqr(x).mean(**kwargs) + epsilon)) 30 | network.create_vw( 31 | name="default", 32 | variable=z, 33 | shape=in_vw.shape, 34 | tags={"output"}, 35 | ) 36 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/sample_variance_penalization.py: -------------------------------------------------------------------------------- 1 | """ 2 | based on "Empirical Bernstein Bounds and Sample Variance Penalization" 3 | http://arxiv.org/abs/0907.3740 4 | and http://www.machinedlearnings.com/2015/11/sample-variance-penalization.html 5 | """ 6 | import numpy as np 7 | import theano 8 | import theano.tensor as T 9 | import treeano 10 | import treeano.nodes as tn 11 | 12 | fX = theano.config.floatX 13 | 14 | 15 | def sample_variance_penalty_aggregator(costs, 16 | kappa=0.25, 17 | penalty_type="per_sample"): 18 | if costs.ndim < 1: 19 | assert False 20 | 21 | if penalty_type == "per_sample": 22 | # convert to 1 cost per sample 23 | if costs.ndim > 1: 24 | if costs.ndim > 2: 25 | costs = T.flatten(costs, 2) 26 | costs = costs.mean(axis=1) 27 | elif penalty_type == "per_element": 28 | # leave it as it is 29 | pass 30 | else: 31 | raise ValueError("incorrect penalty_type: {}".format(penalty_type)) 32 | 33 | return costs.mean() + kappa * costs.std() 34 | 35 | 36 | def SampleVariancePenalizationNode(*args, **kwargs): 37 | # TODO convert to node that takes in appropriate hyperparameters 38 | assert "aggregator" not in kwargs 39 | kwargs["aggregator"] = sample_variance_penalty_aggregator 40 | return tn.TotalCostNode(*args, **kwargs) 41 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/segmentation.py: -------------------------------------------------------------------------------- 1 | import functools 2 | import theano 3 | import theano.tensor as T 4 | import treeano 5 | 6 | 7 | def approximate_negative_iou(pred, target, epsilon=1e-3): 8 | """ 9 | differentiable approximation to negative IOU 10 | """ 11 | intersection = (pred * target).sum(axis=(1, 2, 3)) 12 | intersection = T.maximum(intersection, epsilon) 13 | union = T.maximum(pred, target).sum(axis=(1, 2, 3)) 14 | union = T.maximum(union, epsilon) 15 | return -1.0 * (intersection / union).mean() 16 | 17 | 18 | def hard_bootstrapping_binary_crossentropy(pred, 19 | target, 20 | num_taken, 21 | num_skipped=0, 22 | weight=1): 23 | """ 24 | from 25 | High-performance Semantic Segmentation Using Very Deep Fully Convolutional Networks 26 | http://arxiv.org/abs/1604.04339 27 | """ 28 | pixel_loss = treeano.utils.weighted_binary_crossentropy(pred, 29 | target, 30 | weight=weight) 31 | flat_loss = pixel_loss.flatten(2) 32 | sorted_flat_loss = T.sort(flat_loss) 33 | chosen_loss = sorted_flat_loss[:, -(num_taken + num_skipped):-num_skipped] 34 | return chosen_loss 35 | 36 | 37 | def hard_bootstrapping_binary_crossentropy_fn(num_taken, 38 | num_skipped=0, 39 | weight=1): 40 | return functools.partial(hard_bootstrapping_binary_crossentropy, 41 | num_taken=num_taken, 42 | num_skipped=num_skipped, 43 | weight=weight) 44 | 45 | 46 | def hard_bootstrap_aggregator(loss, num_taken, num_skipped=0): 47 | flat_loss = loss.flatten(2) 48 | sorted_flat_loss = T.sort(flat_loss, axis=1) 49 | chosen_loss = sorted_flat_loss[:, -(num_taken + num_skipped):-num_skipped] 50 | return chosen_loss.mean() 51 | 52 | 53 | def hard_bootstrap_aggregator_fn(num_taken, num_skipped=0): 54 | return functools.partial(hard_bootstrap_aggregator, 55 | num_taken=num_taken, 56 | num_skipped=num_skipped) 57 | 58 | 59 | def mixed_hard_bootstrap_aggregator(loss, mix_rate, num_taken, num_skipped=0): 60 | flat_loss = loss.flatten(2) 61 | sorted_flat_loss = T.sort(flat_loss, axis=1) 62 | chosen_loss = sorted_flat_loss[:, -(num_taken + num_skipped):-num_skipped] 63 | return mix_rate * chosen_loss.mean() + (1 - mix_rate) * loss.mean() 64 | 65 | 66 | def mixed_hard_bootstrap_aggregator_fn(mix_rate, num_taken, num_skipped=0): 67 | return functools.partial(mixed_hard_bootstrap_aggregator, 68 | mix_rate=mix_rate, 69 | num_taken=num_taken, 70 | num_skipped=num_skipped) 71 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/smorms3.py: -------------------------------------------------------------------------------- 1 | """ 2 | SMORMS3 algorithm (squared mean over root mean squared cubed) 3 | based on http://sifter.org/~simon/journal/20150420.html 4 | """ 5 | 6 | import numpy as np 7 | import theano 8 | import theano.tensor as T 9 | import treeano 10 | import treeano.nodes as tn 11 | 12 | fX = theano.config.floatX 13 | 14 | 15 | @treeano.register_node("smorms3") 16 | class SMORMS3Node(tn.StandardUpdatesNode): 17 | 18 | hyperparameter_names = ("learning_rate", 19 | "epsilon") 20 | 21 | def _new_update_deltas(self, network, parameter_vws, grads): 22 | learning_rate = network.find_hyperparameter(["learning_rate"], 0.001) 23 | epsilon = network.find_hyperparameter(["epsilon"], 1e-16) 24 | update_deltas = treeano.UpdateDeltas() 25 | for parameter_vw, grad in zip(parameter_vws, grads): 26 | mem_vw = network.create_vw( 27 | "smorms3_mem(%s)" % parameter_vw.name, 28 | shape=parameter_vw.shape, 29 | is_shared=True, 30 | tags={"state"}, 31 | default_inits=[treeano.inits.ConstantInit(1)], 32 | ) 33 | g_vw = network.create_vw( 34 | "smorms3_g(%s)" % parameter_vw.name, 35 | shape=parameter_vw.shape, 36 | is_shared=True, 37 | tags={"state"}, 38 | default_inits=[], 39 | ) 40 | g2_vw = network.create_vw( 41 | "smorms3_g2(%s)" % parameter_vw.name, 42 | shape=parameter_vw.shape, 43 | is_shared=True, 44 | tags={"state"}, 45 | default_inits=[], 46 | ) 47 | parameter = parameter_vw.variable 48 | mem = mem_vw.variable 49 | g = g_vw.variable 50 | g2 = g2_vw.variable 51 | r = 1 / (mem + 1) 52 | new_g = (1 - r) * g + r * grad 53 | new_g2 = (1 - r) * g2 + r * grad ** 2 54 | term1 = (new_g ** 2) / (new_g2 + epsilon) 55 | term2 = T.sqrt(new_g2) + epsilon 56 | parameter_delta = -grad * T.minimum(learning_rate, term1) / term2 57 | new_mem = 1 + mem * (1 - term1) 58 | update_deltas[parameter] = parameter_delta 59 | update_deltas[mem] = new_mem - mem 60 | update_deltas[g] = new_g - g 61 | update_deltas[g2] = new_g2 - g2 62 | return update_deltas 63 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/stochastic_pooling.py: -------------------------------------------------------------------------------- 1 | """ 2 | from 3 | "Stochastic Pooling for Regularization of Deep Convolutional Neural Networks" 4 | http://arxiv.org/abs/1301.3557 5 | 6 | NOTE: very slow 7 | """ 8 | 9 | import functools 10 | import theano 11 | import theano.tensor as T 12 | from theano.sandbox.rng_mrg import MRG_RandomStreams 13 | 14 | import treeano 15 | import treeano.nodes as tn 16 | 17 | fX = theano.config.floatX 18 | 19 | 20 | def stochastic_pool(neibs, axis, deterministic): 21 | """ 22 | NOTE: assumes that inputs are >= 0 23 | """ 24 | assert axis == 1 25 | # TODO parameterize 26 | epsilon = 1e-6 27 | as_p = neibs / (neibs.sum(axis=axis, keepdims=True) + epsilon) 28 | if deterministic: 29 | mask = as_p 30 | else: 31 | # FIXME save state in network 32 | srng = MRG_RandomStreams() 33 | mask = srng.multinomial(pvals=as_p).astype(fX) 34 | return (neibs * mask).sum(axis=axis) 35 | 36 | 37 | @treeano.register_node("stochastic_pool_2d") 38 | class StochasticPool2DNode(treeano.Wrapper0NodeImpl): 39 | 40 | hyperparameter_names = ( 41 | tuple([x 42 | for x in tn.CustomPool2DNode.hyperparameter_names 43 | if x != "pool_function"]) 44 | + ("deterministic",)) 45 | 46 | def architecture_children(self): 47 | return [tn.CustomPool2DNode(self.name + "_pool2d")] 48 | 49 | def init_state(self, network): 50 | super(StochasticPool2DNode, self).init_state(network) 51 | deterministic = network.find_hyperparameter(["deterministic"]) 52 | pool_fn = functools.partial(stochastic_pool, 53 | deterministic=deterministic) 54 | network.set_hyperparameter(self.name + "_pool2d", 55 | "pool_function", 56 | pool_fn) 57 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/tests/activation_transformation_test.py: -------------------------------------------------------------------------------- 1 | import nose.tools as nt 2 | import numpy as np 3 | import theano 4 | import theano.tensor as T 5 | 6 | import treeano 7 | import treeano.nodes as tn 8 | 9 | from treeano.sandbox.nodes import activation_transformation 10 | 11 | 12 | fX = theano.config.floatX 13 | 14 | 15 | def test_concatenate_negation_node_serialization(): 16 | tn.check_serialization( 17 | activation_transformation.ConcatenateNegationNode("a")) 18 | 19 | 20 | def test_concatenate_negation_node(): 21 | # just testing that it runs 22 | network = tn.SequentialNode( 23 | "s", 24 | [tn.InputNode("i", shape=(10, 10)), 25 | activation_transformation.ConcatenateNegationNode("a")]).network() 26 | fn = network.function(["i"], ["s"]) 27 | x = np.random.randn(10, 10).astype(fX) 28 | ans = np.concatenate([x, -x], axis=1) 29 | np.testing.assert_allclose(ans, fn(x)[0]) 30 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/tests/anrat_test.py: -------------------------------------------------------------------------------- 1 | import nose.tools as nt 2 | import numpy as np 3 | import theano 4 | import theano.tensor as T 5 | 6 | import treeano 7 | import treeano.nodes as tn 8 | 9 | from treeano.sandbox.nodes import anrat 10 | 11 | fX = theano.config.floatX 12 | 13 | 14 | def test_anrat_node(): 15 | network = tn.AdamNode( 16 | "adam", 17 | {"subtree": tn.InputNode("x", shape=(None, 1)), 18 | "cost": anrat.ANRATNode("cost", { 19 | "target": tn.InputNode("y", shape=(None, 1)), 20 | "pred": tn.ReferenceNode("pred_ref", reference="x"), 21 | })}).network() 22 | 23 | fn = network.function(["x", "y"], ["cost"], include_updates=True) 24 | 25 | for x_raw, y_raw in [(3.4, 2), 26 | (4.2, 4.2)]: 27 | x = np.array([[x_raw]], dtype=fX) 28 | y = np.array([[y_raw]], dtype=fX) 29 | prev_cost = fn(x, y)[0] 30 | for _ in range(3): 31 | cost = fn(x, y)[0] 32 | assert cost < prev_cost 33 | prev_cost = cost 34 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/tests/auxiliary_costs_test.py: -------------------------------------------------------------------------------- 1 | import nose.tools as nt 2 | import numpy as np 3 | import theano 4 | import theano.tensor as T 5 | 6 | import treeano 7 | import treeano.nodes as tn 8 | import treeano.sandbox.nodes.auxiliary_costs as auxiliary_costs 9 | 10 | fX = theano.config.floatX 11 | 12 | 13 | def test_auxiliary_dense_softmax_cce_node_serialization(): 14 | tn.check_serialization( 15 | auxiliary_costs.AuxiliaryDenseSoftmaxCCENode("a", {})) 16 | tn.check_serialization( 17 | auxiliary_costs.AuxiliaryDenseSoftmaxCCENode("a", {}, num_units=100)) 18 | 19 | 20 | def test_auxiliary_dense_softmax_cce_node(): 21 | network = tn.SequentialNode( 22 | "seq", 23 | [tn.InputNode("in", shape=(3, 5)), 24 | auxiliary_costs.AuxiliaryDenseSoftmaxCCENode( 25 | "aux", 26 | {"target": tn.ConstantNode("target", value=np.eye(3).astype(fX))}, 27 | num_units=3, 28 | cost_reference="foo"), 29 | tn.IdentityNode("i"), 30 | tn.InputElementwiseSumNode("foo", ignore_default_input=True)] 31 | ).network() 32 | x = np.random.randn(3, 5).astype(fX) 33 | fn = network.function(["in"], ["i", "foo", "aux_dense"]) 34 | res = fn(x) 35 | np.testing.assert_equal(res[0], x) 36 | loss = T.nnet.categorical_crossentropy( 37 | np.ones((3, 3), dtype=fX) / 3.0, 38 | np.eye(3).astype(fX), 39 | ).mean().eval() 40 | np.testing.assert_allclose(res[1], loss) 41 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/tests/bttf_mean_test.py: -------------------------------------------------------------------------------- 1 | import nose.tools as nt 2 | import numpy as np 3 | import theano 4 | import theano.tensor as T 5 | 6 | import treeano 7 | import treeano.nodes as tn 8 | 9 | import treeano 10 | from treeano.sandbox.nodes import bttf_mean 11 | 12 | fX = theano.config.floatX 13 | 14 | 15 | def test_backprop_to_the_future_mean_with_updates1(): 16 | x = T.constant(treeano.utils.as_fX(0.)) 17 | m = theano.shared(treeano.utils.as_fX(1.)) 18 | g = theano.shared(treeano.utils.as_fX(2.)) 19 | 20 | bm = bttf_mean.backprop_to_the_future_mean_with_updates(x, m, g, 0.5) 21 | fn = theano.function([], T.grad(bm, x)) 22 | x_grad = fn() 23 | 24 | np.testing.assert_allclose(2.0, x_grad) 25 | np.testing.assert_allclose(0.5, m.get_value()) 26 | np.testing.assert_allclose(1.5, g.get_value()) 27 | 28 | 29 | def test_backprop_to_the_future_mean_with_updates2(): 30 | x = T.constant(treeano.utils.as_fX(0.)) 31 | m = theano.shared(treeano.utils.as_fX(1.)) 32 | g = theano.shared(treeano.utils.as_fX(2.)) 33 | 34 | bm = bttf_mean.backprop_to_the_future_mean_with_updates(x, m, g, 0.7) 35 | fn = theano.function([], T.grad(10 * bm, x)) 36 | x_grad = fn() 37 | 38 | np.testing.assert_allclose(2.0, x_grad) 39 | np.testing.assert_allclose(0.7, m.get_value()) 40 | np.testing.assert_allclose(10 * 0.3 + 2 * 0.7, g.get_value()) 41 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/tests/channel_out_test.py: -------------------------------------------------------------------------------- 1 | import nose.tools as nt 2 | import numpy as np 3 | import theano 4 | import theano.tensor as T 5 | 6 | import treeano 7 | import treeano.nodes as tn 8 | 9 | from treeano.sandbox.nodes import channel_out 10 | 11 | 12 | fX = theano.config.floatX 13 | 14 | 15 | def test_channel_out_node_serialization(): 16 | tn.check_serialization(channel_out.ChannelOutNode("a")) 17 | 18 | 19 | def test_channel_out_node(): 20 | network = tn.SequentialNode( 21 | "s", 22 | [tn.InputNode("i", shape=(1, 15)), 23 | channel_out.ChannelOutNode("m", num_pieces=5)]).network() 24 | 25 | fn = network.function(["i"], ["m"]) 26 | x = np.arange(15).astype(fX).reshape(1, 15) 27 | ans = np.zeros_like(x) 28 | ans[:, [4, 9, 14]] = [4, 9, 14] 29 | np.testing.assert_equal(fn(x)[0], 30 | ans) 31 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/tests/contraction_penalty_test.py: -------------------------------------------------------------------------------- 1 | import nose.tools as nt 2 | import numpy as np 3 | import theano 4 | import theano.tensor as T 5 | 6 | import treeano 7 | import treeano.nodes as tn 8 | from treeano.sandbox.nodes import contraction_penalty as cp 9 | 10 | fX = theano.config.floatX 11 | 12 | 13 | def test_elementwise_contraction_penalty_node_serialization(): 14 | tn.check_serialization(cp.ElementwiseContractionPenaltyNode("a")) 15 | 16 | 17 | def test_auxiliary_contraction_penalty_node_serialization(): 18 | tn.check_serialization(cp.AuxiliaryContractionPenaltyNode( 19 | "a", tn.IdentityNode("b"))) 20 | 21 | 22 | def test_elementwise_contraction_penalty_node1(): 23 | network = tn.SequentialNode( 24 | "s", 25 | [tn.InputNode("i", shape=(10, 3)), 26 | cp.ElementwiseContractionPenaltyNode("cp", input_reference="i")] 27 | ).network() 28 | fn = network.function(["i"], ["s"]) 29 | x = np.random.rand(10, 3).astype(fX) 30 | # jacobian of each location is 1 31 | # squared jacobian is 1 32 | # mean squared jacobian is 1/3 33 | np.testing.assert_equal(fn(x)[0], np.ones(10, dtype=fX) / 3) 34 | 35 | 36 | def test_elementwise_contraction_penalty_node2(): 37 | # just testing that it runs 38 | network = tn.SequentialNode( 39 | "s", 40 | [tn.InputNode("i", shape=(10, 3)), 41 | tn.DenseNode("d", num_units=9), 42 | cp.ElementwiseContractionPenaltyNode("cp", input_reference="i")] 43 | ).network() 44 | fn = network.function(["i"], ["s"]) 45 | x = np.random.rand(10, 3).astype(fX) 46 | nt.assert_equal(fn(x)[0].shape, (10,)) 47 | 48 | 49 | def test_auxiliary_contraction_penalty_node(): 50 | # testing that both contraction penalty versions return the same thing 51 | network = tn.SequentialNode( 52 | "s", 53 | [tn.InputNode("i", shape=(10, 3)), 54 | cp.AuxiliaryContractionPenaltyNode( 55 | "acp", 56 | tn.DenseNode("d", num_units=9), 57 | cost_reference="sum"), 58 | cp.ElementwiseContractionPenaltyNode("cp", input_reference="i"), 59 | tn.AggregatorNode("a"), 60 | # zero out rest of network, so that value of sum is just value from 61 | # auxiliary contraction pentalty node 62 | tn.ConstantNode("foo", value=0), 63 | tn.InputElementwiseSumNode("sum")] 64 | ).network() 65 | fn = network.function(["i"], ["sum", "a"]) 66 | x = np.random.rand(10, 3).astype(fX) 67 | res = fn(x) 68 | np.testing.assert_equal(res[0], res[1]) 69 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/tests/dNDF_test.py: -------------------------------------------------------------------------------- 1 | import nose.tools as nt 2 | import numpy as np 3 | import theano 4 | import theano.tensor as T 5 | 6 | import treeano 7 | import treeano.nodes as tn 8 | 9 | from treeano.sandbox.nodes import dNDF 10 | 11 | 12 | fX = theano.config.floatX 13 | 14 | 15 | def test_split_probabilities_to_leaf_probabilities_node_serialization(): 16 | for node in [dNDF.TheanoSplitProbabilitiesToLeafProbabilitiesNode, 17 | dNDF.NumpySplitProbabilitiesToLeafProbabilitiesNode]: 18 | tn.check_serialization(node("a")) 19 | 20 | 21 | def test_split_probabilities_to_leaf_probabilities_node(): 22 | x = np.array([[[0.9, 0.2], 23 | [0.7, 0.6], 24 | [0.4, 0.3]]], 25 | dtype=fX) 26 | ans = np.array([[[0.9 * 0.7, 0.2 * 0.6], 27 | [0.9 * (1 - 0.7), 0.2 * (1 - 0.6)], 28 | [(1 - 0.9) * 0.4, (1 - 0.2) * 0.3], 29 | [(1 - 0.9) * (1 - 0.4), (1 - 0.2) * (1 - 0.3)]]], 30 | dtype=fX) 31 | 32 | for node in [dNDF.TheanoSplitProbabilitiesToLeafProbabilitiesNode, 33 | dNDF.NumpySplitProbabilitiesToLeafProbabilitiesNode]: 34 | network = tn.SequentialNode( 35 | "s", 36 | [tn.InputNode("i", shape=(1, 3, 2)), 37 | node("p")] 38 | ).network() 39 | 40 | fn = network.function(["i"], ["s"]) 41 | 42 | np.testing.assert_allclose(ans, 43 | fn(x)[0], 44 | rtol=1e-5) 45 | 46 | 47 | def test_split_probabilities_to_leaf_probabilities_node_grad(): 48 | x = np.array([[[0.9, 0.2], 49 | [0.7, 0.6], 50 | [0.4, 0.3]]], 51 | dtype=fX) 52 | 53 | def node_to_grad(node): 54 | network = tn.SequentialNode( 55 | "s", 56 | [tn.InputNode("i", shape=(1, 3, 2)), 57 | node("p")] 58 | ).network() 59 | 60 | in_ = network["i"].get_vw("default").variable 61 | out = network["s"].get_vw("default").variable 62 | g = T.grad(out[:, 0].sum(), in_) 63 | fn = network.function(["i"], [g]) 64 | return fn(x)[0] 65 | 66 | n1, n2 = [dNDF.TheanoSplitProbabilitiesToLeafProbabilitiesNode, 67 | dNDF.NumpySplitProbabilitiesToLeafProbabilitiesNode] 68 | 69 | np.testing.assert_allclose(node_to_grad(n1), 70 | node_to_grad(n2), 71 | rtol=1e-5) 72 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/tests/deconv_upsample_test.py: -------------------------------------------------------------------------------- 1 | import nose.tools as nt 2 | import numpy as np 3 | import theano 4 | import theano.tensor as T 5 | 6 | import treeano 7 | import treeano.nodes as tn 8 | from treeano.sandbox.nodes import deconv_upsample 9 | 10 | fX = theano.config.floatX 11 | 12 | 13 | def test_deconv_upsample_2d_node_serialization(): 14 | tn.check_serialization(deconv_upsample.DeconvUpsample2DNode("a")) 15 | 16 | 17 | if "gpu" in theano.config.device: 18 | 19 | def test_default_recurrent_conv_2d_node(): 20 | network = tn.SequentialNode( 21 | "s", 22 | [tn.InputNode("i", shape=(3, 4, 5, 6)), 23 | deconv_upsample.DeconvUpsample2DNode( 24 | "a", 25 | num_filters=7, 26 | upsample_factor=(2, 2), 27 | filter_size=(3, 3), 28 | )] 29 | ).network() 30 | fn = network.function(["i"], ["s"]) 31 | res = fn(np.random.randn(3, 4, 5, 6).astype(fX))[0] 32 | np.testing.assert_equal((3, 7, 10, 12), res.shape) 33 | np.testing.assert_equal((3, 7, 10, 12), 34 | network['a'].get_vw('default').shape) 35 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/tests/dropout_max_pool_test.py: -------------------------------------------------------------------------------- 1 | import nose.tools as nt 2 | import numpy as np 3 | import theano 4 | import theano.tensor as T 5 | 6 | import treeano 7 | import treeano.nodes as tn 8 | from treeano.sandbox.nodes import dropout_max_pool as dmp 9 | 10 | fX = theano.config.floatX 11 | 12 | 13 | def test_dropout_max_pool_2d_node_serialization(): 14 | tn.check_serialization(dmp.DropoutMaxPool2DNode("a")) 15 | 16 | 17 | def test_dropout_max_pool_2d_node1(): 18 | network = tn.SequentialNode( 19 | "s", 20 | [tn.InputNode("i", shape=(1, 1, 2, 2)), 21 | dmp.DropoutMaxPool2DNode("a", 22 | pool_size=(2, 2), 23 | dropout_probability=0.3, 24 | deterministic=True)] 25 | ).network() 26 | fn = network.function(["i"], ["s"]) 27 | x = np.arange(4).astype(fX).reshape(1, 1, 2, 2) 28 | ans = np.array([[[[1 * 0.7 * 0.3 ** 2 + 2 * 0.7 * 0.3 + 3 * 0.7]]]], 29 | dtype=fX) 30 | np.testing.assert_allclose(fn(x)[0], 31 | ans, 32 | rtol=1e-5) 33 | nt.assert_equal(network["s"].get_vw("default").shape, 34 | ans.shape) 35 | 36 | 37 | def test_dropout_max_pool_2d_node2(): 38 | # testing that stochastic version works 39 | network = tn.SequentialNode( 40 | "s", 41 | [tn.InputNode("i", shape=(1, 1, 4, 4)), 42 | dmp.DropoutMaxPool2DNode("a", pool_size=(2, 2))] 43 | ).network() 44 | fn = network.function(["i"], ["s"]) 45 | x = np.arange(16).astype(fX).reshape(1, 1, 4, 4) 46 | fn(x) 47 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/tests/equilibrated_sgd_test.py: -------------------------------------------------------------------------------- 1 | import treeano.nodes as tn 2 | from treeano.sandbox.nodes import equilibrated_sgd 3 | 4 | 5 | def test_equilibrated_sgd_node_serialization(): 6 | tn.check_serialization(equilibrated_sgd.EquilibratedSGDNode("a")) 7 | 8 | 9 | def test_equilibrated_sgd_node(): 10 | tn.test_utils.check_updates_node(equilibrated_sgd.EquilibratedSGDNode, 11 | activation="sigmoid") 12 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/tests/expected_batches_test.py: -------------------------------------------------------------------------------- 1 | import nose.tools as nt 2 | import numpy as np 3 | import theano 4 | import theano.tensor as T 5 | 6 | import treeano 7 | import treeano.nodes as tn 8 | from treeano.sandbox.nodes import expected_batches as eb 9 | 10 | fX = theano.config.floatX 11 | 12 | 13 | def test_scale_hyperparameter(): 14 | network = tn.HyperparameterNode( 15 | "hp", 16 | eb.ScaleHyperparameterNode( 17 | "scale", 18 | tn.ConstantNode("c")), 19 | value=42.0, 20 | hyperparameter="value", 21 | start_percent=0., 22 | end_percent=1.0, 23 | start_scale=1.0, 24 | end_scale=0.1, 25 | expected_batches=2, 26 | ).network() 27 | 28 | fn = network.function([], ["c"], include_updates=True) 29 | 30 | np.testing.assert_allclose(42.0, 31 | fn()[0], 32 | rtol=1e-5) 33 | np.testing.assert_allclose(42.0 * 0.55, 34 | fn()[0], 35 | rtol=1e-5) 36 | np.testing.assert_allclose(42.0 * 0.1, 37 | fn()[0], 38 | rtol=1e-5) 39 | np.testing.assert_allclose(42.0 * 0.1, 40 | fn()[0], 41 | rtol=1e-5) 42 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/tests/gradient_normalization_test.py: -------------------------------------------------------------------------------- 1 | import nose.tools as nt 2 | import numpy as np 3 | import theano 4 | import theano.tensor as T 5 | 6 | import treeano 7 | import treeano.nodes as tn 8 | 9 | from treeano.sandbox.nodes import gradient_normalization as gn 10 | 11 | fX = theano.config.floatX 12 | 13 | 14 | def test_gradient_batch_normalization_op(): 15 | epsilon = 1e-8 16 | op = gn.GradientBatchNormalizationOp(subtract_mean=True, 17 | keep_mean=False, 18 | epsilon=epsilon) 19 | 20 | X = np.random.randn(3, 4).astype(fX) 21 | W = np.random.randn(2, 3).astype(fX) 22 | 23 | x = T.matrix("x") 24 | w = T.matrix("w") 25 | 26 | orig_grad = T.grad(w.dot(x).sum(), x).eval({x: X, w: W}) 27 | new_grad = T.grad(w.dot(op(x)).sum(), x).eval({x: X, w: W}) 28 | mu = orig_grad.mean(axis=0, keepdims=True) 29 | sigma = orig_grad.std(axis=0, keepdims=True) + epsilon 30 | ans = (orig_grad - mu) / sigma 31 | np.testing.assert_allclose(ans, 32 | new_grad, 33 | rtol=1e-5) 34 | np.testing.assert_allclose(np.zeros(4), 35 | new_grad.mean(axis=0), 36 | atol=1e-5) 37 | np.testing.assert_allclose(np.ones(4), 38 | new_grad.std(axis=0), 39 | rtol=1e-5) 40 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/tests/gradnet_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, absolute_import 2 | from __future__ import print_function, unicode_literals 3 | 4 | import nose.tools as nt 5 | import numpy as np 6 | import theano 7 | import theano.tensor as T 8 | 9 | import treeano 10 | import treeano.nodes as tn 11 | 12 | from treeano.sandbox.nodes import gradnet 13 | 14 | fX = theano.config.floatX 15 | 16 | 17 | def test_grad_net_interpolation_node(): 18 | network = tn.SequentialNode( 19 | "s", 20 | [tn.InputNode("i", shape=(1, 10)), 21 | gradnet.GradNetInterpolationNode( 22 | "gradnet", 23 | {"early": tn.ReLUNode("r"), 24 | "late": tn.TanhNode("t")}, 25 | late_gate=0.5)] 26 | ).network() 27 | 28 | fn = network.function(["i"], ["s"]) 29 | x = np.random.randn(1, 10).astype(fX) 30 | ans = 0.5 * np.clip(x, 0, np.inf) + 0.5 * np.tanh(x) 31 | np.testing.assert_allclose(ans, fn(x)[0], rtol=1e-5) 32 | 33 | 34 | def test_grad_net_optimizer_interpolation_node(): 35 | 36 | class StateNode(treeano.NodeImpl): 37 | input_keys = () 38 | 39 | def compute_output(self, network): 40 | network.create_vw( 41 | name="default", 42 | shape=(), 43 | is_shared=True, 44 | tags=["parameter"], 45 | inits=[], 46 | ) 47 | 48 | def updater(const): 49 | class UpdaterNode(treeano.nodes.updates.StandardUpdatesNode): 50 | 51 | def _new_update_deltas(self, network, vws, grads): 52 | return treeano.UpdateDeltas({vw.variable: const for vw in vws}) 53 | 54 | return UpdaterNode 55 | 56 | network = tn.SharedHyperparameterNode( 57 | "n", 58 | gradnet.GradNetOptimizerInterpolationNode( 59 | "g", 60 | {"subtree": StateNode("s"), 61 | "cost": tn.ReferenceNode("r", reference="s")}, 62 | early=updater(-1), 63 | late=updater(1)), 64 | hyperparameter="late_gate" 65 | ).network() 66 | 67 | fn1 = network.function([("n", "hyperparameter")], 68 | [], 69 | include_updates=True) 70 | fn2 = network.function([], ["n"]) 71 | gates_and_answers = [(0, -1), 72 | (0.25, -1.5), 73 | (1, -0.5), 74 | (1, 0.5)] 75 | for gate, ans in gates_and_answers: 76 | fn1(gate) 77 | np.testing.assert_allclose(ans, fn2()[0], rtol=1e-1) 78 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/tests/input_scaling_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, absolute_import 2 | from __future__ import print_function, unicode_literals 3 | 4 | import nose.tools as nt 5 | import numpy as np 6 | import theano 7 | import theano.tensor as T 8 | 9 | import treeano 10 | import treeano.nodes as tn 11 | 12 | from treeano.sandbox.nodes import input_scaling 13 | 14 | 15 | fX = theano.config.floatX 16 | 17 | 18 | def test_clip_scaling_node_serialization(): 19 | tn.check_serialization(input_scaling.ClipScalingNode("a")) 20 | 21 | 22 | def test_clip_scaling_node(): 23 | network = tn.SequentialNode( 24 | "s", 25 | [tn.InputNode("i", shape=(None, 2)), 26 | input_scaling.ClipScalingNode("c", 27 | mins=np.array([0, 1]), 28 | maxs=np.array([2, 3]))] 29 | ).network() 30 | fn = network.function(["i"], ["s"]) 31 | x = np.arange(6).reshape(3, 2).astype(fX) 32 | res = fn(x)[0] 33 | ans = np.array([[0, 0, 0.5, 0], 34 | [1, 0.5, 1, 1], 35 | [1, 1, 1, 1]],) 36 | np.testing.assert_allclose(ans, res) 37 | 38 | 39 | def test_clip_scaling_node_learnable(): 40 | # just testing that it runs, not that it learns 41 | network = tn.SequentialNode( 42 | "s", 43 | [tn.InputNode("i", shape=(None, 2)), 44 | input_scaling.ClipScalingNode("c", 45 | mins=np.array([0, 1]), 46 | maxs=np.array([2, 3]), 47 | learnable=True)] 48 | ).network() 49 | fn = network.function(["i"], ["s"]) 50 | x = np.arange(6).reshape(3, 2).astype(fX) 51 | res = fn(x)[0] 52 | ans = np.array([[0, 0, 0.5, 0], 53 | [1, 0.5, 1, 1], 54 | [1, 1, 1, 1]],) 55 | np.testing.assert_allclose(ans, res) 56 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/tests/interval_relu_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, absolute_import 2 | from __future__ import print_function, unicode_literals 3 | 4 | import nose.tools as nt 5 | import numpy as np 6 | import theano 7 | import theano.tensor as T 8 | 9 | import treeano 10 | import treeano.nodes as tn 11 | 12 | from treeano.sandbox.nodes import interval_relu as irelu 13 | 14 | 15 | fX = theano.config.floatX 16 | 17 | 18 | def test_interval_relu_node_serialization(): 19 | tn.check_serialization(irelu.IntervalReLUNode("a")) 20 | 21 | 22 | def test_interval_relu_node(): 23 | network = tn.SequentialNode( 24 | "s", 25 | [tn.InputNode("i", shape=(1, 5)), 26 | irelu.IntervalReLUNode("a")] 27 | ).network() 28 | 29 | fn = network.function(["i"], ["s"]) 30 | x = -1 * np.ones((1, 5), dtype=fX) 31 | ans = np.array([[0, -0.25, -0.5, -0.75, -1]], dtype=fX) 32 | np.testing.assert_allclose(ans, fn(x)[0], rtol=1e-5) 33 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/tests/invariant_dropout_test.py: -------------------------------------------------------------------------------- 1 | import nose.tools as nt 2 | import numpy as np 3 | import theano 4 | import theano.tensor as T 5 | 6 | import treeano 7 | import treeano.nodes as tn 8 | 9 | from treeano.sandbox.nodes import invariant_dropout as ido 10 | 11 | 12 | fX = theano.config.floatX 13 | 14 | 15 | def test_invariant_dropout_node_serialization(): 16 | tn.check_serialization(ido.InvariantDropoutNode("a")) 17 | 18 | 19 | def test_invariant_dropout_node(): 20 | # just testing that it runs 21 | network = tn.SequentialNode( 22 | "s", 23 | [tn.InputNode("i", shape=(10, 10)), 24 | ido.InvariantDropoutNode("ido", p=0.5)]).network() 25 | 26 | fn = network.function(["i"], ["s"]) 27 | x = np.random.rand(10, 10).astype(fX) 28 | fn(x) 29 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/tests/inverse_test.py: -------------------------------------------------------------------------------- 1 | import nose.tools as nt 2 | import numpy as np 3 | import theano 4 | import theano.tensor as T 5 | 6 | import treeano 7 | import treeano.nodes as tn 8 | from treeano.sandbox.nodes import inverse 9 | 10 | fX = theano.config.floatX 11 | 12 | 13 | def test_inverse_node_serialization(): 14 | tn.check_serialization(inverse.InverseNode("a")) 15 | 16 | 17 | def test_inverse_node(): 18 | network = tn.SequentialNode( 19 | "s", 20 | [tn.InputNode("i", shape=(1, 1, 2, 2)), 21 | tn.MaxPool2DNode("m", pool_size=(2, 2)), 22 | tn.InputNode("i2", shape=(1, 1, 1, 1)), 23 | inverse.InverseNode("in", reference="m")] 24 | ).network() 25 | fn = network.function(["i", "i2"], ["in"]) 26 | x = np.array([[[[1, 2], 27 | [3, 4]]]], 28 | dtype=fX) 29 | x2 = np.array(np.random.randn(), dtype=fX) 30 | ans = x2 * np.array([[[[0, 0], 31 | [0, 1]]]], 32 | dtype=fX) 33 | 34 | np.testing.assert_equal(ans, fn(x, x2.reshape(1, 1, 1, 1))[0]) 35 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/tests/irregular_length_test.py: -------------------------------------------------------------------------------- 1 | import nose.tools as nt 2 | import numpy as np 3 | import theano 4 | import theano.tensor as T 5 | 6 | import treeano 7 | import treeano.nodes as tn 8 | from treeano.sandbox.nodes import irregular_length 9 | 10 | fX = theano.config.floatX 11 | 12 | 13 | def test_irregular_length_attention_softmax_node(): 14 | network = tn.SequentialNode( 15 | "s", 16 | [tn.InputNode("l", shape=(None,)), 17 | tn.InputNode("i", shape=(None, None, 3)), 18 | irregular_length._IrregularLengthAttentionSoftmaxNode( 19 | "foo", 20 | lengths_reference="l")] 21 | ).network() 22 | 23 | fn = network.function(["i", "l"], ["s"]) 24 | x = np.random.randn(4, 7, 3).astype(fX) 25 | l = np.array([2, 3, 7, 3], dtype=fX) 26 | for idx, l_ in enumerate(l): 27 | x[idx, l_:] = 0 28 | res = fn(x, l)[0] 29 | nt.assert_equal((4, 7, 3), res.shape) 30 | for idx, l_ in enumerate(l): 31 | np.testing.assert_almost_equal(res[idx][:l_, 0].sum(), 32 | desired=1.0, 33 | decimal=5) 34 | 35 | 36 | def test_irregular_length_attention_node(): 37 | network = tn.SequentialNode( 38 | "s", 39 | [tn.InputNode("l", shape=(None,)), 40 | tn.InputNode("i", shape=(None, 3)), 41 | irregular_length.irregular_length_attention_node( 42 | "foo", 43 | lengths_reference="l", 44 | num_units=3, 45 | output_units=None)] 46 | ).network() 47 | nt.assert_equal((None, 3), network["foo"].get_vw("default").shape) 48 | 49 | fn = network.function(["i", "l"], ["s"]) 50 | x = np.random.randn(15, 3).astype(fX) 51 | l = np.array([2, 3, 7, 3], dtype=fX) 52 | res = fn(x, l)[0].shape 53 | ans = (4, 3) 54 | nt.assert_equal(ans, res) 55 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/tests/kl_sparsity_penalty_test.py: -------------------------------------------------------------------------------- 1 | import nose.tools as nt 2 | import numpy as np 3 | import theano 4 | import theano.tensor as T 5 | 6 | import treeano 7 | import treeano.nodes as tn 8 | from treeano.sandbox.nodes import kl_sparsity_penalty as sp 9 | 10 | fX = theano.config.floatX 11 | 12 | 13 | def test_elementwise_kl_sparsity_penalty_node_serialization(): 14 | tn.check_serialization(sp.ElementwiseKLSparsityPenaltyNode("a")) 15 | 16 | 17 | def test_auxiliary_kl_sparsity_penalty_node_serialization(): 18 | tn.check_serialization(sp.AuxiliaryKLSparsityPenaltyNode("a")) 19 | 20 | 21 | def test_elementwise_kl_sparsity_penalty_node1(): 22 | network = tn.SequentialNode( 23 | "s", 24 | [tn.InputNode("i", shape=(5, 3)), 25 | sp.ElementwiseKLSparsityPenaltyNode("sp", sparsity=0.1)] 26 | ).network() 27 | fn = network.function(["i"], ["s"]) 28 | x = np.ones((5, 3), dtype=fX) * 0.1 29 | np.testing.assert_allclose(np.zeros((5, 3), dtype=fX), 30 | fn(x)[0], 31 | rtol=1e-5, 32 | atol=1e-7) 33 | 34 | 35 | def test_elementwise_kl_sparsity_penalty_node2(): 36 | # just testing that it runs 37 | network = tn.SequentialNode( 38 | "s", 39 | [tn.InputNode("i", shape=(10, 3)), 40 | tn.DenseNode("d", num_units=9), 41 | sp.ElementwiseKLSparsityPenaltyNode("sp", sparsity=0.1)] 42 | ).network() 43 | fn = network.function(["i"], ["s"]) 44 | x = np.random.rand(10, 3).astype(fX) 45 | nt.assert_equal(fn(x)[0].shape, (10, 9)) 46 | 47 | 48 | def test_auxiliary_kl_sparsity_penalty_node(): 49 | # testing that both sparsity penalty versions return the same thing 50 | network = tn.HyperparameterNode( 51 | "hp", 52 | tn.SequentialNode( 53 | "s", 54 | [tn.InputNode("i", shape=(10, 3)), 55 | tn.DenseNode("d", num_units=9), 56 | sp.AuxiliaryKLSparsityPenaltyNode("scp", cost_reference="sum"), 57 | sp.ElementwiseKLSparsityPenaltyNode("sp"), 58 | tn.AggregatorNode("a"), 59 | # zero out rest of network, so that value of sum is just the value 60 | # from auxiliary sparsity pentalty node 61 | tn.ConstantNode("foo", value=0), 62 | tn.InputElementwiseSumNode("sum")]), 63 | sparsity=0.1, 64 | ).network() 65 | fn = network.function(["i"], ["sum", "a"]) 66 | x = np.random.rand(10, 3).astype(fX) 67 | res = fn(x) 68 | np.testing.assert_equal(res[0], res[1]) 69 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/tests/kumaraswamy_unit_test.py: -------------------------------------------------------------------------------- 1 | import nose.tools as nt 2 | import numpy as np 3 | import theano 4 | import theano.tensor as T 5 | 6 | import treeano 7 | import treeano.nodes as tn 8 | 9 | from treeano.sandbox.nodes import kumaraswamy_unit as ku 10 | 11 | 12 | fX = theano.config.floatX 13 | 14 | 15 | def test_kumaraswamy_unit_node_serialization(): 16 | tn.check_serialization(ku.KumaraswamyUnitNode("a")) 17 | 18 | 19 | def test_kumaraswamy_unit_node(): 20 | # just testing that it runs 21 | network = tn.SequentialNode( 22 | "s", 23 | [tn.InputNode("i", shape=(100,)), 24 | ku.KumaraswamyUnitNode("k")]).network() 25 | fn = network.function(["i"], ["s"]) 26 | x = np.random.randn(100).astype(fX) 27 | fn(x) 28 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/tests/l2_pool_test.py: -------------------------------------------------------------------------------- 1 | import nose.tools as nt 2 | import numpy as np 3 | import theano 4 | import theano.tensor as T 5 | 6 | import treeano 7 | import treeano.nodes as tn 8 | from treeano.sandbox.nodes import l2_pool 9 | 10 | fX = theano.config.floatX 11 | 12 | 13 | def test_l2_pool_2d_node_serialization(): 14 | # NOTE: serialization converts pool_size to list, so must be a list 15 | tn.check_serialization(l2_pool.L2Pool2DNode("a")) 16 | 17 | 18 | def test_l2_pool_2d_node(): 19 | network = tn.SequentialNode( 20 | "s", 21 | [tn.InputNode("i", shape=(1, 1, 4, 4)), 22 | l2_pool.L2Pool2DNode("p", pool_size=(2, 2))]).network() 23 | fn = network.function(["i"], ["s"]) 24 | x = np.array([[[[3, 4, 1, 2], 25 | [0, 0, 3, 4], 26 | [1, 1, -1, 1], 27 | [1, 1, 1, -1]]]], dtype=fX) 28 | ans = np.array([[[[5, np.linalg.norm([1, 2, 3, 4])], 29 | [2, 2]]]], dtype=fX) 30 | np.testing.assert_allclose(ans, fn(x)[0]) 31 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/tests/mixed_pooling_test.py: -------------------------------------------------------------------------------- 1 | import nose.tools as nt 2 | import numpy as np 3 | import theano 4 | import theano.tensor as T 5 | 6 | import treeano 7 | import treeano.nodes as tn 8 | from treeano.sandbox.nodes import mixed_pooling 9 | 10 | fX = theano.config.floatX 11 | 12 | 13 | def test_mixed_pool_node_serialization(): 14 | tn.check_serialization(mixed_pooling.MixedPoolNode("a")) 15 | 16 | 17 | def test_gated_pool_2d_node_serialization(): 18 | tn.check_serialization(mixed_pooling.GatedPool2DNode("a")) 19 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/tests/monitor_update_ratio_test.py: -------------------------------------------------------------------------------- 1 | import nose.tools as nt 2 | import re 3 | import numpy as np 4 | import theano 5 | import theano.tensor as T 6 | 7 | import treeano 8 | import treeano.nodes as tn 9 | 10 | from treeano.sandbox.nodes import monitor_update_ratio 11 | 12 | 13 | fX = theano.config.floatX 14 | 15 | 16 | def test_monitor_update_ratio_node(): 17 | network = tn.WeightDecayNode( 18 | "decay", 19 | monitor_update_ratio.MonitorUpdateRatioNode( 20 | "mur", 21 | tn.SequentialNode( 22 | "s", 23 | [tn.InputNode("i", shape=(None, 3)), 24 | tn.LinearMappingNode("linear", output_dim=10), 25 | tn.AddBiasNode("bias")])), 26 | weight_decay=1 27 | ).network() 28 | network.build() 29 | mur_net = network["mur"] 30 | vws = mur_net.find_vws_in_subtree(tags={"monitor"}) 31 | assert len(vws) == 1 32 | vw, = vws 33 | assert re.match(".*_2-norm$", vw.name) 34 | assert re.match(".*linear.*", vw.name) 35 | assert not re.match(".*bias.*", vw.name) 36 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/tests/paired_conv_test.py: -------------------------------------------------------------------------------- 1 | import nose.tools as nt 2 | import numpy as np 3 | import theano 4 | import theano.tensor as T 5 | 6 | import treeano 7 | import treeano.nodes as tn 8 | from treeano.sandbox.nodes import paired_conv 9 | 10 | fX = theano.config.floatX 11 | 12 | 13 | def test_paired_conv_2d_with_bias_node_serialization(): 14 | tn.check_serialization(paired_conv.PairedConvNode("a", {})) 15 | 16 | 17 | def test_paired_conv_2d_with_bias_node(): 18 | network = tn.SequentialNode( 19 | "s", 20 | [tn.InputNode("i", shape=(3, 4, 5, 6)), 21 | paired_conv.PairedConvNode( 22 | "c", 23 | {"conv": tn.Conv2DWithBiasNode("c_conv"), 24 | "separator": tn.IdentityNode("sep")}, 25 | filter_size=(2, 2), 26 | num_filters=7, 27 | pad="same")] 28 | ).network() 29 | fn = network.function(["i"], ["s"]) 30 | res = fn(np.random.randn(3, 4, 5, 6).astype(fX))[0] 31 | np.testing.assert_equal((3, 7, 5, 6), res.shape) 32 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/tests/partition_axis_test.py: -------------------------------------------------------------------------------- 1 | import nose.tools as nt 2 | import numpy as np 3 | import theano 4 | import theano.tensor as T 5 | 6 | import treeano 7 | import treeano.nodes as tn 8 | 9 | from treeano.sandbox.nodes import partition_axis 10 | 11 | 12 | fX = theano.config.floatX 13 | 14 | 15 | def test_partition_axis_node_serialization(): 16 | tn.check_serialization(partition_axis.PartitionAxisNode("a")) 17 | 18 | 19 | def test_partition_axis_node(): 20 | # just testing that it runs 21 | network = tn.SequentialNode( 22 | "s", 23 | [tn.InputNode("i", shape=(4, 8, 12, 16, 20)), 24 | partition_axis.PartitionAxisNode("pa", 25 | split_idx=2, 26 | num_splits=4, 27 | channel_axis=3)] 28 | ).network() 29 | fn = network.function(["i"], ["s"]) 30 | x = np.random.randn(4, 8, 12, 16, 20).astype(fX) 31 | ans = x[:, :, :, 8:12, :] 32 | res = fn(x)[0] 33 | nt.assert_equal(ans.shape, network["pa"].get_vw("default").shape) 34 | np.testing.assert_equal(res, ans) 35 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/tests/prelu_test.py: -------------------------------------------------------------------------------- 1 | import nose.tools as nt 2 | import numpy as np 3 | import theano 4 | import theano.tensor as T 5 | 6 | import treeano 7 | import treeano.nodes as tn 8 | 9 | from treeano.sandbox.nodes import prelu 10 | 11 | 12 | fX = theano.config.floatX 13 | 14 | 15 | def test_prelu_node_serialization(): 16 | tn.check_serialization(prelu.PReLUNode("a")) 17 | 18 | 19 | def test_prelu_node(): 20 | network = tn.SequentialNode( 21 | "s", 22 | [tn.InputNode("i", shape=(1, 4)), 23 | prelu.PReLUNode("p")]).network() 24 | 25 | fn = network.function(["i"], ["p"]) 26 | x = np.array([[-1.0, -0.2, 0.2, 1.0]], dtype=fX) 27 | ans = np.array([[-0.25, -0.05, 0.2, 1.0]], dtype=fX) 28 | np.testing.assert_allclose(fn(x)[0], 29 | ans) 30 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/tests/randomized_relu_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, absolute_import 2 | from __future__ import print_function, unicode_literals 3 | 4 | import nose.tools as nt 5 | import numpy as np 6 | import theano 7 | import theano.tensor as T 8 | 9 | import treeano 10 | import treeano.nodes as tn 11 | 12 | from treeano.sandbox.nodes import randomized_relu as rrelu 13 | 14 | 15 | fX = theano.config.floatX 16 | 17 | 18 | def test_rrelu_node_serialization(): 19 | tn.check_serialization(rrelu.RandomizedReLUNode("a")) 20 | 21 | 22 | def test_rrelu_node1(): 23 | network = tn.SequentialNode( 24 | "s", 25 | [tn.InputNode("i", shape=(1, 4)), 26 | rrelu.RandomizedReLUNode("p", deterministic=True)]).network() 27 | 28 | fn = network.function(["i"], ["p"]) 29 | x = np.array([[-1.0, -0.2, 0.2, 1.0]], dtype=fX) 30 | ans = np.array([[-1.0 * 2 / 11, -0.2 * 2 / 11, 0.2, 1.0]], dtype=fX) 31 | np.testing.assert_allclose(fn(x)[0], 32 | ans, 33 | rtol=1e-5) 34 | 35 | 36 | def test_rrelu_node2(): 37 | network = tn.SequentialNode( 38 | "s", 39 | [tn.InputNode("i", shape=(100, 100)), 40 | rrelu.RandomizedReLUNode("p")]).network() 41 | 42 | fn = network.function(["i"], ["p"]) 43 | x = -np.random.rand(100, 100).astype(fX) 44 | res = fn(x)[0] 45 | assert res.min() > -1 / 3. 46 | assert res.max() < 0 47 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/tests/recurrent_convolution_test.py: -------------------------------------------------------------------------------- 1 | import nose.tools as nt 2 | import numpy as np 3 | import theano 4 | import theano.tensor as T 5 | 6 | import treeano 7 | import treeano.nodes as tn 8 | from treeano.sandbox.nodes import recurrent_convolution as rcl 9 | 10 | fX = theano.config.floatX 11 | 12 | 13 | def test_default_recurrent_conv_2d_node_serialization(): 14 | tn.check_serialization(rcl.DefaultRecurrentConv2DNode("a")) 15 | 16 | 17 | if "gpu" in theano.config.device: 18 | 19 | def test_default_recurrent_conv_2d_node(): 20 | network = tn.SequentialNode( 21 | "s", 22 | [tn.InputNode("i", shape=(3, 4, 5, 6)), 23 | rcl.DefaultRecurrentConv2DNode("a", 24 | num_filters=7, 25 | filter_size=(3, 3), 26 | pad="same")] 27 | ).network() 28 | fn = network.function(["i"], ["s"]) 29 | res = fn(np.random.randn(3, 4, 5, 6).astype(fX))[0] 30 | np.testing.assert_equal((3, 7, 5, 6), res.shape) 31 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/tests/resnet_test.py: -------------------------------------------------------------------------------- 1 | import nose.tools as nt 2 | import numpy as np 3 | import theano 4 | import theano.tensor as T 5 | 6 | import treeano 7 | import treeano.nodes as tn 8 | from treeano.sandbox.nodes import resnet 9 | 10 | fX = theano.config.floatX 11 | 12 | 13 | def test_zero_last_axis_partition_node(): 14 | network = tn.SequentialNode( 15 | "s", 16 | [tn.InputNode("i", shape=(None,)), 17 | resnet._ZeroLastAxisPartitionNode("z", zero_ratio=0.5, axis=0)] 18 | ).network() 19 | fn = network.function(["i"], ["s"]) 20 | x = np.arange(10).astype(fX) 21 | ans = x.copy() 22 | ans[5:] = 0 23 | np.testing.assert_allclose(ans, fn(x)[0]) 24 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/tests/smorms3_test.py: -------------------------------------------------------------------------------- 1 | import treeano.nodes as tn 2 | from treeano.sandbox.nodes import smorms3 3 | 4 | 5 | def test_smorms3_node_serialization(): 6 | tn.check_serialization(smorms3.SMORMS3Node("a")) 7 | 8 | 9 | def test_smorms3_node(): 10 | tn.test_utils.check_updates_node(smorms3.SMORMS3Node) 11 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/tests/spatial_attention_test.py: -------------------------------------------------------------------------------- 1 | import nose.tools as nt 2 | import numpy as np 3 | import theano 4 | import theano.tensor as T 5 | 6 | import treeano 7 | import treeano.nodes as tn 8 | from treeano.sandbox.nodes import spatial_attention 9 | 10 | fX = theano.config.floatX 11 | 12 | 13 | def test_spatial_feature_point_node(): 14 | network = tn.SequentialNode( 15 | "s", 16 | [tn.InputNode("i", shape=(2, 2, 2, 3)), 17 | spatial_attention.SpatialFeaturePointNode("fp")] 18 | ).network() 19 | 20 | fn = network.function(["i"], ["s"]) 21 | 22 | x = np.zeros((2, 2, 2, 3), dtype=fX) 23 | idxs = np.array([[[0, 0], 24 | [1, 0]], 25 | [[0, 1], 26 | [1, 2]]], 27 | dtype=fX) 28 | ans = idxs / np.array([1, 2], dtype=fX)[None, None] 29 | for batch in range(2): 30 | for channel in range(2): 31 | i, j = idxs[batch, channel] 32 | x[batch, channel, i, j] = 1 33 | 34 | np.testing.assert_allclose(ans, 35 | fn(x)[0], 36 | rtol=1e-5, 37 | atol=1e-8) 38 | 39 | 40 | def test_pairwise_distance_node(): 41 | # NOTE: only tests shape calculation 42 | network = tn.SequentialNode( 43 | "s", 44 | [tn.InputNode("i", shape=(2, 2, 2, 3)), 45 | spatial_attention.SpatialFeaturePointNode("fp"), 46 | spatial_attention.PairwiseDistanceNode("pd")] 47 | ).network() 48 | 49 | fn = network.function(["i"], ["s"]) 50 | x = np.zeros((2, 2, 2, 3), dtype=fX) 51 | 52 | nt.assert_equal((2, 4), 53 | fn(x)[0].shape) 54 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/tests/spatial_transformer_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, absolute_import 2 | from __future__ import print_function, unicode_literals 3 | 4 | import theano 5 | import theano.tensor as T 6 | import treeano 7 | import treeano.nodes as tn 8 | 9 | from treeano.sandbox.nodes import spatial_transformer 10 | 11 | fX = theano.config.floatX 12 | 13 | 14 | def test_affine_spatial_transformer_node_build(): 15 | localization_network = tn.HyperparameterNode( 16 | "loc", 17 | tn.SequentialNode( 18 | "loc_seq", 19 | [tn.DenseNode("loc_fc1", num_units=50), 20 | tn.ReLUNode("loc_relu3"), 21 | tn.DenseNode("loc_fc2", 22 | num_units=6, 23 | inits=[treeano.inits.ZeroInit()])]), 24 | num_filters=32, 25 | filter_size=(5, 5), 26 | pool_size=(2, 2), 27 | ) 28 | 29 | model = tn.HyperparameterNode( 30 | "model", 31 | tn.SequentialNode( 32 | "seq", 33 | [tn.InputNode("x", shape=(None, 1, 60, 60)), 34 | spatial_transformer.AffineSpatialTransformerNode( 35 | "st", 36 | localization_network, 37 | output_shape=(20, 20)), 38 | tn.DenseNode("fc1"), 39 | tn.ReLUNode("relu1"), 40 | tn.DropoutNode("do1"), 41 | tn.DenseNode("fc2", num_units=10), 42 | tn.SoftmaxNode("pred"), 43 | ]), 44 | num_filters=32, 45 | filter_size=(3, 3), 46 | pool_size=(2, 2), 47 | num_units=256, 48 | dropout_probability=0.5, 49 | inits=[treeano.inits.HeNormalInit()], 50 | ) 51 | 52 | with_updates = tn.HyperparameterNode( 53 | "with_updates", 54 | tn.AdamNode( 55 | "adam", 56 | {"subtree": model, 57 | "cost": tn.TotalCostNode("cost", { 58 | "pred": tn.ReferenceNode("pred_ref", reference="model"), 59 | "target": tn.InputNode("y", shape=(None,), dtype="int32")}, 60 | )}), 61 | cost_function=treeano.utils.categorical_crossentropy_i32, 62 | ) 63 | network = with_updates.network() 64 | network.build() # build eagerly to share weights 65 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/tests/spp_net_test.py: -------------------------------------------------------------------------------- 1 | import nose.tools as nt 2 | import numpy as np 3 | import theano 4 | import theano.tensor as T 5 | 6 | import treeano 7 | import treeano.nodes as tn 8 | 9 | from treeano.sandbox.nodes import spp_net 10 | 11 | fX = theano.config.floatX 12 | 13 | 14 | def test_spatial_pyramid_pooling_node_serialization(): 15 | tn.check_serialization(spp_net.SpatialPyramidPoolingNode("a")) 16 | 17 | 18 | def test_spatial_pyramid_pooling_node(): 19 | # only testing size 20 | network = tn.SequentialNode( 21 | "s", 22 | [tn.InputNode("i", shape=(3, 2, 17, 12)), 23 | spp_net.SpatialPyramidPoolingNode("spp", spp_levels=[(1, 1), 24 | (2, 2), 25 | (3, 4), 26 | (5, 5), 27 | (17, 12)])] 28 | ).network() 29 | 30 | ans_shape = (3, 2 * (1 * 1 + 2 * 2 + 3 * 4 + 5 * 5 + 17 * 12)) 31 | fn = network.function(["i"], ["s"]) 32 | x = np.random.randn(3, 2, 17, 12).astype(fX) 33 | res = fn(x)[0] 34 | nt.assert_equal(network["s"].get_vw("default").shape, 35 | ans_shape) 36 | nt.assert_equal(res.shape, 37 | ans_shape) 38 | 39 | 40 | # this currently doesn't work because 41 | @nt.raises(AssertionError) 42 | def test_spatial_pyramid_pooling_node_symbolic(): 43 | # only testing size 44 | network = tn.SequentialNode( 45 | "s", 46 | [tn.InputNode("i", shape=(None, 2, None, None)), 47 | spp_net.SpatialPyramidPoolingNode("spp", spp_levels=[(1, 1), 48 | (2, 2), 49 | (3, 4), 50 | (5, 5), 51 | (17, 12)])] 52 | ).network() 53 | 54 | fn = network.function(["i"], ["s"]) 55 | ans_shape = (3, 2 * (1 * 1 + 2 * 2 + 3 * 4 + 5 * 5 + 17 * 12)) 56 | x1 = np.random.randn(3, 2, 17, 12).astype(fX) 57 | nt.assert_equal(ans_shape, 58 | fn(x1)[0].shape) 59 | x2 = np.random.randn(100, 2, 177, 123).astype(fX) 60 | nt.assert_equal(ans_shape, 61 | fn(x2)[0].shape) 62 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/tests/stochastic_pooling_test.py: -------------------------------------------------------------------------------- 1 | import nose.tools as nt 2 | import numpy as np 3 | import theano 4 | import theano.tensor as T 5 | 6 | import treeano 7 | import treeano.nodes as tn 8 | from treeano.sandbox.nodes import stochastic_pooling 9 | 10 | fX = theano.config.floatX 11 | 12 | 13 | def test_stochastic_pool_2d_node_serialization(): 14 | tn.check_serialization(stochastic_pooling.StochasticPool2DNode("a")) 15 | 16 | 17 | def test_stochastic_pool_2d_node1(): 18 | network = tn.SequentialNode( 19 | "s", 20 | [tn.InputNode("i", shape=(1, 1, 4, 4)), 21 | stochastic_pooling.StochasticPool2DNode("m", 22 | pool_size=(2, 2), 23 | deterministic=True)] 24 | ).network() 25 | fn = network.function(["i"], ["m"]) 26 | x = np.arange(16).astype(fX).reshape(1, 1, 4, 4) 27 | pre_pool = np.array([[[[[0, 1, 4, 5], [2, 3, 6, 7]], 28 | [[8, 9, 12, 13], [10, 11, 14, 15]]]]], dtype=fX) 29 | ans = ((pre_pool ** 2) / pre_pool.sum(axis=-1)[..., None]).sum(axis=-1) 30 | np.testing.assert_allclose(fn(x)[0], 31 | ans, 32 | rtol=1e-5) 33 | nt.assert_equal(network["m"].get_vw("default").shape, 34 | ans.shape) 35 | 36 | 37 | def test_stochastic_pool_2d_node2(): 38 | # testing that stochastic version works 39 | network = tn.SequentialNode( 40 | "s", 41 | [tn.InputNode("i", shape=(1, 1, 4, 4)), 42 | stochastic_pooling.StochasticPool2DNode("m", 43 | pool_size=(2, 2))] 44 | ).network() 45 | fn = network.function(["i"], ["m"]) 46 | x = np.arange(16).astype(fX).reshape(1, 1, 4, 4) 47 | fn(x) 48 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/tests/unbiased_nesterov_momentum_test.py: -------------------------------------------------------------------------------- 1 | import treeano.nodes as tn 2 | from treeano.sandbox.nodes import unbiased_nesterov_momentum as unm 3 | 4 | 5 | def test_unbiased_nesterov_momentum_node_serialization(): 6 | tn.check_serialization( 7 | unm.UnbiasedNesterovMomentumNode("a", tn.IdentityNode("i"))) 8 | 9 | 10 | def test_unbiased_nesterov_momentum_node(): 11 | def unbiased_nag(name, children): 12 | return tn.SGDNode(name, 13 | {"cost": children["cost"], 14 | "subtree": unm.UnbiasedNesterovMomentumNode( 15 | name + "_momentum", 16 | children["subtree"])}, 17 | learning_rate=0.01) 18 | tn.test_utils.check_updates_node(unbiased_nag) 19 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/tests/wta_sparsity_test.py: -------------------------------------------------------------------------------- 1 | import nose.tools as nt 2 | import numpy as np 3 | import theano 4 | import theano.tensor as T 5 | 6 | import treeano 7 | import treeano.nodes as tn 8 | 9 | from treeano.sandbox.nodes import wta_sparisty as wta 10 | 11 | 12 | fX = theano.config.floatX 13 | 14 | 15 | def test_wta_spatial_sparsity_node_serialization(): 16 | tn.check_serialization(wta.WTASpatialSparsityNode("a")) 17 | 18 | 19 | def test_wta_sparsity_node_serialization(): 20 | tn.check_serialization(wta.WTASparsityNode("a")) 21 | 22 | 23 | def test_wta_spatial_sparsity_node(): 24 | network = tn.SequentialNode( 25 | "s", 26 | [tn.InputNode("i", shape=(2, 2, 2, 2)), 27 | wta.WTASpatialSparsityNode("a")] 28 | ).network() 29 | 30 | fn = network.function(["i"], ["s"]) 31 | x = np.arange(16).reshape(2, 2, 2, 2).astype(fX) 32 | ans = x.copy() 33 | ans[..., 0] = 0 34 | ans[..., 0, :] = 0 35 | np.testing.assert_allclose(fn(x)[0], 36 | ans) 37 | 38 | 39 | def test_wta_sparsity_node(): 40 | network = tn.SequentialNode( 41 | "s", 42 | [tn.InputNode("i", shape=(2, 2, 2, 2)), 43 | wta.WTASparsityNode("a", percentile=0.5)] 44 | ).network() 45 | 46 | fn = network.function(["i"], ["s"]) 47 | x = np.arange(16).reshape(2, 2, 2, 2).astype(fX) 48 | ans = x.copy() 49 | ans[..., 0] = 0 50 | ans[..., 0, :] = 0 51 | ans[0] = 0 52 | res = fn(x)[0] 53 | np.testing.assert_allclose(res, ans) 54 | -------------------------------------------------------------------------------- /treeano/sandbox/nodes/unbiased_nesterov_momentum.py: -------------------------------------------------------------------------------- 1 | import treeano 2 | 3 | 4 | @treeano.register_node("unbiased_nesterov_momentum") 5 | class UnbiasedNesterovMomentumNode(treeano.Wrapper1NodeImpl): 6 | 7 | """ 8 | similar to NesterovMomentumNode, but includes a term to unbias the 9 | momentum update (similar to adam's unbias term) 10 | """ 11 | 12 | # TODO add way to filter parameters and only apply to a subset 13 | hyperparameter_names = ("momentum",) 14 | 15 | def mutate_update_deltas(self, network, update_deltas): 16 | momentum = network.find_hyperparameter(["momentum"], 0.9) 17 | shared_vws = network.find_vws_in_subtree(is_shared=True) 18 | 19 | # keep count state only once 20 | t_vw = network.create_vw( 21 | "nesterov_momentum_count", 22 | shape=(), 23 | is_shared=True, 24 | tags={"state"}, 25 | default_inits=[], 26 | ) 27 | t = t_vw.variable 28 | new_t = t + 1 29 | update_deltas[t] = new_t - t 30 | # NOTE: assumes constant momentum 31 | unbias_factor = (1 - momentum) / (1 - momentum ** (new_t + 1)) 32 | 33 | for vw in shared_vws: 34 | var = vw.variable 35 | if var in update_deltas: 36 | velocity_vw = network.create_vw( 37 | "nesterov_momentum_velocity(%s)" % vw.name, 38 | shape=vw.shape, 39 | is_shared=True, 40 | tags={"state"}, 41 | default_inits=[], 42 | ) 43 | velocity = velocity_vw.variable 44 | delta = update_deltas[var] 45 | new_velocity = momentum * velocity + delta 46 | update_deltas[velocity] = new_velocity - velocity 47 | update_deltas[var] \ 48 | = (delta + momentum * new_velocity) * unbias_factor 49 | -------------------------------------------------------------------------------- /treeano/sandbox/tests/utils_test.py: -------------------------------------------------------------------------------- 1 | import nose.tools as nt 2 | import numpy as np 3 | import theano 4 | import theano.tensor as T 5 | 6 | import treeano 7 | import treeano.nodes as tn 8 | from treeano.sandbox import utils 9 | 10 | fX = theano.config.floatX 11 | 12 | 13 | def test_overwrite_grad_multiple_args(): 14 | class Foo(utils.OverwriteGrad): 15 | 16 | def __init__(self): 17 | def fn(a, b): 18 | return a + 2 * b 19 | 20 | super(Foo, self).__init__(fn) 21 | 22 | def grad(self, inputs, out_grads): 23 | a, b = inputs 24 | grd, = out_grads 25 | return a, a * b * grd 26 | 27 | foo_op = Foo() 28 | 29 | a = T.scalar() 30 | b = T.scalar() 31 | ga, gb = T.grad(3 * foo_op(a, b), [a, b]) 32 | fn = theano.function([a, b], [ga, gb]) 33 | res1, res2 = fn(2.7, 11.4) 34 | np.testing.assert_allclose(res1, 2.7) 35 | np.testing.assert_allclose(res2, 2.7 * 11.4 * 3) 36 | -------------------------------------------------------------------------------- /treeano/theano_extensions/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = """ 2 | fractional_max_pooling 3 | tree_probability 4 | """.split() 5 | 6 | 7 | from . import gradient 8 | from . import padding 9 | from . import tensor 10 | from .meshgrid import mgrid, ogrid 11 | -------------------------------------------------------------------------------- /treeano/theano_extensions/gradient.py: -------------------------------------------------------------------------------- 1 | import theano 2 | import theano.tensor as T 3 | from theano.compile import ViewOp 4 | 5 | 6 | class GradientReversalOp(ViewOp): 7 | 8 | def grad(self, inputs, output_gradients): 9 | return [-output_gradients[0]] 10 | 11 | gradient_reversal = GradientReversalOp() 12 | -------------------------------------------------------------------------------- /treeano/theano_extensions/meshgrid.py: -------------------------------------------------------------------------------- 1 | """ 2 | from: https://gist.github.com/eickenberg/f1a0e368961ef6d05b5b 3 | by Michael Eickenberg 4 | 5 | TODO fix float64 warnings 6 | """ 7 | 8 | import theano 9 | import theano.tensor as T 10 | 11 | fX = theano.config.floatX 12 | 13 | 14 | class _nd_grid(object): 15 | 16 | """Implements the mgrid and ogrid functionality for theano tensor 17 | variables. 18 | 19 | Parameters 20 | ========== 21 | sparse : boolean, optional, default=True 22 | Specifying False leads to the equivalent of numpy's mgrid 23 | functionality. Specifying True leads to the equivalent of ogrid. 24 | """ 25 | 26 | def __init__(self, sparse=False): 27 | self.sparse = sparse 28 | 29 | def __getitem__(self, slices): 30 | 31 | ndim = len(slices) 32 | ranges = [T.arange(sl.start, sl.stop, sl.step or 1) 33 | for sl in slices] 34 | shapes = [tuple([1] * j + [r.shape[0]] + [1] * (ndim - 1 - j)) 35 | for j, r in enumerate(ranges)] 36 | ranges = [r.reshape(shape) for r, shape in zip(ranges, shapes)] 37 | ones = [T.ones_like(r) for r in ranges] 38 | if self.sparse: 39 | grids = ranges 40 | else: 41 | grids = [] 42 | for i in range(ndim): 43 | grid = 1 44 | for j in range(ndim): 45 | if j == i: 46 | grid = grid * ranges[j] 47 | else: 48 | grid = grid * ones[j] 49 | grids.append(grid) 50 | return grids 51 | 52 | 53 | mgrid = _nd_grid() 54 | ogrid = _nd_grid(sparse=True) 55 | -------------------------------------------------------------------------------- /treeano/theano_extensions/padding.py: -------------------------------------------------------------------------------- 1 | import theano 2 | import theano.tensor as T 3 | 4 | 5 | def pad(x, padding, fill_value=0): 6 | """ 7 | applies padding to tensor 8 | """ 9 | input_shape = x.shape 10 | output_shape = [] 11 | indices = [] 12 | 13 | for dim, pad in enumerate(padding): 14 | try: 15 | left_pad, right_pad = pad 16 | except TypeError: 17 | left_pad = right_pad = pad 18 | output_shape.append(left_pad + input_shape[dim] + right_pad) 19 | indices.append(slice(left_pad, left_pad + input_shape[dim])) 20 | 21 | if fill_value: 22 | out = T.ones(output_shape) * fill_value 23 | else: 24 | out = T.zeros(output_shape) 25 | return T.set_subtensor(out[tuple(indices)], x) 26 | -------------------------------------------------------------------------------- /treeano/theano_extensions/tensor.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import theano 3 | import theano.tensor as T 4 | 5 | from .. import utils 6 | 7 | 8 | class PercentileOp(theano.Op): 9 | 10 | """ 11 | like numpy.percentile 12 | returns q-th percentile of the data for q in [0, 100] 13 | """ 14 | 15 | # TODO can implement gradient w.r.t. q 16 | 17 | __props__ = ("axis", "keepdims") 18 | 19 | def __init__(self, axis, keepdims): 20 | if isinstance(axis, list): 21 | axis = tuple(axis) 22 | assert axis is None or isinstance(axis, (int, tuple)) 23 | self.axis = axis 24 | self.keepdims = keepdims 25 | 26 | def make_node(self, a, q): 27 | # cast q to theano variable 28 | if isinstance(q, (int, float)): 29 | scalar_type = T.scalar().type 30 | q = T.Constant(scalar_type, q) 31 | 32 | # set to all axes if none specified 33 | if self.axis is None: 34 | axis = range(a.ndim) 35 | elif isinstance(self.axis, int): 36 | axis = [self.axis] 37 | else: 38 | axis = self.axis 39 | 40 | # calculate broadcastable 41 | if self.keepdims: 42 | broadcastable = [b or (ax in axis) 43 | for ax, b in enumerate(a.broadcastable)] 44 | else: 45 | broadcastable = [b 46 | for ax, b in enumerate(a.broadcastable) 47 | if ax not in axis] 48 | 49 | out = T.TensorType(a.dtype, broadcastable)() 50 | return theano.gof.Apply(self, [a, q], [out]) 51 | 52 | def perform(self, node, inputs, output_storage): 53 | a, q = inputs 54 | z, = output_storage 55 | res = np.percentile(a, q, axis=self.axis, keepdims=self.keepdims) 56 | z[0] = utils.as_fX(res) 57 | 58 | 59 | def percentile(a, q, axis=None, keepdims=False): 60 | return PercentileOp(axis, keepdims)(a, q) 61 | -------------------------------------------------------------------------------- /treeano/theano_extensions/tests/fractional_max_pooling_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, absolute_import 2 | from __future__ import print_function, unicode_literals 3 | 4 | import nose.tools as nt 5 | import numpy as np 6 | import theano 7 | import theano.tensor as T 8 | 9 | fX = theano.config.floatX 10 | 11 | if "gpu" in theano.config.device: 12 | import treeano.theano_extensions.fractional_max_pooling as fmp 13 | 14 | def test_fractional_max_pooling_numeric_gradient(): 15 | def fun(x): 16 | return fmp.DisjointPseudorandomFractionalMaxPooling2DOp( 17 | alpha=1.414, 18 | u=0.5 19 | )(x) 20 | 21 | T.verify_grad(fun, 22 | [np.arange(25).reshape(1, 1, 5, 5).astype(fX)], 23 | rng=np.random) 24 | 25 | def test_fractional_max_pooling_shape(): 26 | def fmp_shape(x, op): 27 | return fmp.DisjointPseudorandomFractionalMaxPooling2DOp( 28 | alpha=alpha, 29 | u=u 30 | )(T.constant(x)).eval().shape 31 | 32 | for _ in range(10): 33 | in_dim = np.random.randint(2, 100) 34 | x = np.random.randn(1, 1, in_dim, in_dim).astype(fX) 35 | alpha = np.random.rand() + 1 36 | u = np.random.rand() 37 | op = fmp.DisjointPseudorandomFractionalMaxPooling2DOp( 38 | alpha=alpha, 39 | u=u 40 | ) 41 | res = fmp_shape(x, op) 42 | new_dim = op.output_length(in_dim) 43 | print(in_dim, res, new_dim, alpha, u) 44 | nt.assert_equal((1, 1, new_dim, new_dim), 45 | res) 46 | -------------------------------------------------------------------------------- /treeano/theano_extensions/tests/gradient_test.py: -------------------------------------------------------------------------------- 1 | import nose.tools as nt 2 | import numpy as np 3 | import theano 4 | import theano.tensor as T 5 | 6 | import treeano.theano_extensions.gradient as ttg 7 | 8 | fX = theano.config.floatX 9 | 10 | 11 | def test_gradient_reversal(): 12 | v = np.random.randn(3, 4).astype(fX) 13 | m = T.matrix() 14 | s1 = m.sum() 15 | g1 = T.grad(s1, m) 16 | s2 = ttg.gradient_reversal(s1) 17 | g2 = T.grad(s2, m) 18 | g1_res, g2_res, s1_res, s2_res = theano.function([m], [g1, g2, s1, s2])(v) 19 | np.testing.assert_allclose(v.sum(), s1_res, rtol=1e-5) 20 | np.testing.assert_equal(s1_res, s2_res) 21 | np.testing.assert_equal(np.ones((3, 4), dtype=fX), g1_res) 22 | np.testing.assert_equal(g1_res, -g2_res) 23 | -------------------------------------------------------------------------------- /treeano/theano_extensions/tests/irregular_length_test.py: -------------------------------------------------------------------------------- 1 | import nose.tools as nt 2 | import numpy as np 3 | import theano 4 | import theano.tensor as T 5 | 6 | from treeano.theano_extensions.irregular_length import ungroup_irregular_length_tensors 7 | 8 | fX = theano.config.floatX 9 | 10 | 11 | def test_ungroup_irregular_length_tensors(): 12 | x = np.array([[0, 1], 13 | [2, 3], 14 | [4, 5], 15 | [6, 7], 16 | [8, 9]], 17 | dtype=fX) 18 | lengths = np.array([2, 1, 0, 2]) 19 | ans = np.array([[[0, 1], 20 | [2, 3]], 21 | [[4, 5], 22 | [0, 0]], 23 | [[0, 0], 24 | [0, 0]], 25 | [[6, 7], 26 | [8, 9]]]) 27 | res = ungroup_irregular_length_tensors(x, lengths).eval() 28 | np.testing.assert_equal(ans, res) 29 | 30 | 31 | def test_ungroup_irregular_length_tensors_grad(): 32 | x = np.array([[0, 1], 33 | [2, 3], 34 | [4, 5], 35 | [6, 7], 36 | [8, 9]], 37 | dtype=fX) 38 | x = T.constant(x) 39 | lengths = np.array([2, 1, 0, 2]) 40 | ans = np.array([[1, 1], 41 | [1, 1], 42 | [2, 2], 43 | [0, 0], 44 | [0, 0]], 45 | dtype=fX) 46 | ungrouped = ungroup_irregular_length_tensors(x, lengths) 47 | out = ungrouped[0].sum() + 2 * ungrouped[1].sum() 48 | grad = T.grad(out, x).eval() 49 | np.testing.assert_equal(ans, grad) 50 | 51 | 52 | def test_ungroup_irregular_length_tensors_numeric_gradient(): 53 | lengths = np.array([2, 3, 4, 5, 7, 2], dtype=fX) 54 | T.verify_grad(lambda x: ungroup_irregular_length_tensors(x, lengths), 55 | [np.random.randn(23, 10).astype(fX)], 56 | rng=np.random) 57 | -------------------------------------------------------------------------------- /treeano/theano_extensions/tests/meshgrid_test.py: -------------------------------------------------------------------------------- 1 | """ 2 | from: https://gist.github.com/eickenberg/f1a0e368961ef6d05b5b 3 | by Michael Eickenberg 4 | """ 5 | 6 | import numpy as np 7 | from treeano.theano_extensions import mgrid, ogrid 8 | 9 | 10 | def test_mgrid_ogrid(): 11 | fmgrid = np.mgrid[0:1:.1, 1:10:1., 10:100:10.] 12 | imgrid = np.mgrid[0:2:1, 1:10:1, 10:100:10] 13 | 14 | fogrid = np.ogrid[0:1:.1, 1:10:1., 10:100:10.] 15 | iogrid = np.ogrid[0:2:1, 1:10:1, 10:100:10] 16 | 17 | tfmgrid = mgrid[0:1:.1, 1:10:1., 10:100:10.] 18 | timgrid = mgrid[0:2:1, 1:10:1, 10:100:10] 19 | 20 | tfogrid = ogrid[0:1:.1, 1:10:1., 10:100:10.] 21 | tiogrid = ogrid[0:2:1, 1:10:1, 10:100:10] 22 | 23 | for g1, g2 in zip([fmgrid, imgrid, fogrid, iogrid], 24 | [tfmgrid, timgrid, tfogrid, tiogrid]): 25 | for v1, v2 in zip(g1, g2): 26 | np.testing.assert_almost_equal(v1, v2.eval(), decimal=6) 27 | -------------------------------------------------------------------------------- /treeano/theano_extensions/tests/nanguardmode_test.py: -------------------------------------------------------------------------------- 1 | import nose.tools as nt 2 | import numpy as np 3 | import theano 4 | import theano.tensor as T 5 | from theano.sandbox.rng_mrg import MRG_RandomStreams 6 | 7 | import theano.compile.nanguardmode 8 | import treeano.theano_extensions.nanguardmode 9 | 10 | 11 | if "gpu" in theano.config.device: 12 | def test_nanguardmode(): 13 | # this is the case which requires a custom nanguardmode 14 | srng = MRG_RandomStreams() 15 | x = srng.uniform((3, 4, 5)) 16 | 17 | def random_number(mode): 18 | return theano.function([], [x], mode=mode)() 19 | 20 | @nt.raises(AssertionError) 21 | def fails(): 22 | random_number(theano.compile.nanguardmode.NanGuardMode( 23 | nan_is_error=True, 24 | inf_is_error=True, 25 | big_is_error=True 26 | )) 27 | 28 | fails() 29 | 30 | random_number(treeano.theano_extensions.nanguardmode.NanGuardMode( 31 | nan_is_error=True, 32 | inf_is_error=True, 33 | big_is_error=True 34 | )) 35 | -------------------------------------------------------------------------------- /treeano/theano_extensions/tests/padding_test.py: -------------------------------------------------------------------------------- 1 | import nose.tools as nt 2 | import numpy as np 3 | import theano 4 | import theano.tensor as T 5 | 6 | from treeano.theano_extensions.padding import pad 7 | 8 | fX = theano.config.floatX 9 | 10 | 11 | def test_pad(): 12 | x = T.constant(np.array([[1, 2]], dtype=fX)) 13 | res = pad(x, [1, 1]).eval() 14 | ans = np.array([[0, 0, 0, 0], 15 | [0, 1, 2, 0], 16 | [0, 0, 0, 0]], dtype=fX) 17 | np.testing.assert_equal(ans, res) 18 | -------------------------------------------------------------------------------- /treeano/visualization.py: -------------------------------------------------------------------------------- 1 | import pylab 2 | import networkx as nx 3 | import theano 4 | import theano.tensor as T 5 | 6 | 7 | def _plot_graph(graph, filename=None, node_size=500): 8 | nx.draw_networkx( 9 | graph, 10 | nx.graphviz_layout(graph), 11 | node_size=node_size) 12 | if filename is None: 13 | pylab.show() 14 | else: 15 | pylab.savefig(filename) 16 | 17 | 18 | def plot_architectural_tree(network, *args, **kwargs): 19 | return _plot_graph(network.graph.architectural_tree, *args, **kwargs) 20 | 21 | 22 | def plot_computation_graph(network, *args, **kwargs): 23 | return _plot_graph(network.graph.computation_graph, *args, **kwargs) 24 | 25 | 26 | def pydotprint_network(network, 27 | outfile=None, 28 | variables=None, 29 | include_updates=True, 30 | *args, 31 | **kwargs): 32 | network.build() 33 | if variables is None: 34 | vws = network.relative_network( 35 | network.root_node 36 | ).find_vws_in_subtree() 37 | variables = [vw.variable for vw in vws] 38 | if include_updates: 39 | variables += [v for _, v in network.update_deltas.to_updates()] 40 | else: 41 | # TODO search through update deltas for which ones apply to the 42 | # given variables 43 | assert not include_updates, ("include_updates is currently only " 44 | "for showing all variables") 45 | variables = [network.network_variable(v) for v in variables] 46 | 47 | theano.printing.pydotprint(fct=variables, 48 | outfile=outfile, 49 | *args, 50 | **kwargs) 51 | --------------------------------------------------------------------------------