├── examples
    ├── __init__.py
    ├── mixture_density_net.py
    ├── nist_lre
    │   └── __init__.py
    ├── nist_sre
    │   ├── __init__.py
    │   └── run.sh
    ├── tidigits
    │   └── __init__.py
    ├── voxceleb
    │   ├── __init__.py
    │   ├── speech_features_extraction.py
    │   └── train_ivec.py
    ├── bayesian_neural_network.py
    ├── ladder_network.py
    ├── models
    │   ├── model_tidigits.py
    │   ├── models_cifar10.py
    │   └── models_ladder.py
    ├── kaggle
    │   └── covid19_cases.py
    ├── interpolation_figures.py
    ├── machine_learning
    │   ├── pca_tsne_umap.py
    │   └── gmm_fitting.py
    ├── discretizing_features.py
    ├── logistic_regression.py
    ├── cifar10_ivec.py
    ├── features
    │   ├── speech_features_visualization.py
    │   └── speech_pipeline.py
    ├── vae
    │   ├── stl10_self_supervised.py
    │   ├── plotting_results.py
    │   ├── two_stage_vae_test.py
    │   └── rate_distortion_onehot.py
    └── mnist.py
├── tests
    ├── ml
    │   ├── __init__.py
    │   └── test_clustering.py
    ├── backend
    │   ├── __init__.py
    │   ├── utils.py
    │   └── test_maths.py
    ├── bayesian
    │   ├── __init__.py
    │   ├── test_random_variable.py
    │   ├── test_losses.py
    │   ├── test_negative_binomial_disp.py
    │   └── test_mixture_distributions.py
    ├── networks
    │   ├── __init__.py
    │   ├── test_mixture_density_network.py
    │   └── test_keras_torch.py
    ├── utilities
    │   ├── __init__.py
    │   └── test_orderedflag.py
    ├── vae_test
    │   ├── __init__.py
    │   └── test_m2_vae.py
    ├── preprocessing
    │   ├── __init__.py
    │   └── test_kaldi_io.py
    ├── __init__.py
    ├── test_search.py
    ├── test_time_delay_networks.py
    └── test_datasets.py
├── odin
    ├── preprocessing
    │   ├── audio
    │   │   └── __init__.py
    │   ├── __init__.py
    │   ├── video.py
    │   ├── confs
    │   │   ├── prosodyAcf.cfg
    │   │   ├── prosodyShs.cfg
    │   │   ├── openSMILEpitch.cfg
    │   │   ├── smileF0.cfg
    │   │   └── openSMILEloudness.cfg
    │   └── sequence.py
    ├── bay
    │   ├── vi
    │   │   ├── autoencoder
    │   │   │   ├── autoregressive_vae.py
    │   │   │   ├── moe_vae.py
    │   │   │   ├── cycle_vae.py
    │   │   │   ├── sequential_vae.py
    │   │   │   ├── __init__.py
    │   │   │   ├── self_supervised_vae.py
    │   │   │   ├── dip_vae.py
    │   │   │   └── hyperbolic_vae.py
    │   │   └── __init__.py
    │   ├── mixed_membership
    │   │   └── __init__.py
    │   ├── __init__.py
    │   ├── distributions
    │   │   ├── __init__.py
    │   │   ├── joint_distributions_addons.py
    │   │   └── logarizmed.py
    │   ├── layers
    │   │   ├── __init__.py
    │   │   └── latents.py
    │   └── stochastic_initializers.py
    ├── explain
    │   ├── __init__.py
    │   ├── helpers.py
    │   └── adversarial_attack.py
    ├── ml
    │   ├── poincare_embedding.py
    │   ├── gmm_thresholding.py
    │   ├── linear_model.py
    │   └── tree.py
    ├── search
    │   ├── __init__.py
    │   ├── beam_search.py
    │   └── assignment.py
    ├── networks_torch
    │   ├── __init__.py
    │   └── util_modules.py
    ├── visual
    │   ├── __init__.py
    │   ├── base.py
    │   └── animation.py
    ├── fuel
    │   ├── image_data
    │   │   ├── kaokore.py
    │   │   ├── __init__.py
    │   │   ├── omniglot.py
    │   │   ├── cifar.py
    │   │   └── synthesize.py
    │   ├── nlp_data
    │   │   ├── __init__.py
    │   │   └── newsgroup.py
    │   ├── bio_data
    │   │   ├── __init__.py
    │   │   ├── human_embryos.py
    │   │   ├── pbmc.py
    │   │   └── cortex.py
    │   └── __init__.py
    ├── training
    │   └── __init__.py
    ├── __init__.py
    ├── networks
    │   ├── __init__.py
    │   ├── skip_connection.py
    │   └── positional_encoder.py
    ├── backend
    │   ├── __init__.py
    │   └── types_helpers.py
    └── utils
    │   └── path_utils.py
├── docs
    ├── docs.rst
    ├── _imgs
    │   └── odin_scheme.jpg
    ├── blogs
    │   └── catastrophic_forgetting.rst
    └── principle.rst
├── setup.cfg
├── benchmarks
    ├── multi_memory_multiprocessing.py
    ├── multiple_inherit_python.py
    ├── multiprocess_vs_single.py
    ├── PCA_multiprocessing_transform.py
    ├── cPickle_preserve_ref.py
    ├── should_concat_input_tf.py
    ├── fast_stacking_numba.py
    ├── queue_vs_zmq.py
    ├── strict_vs_non_strict_scan.py
    ├── tf_factorvae_permute_dims.py
    └── single_vs_multi_feeders.py
├── odin_jax.yml
├── LICENSE
├── odin.yml
├── .gitignore
└── setup.py


/examples/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/ml/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/backend/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/bayesian/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/networks/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/utilities/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/vae_test/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/examples/mixture_density_net.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/examples/nist_lre/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/examples/nist_sre/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/examples/tidigits/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/examples/voxceleb/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/preprocessing/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/odin/preprocessing/audio/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | 


--------------------------------------------------------------------------------
/docs/docs.rst:
--------------------------------------------------------------------------------
1 | Documentation
2 | =============


--------------------------------------------------------------------------------
/odin/bay/vi/autoencoder/autoregressive_vae.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file = README.rst


--------------------------------------------------------------------------------
/odin/explain/__init__.py:
--------------------------------------------------------------------------------
1 | from odin.traininglain import adversarial_attack, deep_dream
2 | 


--------------------------------------------------------------------------------
/odin/ml/poincare_embedding.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | 
3 | 
4 | def poincare_embedding():
5 |   pass
6 | 


--------------------------------------------------------------------------------
/docs/_imgs/odin_scheme.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/trungnt13/odin-ai/HEAD/docs/_imgs/odin_scheme.jpg


--------------------------------------------------------------------------------
/odin/search/__init__.py:
--------------------------------------------------------------------------------
1 | from odin.search.beam_search import *
2 | from odin.search.diag_search import *
3 | from odin.search.assignment import *
4 | 


--------------------------------------------------------------------------------
/odin/bay/mixed_membership/__init__.py:
--------------------------------------------------------------------------------
1 | from odin.bay.mixed_membership.latent_dirichlet_allocation import *
2 | from odin.bay.mixed_membership.grade_membership_model import *
3 | 


--------------------------------------------------------------------------------
/odin/networks_torch/__init__.py:
--------------------------------------------------------------------------------
1 | from odin.networks_torch.keras_torch import *
2 | from odin.networks_torch.time_delay import *
3 | from odin.networks_torch.util_modules import *
4 | 


--------------------------------------------------------------------------------
/odin/visual/__init__.py:
--------------------------------------------------------------------------------
1 | from odin.visual.animation import *
2 | from odin.visual.base import Visualizer
3 | from odin.visual.bashplot import *
4 | from odin.visual.figures import *
5 | 


--------------------------------------------------------------------------------
/odin/fuel/image_data/kaokore.py:
--------------------------------------------------------------------------------
1 | class Kaokore:
2 |   r""" Dataset for the Collection of Facial Expressions from Japanese Artwork
3 | 
4 |   https://github.com/rois-codh/kaokore
5 |   """
6 | 


--------------------------------------------------------------------------------
/odin/search/beam_search.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, division, print_function
2 | 
3 | 
4 | def beam_search(matrix, beam_size=2, n_best=4):
5 |   pass
6 | 
7 | def greedy_search():
8 |   pass
9 | 


--------------------------------------------------------------------------------
/odin/fuel/nlp_data/__init__.py:
--------------------------------------------------------------------------------
1 | from odin.fuel.nlp_data._base import NLPDataset
2 | from odin.fuel.nlp_data.newsgroup import Newsgroup5, Newsgroup20
3 | from odin.fuel.nlp_data.newsgroup20_clean import Newsgroup20_clean
4 | 


--------------------------------------------------------------------------------
/odin/bay/vi/__init__.py:
--------------------------------------------------------------------------------
1 | from odin.bay.vi.autoencoder import *
2 | from odin.bay.vi.losses import *
3 | from odin.bay.vi.metrics import *
4 | from odin.bay.vi.utils import *
5 | from odin.bay.vi._base import *
6 | from odin.bay.vi.disentanglement_gym import *
7 | 


--------------------------------------------------------------------------------
/odin/training/__init__.py:
--------------------------------------------------------------------------------
1 | from odin.training.experimenter import *
2 | from odin.training.scores import ScoreBoard
3 | from odin.training.trainer import (Callback, Trainer, get_current_trainer,
4 |                               read_tensorboard)
5 | from odin.training.early_stopping import *
6 | 


--------------------------------------------------------------------------------
/odin/fuel/bio_data/__init__.py:
--------------------------------------------------------------------------------
1 | from odin.fuel.bio_data._base import GeneDataset
2 | from odin.fuel.bio_data.atac_datasets import *
3 | from odin.fuel.bio_data.cortex import Cortex
4 | from odin.fuel.bio_data.human_embryos import HumanEmbryos
5 | from odin.fuel.bio_data.human_genome import HumanGenome
6 | from odin.fuel.bio_data.pbmc import PBMC
7 | 


--------------------------------------------------------------------------------
/examples/bayesian_neural_network.py:
--------------------------------------------------------------------------------
1 | # ===========================================================================
2 | # Original example:
3 | # https://gist.github.com/anonymous/96b998304de1eb4306738543170788ca
4 | # ===========================================================================
5 | from __future__ import print_function, division, absolute_import
6 | 


--------------------------------------------------------------------------------
/tests/vae_test/test_m2_vae.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | 
 3 | import os
 4 | import unittest
 5 | from tempfile import mkstemp
 6 | 
 7 | import numpy as np
 8 | 
 9 | np.random.seed(8)
10 | 
11 | class Test(unittest.TestCase):
12 |   def test_(self):
13 |     pass
14 | 
15 | if __name__ == '__main__':
16 |   unittest.main()
17 | 


--------------------------------------------------------------------------------
/odin/bay/vi/autoencoder/moe_vae.py:
--------------------------------------------------------------------------------
 1 | class MoeVAE():
 2 |   r""" Multimodal Mixture-of-Experts VAE
 3 | 
 4 |   Reference:
 5 |     Shi, Y., Siddharth, N., Paige, B., H.S. Torr, P., 2019. "Variational
 6 |       Mixture-of-Experts Autoencoders for Multi-Modal Deep Generative
 7 |       Models". NeurIPS.
 8 |     Pytorch implementation: https://github.com/iffsid/mmvae
 9 |   """
10 |   pass
11 | 


--------------------------------------------------------------------------------
/odin/bay/__init__.py:
--------------------------------------------------------------------------------
1 | from odin.bay import distributions, layers, mixed_membership
2 | from odin.bay import stochastic_initializers as initializers
3 | from odin.bay import vi
4 | from odin.bay.vi.disentanglement_gym import *
5 | from odin.bay.vi.autoencoder import *
6 | from odin.bay.distribution_alias import parse_distribution
7 | from odin.bay.helpers import *
8 | from odin.bay.random_variable import RVconf
9 | 


--------------------------------------------------------------------------------
/tests/preprocessing/test_kaldi_io.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | 
 3 | import unittest
 4 | 
 5 | 
 6 | def _check_pykaldi():
 7 |   try:
 8 |     import kaldi
 9 |     return True
10 |   except ImportError:
11 |     return False
12 | 
13 | 
14 | class KaldiIOTest(unittest.TestCase):
15 | 
16 |   def test_feature_loader(self):
17 |     if not _check_pykaldi():
18 |       return
19 | 


--------------------------------------------------------------------------------
/odin/__init__.py:
--------------------------------------------------------------------------------
 1 | __version__ = "1.0.0"
 2 | 
 3 | import os
 4 | 
 5 | # this should always be true, the gain in performance
 6 | # for preempting whole GPU memory is marginal (memory fragmentation)
 7 | # it further prevent you from running multiple experiments
 8 | # on 1 GPU, take all memory from other processes even though
 9 | # it does not use all the computational resources
10 | os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
11 | 


--------------------------------------------------------------------------------
/tests/bayesian/test_random_variable.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | 
 3 | import os
 4 | import unittest
 5 | from tempfile import mkstemp
 6 | 
 7 | import numpy as np
 8 | 
 9 | from odin.bay import RVconf
10 | 
11 | np.random.seed(8)
12 | 
13 | 
14 | class RVmetaTest(unittest.TestCase):
15 | 
16 |   def test_posterior(self):
17 |     pass
18 | 
19 | 
20 | if __name__ == '__main__':
21 |   unittest.main()
22 | 


--------------------------------------------------------------------------------
/odin/preprocessing/__init__.py:
--------------------------------------------------------------------------------
1 | from odin.preprocessing import (base, sequence, signal, speech, textgrid)
2 | from odin.preprocessing.base import Pipeline, make_pipeline, set_extractor_debug
3 | from odin.preprocessing.processor import (FeatureProcessor, calculate_pca,
4 |                                           validate_features)
5 | 
6 | # from odin.preprocessing import image
7 | # from odin.preprocessing import video
8 | # from odin.preprocessing import text
9 | 


--------------------------------------------------------------------------------
/odin/bay/vi/autoencoder/cycle_vae.py:
--------------------------------------------------------------------------------
 1 | class CycleConsistentVAE():
 2 |   r"""
 3 | 
 4 |   Reference:
 5 |     Jha, A.H., Anand, S., Singh, M., Veeravasarapu, V.S.R., 2018.
 6 |       "Disentangling Factors of Variation with Cycle-Consistent
 7 |       Variational Auto-Encoders". arXiv:1804.10469 [cs].
 8 |     Implementation: https://github.com/ananyahjha93/cycle-consistent-vae
 9 |   """
10 | 
11 |   def __init__(self, **kwargs):
12 |     super().__init__(**kwargs)
13 | 


--------------------------------------------------------------------------------
/odin/fuel/image_data/__init__.py:
--------------------------------------------------------------------------------
 1 | from odin.fuel.image_data._base import ImageDataset
 2 | from odin.fuel.image_data.all_mnist import *
 3 | from odin.fuel.image_data.celeba import *
 4 | from odin.fuel.image_data.cifar import *
 5 | from odin.fuel.image_data.lego_faces import LegoFaces
 6 | from odin.fuel.image_data.shapes import *
 7 | # from odin.fuel.image_data.synthesize import YDisentanglement
 8 | from odin.fuel.image_data.omniglot import *
 9 | from odin.fuel.image_data.toys import *
10 | 
11 | # TODO: STL10
12 | 


--------------------------------------------------------------------------------
/benchmarks/multi_memory_multiprocessing.py:
--------------------------------------------------------------------------------
 1 | # ===========================================================================
 2 | # Conclusion: each processes has its own globals()
 3 | # ===========================================================================
 4 | from multiprocessing import Process
 5 | 
 6 | 
 7 | def check(i, n):
 8 |     globals()['Process_%d' % i] = i
 9 |     print([(('Process_%d' % j) in globals()) for j in range(n)])
10 | 
11 | 
12 | def func(i, n):
13 |     check(i, n)
14 | 
15 | n = 2
16 | p = [Process(target=func, args=(i, n)) for i in range(n)]
17 | [i.start() for i in p]
18 | [i.join() for i in p]
19 | 


--------------------------------------------------------------------------------
/odin/networks_torch/util_modules.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | 
 3 | from itertools import chain
 4 | 
 5 | import numpy as np
 6 | import torch
 7 | 
 8 | from odin.utils import as_tuple
 9 | 
10 | 
11 | class SequentialNetwork(torch.nn.Sequential):
12 | 
13 |   def __init__(self, *args):
14 |     args = list(chain(*[as_tuple(a) for a in args]))
15 |     super().__init__(*args)
16 | 
17 | 
18 | class ParallelNetwork(Sequential):
19 | 
20 |   def forward(self, input):
21 |     outputs = []
22 |     for module in self._modules.values():
23 |       outputs.append(module(input))
24 |     return outputs
25 | 


--------------------------------------------------------------------------------
/odin/fuel/bio_data/human_embryos.py:
--------------------------------------------------------------------------------
 1 | # embryos
 2 | 
 3 | import numpy as np
 4 | from scipy import sparse
 5 | 
 6 | from odin.fuel.bio_data._base import GeneDataset
 7 | from odin.fuel.bio_data.cortex import _load_single_cell_data
 8 | 
 9 | 
10 | class HumanEmbryos(GeneDataset):
11 | 
12 |   def __init__(self, path="~/tensorflow_datasets/human_embryos"):
13 |     super().__init__()
14 |     url = b'aHR0cHM6Ly9haS1kYXRhc2V0cy5zMy5hbWF6b25hd3MuY29tL2VtYnJ5by56aXA=\n'
15 |     self.x, self.y, self.xvar, self.yvar = _load_single_cell_data(url=url,
16 |                                                                   path=path)
17 | 
18 |   @property
19 |   def name(self):
20 |     return f"embryos"
21 | 


--------------------------------------------------------------------------------
/odin/ml/gmm_thresholding.py:
--------------------------------------------------------------------------------
 1 | from numbers import Number
 2 | 
 3 | import numpy as np
 4 | from six import string_types
 5 | from sklearn.mixture import GaussianMixture
 6 | 
 7 | from odin.ml.base import BaseEstimator, TransformerMixin
 8 | from odin.utils import as_tuple
 9 | 
10 | 
11 | class GMMThreshold(BaseEstimator, TransformerMixin):
12 | 
13 |   def __init__(self, independent=True, n_components='auto', random_state=1):
14 |     super().__init__()
15 |     if isinstance(random_state, np.random.RandomState):
16 |       self.randome_state = random_state
17 |     else:
18 |       self.randome_state = np.random.RandomState(seed=random_state)
19 |     self.independent = bool(independent)
20 |     self.n_components = n_components
21 | 
22 |   def fit(self, X, y=None):
23 |     # TODO
24 |     pass
25 | 


--------------------------------------------------------------------------------
/odin/bay/distributions/__init__.py:
--------------------------------------------------------------------------------
 1 | from odin.bay.distributions.batchwise import *
 2 | from odin.bay.distributions.conditional import *
 3 | from odin.bay.distributions.joint_distributions_addons import *
 4 | from odin.bay.distributions.logarizmed import *
 5 | from odin.bay.distributions.mixture import *
 6 | from odin.bay.distributions.negative_binomial_disp import NegativeBinomialDisp
 7 | from odin.bay.distributions.normal_gamma import NormalGamma
 8 | from odin.bay.distributions.quantized import *
 9 | from odin.bay.distributions.vector_quantizer import *
10 | from odin.bay.distributions.zero_inflated import ZeroInflated
11 | from tensorflow_probability.python.distributions import *
12 | from tensorflow_probability.python.distributions import ContinuousBernoulli
13 | 
14 | RelaxedSoftmax = RelaxedOneHotCategorical
15 | 


--------------------------------------------------------------------------------
/benchmarks/multiple_inherit_python.py:
--------------------------------------------------------------------------------
 1 | # class Child(Parent1, Parent2):
 2 | #     def __init__(self):
 3 | #         Parent1.__init__(self)
 4 | #         Parent2.__init__(self)
 5 | 
 6 | from __future__ import print_function, division, absolute_import
 7 | 
 8 | 
 9 | class S1(object):
10 | 
11 |     def __init__(self):
12 |         super(S1, self).__init__()
13 |         print("S1 init")
14 | 
15 |     def test(self):
16 |         print("S1")
17 | 
18 | 
19 | class S2(object):
20 | 
21 |     def __init__(self):
22 |         super(S2, self).__init__()
23 |         print("S2 init")
24 | 
25 |     def test(self):
26 |         print("S2")
27 | 
28 | 
29 | class S3(S2, S1):
30 | 
31 |     def __init__(self):
32 |         super(S3, self).__init__()
33 | 
34 | s = S3() # init order: S1, S2 (reversed of the inheritant order)
35 | s.test() # S2
36 | 


--------------------------------------------------------------------------------
/odin/bay/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | from odin.bay.layers.continuous import *
 2 | from odin.bay.layers.count_layers import *
 3 | from odin.bay.layers.dense_distribution import *
 4 | from odin.bay.layers.deterministic_layers import *
 5 | from odin.bay.layers.discrete import *
 6 | from odin.bay.layers.distribution_util_layers import *
 7 | from odin.bay.layers.latents import *
 8 | from odin.bay.layers.mixture_layers import *
 9 | from odin.bay.layers.autoregressive_layers import *
10 | 
11 | 
12 | def _register_distribution_layers():
13 |   # For deserialization.
14 |   import tensorflow as tf
15 |   import inspect
16 |   custom_objects = tf.keras.utils.get_custom_objects()
17 | 
18 |   for key, value in globals().items():
19 |     if key not in custom_objects and \
20 |       inspect.isclass(value) and \
21 |       issubclass(value, DistributionLambda):
22 |       custom_objects[key] = value
23 | 
24 | 
25 | _register_distribution_layers()
26 | 


--------------------------------------------------------------------------------
/docs/blogs/catastrophic_forgetting.rst:
--------------------------------------------------------------------------------
 1 | Catastrophic forgetting
 2 | =======================
 3 | 
 4 | Should you retrain a network using unrecognized samples?
 5 | 
 6 | Identify the issue
 7 | ------------------
 8 | Due to the dense representation used by CNNs, the network will tend to forget previously learned information.
 9 | 
10 | Solutions
11 | ---------
12 | One must either
13 | 1) Retrain on old samples, i.i.d.
14 | 2) Adopt a different, more tabular representation (such as sparse codes) to drastically reduce interference
15 | 3) In this particular instance, since this isn't quite online learning (it integrates new information in batches, not one sample at a time), it may be possible to use a recent technique developed by DeepMind for reducing forgetting between two reinforcement learning tasks: https://arxiv.org/pdf/1612.00796v2.pdf, or https://arxiv.org/pdf/1606.04671.pdf. This may work on this supervised learning task as well.
16 | 


--------------------------------------------------------------------------------
/odin/preprocessing/video.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function, division, absolute_import
 2 | 
 3 | import numpy as np
 4 | 
 5 | def read(path, boxes=None):
 6 |   """
 7 |   Return
 8 |   ------
 9 |   Always return 3D images
10 |   (n_frames, channels, width, height)
11 |   """
12 |   import imageio
13 |   vid = imageio.get_reader(path)
14 |   metadata = vid.get_meta_data()
15 |   fps = metadata['fps']
16 |   nb_frames = metadata['nframes']
17 |   if boxes is not None:
18 |     pass
19 |   print(nb_frames)
20 |   exit()
21 |   try:
22 |     frames = []
23 |     for i in vid:
24 |       # it is bizzare why width and height are swapped
25 |       if i.ndim == 3: # swap channel first
26 |         i = i.transpose(2, 1, 0)
27 |       else:
28 |         i = np.expand_dims(i.transpose(1, 0), 1)
29 |       frames.append(i)
30 |   except RuntimeError:
31 |     pass
32 |   frames = np.array(frames, dtype=frames[0].dtype)
33 |   return frames, fps
34 | 


--------------------------------------------------------------------------------
/odin_jax.yml:
--------------------------------------------------------------------------------
 1 | # conda env create -f=odin.yml
 2 | # conda activate odin
 3 | name: odinjax
 4 | 
 5 | channels:
 6 |   - conda-forge
 7 |   - rapidsai
 8 |   - nvidia
 9 |   - pytorch
10 |   - defaults
11 | 
12 | dependencies:
13 |   - python=3.7
14 |   - pip>=20.3
15 |   - cudatoolkit=11.0
16 |   - cudnn=8.0
17 |   - rapids>=0.17
18 |   - jax>=0.2.12
19 |   - matplotlib
20 |   - ipython
21 |   - scikit-learn
22 |   - pandas
23 |   - seaborn
24 |   - tqdm
25 |   - dill
26 | 
27 |   - pip:
28 |       - tensorflow-probability==0.12.1
29 |       - tensorflow-datasets
30 |       - kaggle
31 |       - numba
32 |       - pycrypto
33 |       - spacy
34 |       - umap-learn
35 |       - arviz # bayesian analysis
36 |       - statsmodels # hypothesis testing and frequentist statistic analysis
37 |       - typeguard>=2.10.0
38 |       - pyro-ppl
39 | 
40 | # Other helpful library:
41 | # pip install git+https://github.com/DmitryUlyanov/Multicore-TSNE.git
42 | 


--------------------------------------------------------------------------------
/examples/ladder_network.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function, division, absolute_import
 2 | 
 3 | import os
 4 | os.environ['ODIN'] = 'float32,gpu,tensorflow'
 5 | 
 6 | import numpy as np
 7 | 
 8 | from odin import nnet as N, backend as K, fuel as F
 9 | 
10 | 
11 | # ===========================================================================
12 | # Data and const
13 | # ===========================================================================
14 | ds = F.load_mnist()
15 | print(ds)
16 | 
17 | # ===========================================================================
18 | # Model
19 | # ===========================================================================
20 | input_desc = [
21 |     N.VariableDesc(shape=(None, 28, 28), dtype='float32', name='X'),
22 |     N.VariableDesc(shape=(None,), dtype='float32', name='y')
23 | ]
24 | model = N.get_model_descriptor('ladder1')
25 | K.set_training(True); y_train, cost = model(input_desc)
26 | K.set_training(False); y_score = model()
27 | 


--------------------------------------------------------------------------------
/tests/backend/utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import numpy as np
 4 | import tensorflow as tf
 5 | import torch
 6 | 
 7 | np.random.seed(8)
 8 | torch.manual_seed(8)
 9 | os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
10 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
11 | 
12 | x = np.random.rand(12, 25, 8).astype('float32')
13 | y = torch.Tensor(x)
14 | z = tf.convert_to_tensor(x)
15 | 
16 | 
17 | def assert_equal(self, info, a: np.ndarray, b: torch.Tensor, c: tf.Tensor):
18 |   assert all(
19 |       int(i) == int(j) == int(k) for i, j, k in zip(a.shape, b.shape, c.shape)),\
20 |         "Input shape: %s, info: %s, output shapes mismatch: %s, %s and %s" % \
21 |           (str(x.shape), str(info), str(a.shape), str(b.shape), str(c.shape))
22 |   self.assertTrue(np.all(
23 |       np.logical_and(np.allclose(a, b.numpy()), np.allclose(a, c.numpy()))),
24 |                   msg="info: %s, output value mismatch, \n%s\n%s\n%s" %
25 |                   (info, str(a), str(b.numpy()), str(c.numpy())))
26 | 


--------------------------------------------------------------------------------
/tests/bayesian/test_losses.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | 
 3 | import os
 4 | import unittest
 5 | from tempfile import mkstemp
 6 | 
 7 | import numpy as np
 8 | import tensorflow as tf
 9 | 
10 | from odin.bay.vi import losses
11 | 
12 | np.random.seed(1)
13 | tf.random.set_seed(1)
14 | 
15 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
16 | os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
17 | 
18 | 
19 | class BayesianLossesTest(unittest.TestCase):
20 | 
21 |   def test_pairwise_distances(self):
22 |     for shape1, shape2, output_shape in [
23 |         [(2, 5), (3, 5), (2, 3, 5)],
24 |         [(3, 5), (2, 5), (3, 2, 5)],
25 |         [(4, 3, 5), (2, 5), (4, 3, 2, 5)],
26 |         [(4, 3, 5), (1, 2, 5), (4, 3, 1, 2, 5)],
27 |     ]:
28 |       x = tf.random.uniform(shape1)
29 |       y = tf.random.uniform(shape2)
30 |       z = losses.pairwise_distances(x, y)
31 |       self.assertEqual(output_shape, z.shape)
32 | 
33 | 
34 | if __name__ == '__main__':
35 |   unittest.main()
36 | 


--------------------------------------------------------------------------------
/odin/bay/distributions/joint_distributions_addons.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from tensorflow_probability.python.distributions import (
 3 |     JointDistributionCoroutine, JointDistributionNamed,
 4 |     JointDistributionSequential)
 5 | from tensorflow_probability.python.experimental.marginalize import (
 6 |     MarginalizableJointDistributionCoroutine, logeinsumexp)
 7 | from tensorflow_probability.python.experimental.marginalize.marginalizable import \
 8 |     Marginalizable as _Marginalizable
 9 | 
10 | __all__ = [
11 |     'MarginalizableJointDistributionCoroutine',
12 |     'MarginalizableJointDistributionNamed',
13 |     'MarginalizableJointDistributionSequential',
14 |     'logeinsumexp',
15 | ]
16 | 
17 | 
18 | class MarginalizableJointDistributionNamed(JointDistributionNamed,
19 |                                            _Marginalizable):
20 | 
21 |   ...
22 | 
23 | 
24 | class MarginalizableJointDistributionSequential(JointDistributionSequential,
25 |                                                 _Marginalizable):
26 | 
27 |   ...
28 | 


--------------------------------------------------------------------------------
/odin/networks/__init__.py:
--------------------------------------------------------------------------------
 1 | # TODO: Fix overlap import with odin.bay here
 2 | # from odin.networks import attention_mechanism
 3 | # from odin.networks.attention import *
 4 | from odin.networks.base_networks import *
 5 | from odin.networks.conditional_embedding import *
 6 | from odin.networks.cudnn_rnn import *
 7 | from odin.networks.dropout import *
 8 | from odin.networks.positional_encoder import *
 9 | from odin.networks.skip_connection import SkipConnection, skip_connect
10 | from odin.networks.time_delay import *
11 | from odin.networks.util_layers import *
12 | from odin.networks.image_networks import *
13 | from odin.networks.resnets import *
14 | 
15 | def register_new_keras_layers(extras=None):
16 |   import tensorflow as tf
17 |   from tensorflow.python.keras.layers import Layer
18 |   custom_objects = tf.keras.utils.get_custom_objects()
19 | 
20 |   globs = dict(globals())
21 |   if extras is not None:
22 |     globs.update(extras)
23 |   for key, val in globs.items():
24 |     if isinstance(val, type) and issubclass(val, Layer):
25 |       custom_objects[key] = val
26 | 
27 | 
28 | register_new_keras_layers()
29 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2016 imito
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/examples/models/model_tidigits.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function, division, absolute_import
 2 | 
 3 | from odin import nnet as N, backend as K
 4 | 
 5 | 
 6 | @N.Model
 7 | def gender(X, f, **kwargs):
 8 |     nb_gender = kwargs.get('nb_gender', 4)
 9 |     if f is None:
10 |         f = N.Sequence([
11 |             N.Dimshuffle(pattern=(0, 1, 2, 'x')),
12 |             N.Conv(num_filters=32, filter_size=3, strides=1, b_init=None, pad='valid'),
13 |             N.BatchNorm(activation=K.relu),
14 |             N.Pool(pool_size=2, mode='avg'),
15 | 
16 |             N.Conv(num_filters=64, filter_size=3, strides=1, b_init=None, pad='valid'),
17 |             N.BatchNorm(activation=K.relu),
18 |             N.Pool(pool_size=2, mode='avg'),
19 | 
20 |             N.Flatten(outdim=3),
21 |             N.Dense(num_units=512, b_init=None),
22 |             N.BatchNorm(axes=(0, 1)),
23 |             N.AutoRNN(num_units=128, rnn_mode='gru', num_layers=2,
24 |                       input_mode='linear', direction_mode='unidirectional'),
25 | 
26 |             N.Flatten(outdim=2),
27 |             N.Dense(num_units=nb_gender, activation=K.softmax)
28 |         ], debug=True)
29 |     return f(X), f
30 | 


--------------------------------------------------------------------------------
/odin.yml:
--------------------------------------------------------------------------------
 1 | # conda env create -f=odin.yml
 2 | # conda activate odin
 3 | name: odin
 4 | 
 5 | channels:
 6 |   - conda-forge
 7 |   - rapidsai
 8 |   - nvidia
 9 |   - pytorch
10 |   - defaults
11 | 
12 | dependencies:
13 |   - python=3.7
14 |   - pip>=20.3
15 |   - cudatoolkit=11.2
16 |   - cudnn=8.1.0
17 |   - rapids=21.06
18 |   - matplotlib
19 |   - ipython
20 |   - scikit-learn
21 |   - pandas
22 |   - seaborn
23 |   - tqdm
24 |   - dill
25 | #  - pytorch=1.9.0
26 | #  - torchvision
27 | #  - torchaudio
28 | 
29 |   - pip:
30 |       - tensorflow==2.5.0
31 |       - tensorflow-probability==0.13.0
32 |       - tensorflow-datasets
33 |       - tensorflow_io
34 |       - tensorflow-addons
35 |       - transformers
36 |       - kaggle
37 |       - hydra-core>=1.0.0
38 |       - hydra-joblib-launcher>=1.1.0
39 |       - bigarray==0.2.1
40 |       - numba
41 |       - pycrypto
42 |       - spacy
43 |       - umap-learn
44 |       - arviz # bayesian analysis
45 |       - statsmodels # hypothesis testing and frequentist statistic analysis
46 |       - typeguard>=2.10.0
47 | #      - pyro-ppl
48 | 
49 | # Other helpful library:
50 | # pip install git+https://github.com/DmitryUlyanov/Multicore-TSNE.git
51 | 


--------------------------------------------------------------------------------
/odin/explain/helpers.py:
--------------------------------------------------------------------------------
 1 | import inspect
 2 | 
 3 | import numpy as np
 4 | import tensorflow as tf
 5 | from six import string_types
 6 | from tensorflow import keras
 7 | 
 8 | 
 9 | def get_pretrained_model(model, model_kwargs={}):
10 |   if isinstance(model, keras.Model):
11 |     return model
12 |   if isinstance(model, string_types):
13 |     pretrained_model = {
14 |         name.lower(): obj
15 |         for name, obj in inspect.getmembers(keras.applications)
16 |         if inspect.isfunction(obj)
17 |     }
18 |     model = pretrained_model[model.strip().lower()]
19 |     model = model(**model_kwargs)
20 |     return model
21 |   raise NotImplementedError("No support for model with type: %s" % type(model))
22 | 
23 | 
24 | def _may_add_batch_dim(X, input_shape):
25 |   # add batch dimension if necessary
26 |   if X.ndim == len(input_shape) - 1:
27 |     X = np.expand_dims(X, axis=0) if isinstance(X, np.ndarray) else \
28 |       tf.expand_dims(X, axis=0)
29 |   assert len(input_shape) == X.ndim and all(
30 |       i == j if i is not None else True
31 |       for i, j in zip(input_shape, X.shape)), \
32 |         "Require input_shape=%s but X.shape=%s" % (input_shape, X.shape)
33 |   return X
34 | 


--------------------------------------------------------------------------------
/tests/test_search.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | 
 3 | import itertools
 4 | import os
 5 | 
 6 | import numpy as np
 7 | import tensorflow as tf
 8 | 
 9 | from odin.search import (diagonal_beam_search, diagonal_bruteforce_search,
10 |                          diagonal_greedy_search, diagonal_hillclimb_search)
11 | from odin.utils import UnitTimer
12 | 
13 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
14 | os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
15 | 
16 | tf.random.set_seed(8)
17 | np.random.seed(8)
18 | 
19 | shape = (8, 8)
20 | mat = np.random.randint(0, 88, size=shape)
21 | print(mat)
22 | 
23 | with UnitTimer():
24 |   ids = diagonal_beam_search(mat)
25 | print(ids)
26 | print(mat[:, ids])
27 | print(np.sum(np.diag(mat[:, ids])))
28 | 
29 | with UnitTimer():
30 |   ids = diagonal_hillclimb_search(mat)
31 | print(ids)
32 | print(mat[:, ids])
33 | print(np.sum(np.diag(mat[:, ids])))
34 | 
35 | with UnitTimer():
36 |   ids = diagonal_greedy_search(mat)
37 | print(ids)
38 | print(mat[:, ids])
39 | print(np.sum(np.diag(mat[:, ids])))
40 | 
41 | with UnitTimer():
42 |   ids = diagonal_bruteforce_search(mat)
43 | print(ids)
44 | print(mat[:, ids])
45 | print(np.sum(np.diag(mat[:, ids])))
46 | 


--------------------------------------------------------------------------------
/examples/kaggle/covid19_cases.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from urllib.request import urlretrieve
 3 | 
 4 | import pandas as pd
 5 | 
 6 | REPO_BASE = r"https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series"
 7 | URLs = dict(
 8 |     recovered=r"time_series_covid19_recovered_global.csv",
 9 |     deaths=r"time_series_covid19_deaths_global.csv",
10 |     confirmed=r"time_series_covid19_confirmed_global.csv",
11 | )
12 | 
13 | 
14 | # ===========================================================================
15 | # Helpers
16 | # ===========================================================================
17 | def download(outdir="/tmp") -> dict:
18 |   if not os.path.isdir(outdir):
19 |     os.makedirs(outdir)
20 |   data = {}
21 |   for key, url in URLs.items():
22 |     url = os.path.join(REPO_BASE, url)
23 |     name = os.path.basename(url)
24 |     outpath = os.path.join(outdir, name)
25 |     data[key] = pd.read_csv(outpath)
26 |   return data
27 | 
28 | 
29 | # ===========================================================================
30 | # Main
31 | # ===========================================================================
32 | if __name__ == "__main__":
33 |   data = download()
34 |   for i, j in data.items():
35 |     print(j)
36 | 


--------------------------------------------------------------------------------
/examples/interpolation_figures.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | 
 3 | import os
 4 | 
 5 | import numpy as np
 6 | import seaborn as sns
 7 | from matplotlib import pyplot as plt
 8 | 
 9 | from odin import visual as vs
10 | from odin.backend import interpolation
11 | 
12 | os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
13 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
14 | 
15 | sns.set()
16 | 
17 | all_interpolation = interpolation.get()
18 | n = len(all_interpolation)
19 | n_col = 5
20 | n_row = int(np.ceil(n / 5))
21 | 
22 | x = np.linspace(0., 1., num=250).astype('float32')
23 | plt.figure(figsize=(int(n_col * 3), int(n_row * 2.5)))
24 | for idx, cls in enumerate(all_interpolation):
25 |   plt.subplot(n_row, n_col, idx + 1)
26 |   name = str(cls.__name__).split('.')[-1]
27 |   y = cls()(x)
28 |   plt.plot(x, y)
29 |   plt.title(name)
30 | plt.tight_layout()
31 | 
32 | x = np.arange(0, 250).astype('float32')
33 | plt.figure(figsize=(int(n_col * 3), int(n_row * 2.5)))
34 | for idx, cls in enumerate(all_interpolation):
35 |   plt.subplot(n_row, n_col, idx + 1)
36 |   name = str(cls.__name__).split('.')[-1]
37 |   y = cls(cyclical=True, norm=50, delayIn=20, delayOut=10, vmin=1., vmax=2.)(x)
38 |   plt.plot(x, y)
39 |   plt.title(name)
40 | plt.tight_layout()
41 | 
42 | vs.plot_save(log=True)
43 | 


--------------------------------------------------------------------------------
/benchmarks/multiprocess_vs_single.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function, division, absolute_import
 2 | 
 3 | import multiprocessing as mpi
 4 | from itertools import chain
 5 | 
 6 | import numpy as np
 7 | 
 8 | from odin import utils
 9 | 
10 | ncpu = 2
11 | ntasks = 8
12 | jobs = [[10**3] * ntasks] * ncpu
13 | # Big memory seems not affect the speed of inter-processes communication
14 | dummy = np.ones((1000, 1000, 120), dtype='float64')
15 | print('Size:', dummy.nbytes / 1024. / 1024., ' MB')
16 | 
17 | if True:
18 |     with utils.UnitTimer():
19 |         _ = []
20 |         for i in chain(*jobs):
21 |             count = 0
22 |             for j in range(i):
23 |                 count += i * j ** i
24 |             _.append(count)
25 | 
26 | if True:
27 |     def work(jobs, results, dummy):
28 |         count = 0
29 |         for i in jobs:
30 |             for j in range(i):
31 |                 # count += i * j ** i - dummy[12].sum().astype('int32')
32 |                 count += i * j ** i
33 |             results.put(count)
34 | 
35 |     res = mpi.Queue()
36 |     p = [mpi.Process(target=work, args=(i, res, dummy)) for i in jobs]
37 | 
38 |     with utils.UnitTimer():
39 |         [i.start() for i in p]
40 |         for _ in range(ntasks * ncpu):
41 |             c = res.get()
42 | 
43 |         [i.join() for i in p]
44 |         res.close()
45 | 


--------------------------------------------------------------------------------
/examples/machine_learning/pca_tsne_umap.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | 
 3 | import os
 4 | 
 5 | import numpy as np
 6 | import tensorflow as tf
 7 | from sklearn.datasets import load_digits
 8 | from sklearn.model_selection import train_test_split
 9 | 
10 | from odin import ml
11 | from odin import visual as vs
12 | 
13 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
14 | os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
15 | 
16 | tf.random.set_seed(8)
17 | np.random.seed(8)
18 | 
19 | X, y = load_digits(return_X_y=True)
20 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
21 | 
22 | X_umap = ml.fast_umap(X_train, X_test)
23 | X_tsne = ml.fast_tsne(X_train, X_test)
24 | X_pca = ml.fast_pca(X_train, X_test, n_components=2)
25 | 
26 | styles = dict(size=12, alpha=0.6, centroids=True)
27 | 
28 | vs.plot_figure(6, 12)
29 | vs.plot_scatter(x=X_pca[0], color=y_train, ax=(1, 2, 1), **styles)
30 | vs.plot_scatter(x=X_pca[1], color=y_test, ax=(1, 2, 2), **styles)
31 | 
32 | vs.plot_figure(6, 12)
33 | vs.plot_scatter(x=X_tsne[0], color=y_train, ax=(1, 2, 1), **styles)
34 | vs.plot_scatter(x=X_tsne[1], color=y_test, ax=(1, 2, 2), **styles)
35 | 
36 | vs.plot_figure(6, 12)
37 | vs.plot_scatter(x=X_umap[0], color=y_train, ax=(1, 2, 1), **styles)
38 | vs.plot_scatter(x=X_umap[1], color=y_test, ax=(1, 2, 2), **styles)
39 | 
40 | vs.plot_save()
41 | 


--------------------------------------------------------------------------------
/benchmarks/PCA_multiprocessing_transform.py:
--------------------------------------------------------------------------------
 1 | # ===========================================================================
 2 | # Single process:
 3 | # 0.0003s
 4 | # Multiprocessing:
 5 | # ncpu = 1: ~0.16s
 6 | # ncpu = 2: ~0.07s
 7 | # ===========================================================================
 8 | from __future__ import print_function, division, absolute_import
 9 | 
10 | import os
11 | import matplotlib
12 | matplotlib.use('Agg')
13 | from matplotlib import pyplot as plt
14 | 
15 | import numpy as np
16 | from odin import fuel as F, visual
17 | from odin.ml import MiniBatchPCA
18 | from sklearn.manifold import TSNE
19 | from odin.utils import UnitTimer, TemporaryDirectory
20 | 
21 | iris = F.load_iris()
22 | print(iris)
23 | pca = MiniBatchPCA()
24 | 
25 | X = iris['X'][:]
26 | 
27 | i = 0
28 | while i < X.shape[0]:
29 |     x = X[i:i + 20]
30 |     i += 20
31 |     pca.partial_fit(x)
32 |     print("Fitting PCA ...")
33 | 
34 | with UnitTimer():
35 |     for i in range(8):
36 |         x = pca.transform(X)
37 | 
38 | with UnitTimer():
39 |     for i in range(8):
40 |         x = pca.transform_mpi(X, keep_order=True, ncpu=1, n_components=2)
41 | print("Output shape:", x.shape)
42 | 
43 | colors = ['r' if i == 0 else ('b' if i == 1 else 'g')
44 |           for i in iris['y'][:]]
45 | visual.plot_scatter(x[:, 0], x[:, 1], color=colors, size=8)
46 | visual.plot_save('/tmp/tmp.pdf')
47 | # bananab
48 | 


--------------------------------------------------------------------------------
/odin/preprocessing/confs/prosodyAcf.cfg:
--------------------------------------------------------------------------------
 1 | ///////////////////////////////////////////////////////////////////////////////////////
 2 | ///////// > openSMILE configuration file for speech prosody features //////////////////
 3 | /////////   pitch (ACF) and intensity                                //////////////////
 4 | /////////                                                            //////////////////
 5 | ///////// (c) 2013-2016 audEERING.                                   //////////////////
 6 | /////////     All rights reserverd. See file COPYING for details.    //////////////////
 7 | ///////////////////////////////////////////////////////////////////////////////////////
 8 | 
 9 | 
10 | ;;;;;;; component list ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11 | [componentInstances:cComponentManager]
12 | instance[acf].type=cAcf
13 | instance[cep].type=cAcf
14 | instance[pitch].type=cPitchACF
15 | 
16 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;; main section ;;;;;;;;;;;;;;;;;;;;;;;;;;;
17 | [acf:cAcf]
18 | reader.dmLevel=fftmag
19 | writer.dmLevel=acf
20 | 
21 | [cep:cAcf]
22 | reader.dmLevel=fftmag
23 | writer.dmLevel=cepstrum
24 | cepstrum=1
25 | 
26 | [pitch:cPitchACF]
27 | reader.dmLevel = acf;cepstrum
28 | writer.dmLevel = pitch
29 | copyInputName = 1
30 | processArrayFields = 0
31 | ; 500
32 | maxPitch = {fmax}
33 | voiceProb = 1
34 | voiceQual = 0
35 | HNR = 0
36 | F0 = 1
37 | F0raw = 0
38 | F0env = 0
39 | voicingCutoff = {voicingCutoff}
40 | 


--------------------------------------------------------------------------------
/odin/fuel/__init__.py:
--------------------------------------------------------------------------------
 1 | import inspect
 2 | from typing import Type
 3 | 
 4 | from odin.fuel.audio_data import *
 5 | from odin.fuel.bio_data import *
 6 | from odin.fuel.databases import *
 7 | from odin.fuel.dataset_base import *
 8 | from odin.fuel.image_data import *
 9 | from odin.fuel.nlp_data import *
10 | 
11 | 
12 | def get_dataset(
13 |     name: str,
14 |     **kwargs,
15 | ) -> Union[IterableDataset, ImageDataset, GeneDataset, NLPDataset]:
16 |   """Return an instance of `IterableDataset`"""
17 |   name = str(name).strip().lower()
18 |   for key, val in globals().items():
19 |     key = str(key).lower()
20 |     if (key == name and
21 |         inspect.isclass(val) and
22 |         issubclass(val, IterableDataset)):
23 |       val: Type[IterableDataset]
24 |       return val(**kwargs)
25 |   raise ValueError(f"Cannot find dataset with name: {name}")
26 | 
27 | 
28 | def get_all_dataset(
29 |     data_type: Literal['image', 'audio', 'text', 'gene']
30 | ) -> List[Union[Type[IterableDataset],
31 |                 Type[ImageDataset],
32 |                 Type[GeneDataset],
33 |                 Type[NLPDataset]]]:
34 |   ds = []
35 |   for key, val in globals().items():
36 |     if (inspect.isclass(val) and
37 |         issubclass(val, IterableDataset) and
38 |         val not in (IterableDataset, ImageDataset, NLPDataset, GeneDataset)):
39 |       val: Type[IterableDataset]
40 |       if val.data_type() == data_type:
41 |         ds.append(val)
42 |   return ds
43 | 


--------------------------------------------------------------------------------
/benchmarks/cPickle_preserve_ref.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function, division, absolute_import
 2 | 
 3 | import cPickle
 4 | 
 5 | 
 6 | class Shit1(object):
 7 |     """docstring for Shit1"""
 8 | 
 9 |     def __init__(self):
10 |         super(Shit1, self).__init__()
11 | 
12 | 
13 | class Shit2(object):
14 |     """docstring for Shit2"""
15 | 
16 |     def __init__(self):
17 |         super(Shit2, self).__init__()
18 | 
19 | # ===========================================================================
20 | # This will preserve the reference
21 | # ===========================================================================
22 | if False:
23 |     s1 = Shit1()
24 |     s2 = Shit2()
25 |     s2.shit = s1
26 |     print(s1, s2, s2.shit, s1 == s2.shit)
27 |     cPickle.dump((s1, s2), open('/tmp/s1', 'w'))
28 | else:
29 |     s1, s2 = cPickle.load(open('/tmp/s1', 'rb'))
30 |     print(s1, s2, s2.shit, s1 == s2.shit) # True
31 | 
32 | # ===========================================================================
33 | # This will remove the reference
34 | # ===========================================================================
35 | if False:
36 |     s1 = Shit1()
37 |     s2 = Shit2()
38 |     s2.shit = s1
39 |     print(s1, s2, s2.shit, s1 == s2.shit)
40 |     cPickle.dump(s1, open('/tmp/s1', 'w'))
41 |     cPickle.dump(s2, open('/tmp/s2', 'w'))
42 | else:
43 |     s1 = cPickle.load(open('/tmp/s1', 'rb'))
44 |     s2 = cPickle.load(open('/tmp/s2', 'rb'))
45 |     print(s1, s2, s2.shit, s1 == s2.shit) # False
46 | 


--------------------------------------------------------------------------------
/odin/networks/skip_connection.py:
--------------------------------------------------------------------------------
 1 | import inspect
 2 | from numbers import Number
 3 | 
 4 | import tensorflow as tf
 5 | from six import string_types
 6 | from tensorflow.python import keras
 7 | 
 8 | 
 9 | def skip_connect(inputs, outputs, mode):
10 |   ishape = inputs.shape
11 |   oshape = outputs.shape
12 |   if len(ishape) != len(oshape):
13 |     n = abs(len(ishape) - len(oshape))
14 |     # first expand
15 |     for _ in range(n):
16 |       if len(ishape) < len(oshape):
17 |         inputs = tf.expand_dims(inputs, axis=1)
18 |       else:
19 |         outputs = tf.expand_dims(outputs, axis=1)
20 |     # now repeat
21 |     for i in range(1, n + 1):
22 |       if len(ishape) < len(oshape):
23 |         inputs = tf.repeat(inputs, outputs.shape[i], axis=i)
24 |       else:
25 |         outputs = tf.repeat(outputs, inputs.shape[i], axis=i)
26 |   ### Concatenation
27 |   if mode == 'concat':
28 |     return tf.concat([outputs, inputs], axis=-1)
29 |   ### Identity, a.k.a residual connection
30 |   elif mode == 'identity':
31 |     return inputs + outputs
32 |   ### No support
33 |   else:
34 |     raise NotImplementedError("No support for skip connect mode: '%s'" % mode)
35 |   return outputs
36 | 
37 | 
38 | class SkipConnection(keras.Sequential):
39 | 
40 |   def __init__(self, layers, mode='concat', name=None):
41 |     super().__init__(layers, name=name)
42 |     self.mode = mode
43 | 
44 |   def call(self, inputs, training=None, mask=None):
45 |     outputs = super().call(inputs, training=training, mask=mask)
46 |     return skip_connect(inputs, outputs, self.mode)
47 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | sftp-config*
 2 | .favorites.json
 3 | .vscode
 4 | .DS_Store
 5 | .sync
 6 | Icon*
 7 | .vscode
 8 | .idea
 9 | 
10 | # Byte-compiled / optimized / DLL files
11 | __pycache__/
12 | *.py[cod]
13 | *$py.class
14 | 
15 | # C extensions
16 | *.so
17 | 
18 | # Distribution / packaging
19 | .Python
20 | env/
21 | build/
22 | develop-eggs/
23 | dist/
24 | downloads/
25 | eggs/
26 | .eggs/
27 | lib/
28 | lib64/
29 | parts/
30 | sdist/
31 | var/
32 | *.egg-info/
33 | .installed.cfg
34 | *.egg
35 | 
36 | # PyInstaller
37 | #  Usually these files are written by a python script from a template
38 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
39 | *.manifest
40 | *.spec
41 | 
42 | # Installer logs
43 | pip-log.txt
44 | pip-delete-this-directory.txt
45 | 
46 | # Unit test / coverage reports
47 | htmlcov/
48 | .tox/
49 | .coverage
50 | .coverage.*
51 | .cache
52 | nosetests.xml
53 | coverage.xml
54 | *,cover
55 | .hypothesis/
56 | 
57 | # Translations
58 | *.mo
59 | *.pot
60 | 
61 | # Django stuff:
62 | *.log
63 | local_settings.py
64 | 
65 | # Flask stuff:
66 | instance/
67 | .webassets-cache
68 | 
69 | # Scrapy stuff:
70 | .scrapy
71 | 
72 | # Sphinx documentation
73 | docs/_build/
74 | 
75 | # PyBuilder
76 | target/
77 | 
78 | # IPython Notebook
79 | .ipynb_checkpoints
80 | 
81 | # pyenv
82 | .python-version
83 | 
84 | # celery beat schedule file
85 | celerybeat-schedule
86 | 
87 | # dotenv
88 | .env
89 | 
90 | # virtualenv
91 | venv/
92 | ENV/
93 | 
94 | # Spyder project settings
95 | .spyderproject
96 | 
97 | # Rope project settings
98 | .ropeproject
99 | 


--------------------------------------------------------------------------------
/odin/preprocessing/sequence.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """This code is collections of sequence processing toolkits
 3 | """
 4 | from __future__ import print_function, division, absolute_import
 5 | 
 6 | import numpy as np
 7 | 
 8 | from odin.preprocessing.base import Extractor
 9 | 
10 | class _SequenceExtractor(Extractor):
11 |   pass
12 | 
13 | class MaxLength(_SequenceExtractor):
14 |   """ Sequences longer than this will be filtered out. """
15 | 
16 |   def __init__(self, max_len=1234,
17 |                input_name=None):
18 |     super(MaxLength, self).__init__()
19 |     self.max_len = int(max_len)
20 |     self.input_name = input_name
21 | 
22 |   def _transform(self, X):
23 |     pass
24 | 
25 | class IndexShift(object):
26 |   """ IndexShift """
27 | 
28 |   def __init__(self, start_index=None, end_index=None, index_from=None):
29 |     super(IndexShift, self).__init__()
30 | 
31 | class SkipFrequent(_SequenceExtractor):
32 | 
33 |   def __init__(self, new):
34 |     pass
35 | 
36 | class OOVindex(_SequenceExtractor):
37 |   """ Out-of-vocabulary processing
38 |   Any index that is: < lower or > upper will be replaced
39 |   by given `oov_index`
40 | 
41 |   Parameters
42 |   ----------
43 |   oov_index : scalar
44 |     pass
45 |   lower : {scalar or None}
46 |     if None, use `min` value of all given sequences
47 |   upper : {scalar or None}
48 |     if None, use `max` value of all given sequences
49 |   input_name : {list of string, None}
50 |     pass
51 |   """
52 | 
53 |   def __init__(self, oov_index,
54 |                lower=None, upper=None,
55 |                input_name=None):
56 |     super(OOVindex, self).__init__()
57 |     self.oov_index = int(oov_index)
58 | 


--------------------------------------------------------------------------------
/examples/discretizing_features.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | 
 3 | import os
 4 | 
 5 | import numpy as np
 6 | import seaborn as sns
 7 | import tensorflow as tf
 8 | from matplotlib import pyplot as plt
 9 | from odin import visual as vs
10 | from odin.bay.vi import discretizing
11 | 
12 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
13 | os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
14 | 
15 | tf.random.set_seed(8)
16 | np.random.seed(8)
17 | sns.set()
18 | 
19 | shape = (1024, 1)
20 | total_figures = 1 + 4 * 2
21 | ncol = nrow = int(np.ceil(np.sqrt(total_figures)))
22 | hist_bins = 120
23 | for dist, fn in [('uniform', np.random.rand), ('normal', np.random.randn)]:
24 |   x = fn(*shape)
25 |   vs.plot_figure(nrow=12, ncol=12, dpi=120)
26 |   ax = vs.subplot(nrow, ncol, 1)
27 |   ax, _, _ = vs.plot_histogram(x, bins=hist_bins, title=dist, ax=ax)
28 |   idx = 2
29 |   for strategy in ('gmm', 'uniform', 'quantile', 'kmeans'):
30 |     for n_bins in (5, 10):
31 |       y = discretizing(x, n_bins=n_bins, strategy=strategy)
32 |       title = '%s-%d' % (strategy, n_bins)
33 |       ax = vs.subplot(nrow, ncol, idx)
34 |       vs.plot_histogram(y, bins=hist_bins, ax=ax, title=title)
35 |       idx += 1
36 |   plt.tight_layout()
37 | 
38 | # ====== special case: GMM discretizing ====== #
39 | vs.plot_figure()
40 | y, gmm = discretizing(x, n_bins=2, strategy='gmm', return_model=True)
41 | gmm = gmm[0]
42 | vs.plot_gaussian_mixture(x,
43 |                          gmm,
44 |                          show_probability=True,
45 |                          show_pdf=True,
46 |                          show_components=True)
47 | # ====== save everything ====== #
48 | vs.plot_save()
49 | 


--------------------------------------------------------------------------------
/examples/logistic_regression.py:
--------------------------------------------------------------------------------
 1 | import matplotlib
 2 | matplotlib.use('Agg')
 3 | 
 4 | import os
 5 | os.environ['ODIN'] = 'gpu,float32'
 6 | import pickle
 7 | 
 8 | import numpy as np
 9 | 
10 | from odin import ml
11 | from odin import fuel as F
12 | from odin.utils import ctext, ArgController
13 | from odin import visual as V
14 | 
15 | from sklearn.metrics import confusion_matrix, accuracy_score
16 | args = ArgController(
17 | ).add('--reset', "re-run the fitting of the model", False
18 | ).parse()
19 | # ===========================================================================
20 | # Const
21 | # ===========================================================================
22 | ds = F.MNIST.load()
23 | print(ds)
24 | nb_classes = 10
25 | PATH = '/tmp/lore.ai'
26 | # ===========================================================================
27 | # Model
28 | # ===========================================================================
29 | if not os.path.exists(PATH) or args.reset:
30 |   f = ml.LogisticRegression(nb_classes=nb_classes, tol=1e-4,
31 |                             fit_intercept=True, path=PATH,
32 |                             batch_size=256, dtype='float32')
33 |   cross_validation = (ds['X_valid'], ds['y_valid'])
34 |   f.fit(X=ds['X_train'], y=ds['y_train'],
35 |         cv=cross_validation)
36 | else:
37 |   with open(PATH, 'rb') as f:
38 |     f = pickle.load(f)
39 | # ===========================================================================
40 | # Evaluation
41 | # ===========================================================================
42 | f.evaluate(ds['X_test'], ds['y_test'], path='/tmp/tmp.pdf',
43 |            title="MNIST Test Set",
44 |            xlims=(0., 0.88), ylims=(0., 0.88))
45 | 


--------------------------------------------------------------------------------
/odin/fuel/bio_data/pbmc.py:
--------------------------------------------------------------------------------
 1 | import base64
 2 | import os
 3 | from urllib.request import urlretrieve
 4 | 
 5 | import numpy as np
 6 | import tensorflow as tf
 7 | from scipy import sparse
 8 | 
 9 | from odin.fuel.bio_data._base import GeneDataset
10 | from odin.utils.crypto import md5_checksum
11 | 
12 | 
13 | class PBMC(GeneDataset):
14 |   _URL = {
15 |       '5k':
16 |           b'aHR0cHM6Ly9haS1kYXRhc2V0cy5zMy5hbWF6b25hd3MuY29tL3BibWM1ay5ucHo=\n',
17 |       '10k':
18 |           b'aHR0cHM6Ly9haS1kYXRhc2V0cy5zMy5hbWF6b25hd3MuY29tL3BibWMxMGsubnB6\n'
19 |   }
20 | 
21 |   def __init__(self, dataset='5k', path="~/tensorflow_datasets/pbmc"):
22 |     super().__init__()
23 |     path = os.path.abspath(os.path.expanduser(path))
24 |     self.dsname = dataset
25 |     if not os.path.exists(path):
26 |       os.makedirs(path)
27 |     url = str(base64.decodebytes(PBMC._URL[str(dataset).lower().strip()]),
28 |               'utf-8')
29 |     name = os.path.basename(url)
30 |     filename = os.path.join(path, name)
31 |     urlretrieve(url,
32 |                 filename=filename,
33 |                 reporthook=lambda blocknum, bs, size: None)
34 |     ### load the data
35 |     data = np.load(filename, allow_pickle=True)
36 |     self.x = data['x'].tolist().todense().astype(np.float32)
37 |     self.y = data['y'].tolist().todense().astype(np.float32)
38 |     assert md5_checksum(self.x) == data['xmd5'].tolist(), \
39 |       "MD5 for transcriptomic data mismatch"
40 |     assert md5_checksum(self.y) == data['ymd5'].tolist(), \
41 |       "MD5 for proteomic data mismatch"
42 |     self.xvar = data['xvar']
43 |     self.yvar = data['yvar']
44 |     self.pairs = data['pairs']
45 | 
46 |   @property
47 |   def name(self):
48 |     return f"pbmc{self.dsname}"
49 | 


--------------------------------------------------------------------------------
/examples/models/models_cifar10.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function, division, absolute_import
 2 | 
 3 | from odin import backend as K, nnet as N
 4 | import tensorflow as tf
 5 | 
 6 | 
 7 | @N.Lambda
 8 | def test(X, y):
 9 |   nb_classes = y.shape.as_list()[-1]
10 |   f = N.Sequence([
11 |       N.Flatten(outdim=2),
12 |       N.Dense(512, activation=K.relu),
13 |       N.Dropout(level=0.5),
14 |       N.Dense(nb_classes, activation=K.linear)
15 |   ], debug=2)
16 |   logit = f(X)
17 |   prob = tf.nn.softmax(logit)
18 |   return {'logit': logit, 'prob': prob}
19 | 
20 | 
21 | @N.Lambda
22 | def cnn(X, y):
23 |   nb_classes = y.shape.as_list()[-1]
24 |   with N.args_scope(['Conv', dict(b_init=None, activation=K.linear)],
25 |                     ['BatchNorm', dict(activation=K.relu)]):
26 |     f = N.Sequence([
27 |         N.Dimshuffle(pattern=(0, 2, 3, 1)),
28 |         N.Conv(32, (3, 3), pad='same', stride=(1, 1)),
29 |         N.BatchNorm(),
30 |         N.Conv(32, (3, 3), pad='same', stride=(1, 1),
31 |                b_init=0, activation=K.relu),
32 |         N.Pool(pool_size=(2, 2), strides=None, mode='max'),
33 |         N.Dropout(level=0.25),
34 |         #
35 |         N.Conv(64, (3, 3), pad='same', stride=(1, 1)),
36 |         N.BatchNorm(),
37 |         N.Conv(64, (3, 3), pad='same', stride=(1, 1),
38 |                b_init=0., activation=K.relu),
39 |         N.Pool(pool_size=(2, 2), strides=None, mode='max'),
40 |         N.Dropout(level=0.25),
41 |         #
42 |         N.Flatten(outdim=2),
43 |         N.Dense(512, activation=K.relu),
44 |         N.Dropout(level=0.5),
45 |         N.Dense(nb_classes, activation=K.linear)
46 |     ], debug=1)
47 |   logit = f(X)
48 |   prob = tf.nn.softmax(logit)
49 |   return {'logit': logit, 'prob': prob}
50 | 


--------------------------------------------------------------------------------
/tests/test_time_delay_networks.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | 
 3 | import os
 4 | 
 5 | import numpy as np
 6 | import tensorflow as tf
 7 | import torch
 8 | from tensorflow.python.keras.layers import Dense
 9 | 
10 | from odin import networks_torch as nt
11 | from odin.networks import (TimeDelay, TimeDelayConv, TimeDelayConvTied,
12 |                            TimeDelayDense)
13 | 
14 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
15 | os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
16 | 
17 | tf.random.set_seed(8)
18 | np.random.seed(8)
19 | torch.manual_seed(8)
20 | 
21 | x = np.random.rand(12, 80, 23).astype('float32')
22 | 
23 | for _ in range(20):
24 |   ctx = sorted(set(int(i) for i in np.random.randint(-5, 5, size=4)))
25 |   print('\n', ctx)
26 | 
27 |   # ====== tensorflow ====== #
28 |   tdd = TimeDelay(
29 |       fn_layer_creator=lambda: Dense(units=128),
30 |       delay_context=ctx,  #
31 |   )
32 |   y = tdd(x)
33 |   print(y.shape)
34 | 
35 |   tdd = TimeDelayDense(units=128)
36 |   y = tdd(x)
37 |   print(y.shape)
38 | 
39 |   tdc = TimeDelayConv(units=128)
40 |   y = tdc(x)
41 |   print(y.shape)
42 | 
43 |   tdct = TimeDelayConvTied(units=128)
44 |   y = tdct(x)
45 |   print(y.shape)
46 | 
47 |   # ====== pytorch ====== #
48 |   # add `nt.` to everything and the same code will work for pytorch
49 |   tdd = nt.TimeDelay(
50 |       fn_layer_creator=lambda: nt.Dense(128),
51 |       delay_context=ctx,  #
52 |   )
53 |   y = tdd(x)
54 |   print(y.shape)
55 | 
56 |   tdd = nt.TimeDelayDense(units=128)
57 |   y = tdd(x)
58 |   print(y.shape)
59 | 
60 |   tdc = nt.TimeDelayConv(units=128)
61 |   y = tdc(x)
62 |   print(y.shape)
63 | 
64 |   tdct = nt.TimeDelayConvTied(units=128)
65 |   y = tdct(x)
66 |   print(y.shape)
67 | 


--------------------------------------------------------------------------------
/odin/backend/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | 
 3 | import functools
 4 | import inspect
 5 | import os
 6 | from collections import Mapping
 7 | from contextlib import contextmanager
 8 | 
 9 | from odin.backend import (interpolation, keras_callbacks, keras_helpers, losses,
10 |                           metrics)
11 | from odin.backend.alias import *
12 | from odin.backend.maths import *
13 | from odin.backend.tensor import *
14 | from odin.backend.types_helpers import *
15 | from odin.utils import as_tuple, is_path, is_string
16 | from six import add_metaclass
17 | from six.moves import builtins, cPickle
18 | 
19 | 
20 | # ===========================================================================
21 | # Make the layers accessible through backend
22 | # ===========================================================================
23 | class _nn_meta(type):
24 | 
25 |   def __getattr__(cls, key):
26 |     fw = get_framework()
27 |     import tensorflow as tf
28 |     import torch
29 | 
30 |     all_objects = {}
31 |     if fw == torch:
32 |       from odin import networks_torch
33 |       all_objects.update(torch.nn.__dict__)
34 |       all_objects.update(networks_torch.__dict__)
35 |     elif fw == tf:
36 |       from odin import networks
37 |       from tensorflow.python.keras.engine import sequential, training
38 |       all_objects.update(tf.keras.layers.__dict__)
39 |       all_objects.update(networks.__dict__)
40 |       all_objects.update(sequential.__dict__)
41 |       all_objects.update(training.__dict__)
42 |     else:
43 |       raise NotImplementedError("No neural networks support for framework: " +
44 |                                 str(fw))
45 |     return all_objects[key]
46 | 
47 | 
48 | @add_metaclass(_nn_meta)
49 | class nn:
50 |   pass
51 | 


--------------------------------------------------------------------------------
/benchmarks/should_concat_input_tf.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function, division, absolute_import
 2 | 
 3 | import os
 4 | os.environ['ODIN'] = 'float32,gpu'
 5 | import timeit
 6 | import random
 7 | 
 8 | import numpy as np
 9 | 
10 | from odin.utils import UnitTimer, Progbar
11 | from odin import backend as K, nnet as N
12 | 
13 | X1 = K.placeholder(shape=(10000, 1000), name='X1')
14 | X2 = K.placeholder(shape=(10000, 1000), name='X2')
15 | 
16 | X3 = K.placeholder(shape=(10000, 2000), name='X3')
17 | 
18 | y1 = K.placeholder(shape=(1000, 2000), name='y1')
19 | y2 = K.placeholder(shape=(2000, 3000), name='y2')
20 | y3 = K.placeholder(shape=(3000, 4000), name='y3')
21 | y4 = K.placeholder(shape=(4000, 5000), name='y4')
22 | 
23 | z = K.dot(X1, y1) + K.dot(X2, y1)
24 | z = K.dot(z, y2)
25 | z = K.dot(z, y3)
26 | z = K.dot(z, y4)
27 | print(z)
28 | f = K.function([X1, X2, y1, y2, y3, y4], outputs=z)
29 | 
30 | X1 = X3[:, :1000]
31 | X2 = X3[:, 1000:]
32 | z1 = K.dot(X1, y1) + K.dot(X2, y1)
33 | z1 = K.dot(z1, y2)
34 | z1 = K.dot(z1, y3)
35 | z1 = K.dot(z1, y4)
36 | print(z1)
37 | f1 = K.function([X3, y1, y2, y3, y4], outputs=z1)
38 | 
39 | v = [np.random.rand(*i.shape.as_list()) for i in [X1, X2, X3, y1, y2, y3, y4]]
40 | 
41 | f(v[0], v[1], v[3], v[4], v[5], v[6])
42 | f1(v[2], v[3], v[4], v[5], v[6])
43 | 
44 | time = 0.
45 | time1 = 0.
46 | n = 100
47 | prog = Progbar(target=80)
48 | for _ in range(1, n + 1):
49 |     prog.add(1)
50 |     if _ % 2 == 0:
51 |         start = timeit.timeit()
52 |         f(v[0], v[1], v[3], v[4], v[5], v[6])
53 |         time += timeit.timeit() - start
54 |     else:
55 |         start = timeit.timeit()
56 |         f1(v[2], v[3], v[4], v[5], v[6])
57 |         time1 += timeit.timeit() - start
58 | 
59 | print("Splitted input:", time)
60 | print("Concatenated input:", time1)
61 | 


--------------------------------------------------------------------------------
/odin/backend/types_helpers.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from numbers import Number
 3 | from typing import Callable, List, Union, Sequence, Any
 4 | 
 5 | from numpy import ndarray
 6 | from scipy.sparse import spmatrix
 7 | from tensorflow import Tensor
 8 | from tensorflow.python.keras import Model, Sequential
 9 | from tensorflow.python.keras.layers import Layer
10 | from typing_extensions import Literal
11 | 
12 | from odin.backend.interpolation import Interpolation
13 | from tensorflow.python.keras.optimizer_v2.optimizer_v2 import OptimizerV2
14 | from tensorflow.python.keras.initializers.initializers_v2 import \
15 |   Initializer as InitializerV2
16 | 
17 | __all__ = [
18 |   'Coefficient',
19 |   'NoneType',
20 |   'TensorType',
21 |   'LayerType',
22 |   'BATCH',
23 |   'EVENT',
24 |   'MCMC',
25 |   'CorrelationMethod',
26 |   'Axes',
27 |   'Axis',
28 |   'DataType',
29 |   'LabelType',
30 |   'Scalar',
31 |   'Optimizer',
32 |   'Activation',
33 |   'Initializer'
34 | ]
35 | 
36 | Coefficient = Union[Number, Interpolation]
37 | 
38 | CorrelationMethod = Literal[
39 |   'spearman', 'lasso', 'pearson', 'mutualinfo', 'importance']
40 | 
41 | NoneType = type(None)
42 | TensorType = Union[spmatrix, ndarray, Tensor]
43 | Scalar = Union[Tensor, ndarray, Number]
44 | LayerType = Union[Layer, Model, Sequential, Callable[..., Layer],
45 |                   Callable[[Tensor], Tensor]]
46 | 
47 | BATCH = Union[int, NoneType]
48 | EVENT = int
49 | MCMC = Union[int, NoneType]
50 | 
51 | Axes = Union[int, Sequence[int]]
52 | Axis = int
53 | 
54 | DataType = Literal['image', 'audio', 'text', 'gene']
55 | LabelType = Literal['binary', 'categorical', 'factor']
56 | 
57 | Optimizer = OptimizerV2
58 | Activation = Union[Callable[[TensorType], TensorType], str]
59 | Initializer = Union[str, InitializerV2, Callable[[Any], TensorType]]
60 | 


--------------------------------------------------------------------------------
/odin/fuel/image_data/omniglot.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | import numpy as np
 4 | import tensorflow as tf
 5 | 
 6 | import tensorflow_datasets as tfds
 7 | from odin.fuel.image_data._base import ImageDataset
 8 | 
 9 | 
10 | class Omniglot(ImageDataset):
11 |   """ Omniglot dataset """
12 | 
13 |   def __init__(self, image_size: Optional[int] = 28, seed: int = 1):
14 |     train, valid, test = tfds.load(
15 |         name='omniglot',
16 |         split=['train[:90%]', 'train[90%:]', 'test'],
17 |         read_config=tfds.ReadConfig(shuffle_seed=seed,
18 |                                     shuffle_reshuffle_each_iteration=True),
19 |         as_supervised=True,
20 |     )
21 | 
22 |     if image_size is None:
23 |       image_size = 105
24 |     image_size = int(image_size)
25 |     if image_size != 105:
26 | 
27 |       @tf.function
28 |       def resize(x, y):
29 |         x = tf.image.resize(x,
30 |                             size=(image_size, image_size),
31 |                             method=tf.image.ResizeMethod.BILINEAR,
32 |                             preserve_aspect_ratio=True,
33 |                             antialias=True)
34 |         y = tf.cast(y, dtype=tf.float32)
35 |         return x, y
36 | 
37 |       train = train.map(resize, tf.data.AUTOTUNE)
38 |       valid = valid.map(resize, tf.data.AUTOTUNE)
39 |       test = test.map(resize, tf.data.AUTOTUNE)
40 | 
41 |     self.train = train
42 |     self.valid = valid
43 |     self.test = test
44 |     self._image_size = image_size
45 | 
46 |   @property
47 |   def binarized(self):
48 |     return False
49 | 
50 |   @property
51 |   def shape(self):
52 |     return (self._image_size, self._image_size, 3)
53 | 
54 |   @property
55 |   def labels(self):
56 |     """  50 different alphabets. """
57 |     return np.array([str(i) for i in range(1623)])
58 | 


--------------------------------------------------------------------------------
/odin/bay/vi/autoencoder/sequential_vae.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from odin.bay.vi.autoencoder.variational_autoencoder import \
 4 |     VariationalAutoencoder
 5 | 
 6 | 
 7 | class SequentialVAE(VariationalAutoencoder):
 8 |   r"""
 9 | 
10 |   References:
11 |     Yingzhen Li and Stephan Mandt. "Disentangled Sequential Autoencoder".
12 |       In _International Conference on Machine Learning_, 2018.
13 |       https://arxiv.org/abs/1803.02991
14 |     Fraccaro, M., Sønderby, S.K., Paquet, U., Winther, O., 2016.
15 |       "Sequential Neural Models with Stochastic Layers".
16 |       arXiv:1605.07571 [cs, stat]. (https://github.com/google/vae-seq)
17 |     Zhao, S., Song, J., Ermon, S., 2017. "Towards Deeper Understanding
18 |       of Variational Autoencoding Models". arXiv:1702.08658 [cs, stat].
19 |   """
20 | 
21 |   def __init__(self, **kwargs):
22 |     super().__init__(**kwargs)
23 | 
24 | 
25 | class SequentialAttentionVAE(VariationalAutoencoder):
26 |   r"""
27 |   Reference:
28 |     Deng, Y., Kim, Y., Chiu, J., Guo, D., Rush, A.M., 2018.
29 |       "Latent Alignment and Variational Attention".
30 |       arXiv:1807.03756 [cs, stat].
31 |     Bahuleyan, H., Mou, L., Vechtomova, O., Poupart, P., 2017.
32 |       "Variational Attention for Sequence-to-Sequence Models".
33 |       arXiv:1712.08207 [cs].
34 |     https://github.com/HareeshBahuleyan/tf-var-attention
35 |     https://github.com/harvardnlp/var-attn/
36 |   """
37 | 
38 | 
39 | class VariationalRNN(VariationalAutoencoder):
40 |   r"""
41 | 
42 |   Reference:
43 |     Chung, J., Kastner, K., Dinh, L., Goel, K., Courville, A.C., Bengio, Y.,
44 |       2015. "A Recurrent Latent Variable Model for Sequential Data",
45 |       Advances in Neural Information Processing Systems 28.
46 |   """
47 | 
48 |   def __init__(self, **kwargs):
49 |     super().__init__(**kwargs)
50 | 


--------------------------------------------------------------------------------
/benchmarks/fast_stacking_numba.py:
--------------------------------------------------------------------------------
 1 | # ===========================================================================
 2 | # Just for reference, very difficult to apply
 3 | # ===========================================================================
 4 | from __future__ import print_function, division, absolute_import
 5 | import numpy as np
 6 | import numba as nb
 7 | 
 8 | from odin import utils
 9 | 
10 | X = np.random.rand(50000, 123)
11 | 
12 | 
13 | def normal(x):
14 |     idx = list(range(0, x.shape[0], 5))
15 |     _ = [x[i:i + 21].ravel() for i in idx
16 |          if (i + 21) <= x.shape[0]]
17 |     x = np.asarray(_) if len(_) > 1 else _[0]
18 |     # np.random.shuffle(x)
19 |     return x
20 | 
21 | 
22 | with utils.UnitTimer(12):
23 |     for i in range(12):
24 |         x1 = normal(X)
25 | print(x1.shape)
26 | 
27 | 
28 | tmp = np.ones((20000, 2583))
29 | 
30 | 
31 | @nb.jit('f8[:,:](f8[:,:], f8[:,:])', locals={}, nopython=True, nogil=True, cache=True)
32 | def fast(x, tmp):
33 |     idx = list(range(0, x.shape[0], 5))
34 |     count = 0
35 |     for _, i in enumerate(idx):
36 |         if (i + 21) <= x.shape[0]:
37 |             tmp[_] = x[i:i + 21].ravel()
38 |             count += 1
39 |     # idx = np.arange(count)
40 |     # np.random.shuffle(idx)
41 |     return tmp[:count]
42 | 
43 | 
44 | with utils.UnitTimer(12):
45 |     for i in range(12):
46 |         x2 = fast(X, tmp)
47 | print(x2.shape)
48 | print(np.sum(x1 - x2)) # must be 0.
49 | 
50 | # Numpy time: 0.107473 (sec)
51 | # Numba time: 0.037539 (sec) # at least 3 times faster
52 | 
53 | 
54 | with utils.UnitTimer(12):
55 |     for i in range(12):
56 |         np.ones((100000, 2583))
57 | 
58 | with utils.UnitTimer(12):
59 |     for i in range(12):
60 |         np.empty((100000, 2583))
61 | # create np.empty array is extremely faster than np.ones
62 | # Time: 1.278843 (sec)
63 | # Time: 0.000014 (sec)
64 | 


--------------------------------------------------------------------------------
/examples/cifar10_ivec.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function, division, absolute_import
 2 | import os
 3 | os.environ['ODIN'] = 'gpu,float32'
 4 | import shutil
 5 | 
 6 | import numpy as np
 7 | import tensorflow as tf
 8 | 
 9 | from odin import backend as K, nnet as N, visual as V, fuel as F
10 | from odin.utils import minibatch, Progbar, get_exppath, crypto
11 | from odin import ml
12 | 
13 | from sklearn.svm import SVC
14 | from sklearn.metrics import classification_report
15 | 
16 | EXP_PATH = get_exppath('cifar10_ivec')
17 | # ===========================================================================
18 | # Load the dataset
19 | # ===========================================================================
20 | ds = F.CIFAR10.load()
21 | print(ds)
22 | X_train, y_train = ds['X_train'][:].reshape(-1, 3 * 32 * 32), ds['y_train'][:]
23 | X_test, y_test = ds['X_test'][:].reshape(-1, 3 * 32 * 32), ds['y_test'][:]
24 | # ====== normalize the data ====== #
25 | X_train = X_train / 255.
26 | X_test = X_test / 255.
27 | print("Input:", X_train.shape, X_test.shape)
28 | # ===========================================================================
29 | # Training the GMM
30 | # ===========================================================================
31 | ivec = ml.Ivector(path=EXP_PATH, nmix=32, tv_dim=16,
32 |                   niter_gmm=8, niter_tmat=8)
33 | ivec.fit(X_train)
34 | I_train = ivec.transform(X_train, save_ivecs=True, name='train')[:]
35 | I_test = ivec.transform(X_test, save_ivecs=True, name='test')[:]
36 | print(ivec)
37 | # ===========================================================================
38 | # Classifier
39 | # ===========================================================================
40 | svm = SVC()
41 | svm.fit(I_train, y_train)
42 | print(classification_report(y_true=y_test,
43 |                             y_pred=svm.predict(I_test)))
44 | 


--------------------------------------------------------------------------------
/tests/ml/test_clustering.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | 
 3 | import os
 4 | import unittest
 5 | from tempfile import mkstemp
 6 | 
 7 | import numpy as np
 8 | 
 9 | from odin.ml import clustering, fast_dbscan, fast_kmeans, fast_knn
10 | 
11 | np.random.seed(8)
12 | 
13 | try:
14 |   import cuml
15 |   _CUML = True
16 | except ImportError:
17 |   _CUML = False
18 | 
19 | 
20 | def _prepare():
21 |   from sklearn.datasets import load_iris
22 |   x, y = load_iris(return_X_y=True)
23 |   return x, y, len(np.unique(y))
24 | 
25 | 
26 | class ClusteringTest(unittest.TestCase):
27 | 
28 |   def test_kmeans(self):
29 |     x, y, n = _prepare()
30 |     from sklearn.cluster import MiniBatchKMeans, KMeans
31 | 
32 |     model = [
33 |         fast_kmeans(x, n_clusters=n, framework='sklearn'),
34 |         fast_kmeans(x, n_clusters=n, batch_size=32)
35 |     ]
36 |     mtype = [KMeans, MiniBatchKMeans]
37 |     if _CUML:
38 |       from cuml.cluster import KMeans
39 |       model.append(fast_kmeans(x, n_clusters=n))
40 |       mtype.append(KMeans)
41 | 
42 |     for m, t in zip(model, mtype):
43 |       self.assertTrue(isinstance(m, t))
44 |       self.assertTrue(len(np.unique(m.predict(x))) == n)
45 | 
46 |   def test_knn(self):
47 |     x, y, n = _prepare()
48 |     from sklearn.neighbors import NearestNeighbors
49 |     model = [fast_knn(x, n_neighbors=n, framework='sklearn')]
50 |     mtype = [NearestNeighbors]
51 |     print(model[0].transform(x))
52 |     exit()
53 |     if _CUML:
54 |       from cuml.neighbors import NearestNeighbors
55 |       model.append(fast_knn(x, n_neighbors=n))
56 |       mtype.append(NearestNeighbors)
57 | 
58 |     for m, t in zip(model, mtype):
59 |       self.assertTrue(isinstance(m, t))
60 |       self.assertTrue(len(np.unique(m.predict(x))) == n)
61 | 
62 |   def test_dbscan(self):
63 |     x, y, n = _prepare()
64 |     # model = fast_kmeans(x)
65 | 
66 | 
67 | if __name__ == '__main__':
68 |   unittest.main()
69 | 


--------------------------------------------------------------------------------
/odin/ml/linear_model.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional, Dict
 2 | from typing_extensions import Literal
 3 | from sklearn.linear_model import LogisticRegression
 4 | from sklearn.svm import SVC
 5 | 
 6 | __all__ = ['fast_logistic_regression', 'fast_svc']
 7 | 
 8 | CUML_SOLVER = set(['qn', 'lbfgs', 'owl'])
 9 | 
10 | 
11 | def _prepare_kw(local_dict, *args, **kwargs):
12 |   kw = dict(local_dict)
13 |   for key in args:
14 |     kw.pop(key)
15 |   kw.update(kwargs)
16 |   return kw
17 | 
18 | 
19 | def fast_svc(
20 |     X,
21 |     y,
22 |     *,
23 |     framework: Literal['auto', 'cuml', 'sklearn'] = 'sklearn',
24 |     **kwargs,
25 | ) -> SVC:
26 |   pass
27 | 
28 | 
29 | def fast_logistic_regression(
30 |     X,
31 |     y,
32 |     *,
33 |     penalty: Literal['l1', 'l2', 'elasticnet', 'none'] = 'l2',
34 |     C: float = 1.0,
35 |     solver: Literal['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'] = 'lbfgs',
36 |     fit_intercept: bool = True,
37 |     l1_ratio: Optional[float] = None,
38 |     tol: float = 1e-4,
39 |     max_iter: int = 1000,
40 |     class_weight: Optional[Dict[str, float]] = None,
41 |     n_jobs: Optional[int] = None,
42 |     random_state: int = 1,
43 |     framework: Literal['auto', 'cuml', 'sklearn'] = 'sklearn',
44 |     **kwargs,
45 | ) -> LogisticRegression:
46 |   """The cuML LogisticRegression is only faster when n_samples > 100000 given 64
47 |   feature dimensions"""
48 |   kw = _prepare_kw(locals(), 'X', 'y', 'kwargs', 'framework', **kwargs)
49 |   ### import
50 |   is_cuml = False
51 |   if framework == 'sklearn':
52 |     LoRe = LogisticRegression
53 |   else:
54 |     try:
55 |       from cuml.linear_model import LogisticRegression as LoRe
56 |       is_cuml = True
57 |       kw.pop('n_jobs')
58 |       kw.pop('random_state')
59 |       # if solver not in CUML_SOLVER:
60 |       kw['solver'] = 'qn'
61 |     except ImportError as e:
62 |       LoRe = LogisticRegression
63 |   ### train
64 |   model = LoRe(**kw)
65 |   model.fit(X, y)
66 |   return model
67 | 


--------------------------------------------------------------------------------
/examples/features/speech_features_visualization.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # ====== Visual cluster ====== #
 3 | # TODO: fix bug of the scatter method
 4 | labels = list(set(filter(lambda x: len(x) == 1,
 5 |                          [i.split('_')[-1] for i in all_files])))
 6 | print("Labels:", ctext(labels, 'cyan'))
 7 | for feat in ('bnf', 'mspec', 'spec', 'mfcc'):
 8 |   if feat not in ds:
 9 |     continue
10 |   from sklearn.manifold import TSNE
11 |   X = []; y = []
12 |   # get right feat and indices
13 |   feat_pca = ds.find_prefix(feat, 'pca')
14 |   indices = ds.find_prefix(feat, 'indices')
15 |   # transform
16 |   prog = Progbar(target=len(indices),
17 |                  print_summary=True, print_report=True,
18 |                  name="PCA transform: %s" % feat)
19 |   for f, (start, end) in indices:
20 |     if len(f.split('_')[-1]) == 1:
21 |       X.append(np.mean(
22 |           feat_pca.transform(ds[feat][start:end]),
23 |           axis=0, keepdims=True))
24 |       y.append(f.split('_')[-1])
25 |     prog.add(1)
26 |   X_pca = np.concatenate(X, axis=0)
27 |   y = np.asarray(y)
28 |   with UnitTimer(name="TSNE: feat='%s' N=%d" % (feat, X_pca.shape[0])):
29 |     X_tsne = TSNE(n_components=2).fit_transform(X_pca)
30 |   colors = V.generate_random_colors(len(labels), seed=1234)
31 |   # conver y to appropriate color
32 |   y = [colors[labels.index(i)] for i in y]
33 |   legend = {c: str(i) for i, c in enumerate(colors)}
34 |   with V.figure(ncol=1, nrow=5, title='PCA: %s' % feat):
35 |     V.plot_scatter(X_pca[:, 0], X_pca[:, 1], color=y, legend=legend)
36 |   with V.figure(ncol=1, nrow=5, title='TSNE: %s' % feat):
37 |     V.plot_scatter(X_tsne[:, 0], X_tsne[:, 1], color=y, legend=legend)
38 | # ====== save all the figure ====== #
39 | V.plot_save(os.path.join(fig_path, 'pca_tsne.pdf'),
40 |             tight_plot=True)
41 | # ====== print log ====== #
42 | print('Output path:', ctext(output_path, 'cyan'))
43 | print('Figure path:', ctext(fig_path, 'cyan'))
44 | print('Log path:', ctext(log_path, 'cyan'))
45 | 


--------------------------------------------------------------------------------
/examples/vae/stl10_self_supervised.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | 
 3 | import os
 4 | 
 5 | import numpy as np
 6 | import tensorflow as tf
 7 | from matplotlib import pyplot as plt
 8 | from tqdm import tqdm
 9 | 
10 | from odin.bay.vi import RVconf as RV
11 | from odin.bay.vi.autoencoder import BetaVAE, MultitaskVAE, SemifactorVAE
12 | from odin.fuel import MNIST, STL10, LegoFaces
13 | 
14 | tf.random.set_seed(1)
15 | np.random.seed(1)
16 | 
17 | ds = LegoFaces()
18 | train = ds.create_dataset(partition='train', label_percent=True)
19 | train_l = ds.create_dataset(partition='train_labelled', label_percent=True)
20 | test = ds.create_dataset(partition='test', label_percent=True)
21 | train_u = ds.create_dataset(partition='train', label_percent=False)
22 | test_u = ds.create_dataset(partition='test', label_percent=False)
23 | save_path = f'/tmp/{ds.name}.w'
24 | 
25 | vae = MultitaskVAE(encoder=ds.name,
26 |                    alpha=10.,
27 |                    outputs=RV(ds.shape,
28 |                               'bernoulli',
29 |                               projection=False,
30 |                               name='Image'),
31 |                    labels=RV(10, 'onehot', projection=True, name="Labels"),
32 |                    path=save_path)
33 | vae.fit(
34 |     train_l,
35 |     learning_rate=1e-4,
36 |     max_iter=20000,
37 | ).fit(
38 |     train_u,
39 |     learning_rate=1e-4,
40 |     max_iter=80000,
41 |     earlystop_threshold=0.001,
42 |     earlystop_patience=-1,
43 |     compile_graph=True,
44 | ).save_weights(vae.save_path)
45 | 
46 | z = vae.sample_prior(64)
47 | img = tf.nest.flatten(vae.decode(z))[0].mean().numpy()
48 | fig = plt.figure(figsize=(8, 8))
49 | for idx, i in enumerate(img):
50 |   ax = plt.subplot(8, 8, idx + 1)
51 |   if i.shape[-1] == 1:
52 |     i = np.squeeze(i, axis=-1)
53 |   ax.imshow(i)
54 |   ax.axis('off')
55 | fig.tight_layout()
56 | fig.savefig(f'/tmp/{ds.name}_z.png', dpi=100)
57 | vae.plot_learning_curves(f'/tmp/{ds.name}.png')
58 | 


--------------------------------------------------------------------------------
/odin/fuel/bio_data/cortex.py:
--------------------------------------------------------------------------------
 1 | import base64
 2 | import os
 3 | import zipfile
 4 | from urllib.request import urlretrieve
 5 | 
 6 | import numpy as np
 7 | from scipy import sparse
 8 | 
 9 | from odin.fuel.bio_data._base import GeneDataset
10 | 
11 | 
12 | def _load_single_cell_data(url, path):
13 |   path = os.path.abspath(os.path.expanduser(path))
14 |   if not os.path.exists(path):
15 |     path = os.makedirs(path)
16 |   url = str(base64.decodebytes(url), 'utf-8')
17 |   zip_path = os.path.join(path, os.path.basename(url))
18 |   name = os.path.basename(zip_path).replace('.zip', '')
19 |   extracted_path = os.path.join(path, name)
20 |   # download
21 |   if not os.path.exists(zip_path):
22 |     urlretrieve(filename=zip_path, url=url)
23 |   # extract
24 |   if not os.path.isdir(extracted_path):
25 |     with zipfile.ZipFile(open(zip_path, 'rb')) as f:
26 |       f.extractall(path)
27 |   # load data
28 |   with open(os.path.join(extracted_path, 'X'), 'rb') as f:
29 |     X = sparse.load_npz(f)
30 |   with open(os.path.join(extracted_path, 'y'), 'rb') as f:
31 |     y = sparse.load_npz(f)
32 |   with open(os.path.join(extracted_path, 'var_names'), 'rb') as f:
33 |     var_names = np.load(f, allow_pickle=True)
34 |   with open(os.path.join(extracted_path, 'labels'), 'rb') as f:
35 |     labels = np.load(f, allow_pickle=True)
36 |   # store data
37 |   x = X
38 |   if isinstance(x, (sparse.coo_matrix, sparse.dok_matrix)):
39 |     x = x.tocsr()
40 |   y = y
41 |   if isinstance(y, (sparse.coo_matrix, sparse.dok_matrix)):
42 |     y = y.tocsr()
43 |   xvar = var_names
44 |   yvar = labels
45 |   return x, y, xvar, yvar
46 | 
47 | 
48 | class Cortex(GeneDataset):
49 | 
50 |   def __init__(self, path="~/tensorflow_datasets/cortex"):
51 |     super().__init__()
52 |     url = b'aHR0cHM6Ly9haS1kYXRhc2V0cy5zMy5hbWF6b25hd3MuY29tL2NvcnRleC56aXA=\n'
53 |     self.x, self.y, self.xvar, self.yvar = _load_single_cell_data(url=url,
54 |                                                                   path=path)
55 | 
56 |   @property
57 |   def name(self):
58 |     return f"cortex"
59 | 


--------------------------------------------------------------------------------
/odin/search/assignment.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from scipy.optimize import linear_sum_assignment
 3 | 
 4 | 
 5 | def search_assignment(matrix,
 6 |                       row_assignment=False,
 7 |                       maximize=True,
 8 |                       inplace=False):
 9 |   r"""Solve the linear sum assignment problem.
10 | 
11 |   This function can also solve a generalization of the classic assignment
12 |   problem where the cost matrix is rectangular. If it has more rows than
13 |   columns, then not every row needs to be assigned to a column, and vice
14 |   versa.
15 | 
16 |   Arguments:
17 |     matrix : an Array.
18 |       The cost matrix of the bipartite graph.
19 |     inplace : a Boolean.
20 |       If True, return a new matrix with the applied assignment
21 | 
22 |   Returns:
23 |     row_ind, col_ind : array
24 |         An array of row indices and one of corresponding column indices giving
25 |         the optimal assignment. The cost of the assignment can be computed
26 |         as ``cost_matrix[row_ind, col_ind].sum()``. The row indices will be
27 |         sorted; in the case of a square cost matrix they will be equal to
28 |         ``numpy.arange(cost_matrix.shape[0])``.
29 |   """
30 |   assert matrix.ndim == 2
31 |   rows, cols = linear_sum_assignment(matrix if row_assignment else matrix.T,
32 |                                      maximize=maximize)
33 |   # select the right assignment and keep the order of other dimension intact
34 |   if not row_assignment:
35 |     rows, cols = (cols, rows)
36 |     ids = np.argsort(rows)
37 |   else:
38 |     ids = np.argsort(cols)
39 |   rows = rows[ids]
40 |   cols = cols[ids]
41 |   # inplace output matrix
42 |   if inplace:
43 |     # make sure all indices appear
44 |     rows = rows.tolist()
45 |     for i in range(matrix.shape[0]):
46 |       if i not in rows:
47 |         rows.append(i)
48 |     cols = cols.tolist()
49 |     for i in range(matrix.shape[0]):
50 |       if i not in cols:
51 |         cols.append(i)
52 |     return matrix[rows] if row_assignment else matrix[:, cols]
53 |   return rows, cols
54 | 


--------------------------------------------------------------------------------
/odin/preprocessing/confs/prosodyShs.cfg:
--------------------------------------------------------------------------------
 1 | ///////////////////////////////////////////////////////////////////////////////////////
 2 | ///////// > openSMILE configuration file for speech prosody features //////////////////
 3 | /////////   pitch and intensity                                      //////////////////
 4 | /////////                                                            //////////////////
 5 | ///////// (c) 2014-2016 audEERING.                                   //////////////////
 6 | /////////     All rights reserverd. See file COPYING for details.    //////////////////
 7 | ///////////////////////////////////////////////////////////////////////////////////////
 8 | 
 9 | ;;;;;;; component list ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10 | [componentInstances:cComponentManager]
11 | instance[scale].type=cSpecScale
12 | instance[shs].type=cPitchShs
13 | instance[smooth].type=cPitchSmoother
14 | 
15 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;; main section ;;;;;;;;;;;;;;;;;;;;;;;;;;;
16 | [scale:cSpecScale]
17 | reader.dmLevel=fftmag
18 | writer.dmLevel=hps
19 | copyInputName = 1
20 | processArrayFields = 0
21 | scale=octave
22 | sourceScale = lin
23 | interpMethod = spline
24 | minF = 25
25 | maxF = -1
26 | nPointsTarget = 0
27 | specSmooth = 1
28 | specEnhance = 1
29 | auditoryWeighting = 1
30 | 
31 | [shs:cPitchShs]
32 | reader.dmLevel=hps
33 | writer.dmLevel=pitchShs
34 | copyInputName = 1
35 | processArrayFields = 0
36 | maxPitch = {fmax}
37 | minPitch = {fmin}
38 | nCandidates = 4
39 | scores = 1
40 | voicing = 1
41 | F0C1 = 0
42 | voicingC1 = 0
43 | F0raw = 1
44 | voicingClip = 1
45 | voicingCutoff = {voicingCutoff}
46 | inputFieldSearch = Mag_logScale
47 | octaveCorrection = 0
48 | nHarmonics = 15
49 | compressionFactor = 0.850000
50 | 
51 | [smooth:cPitchSmoother]
52 | reader.dmLevel=pitchShs
53 | writer.dmLevel=pitch
54 | copyInputName = 1
55 | processArrayFields = 0
56 | medianFilter0 = 0
57 | postSmoothing = 0
58 | postSmoothingMethod = simple
59 | octaveCorrection = 0
60 | F0final = 1
61 | F0finalEnv = 0
62 | no0f0 = 0
63 | voicingFinalClipped = 0
64 | voicingFinalUnclipped = 1
65 | F0raw = 0
66 | voicingC1 = 0
67 | voicingClip = 0
68 | 


--------------------------------------------------------------------------------
/odin/utils/path_utils.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | 
 3 | import inspect
 4 | import os
 5 | import shutil
 6 | import sys
 7 | from typing import Callable
 8 | 
 9 | from six import string_types
10 | 
11 | 
12 | def get_script_path(module=None, return_dir=False):
13 |   r""" Return the path of the running script or the given module
14 | 
15 |   Example:
16 | 
17 |     >>> get_script_path(__name__)
18 |     # return the path to current module
19 | 
20 |     >>> get_script_path()
21 |     # return the path to runnings script, e.g. "python train.py" -> train.py
22 |   """
23 |   if module is None:
24 |     path = os.path.dirname(sys.argv[0])
25 |     path = os.path.join('.', path)
26 |     path = os.path.abspath(path)
27 |   elif isinstance(module, string_types):
28 |     module = sys.modules[module]
29 |     path = os.path.abspath(module.__file__)
30 |   else:
31 |     module = inspect.getmodule(module)
32 |     path = os.path.abspath(module.__file__)
33 |   if return_dir:
34 |     path = os.path.dirname(path)
35 |   return path
36 | 
37 | 
38 | def get_script_name():
39 |   """Return the name of the running scipt file without extension"""
40 |   name = os.path.basename(sys.argv[0])
41 |   name = os.path.splitext(name)[0]
42 |   return name
43 | 
44 | 
45 | def get_folder_size(path):
46 |   raise NotImplementedError
47 | 
48 | 
49 | def clean_folder(path: str,
50 |                  filter: Callable[[str], bool] = None,
51 |                  verbose: bool = False) -> None:
52 |   r""" Remove all files and subfolder in a directory """
53 |   if os.path.exists(path) and os.path.isdir(path):
54 |     for name in os.listdir(path):
55 |       f = os.path.join(path, name)
56 |       # filtering
57 |       if filter is not None and callable(filter):
58 |         if not filter(f):
59 |           continue
60 |       # remove
61 |       if os.path.isfile(f):
62 |         os.remove(f)
63 |         if verbose:
64 |           print("Remove file  :", f)
65 |       elif os.path.isdir(f):
66 |         shutil.rmtree(f)
67 |         if verbose:
68 |           print("Remove folder:", f)
69 | 
70 | 
71 | clear_folder = clean_folder
72 | 


--------------------------------------------------------------------------------
/examples/models/models_ladder.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function, division, absolute_import
 2 | 
 3 | from odin import nnet as N, backend as K
 4 | 
 5 | 
 6 | @N.Model
 7 | def ladder1(X, y, states, **kwargs):
 8 |     noise = kwargs.get('noise', 0.3)
 9 |     # hyperparameters that denote the importance of each layer
10 |     denoising_cost = [1000.0, 10.0, 0.10, 0.10, 0.10]
11 | 
12 |     if states is None:
13 |         #
14 |         f_encoder = N.Sequence([
15 |             N.Flatten(outdim=2),
16 | 
17 |             N.Dense(num_units=1024, b_init=None),
18 |             N.BatchNorm(axes=0, noise_level=noise, noise_dims=None,
19 |                 activation=K.relu),
20 | 
21 |             N.Dense(num_units=512, b_init=None),
22 |             N.BatchNorm(axes=0, noise_level=noise, noise_dims=None,
23 |                 activation=K.relu),
24 | 
25 |             N.Dense(num_units=256, b_init=None),
26 |             N.BatchNorm(axes=0, noise_level=noise, noise_dims=None,
27 |                 activation=K.relu),
28 | 
29 |             N.Dense(num_units=128, b_init=None),
30 |             N.BatchNorm(axes=0, noise_level=noise, noise_dims=None,
31 |                 activation=K.relu),
32 | 
33 |             N.Dense(num_units=10, activation=K.softmax),
34 |         ], all_layers=True, debug=True, name='Encoder')
35 |         #
36 |         f_decoder = N.Sequence([
37 |             N.Dense(num_units=128, b_init=None),
38 |             N.BatchNorm(axes=0, activation=K.relu),
39 | 
40 |             N.Dense(num_units=256, b_init=None),
41 |             N.BatchNorm(axes=0, activation=K.relu),
42 | 
43 |             N.Dense(num_units=512, b_init=None),
44 |             N.BatchNorm(axes=0, activation=K.relu),
45 | 
46 |             N.Dense(num_units=1024, b_init=None),
47 |             N.BatchNorm(axes=0, activation=K.relu),
48 | 
49 |             N.Reshape(shape=(-1, 28, 28)),
50 |         ], all_layers=True, debug=True, name='Decoder')
51 |     else:
52 |         f_encoder, f_decoder = states
53 |     y_encoder_clean = f_encoder(X, noise=-1)[2::2]
54 |     y_encoder_corrp = f_encoder(X, noise=1)[2::2]
55 |     print(len(y_encoder_clean), len(y_encoder_corrp))
56 |     exit()
57 |     return (None, None), [f_encoder, f_decoder]
58 | 


--------------------------------------------------------------------------------
/odin/bay/vi/autoencoder/__init__.py:
--------------------------------------------------------------------------------
 1 | import inspect
 2 | from typing import Optional, Type
 3 | 
 4 | from odin.bay.random_variable import RVconf
 5 | from odin.networks import NetConf
 6 | from odin.bay.vi.autoencoder.auxiliary_vae import *
 7 | from odin.bay.vi.autoencoder.beta_vae import *
 8 | from odin.bay.vi.autoencoder.conditional_vae import *
 9 | from odin.bay.vi.autoencoder.deterministic import *
10 | from odin.bay.vi.autoencoder.dip_vae import *
11 | from odin.bay.vi.autoencoder.factor_vae import *
12 | from odin.bay.vi.autoencoder.hierarchical_vae import *
13 | from odin.bay.vi.autoencoder.hyperbolic_vae import *
14 | from odin.bay.vi.autoencoder.info_vae import *
15 | from odin.bay.vi.autoencoder.irm_vae import *
16 | from odin.bay.vi.autoencoder.lda_vae import *
17 | from odin.bay.vi.autoencoder.multitask_vae import *
18 | from odin.bay.vi.autoencoder.self_supervised_vae import *
19 | from odin.bay.vi.autoencoder.stochastic_vae import *
20 | from odin.bay.vi.autoencoder.vamprior import *
21 | from odin.bay.vi.autoencoder.variational_autoencoder import *
22 | from odin.bay.vi.autoencoder.vq_vae import *
23 | from odin.bay.vi.autoencoder.semafo_vae import *
24 | from six import string_types
25 | from odin.bay.vi.autoencoder.two_stage_vae import *
26 | 
27 | 
28 | def get_vae(name: str = None) -> Type[VariationalAutoencoder]:
29 |   """Get VAE model by name"""
30 |   if not isinstance(name, string_types):
31 |     if inspect.isclass(name):
32 |       name = name.__name__
33 |     else:
34 |       name = type(name).__name__
35 |   name = str(name).strip().lower()
36 |   vae = None
37 |   for key, val in globals().items():
38 |     if inspect.isclass(val) and issubclass(val, VariationalAutoencoder):
39 |       if name == key.lower():
40 |         vae = val
41 |         break
42 |   if vae is None:
43 |     raise ValueError(f"Cannot find VAE with name '{name}'")
44 |   return vae
45 | 
46 | 
47 | def get_all_vae() -> List[Type[VariationalAutoencoder]]:
48 |   """Return all available VAE models"""
49 |   all_vae = []
50 |   for key, val in globals().items():
51 |     if inspect.isclass(val) and issubclass(val, VariationalAutoencoder):
52 |       all_vae.append(val)
53 |   return sorted(all_vae, key=lambda i: i.__name__)
54 | 


--------------------------------------------------------------------------------
/tests/networks/test_mixture_density_network.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | 
 3 | import os
 4 | 
 5 | import numpy as np
 6 | import seaborn as sns
 7 | import tensorflow as tf
 8 | from matplotlib import pyplot as plt
 9 | from odin import visual as vis
10 | from odin.networks import MixtureDensityNetwork
11 | from scipy import stats
12 | from sklearn.mixture import GaussianMixture
13 | from tensorflow.python.keras import Sequential
14 | 
15 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
16 | os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
17 | 
18 | tf.random.set_seed(8)
19 | np.random.seed(8)
20 | 
21 | n = 1200
22 | n_components = 12
23 | x = []
24 | for i in range(n_components):
25 |   x.append(
26 |       stats.norm.rvs(size=(n, 1), loc=i * 12,
27 |                      scale=np.random.randint(1, 6)).astype('float32'))
28 | x = np.concatenate(x, axis=0)
29 | 
30 | # ====== gmm ====== #
31 | gmm = GaussianMixture(n_components=n_components,
32 |                       covariance_type='spherical',
33 |                       random_state=8)
34 | gmm.fit(x)
35 | gmm_llk = gmm.score(x)
36 | gmm_mean = gmm.means_.ravel().astype('float32')
37 | 
38 | 
39 | # ====== mdn ====== #
40 | def fn_loss(y_true, y_pred):
41 |   # negative log-likelihood
42 |   nllk = tf.reduce_mean(-y_pred.log_prob(y_true))
43 |   return nllk
44 | 
45 | 
46 | mdn = MixtureDensityNetwork(1,
47 |                             n_components=n_components,
48 |                             covariance_type='none')
49 | model = Sequential([mdn])
50 | model.compile(optimizer='adam', loss=fn_loss)
51 | model.fit(x=x, y=x, epochs=48, batch_size=32, verbose=True)
52 | 
53 | y = model(x)
54 | mdn_llk = tf.reduce_mean(y.log_prob(x)).numpy()
55 | mdn_mean = tf.reduce_mean(y.components_distribution.mean(),
56 |                           axis=(0, -1)).numpy()
57 | 
58 | # ====== visualizing ====== #
59 | fig = plt.figure()
60 | sns.distplot(x, bins=80)
61 | plt.title('Data')
62 | 
63 | fig = plt.figure()
64 | sns.distplot(gmm.sample(n * n_components)[0], bins=80)
65 | plt.title('GMM - llk: %.2f' % gmm_llk)
66 | 
67 | fig = plt.figure()
68 | sns.distplot(y.sample().numpy(), bins=80)
69 | plt.title('MDN - llk: %.2f' % mdn_llk)
70 | 
71 | vis.plot_save()
72 | 


--------------------------------------------------------------------------------
/tests/utilities/test_orderedflag.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | 
 3 | import os
 4 | import unittest
 5 | 
 6 | import numpy as np
 7 | 
 8 | from odin.utils.ordered_flag import OrderedFlag, auto
 9 | 
10 | np.random.seed(8)
11 | 
12 | 
13 | class Enum1(OrderedFlag):
14 |   T1 = auto()
15 |   T2 = auto()
16 |   T3 = auto()
17 |   T4 = auto()
18 | 
19 | 
20 | class Enum2(OrderedFlag):
21 |   T1 = 1
22 |   T2 = 2
23 |   T3 = 3
24 |   T4 = 4
25 | 
26 | 
27 | class OrderedFlagTest(unittest.TestCase):
28 | 
29 |   def test_contain(self):
30 |     t1 = Enum1.T2 | Enum1.T1 | Enum1.T3
31 |     self.assertTrue(Enum1.T1 in t1)
32 |     self.assertTrue(Enum1.T4 not in t1)
33 | 
34 |   def test_and(self):
35 |     t1 = Enum1.T1 | Enum1.T4
36 |     t2 = Enum1.T2 | Enum1.T1 | Enum1.T3
37 |     self.assertTrue((t1 & t2) == Enum1.T1)
38 | 
39 |   def test_or(self):
40 |     t1 = Enum1.T1 | Enum1.T2
41 |     self.assertTrue(t1.value == '1_2')
42 | 
43 |   def test_xor(self):
44 |     t1 = Enum1.T1 | Enum1.T2
45 |     t2 = Enum1.T2 | Enum1.T1 | Enum1.T3
46 |     self.assertTrue(t1 ^ t2 == Enum1.T3)
47 | 
48 |   def test_not(self):
49 |     t1 = Enum1.T1 | Enum1.T2
50 |     self.assertTrue(~t1 == (Enum1.T3 | Enum1.T4))
51 | 
52 |   def test_iter(self):
53 |     t1 = Enum1.T1 | Enum1.T2
54 |     for i, j in zip(t1, [Enum1.T1, Enum1.T2]):
55 |       self.assertTrue(isinstance(i, Enum1))
56 |       self.assertTrue(i == j)
57 | 
58 |   def test_base(self):
59 |     for i, j in zip(Enum1, Enum2):
60 |       self.assertTrue(i != j)
61 |       self.assertFalse(i == j)
62 |       self.assertTrue(i == i)
63 |       self.assertTrue(j == j)
64 |       self.assertFalse(i != i)
65 |       self.assertFalse(j != j)
66 | 
67 |     t1 = Enum1.T1 | Enum1.T2
68 |     t2 = Enum2.T1 | Enum2.T2
69 |     t3 = Enum1.T2 | Enum1.T1
70 |     t4 = Enum1.T1 | Enum1.T2
71 |     self.assertTrue(t1 != t2)
72 |     self.assertTrue(t1 == t3)
73 |     self.assertFalse(t1 != t4)
74 |     self.assertFalse(t3 != t4)
75 | 
76 |   def test_members(self):
77 |     t1 = Enum1.T1 | Enum1.T2
78 |     t2 = Enum1.T2 | Enum1.T1 | Enum1.T3
79 |     self.assertEqual(len(Enum1), 4)
80 |     self.assertEqual(len(list(Enum1)), 4)
81 | 
82 | if __name__ == '__main__':
83 |   unittest.main()
84 | 


--------------------------------------------------------------------------------
/odin/bay/vi/autoencoder/self_supervised_vae.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from odin.bay.vi.autoencoder.variational_autoencoder import \
 4 |     VariationalAutoencoder
 5 | 
 6 | 
 7 | class AdaptiveVAE(VariationalAutoencoder):
 8 |   r"""
 9 | 
10 |   Arguments:
11 |     base_method : {'g', 'ml'}. Base method for adapting the self-supervised
12 |       objective:
13 |       - 'group' for group VAE
14 |       - 'multilevel' for multi-level VAE
15 | 
16 |   Reference:
17 |     Locatello, F., et al. 2020. "Weakly-Supervised Disentanglement Without
18 |       Compromises". arXiv:2002.02886 [cs, stat].
19 |   """
20 | 
21 |   def __init__(self, base_method="group"):
22 |     super().__init__()
23 | 
24 | 
25 | class WeaklySupervisedVAE(VariationalAutoencoder):
26 |   r"""
27 | 
28 |   Arguments:
29 |     strategy : {'restricted', 'match', 'rank'}. Strategy for weak supervised
30 |       objective
31 |       - 'restricted' labelling
32 |       - 'match' paring
33 |       - 'rank' pairing
34 | 
35 |   Reference:
36 |     Shu, R., Chen, Y., Kumar, A., Ermon, S., Poole, B., 2019.
37 |       "Weakly Supervised Disentanglement with Guarantees".
38 |       arXiv:1910.09772 [cs, stat].
39 |     https://github.com/google-research/google-research/tree/master/weak_disentangle
40 |   """
41 | 
42 |   def __init__(self, strategy="rank"):
43 |     super().__init__()
44 | 
45 | 
46 | class GroupVAE(VariationalAutoencoder):
47 |   r"""
48 |   Reference:
49 |     Hosoya, H., 2019. "Group-based Learning of Disentangled Representations
50 |       with Generalizability for Novel Contents", in: Proceedings of the
51 |       Twenty-Eighth International Joint Conference on Artificial Intelligence.
52 |     https://github.com/HaruoHosoya/gvae
53 |   """
54 | 
55 |   def __init__(self, **kwargs):
56 |     super().__init__(**kwargs)
57 | 
58 | 
59 | class MultiLevelVAE(VariationalAutoencoder):
60 |   r"""
61 |   Reference:
62 |     Bouchacourt, D., Tomioka, R., Nowozin, S., 2017. "Multi-Level Variational
63 |       Autoencoder: Learning Disentangled Representations from Grouped
64 |       Observations". arXiv:1705.08841 [cs, stat].
65 |     Code: https://github.com/ananyahjha93/multi-level-vae/blob/master/training.py
66 |   """
67 | 
68 |   def __init__(self, **kwargs):
69 |     super().__init__(**kwargs)
70 | 


--------------------------------------------------------------------------------
/odin/bay/vi/autoencoder/dip_vae.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, List, Optional, Tuple, Union
 2 | 
 3 | import tensorflow as tf
 4 | from odin.bay.vi.autoencoder.beta_vae import BetaVAE
 5 | from odin.bay.vi.losses import disentangled_inferred_prior_loss
 6 | from odin.utils import as_tuple
 7 | 
 8 | 
 9 | class DIPVAE(BetaVAE):
10 |   """ Implementation of disentangled infered prior VAE
11 | 
12 |   Parameters
13 |   ----------
14 |   only_mean : A Boolean. If `True`, applying DIP constraint only on the
15 |       mean of latents `Cov[E(z)]` (i.e. type 'i'), otherwise,
16 |       `E[Cov(z)] + Cov[E(z)]` (i.e. type 'ii')
17 |   lambda_offdiag : A Scalar. Weight for penalizing the off-diagonal part of
18 |       covariance matrix.
19 |   lambda_diag : A Scalar.
20 |       Weight for penalizing the diagonal.
21 | 
22 |   References
23 |   ----------
24 |   Kumar, A., Sattigeri, P., Balakrishnan, A., 2018. "Variational Inference
25 |       of Disentangled Latent Concepts from Unlabeled Observations".
26 |       arXiv:1711.00848 [cs, stat].
27 |   """
28 | 
29 |   def __init__(self,
30 |                only_mean: bool = False,
31 |                lambda_diag: float = 1.0,
32 |                lambda_offdiag: float = 2.0,
33 |                beta: float = 1.0,
34 |                **kwargs):
35 |     super().__init__(beta=beta, **kwargs)
36 |     self.only_mean = bool(only_mean)
37 |     self.lambda_diag = tf.convert_to_tensor(lambda_diag,
38 |                                             dtype=self.dtype,
39 |                                             name='lambda_diag')
40 |     self.lambda_offdiag = tf.convert_to_tensor(lambda_offdiag,
41 |                                                dtype=self.dtype,
42 |                                                name='lambda_offdiag')
43 | 
44 |   def elbo_components(self, inputs, training=None, mask=None):
45 |     llk, kl = super().elbo_components(inputs, mask=mask, training=training)
46 |     px_z, qz_x = self.last_outputs
47 |     for z, qz in zip(as_tuple(self.latents), as_tuple(qz_x)):
48 |       dip = disentangled_inferred_prior_loss(qz,
49 |                                              only_mean=self.only_mean,
50 |                                              lambda_offdiag=self.lambda_offdiag,
51 |                                              lambda_diag=self.lambda_diag)
52 |       kl[f'dip_{z.name}'] = dip
53 |     return llk, kl
54 | 


--------------------------------------------------------------------------------
/tests/backend/test_maths.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | 
 3 | import os
 4 | import unittest
 5 | from tempfile import mkstemp
 6 | 
 7 | import numpy as np
 8 | 
 9 | from odin import backend as bk
10 | from tests.backend.utils import assert_equal, x, y, z
11 | 
12 | np.random.seed(8)
13 | 
14 | FRAMEWORKS = ('numpy', 'torch', 'tensorflow')
15 | 
16 | 
17 | class BackendMathTest(unittest.TestCase):
18 | 
19 |   def test_matmul(self):
20 |     for shape1, shape2, outshape in [
21 |         [(2, 3), (4, 3, 5), (4, 2, 5)],
22 |         [(2, 3, 4), (4, 5), (2, 3, 5)],
23 |         [(5, 3, 4), (5, 4, 6), (5, 3, 6)],
24 |     ]:
25 |       x = np.random.rand(*shape1)
26 |       y = np.random.rand(*shape2)
27 |       for fw in FRAMEWORKS:
28 |         a = bk.array(x, fw)
29 |         b = bk.array(y, fw)
30 |         c = bk.matmul(a, b)
31 |         self.assertEqual(c.shape, outshape, msg=fw)
32 | 
33 |   def test_norm(self):
34 |     for p in [1, 2, 'fro', np.inf]:
35 |       for axis in [None, 0, 1, (0, 1)]:
36 |         a = bk.norm(bk.flatten(x, 2), p=p, axis=axis, keepdims=True)
37 |         b = bk.norm(bk.flatten(y, 2), p=p, axis=axis, keepdims=True)
38 |         c = bk.norm(bk.flatten(z, 2), p=p, axis=axis, keepdims=True)
39 |         assert_equal(self, (p, axis), a, b, c)
40 | 
41 |         a = bk.norm(bk.flatten(x, 2), p=p, axis=axis, keepdims=False)
42 |         b = bk.norm(bk.flatten(y, 2), p=p, axis=axis, keepdims=False)
43 |         c = bk.norm(bk.flatten(z, 2), p=p, axis=axis, keepdims=False)
44 |         assert_equal(self, (p, axis), a, b, c)
45 | 
46 |   def test_countnonzero(self):
47 |     x = np.random.randint(0, 10, size=(25, 12, 8))
48 |     for axis in (None, 0, 1, 2, (1, 2)):
49 |       for keepdims in (True, False):
50 |         for dtype in ('int32', 'float32'):
51 |           y = [
52 |               bk.count_nonzero(bk.array(x, fw),
53 |                                axis=axis,
54 |                                keepdims=keepdims,
55 |                                dtype=dtype) for fw in FRAMEWORKS
56 |           ]
57 |           assert_equal(self, (axis, keepdims, dtype), *y)
58 | 
59 |   def test_clip_by_value(self):
60 |     for minval, maxval in [(None, 1), (1, None), (1, 2)]:
61 |       a = bk.clip(x, minval, maxval)
62 |       b = bk.clip(y, minval, maxval)
63 |       c = bk.clip(z, minval, maxval)
64 |       assert_equal(self, (minval, maxval), a, b, c)
65 | 
66 | 
67 | if __name__ == '__main__':
68 |   unittest.main()
69 | 


--------------------------------------------------------------------------------
/benchmarks/queue_vs_zmq.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import zmq
 3 | from multiprocessing import Process, Queue
 4 | import time
 5 | import numpy as np
 6 | import marshal
 7 | from odin.utils import array2bytes, bytes2array
 8 | from odin.utils.mpi import QueueZMQ
 9 | 
10 | NB_MESSAGE = 800000 * 2
11 | 
12 | 
13 | X = np.random.randn(80, 120).astype('float32')
14 | print(np.sum(X), np.sum(X**2))
15 | 
16 | context = zmq.Context(io_threads=1)
17 | # context = zmq.Context.instance()
18 | 
19 | 
20 | # ===========================================================================
21 | # zmq
22 | # ===========================================================================
23 | def worker_zmq():
24 |     work_receiver = context.socket(zmq.PULL)
25 |     work_receiver.connect("tcp://127.0.0.1:5557")
26 | 
27 |     start_time = time.time()
28 |     for task_nbr in range(NB_MESSAGE):
29 |         message = work_receiver.recv()
30 |         message = bytes2array(message)
31 |     end_time = time.time()
32 |     duration = end_time - start_time
33 |     msg_per_sec = NB_MESSAGE / duration
34 |     print("Zmq Duration: %s" % duration, msg_per_sec,
35 |           np.sum(message), np.sum(message**2))
36 |     sys.exit(1)
37 | 
38 | 
39 | def main_zmq():
40 |     Process(target=worker_zmq, args=()).start()
41 |     ventilator_send = context.socket(zmq.PUSH)
42 |     ventilator_send.bind("tcp://127.0.0.1:5557")
43 | 
44 |     for num in range(NB_MESSAGE):
45 |         ventilator_send.send(array2bytes(X))
46 | 
47 | 
48 | # ===========================================================================
49 | # Queue
50 | # ===========================================================================
51 | def worker_queue(q):
52 |     start_time = time.time()
53 |     for task_nbr in range(NB_MESSAGE):
54 |         message = q.get()
55 |         message = bytes2array(message)
56 |     end_time = time.time()
57 |     duration = end_time - start_time
58 |     msg_per_sec = NB_MESSAGE / duration
59 |     print("Queue Duration: %s" % duration, msg_per_sec,
60 |           np.sum(message), np.sum(message**2))
61 |     sys.exit(1)
62 | 
63 | 
64 | def main_queue():
65 |     send_q = Queue()
66 |     Process(target=worker_queue, args=(send_q,)).start()
67 |     for num in range(NB_MESSAGE):
68 |         send_q.put(array2bytes(X))
69 | 
70 | 
71 | # ===========================================================================
72 | # Run the test
73 | # ===========================================================================
74 | if __name__ == "__main__":
75 |     # main_zmq()
76 |     main_queue()
77 |     context.term()
78 | 


--------------------------------------------------------------------------------
/benchmarks/strict_vs_non_strict_scan.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function, division, absolute_import
 2 | 
 3 | import os
 4 | os.environ['ODIN'] = 'theano,gpu,float32'
 5 | 
 6 | from odin import backend
 7 | 
 8 | import numpy as np
 9 | import theano
10 | from theano import tensor as T
11 | 
12 | from odin.utils import UnitTimer
13 | import time
14 | 
15 | const1 = theano.shared(np.random.rand(25, 25))
16 | const2 = theano.shared(np.random.rand(18, 18))
17 | 
18 | # ===========================================================================
19 | # Strict scan
20 | # ===========================================================================
21 | 
22 | 
23 | def step_strict(s1, o1, c1, c2):
24 |     return (T.dot(o1, s1) + T.dot(o1.T, s1) + T.dot(o1, s1.T) + T.dot(o1.T, s1.T) +
25 |         T.sum(const1) + T.sum(const2**2) + T.sum(const1**3) +
26 |         T.sum(const2.T) + T.sum(const2.T**2) + T.sum(const2.T**3))
27 | 
28 | outputs, update = theano.scan(step_strict,
29 |     sequences=theano.shared(np.arange(12 * 12 * 12 * 8 * 8).reshape(12 * 12 * 12, 8, 8)),
30 |     outputs_info=theano.shared(np.ones((8, 8))),
31 |     non_sequences=[const1, const2],
32 |     strict=True)
33 | 
34 | f_strict = theano.function(inputs=[], outputs=outputs, allow_input_downcast=True)
35 | 
36 | 
37 | # ===========================================================================
38 | # Non-strict scan
39 | # ===========================================================================
40 | def step_non(s1, o1):
41 |     return (T.dot(o1, s1) + T.dot(o1.T, s1) + T.dot(o1, s1.T) + T.dot(o1.T, s1.T) +
42 |         T.sum(const1) + T.sum(const2**2) + T.sum(const1**3) +
43 |         T.sum(const2.T) + T.sum(const2.T**2) + T.sum(const2.T**3))
44 | 
45 | outputs, update = theano.scan(step_non,
46 |     sequences=theano.shared(np.arange(12 * 12 * 12 * 8 * 8).reshape(12 * 12 * 12, 8, 8)),
47 |     outputs_info=theano.shared(np.ones((8, 8))),
48 |     strict=False)
49 | f_non = theano.function(inputs=[], outputs=outputs, allow_input_downcast=True)
50 | 
51 | time.sleep(0.5)
52 | 
53 | for i in range(3):
54 |     print('Non-strict scan:')
55 |     with UnitTimer(8):
56 |         for i in range(8):
57 |             f_non()
58 | 
59 |     print('Strict scan:')
60 |     with UnitTimer(8):
61 |         for i in range(8):
62 |             f_strict()
63 | 
64 | # Non - strict scan:
65 | # Time: 0.064988 (sec)
66 | # Strict scan:
67 | # Time: 0.058314 (sec)
68 | # Non - strict scan:
69 | # Time: 0.059891 (sec)
70 | # Strict scan:
71 | # Time: 0.067796 (sec)
72 | # Non - strict scan:
73 | # Time: 0.059809 (sec)
74 | # Strict scan:
75 | # Time: 0.065363 (sec)
76 | 


--------------------------------------------------------------------------------
/examples/nist_sre/run.sh:
--------------------------------------------------------------------------------
 1 | # For training: fisher,mx6,sre04,sre05,sre06,sre08,sre10,swb,voxceleb1,voxceleb2
 2 | # For noise   : musan, rirs
 3 | 
 4 | # Test training on voxceleb1
 5 | python train_xvec.py mfcc_musan_rirs -exclude fisher,mx6,sre04,sre05,sre06,sre08,sre10,swb,voxceleb2 -mindur 1 -minutt 8 --override
 6 | # Test training on voxceleb2
 7 | python train_xvec.py mfcc_musan_rirs -exclude fisher,mx6,sre04,sre05,sre06,sre08,sre10,swb,voxceleb1 -mindur 1 -minutt 8 --override
 8 | # all sre and mixer
 9 | python train_xvec.py mfcc_musan_rirs -exclude fisher,swb,voxceleb1,voxceleb2 -mindur 4 -minutt 8 --override
10 | # only fisher
11 | python train_xvec.py mfcc_musan_rirs -exclude mx6,sre04,sre05,sre06,sre08,sre10,swb,voxceleb1,voxceleb2 -mindur 1 -minutt 8 --override
12 | # everything except voxceleb
13 | python train_xvec.py mfcc_musan_rirs -exclude voxceleb1,voxceleb2 -mindur 3 -minutt 8 --override
14 | 
15 | # test training on only one of the sre (without noise)
16 | # sre04
17 | python train_xvec.py mfcc_musan_rirs -exclude fisher,mx6,sre05,sre06,sre08,sre10,swb,voxceleb1,voxceleb2,noise
18 | # sre05
19 | python train_xvec.py mfcc_musan_rirs -exclude fisher,mx6,sre04,sre06,sre08,sre10,swb,voxceleb1,voxceleb2,noise
20 | # sre06
21 | python train_xvec.py mfcc_musan_rirs -exclude fisher,mx6,sre04,sre05,sre08,sre10,swb,voxceleb1,voxceleb2,noise
22 | # sre08
23 | python train_xvec.py mfcc_musan_rirs -exclude fisher,mx6,sre04,sre05,sre06,sre10,swb,voxceleb1,voxceleb2
24 | # sre10
25 | python train_xvec.py mfcc_musan_rirs -exclude fisher,mx6,sre04,sre05,sre06,sre08,swb,voxceleb1,voxceleb2,noise
26 | 
27 | # swb
28 | python train_xvec.py mfcc_musan_rirs -exclude fisher,mx6,sre04,sre05,sre06,sre08,sre10,voxceleb1,voxceleb2,noise
29 | 
30 | # test training on only sre06 and noise
31 | python train_xvec.py mfcc_musan_rirs -exclude fisher,mx6,sre04,sre05,sre08,sre10,swb,voxceleb1,voxceleb2
32 | 
33 | # No Voxceleb datasets
34 | python train_xvec.py mfcc_musan_rirs -exclude voxceleb1,voxceleb2
35 | python make_score.py mfcc_musan_rirs -sys xvec -sysid -1 -score sre18dev,sre18eval -backend sre04,sre05,sre06,sre08,sre10 -exclude voxceleb1,voxceleb2
36 | 
37 | # same but without noise
38 | python train_xvec.py mfcc_musan_rirs -exclude voxceleb1,voxceleb2,noise
39 | python make_score.py mfcc_musan_rirs -sys xvec -sysid -1 -score sre18dev,sre18eval -backend sre04,sre05,sre06,sre08,sre10 -exclude voxceleb1,voxceleb2,noise
40 | 
41 | # No Voxceleb and fisher datasets
42 | python train_xvec.py mfcc_musan_rirs -exclude voxceleb1,voxceleb2,fisher
43 | python make_score.py mfcc_musan_rirs -sys xvec -sysid -1 -score sre18dev,sre18eval -backend sre04,sre05,sre06,sre08,sre10 -exclude voxceleb1,voxceleb2,fisher
44 | 
45 | 


--------------------------------------------------------------------------------
/odin/explain/adversarial_attack.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | 
 3 | import os
 4 | import time
 5 | 
 6 | import numpy as np
 7 | import tensorflow as tf
 8 | from sklearn.base import BaseEstimator
 9 | 
10 | from odin.traininglain.helpers import _may_add_batch_dim, get_pretrained_model
11 | 
12 | 
13 | # it seems adding tf.function don't improve much performance
14 | # @tf.function
15 | def _adversarial_optimizing(model, X, y, X_org, loss_function, l2_norm, l1_norm,
16 |                             learning_rate):
17 |   with tf.GradientTape() as tape:
18 |     tape.watch(X)
19 |     y_pred = model(X)
20 |     loss = loss_function(y, y_pred)
21 |     if l2_norm > 0:
22 |       loss += l2_norm * tf.norm(X - X_org, ord=2)
23 |     if l1_norm > 0:
24 |       loss += l1_norm * tf.norm(X - X_org, ord=1)
25 |   gradients = tape.gradient(loss, X)
26 |   # Normalize the gradients.
27 |   gradients /= tf.math.reduce_std(gradients) + 1e-8
28 |   # gradient descent
29 |   X = X - gradients * learning_rate
30 |   return loss, X
31 | 
32 | 
33 | class AdversarialAttack(BaseEstimator):
34 | 
35 |   def __init__(self,
36 |                model,
37 |                loss_function=tf.losses.sparse_categorical_crossentropy,
38 |                model_kwargs={'include_top': True},
39 |                epoch=80,
40 |                l2_norm=0.0,
41 |                l1_norm=0.0,
42 |                learning_rate=0.01,
43 |                verbose=10):
44 |     super().__init__()
45 |     self.model = get_pretrained_model(model, model_kwargs)
46 |     self.input_shape = self.model.input_shape
47 |     self.dtype = self.model.dtype
48 |     self.loss_function = loss_function
49 |     # training settings
50 |     self.learning_rate = learning_rate
51 |     self.epoch = epoch
52 |     self.l2_norm = l2_norm
53 |     self.l1_norm = l1_norm
54 |     self.verbose = int(verbose)
55 | 
56 |   def fit(self, X, y):
57 |     X = _may_add_batch_dim(X, self.input_shape)
58 |     X = tf.convert_to_tensor(X, dtype=self.dtype)
59 |     X_org = X
60 |     y = tf.convert_to_tensor(y, dtype=self.model.output.dtype)
61 |     benchmark = []
62 | 
63 |     for epoch in range(self.epoch):
64 |       start_time = time.time()
65 |       loss, X = _adversarial_optimizing(self.model, X, y, X_org,
66 |                                         self.loss_function, self.l2_norm,
67 |                                         self.l1_norm, self.learning_rate)
68 |       benchmark.append(time.time() - start_time)
69 |       if self.verbose > 0 and (epoch + 1) % self.verbose == 0:
70 |         print("Epoch#%d Loss:%.4f (%.2f sec/epoch)" %
71 |               (epoch + 1, loss, np.mean(benchmark)))
72 |     return X.numpy()
73 | 


--------------------------------------------------------------------------------
/odin/preprocessing/confs/openSMILEpitch.cfg:
--------------------------------------------------------------------------------
 1 | ///////////////////////////////////////////////////////////////////////////////////////
 2 | ///////// > openSMILE configuration file for speech prosody features //////////////////
 3 | /////////   pitch and intensity                                      //////////////////
 4 | /////////                                                            //////////////////
 5 | ///////// (c) 2014-2016 audEERING.                                   //////////////////
 6 | /////////     All rights reserverd. See file COPYING for details.    //////////////////
 7 | ///////////////////////////////////////////////////////////////////////////////////////
 8 | 
 9 | ;;;;;;; component list ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10 | [componentInstances:cComponentManager]
11 | instance[dataMemory].type=cDataMemory
12 | ;printLevelStats=6
13 | 
14 | [componentInstances:cComponentManager]
15 | instance[waveIn].type=cWaveSource
16 | instance[frame].type=cFramer
17 | instance[int].type=cIntensity
18 | instance[win].type=cWindower
19 | instance[fft].type=cTransformFFT
20 | instance[fftmp].type=cFFTmagphase
21 | instance[smo].type=cContourSmoother
22 | instance[F0_lldconcat].type=cVectorConcat
23 | instance[lldcsvsink].type=cCsvSink
24 | 
25 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;; ACF or SHS ;;;;;;;;;;;;;;;;;;;;;;;;;;;
26 | {method}
27 | {f0}
28 | 
29 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;; main section ;;;;;;;;;;;;;;;;;;;;;;;;;;;
30 | [waveIn:cWaveSource]
31 | writer.dmLevel=wave
32 | buffersize_sec = 10.0
33 | filename=\cm[inputfile(I):name of input file]
34 | start=0
35 | end=-1
36 | monoMixdown=1
37 | outFieldName = pcm
38 | 
39 | [frame:cFramer]
40 | reader.dmLevel=wave
41 | writer.dmLevel=outp
42 | frameSize = {framesize}
43 | frameStep = {framestep}
44 | frameCenterSpecial = left
45 | 
46 | [int:cIntensity]
47 | reader.dmLevel = outp
48 | writer.dmLevel = intens
49 | copyInputName = 1
50 | processArrayFields = 1
51 | intensity = 0
52 | loudness = 1
53 | 
54 | [win:cWindower]
55 | reader.dmLevel=outp
56 | writer.dmLevel=win
57 | winFunc={window}
58 | gain=1.0
59 | sigma=0.4
60 | 
61 | [fft:cTransformFFT]
62 | reader.dmLevel=win
63 | writer.dmLevel=fftc
64 | zeroPadSymmetric = 0
65 | 
66 | [fftmp:cFFTmagphase]
67 | reader.dmLevel=fftc
68 | writer.dmLevel=fftmag
69 | 
70 | [smo:cContourSmoother]
71 | reader.dmLevel = pitch;intens
72 | writer.dmLevel = lld0
73 | nameAppend = sma
74 | copyInputName = 1
75 | noPostEOIprocessing = 0
76 | smaWin = 3
77 | 
78 | [F0_lldconcat:cVectorConcat]
79 | reader.dmLevel = lld0{f0_flag}
80 | writer.dmLevel = lld
81 | includeSingleElementFields = 1
82 | 
83 | [lldcsvsink:cCsvSink]
84 | reader.dmLevel = lld
85 | filename= \cm[csvoutput(O):name of output file]
86 | append = 0
87 | timestamp = 1
88 | number = 0
89 | printHeader = 0
90 | errorOnNoOutput = 1
91 | delimChar = ,
92 | 


--------------------------------------------------------------------------------
/examples/voxceleb/speech_features_extraction.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import print_function, division, absolute_import
 3 | import matplotlib
 4 | matplotlib.use('Agg')
 5 | 
 6 | import os
 7 | os.environ['ODIN'] = 'float32,gpu'
 8 | 
 9 | import numpy as np
10 | 
11 | from odin import backend as K, nnet as N, visual as V
12 | from odin import preprocessing as pp
13 | from odin.utils import (args_parse, stdio,
14 |                         get_module_from_path, get_script_path)
15 | from odin.utils.mpi import cpu_count
16 | 
17 | from utils import (WAV_FILES, SAMPLED_WAV_FILE,
18 |                    PATH_ACOUSTIC_FEAT, PATH_EXP)
19 | # ===========================================================================
20 | # Config
21 | # ===========================================================================
22 | stdio(os.path.join(PATH_EXP, 'features_extraction.log'))
23 | args = args_parse(descriptions=[
24 |     ('recipe', 'the name of function defined in feature_recipes.py', None),
25 |     ('--debug', 'enable debug or not', None, False)
26 | ])
27 | DEBUG = args.debug
28 | # ===========================================================================
29 | # Create the recipes
30 | # ===========================================================================
31 | extractor = get_module_from_path(identifier=str(args.recipe),
32 |                                  prefix='feature_recipes',
33 |                                  path=get_script_path())
34 | assert len(extractor) > 0, \
35 | "Cannot find any recipe with name: '%s' from path: '%s'" % (args.recipe, get_script_path())
36 | recipe = extractor[0](DEBUG)
37 | # ====== debugging ====== #
38 | if DEBUG:
39 |   with np.warnings.catch_warnings():
40 |     np.warnings.filterwarnings('ignore')
41 |     for path, name in SAMPLED_WAV_FILE:
42 |       feat = recipe.transform(path)
43 |       assert feat['bnf'].shape[0] == feat['mspec'].shape[0]
44 |       V.plot_multiple_features(feat, title=feat['name'])
45 |     V.plot_save(os.path.join(PATH_EXP, 'features_%s.pdf' % args.recipe))
46 |     exit()
47 | # ===========================================================================
48 | # Prepare the processor
49 | # ===========================================================================
50 | with np.warnings.catch_warnings():
51 |   np.warnings.filterwarnings('ignore')
52 |   jobs = list(WAV_FILES.keys())
53 |   processor = pp.FeatureProcessor(jobs=jobs,
54 |       path=os.path.join(PATH_ACOUSTIC_FEAT, args.recipe),
55 |       extractor=recipe,
56 |       n_cache=1200,
57 |       ncpu=min(18, cpu_count() - 2),
58 |       override=True,
59 |       identifier='name',
60 |       log_path=os.path.join(PATH_EXP, 'processor_%s.log' % args.recipe),
61 |       stop_on_failure=False)
62 |   processor.run()
63 |   pp.validate_features(processor,
64 |                        nb_samples=12,
65 |                        path=os.path.join(PATH_EXP, args.recipe),
66 |                        override=True)
67 | 


--------------------------------------------------------------------------------
/examples/vae/plotting_results.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | from odin import visual as vs
 4 | import seaborn as sns
 5 | from matplotlib import pyplot as plt
 6 | 
 7 | sns.set()
 8 | 
 9 | # === 1. varying zdim and the active units MNIST
10 | # zdim = [2, 5, 10, 20, 35, 60, 80]
11 | # au = [2, 5, 7, 7, 7, 7, 7]
12 | # llk = [-134.05, -103.22, -97.78, -98.35, -99.04, -96.23, -96.27]
13 | # df = pd.DataFrame({'zdim': zdim, 'active latent units': au, 'llk': llk})
14 | # print(df)
15 | #
16 | # plt.figure(figsize=(5, 4), dpi=200)
17 | # sns.scatterplot(x='zdim', y='llk', data=df,
18 | #                 hue='active latent units',
19 | #                 size='active latent units',
20 | #                 sizes=(80, 150),
21 | #                 alpha=0.9)
22 | # plt.xticks(zdim, [str(i) for i in zdim])
23 | # vs.plot_save(verbose=True)
24 | 
25 | 
26 | # === 2. varying zdim and the active units CIFAR10
27 | # zdim = [32, 64, 128, 256, 512, 512, 1024]
28 | # au = [32, 64, 128, 256, 466, 512, 466]
29 | # llk = [-13605.55, -12715.09, -11767.64, -10701.66, -9662.88, -142.95,
30 | #        -9653.24]
31 | # df = pd.DataFrame({'zdim': zdim, 'active latent units': au, 'llk': llk})
32 | # print(df)
33 | #
34 | # plt.figure(figsize=(5, 4), dpi=200)
35 | # sns.scatterplot(x='zdim', y='llk', data=df,
36 | #                 hue='active latent units',
37 | #                 size='active latent units',
38 | #                 sizes=(80, 200),
39 | #                 alpha=0.8)
40 | # plt.gca().set_xscale('log')
41 | # plt.xticks(zdim, [str(i) for i in zdim])
42 | # vs.plot_save(verbose=True)
43 | 
44 | # === 3. varying py semafoVAE
45 | 
46 | py = [0.002, 0.004, 0.01, 0.05, 0.1, 0.2, 0.5]
47 | llk = [-3456.38, -3460.43, -3457.71, -3456.63, -3457.03, -3456.75, -3456.91]
48 | fid = [28.78, 27.80,  32.11, 26.84, 28.61, 28.43,  25.12]
49 | dci = [60.84, 68.49, 74.22, 81.79, 80.88, 83.72, 85.12]
50 | 
51 | plt.figure(figsize=(10, 3), dpi=200)
52 | 
53 | plt.subplot(1, 3, 1)
54 | plt.plot(py, llk, label='SemafoVAE')
55 | plt.plot([py[0], py[-1]], [-3464.40, -3464.40], label='VAE baseline', color='r')
56 | plt.gca().set_xscale('log')
57 | plt.xticks(py, [str(i) for i in py], rotation=-30)
58 | plt.legend(fontsize=8)
59 | plt.xlabel('Supervision rate')
60 | plt.title('Test log-likelihood')
61 | 
62 | plt.subplot(1, 3, 2)
63 | plt.plot(py, fid, label='SemafoVAE')
64 | plt.plot([py[0], py[-1]], [74.57, 74.57], label='VAE baseline', color='r')
65 | plt.gca().set_xscale('log')
66 | plt.xticks(py, [str(i) for i in py], rotation=-30)
67 | plt.legend(fontsize=8)
68 | plt.xlabel('Supervision rate')
69 | plt.title('FID')
70 | 
71 | plt.subplot(1, 3, 3)
72 | plt.plot(py, dci, label='SemafoVAE')
73 | plt.plot([py[0], py[-1]], [64.82, 64.82], label='VAE baseline', color='r')
74 | plt.gca().set_xscale('log')
75 | plt.xticks(py, [str(i) for i in py], rotation=-30)
76 | plt.legend(fontsize=8)
77 | plt.xlabel('Supervision rate')
78 | plt.title('DCI')
79 | 
80 | plt.tight_layout()
81 | vs.plot_save(verbose=True)
82 | 


--------------------------------------------------------------------------------
/tests/networks/test_keras_torch.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | 
 3 | import os
 4 | 
 5 | import numpy as np
 6 | import tensorflow as tf
 7 | import torch
 8 | 
 9 | from odin import backend as K
10 | from odin import networks as net  # tensorflow networks
11 | from odin import networks_torch as nt  # pytorch networks
12 | 
13 | tf.random.set_seed(8)
14 | torch.manual_seed(8)
15 | np.random.seed(8)
16 | 
17 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
18 | os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
19 | 
20 | x = torch.Tensor(np.random.rand(12, 8))
21 | x1 = torch.Tensor(np.random.rand(12, 25, 8))
22 | # ===========================================================================
23 | # RNN
24 | # ===========================================================================
25 | f = nt.LSTM(units=32,
26 |             go_backwards=True,
27 |             unit_forget_bias=True,
28 |             return_sequences=True,
29 |             return_state=True,
30 |             bidirectional=True)
31 | y = f(x1)
32 | print(x1.shape, [i.shape for i in y])
33 | 
34 | f = nt.SimpleRNN(units=32, go_backwards=True)
35 | y = f(x1)
36 | print(x1.shape, y.shape)
37 | 
38 | f = nt.GRU(units=32, go_backwards=False, return_state=True)
39 | y = f(x1)
40 | print(x1.shape, [i.shape for i in y])
41 | 
42 | # ====== tensorflow ====== #
43 | print()
44 | f = net.LSTM(units=32,
45 |              go_backwards=True,
46 |              unit_forget_bias=True,
47 |              return_sequences=True,
48 |              return_state=True,
49 |              bidirectional=True)
50 | y = f(x1.numpy())
51 | print(x1.shape, [i.shape for i in y])
52 | 
53 | f = net.SimpleRNN(units=32, go_backwards=True)
54 | y = f(x1.numpy())
55 | print(x1.shape, y.shape)
56 | 
57 | f = net.GRU(units=32, go_backwards=False, return_state=True)
58 | y = f(x1.numpy())
59 | print(x1.shape, [i.shape for i in y])
60 | 
61 | print()
62 | # ===========================================================================
63 | # Basics
64 | # ===========================================================================
65 | f = nt.Dense(units=512)
66 | y = f(x)
67 | print(x.shape, y.shape)
68 | 
69 | # ===========================================================================
70 | # CNN
71 | # ===========================================================================
72 | x = torch.Tensor(np.random.rand(12, 25, 8))
73 | f = nt.Conv1D(filters=128, kernel_size=3)
74 | y = f(x)
75 | print(x.shape, y.shape)
76 | 
77 | x = torch.Tensor(np.random.rand(12, 25, 8))
78 | f = nt.ConvCausal(filters=128, kernel_size=3)
79 | y = f(x)
80 | print(x.shape, y.shape)
81 | 
82 | x = torch.Tensor(np.random.rand(12, 25, 8))
83 | f = nt.Conv1D(filters=128, kernel_size=3, data_format='channels_first')
84 | y = f(x)
85 | print(x.shape, y.shape)
86 | 
87 | x = torch.Tensor(np.random.rand(12, 32, 32, 3))
88 | f = nt.Conv2D(filters=128, kernel_size=3, padding='same')
89 | y = f(x)
90 | print(x.shape, y.shape)
91 | 
92 | x = torch.Tensor(np.random.rand(12, 32, 32, 32, 3))
93 | f = nt.Conv3D(filters=128, kernel_size=3)
94 | y = f(x)
95 | print(x.shape, y.shape)
96 | 


--------------------------------------------------------------------------------
/tests/bayesian/test_negative_binomial_disp.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | 
 3 | import os
 4 | 
 5 | import numpy as np
 6 | import tensorflow as tf
 7 | import torch
 8 | 
 9 | from odin.bay.distributions import NegativeBinomialDisp, ZeroInflated
10 | from odin.stats import describe
11 | from scvi.models.log_likelihood import log_nb_positive, log_zinb_positive
12 | 
13 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
14 | os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
15 | tf.random.set_seed(8)
16 | np.random.seed(8)
17 | torch.manual_seed(8)
18 | 
19 | 
20 | def torch_nb(mean, disp):
21 |   px_rate = torch.Tensor(mean)
22 |   px_r = torch.Tensor(disp)
23 | 
24 |   p = px_rate / (px_rate + px_r)
25 |   r = px_r
26 |   l_train = torch.distributions.Gamma(concentration=r,
27 |                                       rate=(1 - p) / p).sample()
28 |   l_train = torch.clamp(l_train, max=1e18)
29 |   X = torch.distributions.Poisson(l_train).sample()
30 |   return X
31 | 
32 | 
33 | shape = (12000, 800)
34 | x = np.random.randint(1, 20, size=shape).astype('float32')
35 | mean = np.random.randint(1, 20, size=shape).astype('float32')
36 | disp = np.random.randint(1, 20, size=shape).astype('float32')
37 | disp_col = np.random.randint(1, 20, size=shape[1]).astype('float32')
38 | disp_row = np.random.randint(1, 20, size=shape[0]).astype('float32')
39 | pi = np.random.rand(*shape).astype('float32')
40 | 
41 | # constant dispersion (only for tensorflow)
42 | nb = NegativeBinomialDisp(loc=mean, disp=2)
43 | llk1 = tf.reduce_sum(nb.log_prob(x), axis=1).numpy()
44 | print(llk1)
45 | 
46 | # broadcast disp in column
47 | nb = NegativeBinomialDisp(loc=mean, disp=disp_col)
48 | llk1 = tf.reduce_sum(nb.log_prob(x), axis=1).numpy()
49 | llk2 = log_nb_positive(x=torch.Tensor(x),
50 |                        mu=torch.Tensor(mean),
51 |                        theta=torch.Tensor(disp_col)).numpy()
52 | print(np.all(np.isclose(llk1, llk2)))
53 | 
54 | # broadcast disp in row
55 | try:
56 |   nb = NegativeBinomialDisp(loc=mean, disp=disp_row)
57 |   llk1 = tf.reduce_sum(nb.log_prob(x), axis=1).numpy()
58 |   llk2 = log_nb_positive(x=torch.Tensor(x),
59 |                          mu=torch.Tensor(mean),
60 |                          theta=torch.Tensor(disp_row)).numpy()
61 |   print(np.all(np.isclose(llk1, llk2)))
62 | except:
63 |   print("NOT POSSIBLE TO BROADCAST the first dimension")
64 | 
65 | # all disp available
66 | nb = NegativeBinomialDisp(loc=mean, disp=disp)
67 | llk1 = tf.reduce_sum(nb.log_prob(x), axis=1).numpy()
68 | llk2 = log_nb_positive(x=torch.Tensor(x),
69 |                        mu=torch.Tensor(mean),
70 |                        theta=torch.Tensor(disp)).numpy()
71 | print(np.all(np.isclose(llk1, llk2)))
72 | 
73 | s1 = nb.sample().numpy()
74 | s2 = torch_nb(mean, disp).numpy()
75 | print(describe(s1))
76 | print(describe(s2))
77 | 
78 | zinb = ZeroInflated(nb, probs=pi)
79 | llk1 = tf.reduce_sum(zinb.log_prob(x), axis=1).numpy()
80 | llk2 = log_zinb_positive(x=torch.Tensor(x),
81 |                          mu=torch.Tensor(mean),
82 |                          theta=torch.Tensor(disp),
83 |                          pi=torch.Tensor(pi)).numpy()
84 | print(llk1)
85 | print(llk2)
86 | 


--------------------------------------------------------------------------------
/examples/machine_learning/gmm_fitting.py:
--------------------------------------------------------------------------------
 1 | # ===========================================================================
 2 | # Conclusion
 3 | # * Higher downsample rate require more iteration of E-M algorithm
 4 | # * enable stochastic_downsampling will significant reduce the
 5 | # number of iteration
 6 | # ===========================================================================
 7 | import matplotlib as mpl
 8 | mpl.use('Agg')
 9 | import matplotlib.pyplot as plt
10 | 
11 | import numpy as np
12 | from odin.ml import GMM
13 | from odin import visual as V
14 | 
15 | np.random.seed(1234)
16 | nmix = 8
17 | pdf_path = '/tmp/tmp.pdf'
18 | 
19 | # ===========================================================================
20 | # Generate Artificial data
21 | # ===========================================================================
22 | X = []
23 | y = []
24 | stats_mean = []
25 | stats_sigma = []
26 | for i in range(nmix):
27 |   m = (np.random.randint(-18, 18, size=(1, 2)) +
28 |        np.random.randint(-18, 18, size=(1, 2)))
29 |   s = np.random.rand(1, 2) + np.random.rand(1, 2)
30 |   stats_mean.append(m)
31 |   stats_sigma.append(np.diag(s.ravel()))
32 |   dat = m + s * np.random.randn(512, 2)
33 |   X.append(dat)
34 |   y.append(dat)
35 | X = np.concatenate(X, axis=0)
36 | print(X.shape)
37 | 
38 | stats_mean = np.concatenate(stats_mean, axis=0)
39 | 
40 | # ===========================================================================
41 | # Plot
42 | # ===========================================================================
43 | for niter in (8, 16, 128):
44 |   for downsample in (1, 4, 16):
45 |     for stochastic in (True, False):
46 |       gmm = GMM(nmix=nmix, nmix_start=1, niter=niter,
47 |                 allow_rollback=True, exit_on_error=True,
48 |                 downsample=downsample,
49 |                 stochastic_downsample=stochastic,
50 |                 batch_size_cpu=25,
51 |                 batch_size_gpu=25,
52 |                 device='gpu')
53 |       gmm.initialize(X)
54 |       print(gmm)
55 |       gmm.fit(X)
56 |       # ====== match each components to closest mean ====== #
57 |       gmm_mean = [None] * nmix
58 |       gmm_sigma = [None] * nmix
59 |       for mean, sigma in zip(gmm.mean.T, gmm.sigma.T):
60 |         sigma = np.diag(sigma)
61 |         distance = sorted([(i, np.sqrt(np.sum((m - mean)**2)))
62 |                            for i, m in enumerate(stats_mean)],
63 |                           key=lambda x: x[1])
64 |         for i, dist in distance:
65 |           if gmm_mean[i] is None:
66 |             gmm_mean[i] = mean
67 |             gmm_sigma[i] = sigma
68 |             break
69 |       # ====== plot everything ====== #
70 |       plt.figure()
71 |       colors = V.generate_random_colors(n=nmix)
72 |       for i in range(nmix):
73 |         c = colors[i]
74 |         dat = y[i]
75 |         sigma = gmm_sigma[i]
76 |         plt.scatter(dat[:, 0], dat[:, 1], c=c, s=0.5)
77 |         V.plot_ellipses(gmm_mean[i], gmm_sigma[i], alpha=0.5, color=c)
78 |         V.plot_ellipses(stats_mean[i], stats_sigma[i], alpha=0.3, color='red')
79 |       plt.suptitle('#iter:%d stochastic:%s downsample:%d ' %
80 |         (niter, stochastic, downsample))
81 | V.plot_save(pdf_path)
82 | 


--------------------------------------------------------------------------------
/odin/networks/positional_encoder.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | 
 3 | import numpy as np
 4 | from tensorflow.python import keras
 5 | 
 6 | from odin import backend as bk
 7 | 
 8 | 
 9 | class PositionalEncoder(keras.layers.Layer):
10 |   r""" Positional encoding follow the approach in (Vaswani et al. 2017)
11 |   For even dimension in the embedding:
12 |     `PE(pos,2i) = sin(pos/10000^(2i/dmodel))`
13 |   and for odd position:
14 |     `PE(pos,2i+1) = cos(pos/10000^(2i/dmodel))`
15 | 
16 |   Reference:
17 |     Vaswani, A., et al., 2017. Attention Is All You Need. arXiv:1706.03762 [cs].
18 | 
19 |   """
20 | 
21 |   def __init__(self,
22 |                output_dim,
23 |                max_len=10000,
24 |                trainable=False,
25 |                mask_zero=False):
26 |     super().__init__()
27 |     self.output_dim = output_dim
28 |     self.mask_zero = bool(mask_zero)
29 |     self.trainable = bool(trainable)
30 |     self.supports_masking = mask_zero
31 |     self.max_len = max_len
32 | 
33 |     # Applying the cosine to even columns and sin to odds.
34 |     # if zero-masked, dont use the 0 position
35 |     # (i - i % 2) create a sequence of (0,0,1,1,2,2,...) which is needed
36 |     # for two running sequence of sin and cos in odd and even position
37 |     position_encoding = np.array([[
38 |         pos / np.power(10000, (i - i % 2) / output_dim)
39 |         for i in range(output_dim)
40 |     ] if pos != 0 or not mask_zero else [0.] * output_dim
41 |                                   for pos in range(max_len)])
42 |     # [max_len, output_dim]
43 |     position_encoding[:, 0::2] = np.sin(position_encoding[:, 0::2])  # dim 2i
44 |     position_encoding[:, 1::2] = np.cos(position_encoding[:, 1::2])  # dim 2i+1
45 |     if not trainable:
46 |       self.position_encoding = bk.array(position_encoding,
47 |                                         dtype='float32',
48 |                                         framework=self)
49 |     else:
50 |       self.position_encoding = bk.variable(initial_value=position_encoding,
51 |                                            dtype='float32',
52 |                                            trainable=True,
53 |                                            framework=self)
54 | 
55 |   def compute_mask(self, inputs, mask=None):
56 |     if not self.mask_zero:
57 |       return None
58 |     return bk.not_equal(inputs, 0)
59 | 
60 |   def call(self, sequence, training=None):
61 |     with bk.framework_(self):
62 |       # [batch_size, time_dim]
63 |       positions = bk.tile(bk.expand_dims(bk.arange(sequence.shape[1]), 0),
64 |                           [sequence.shape[0], 1])
65 |       dtype = bk.dtype_universal(positions.dtype)
66 |       if dtype not in ('int32', 'int64'):
67 |         positions = bk.cast(positions, dtype='int32')
68 |       pe = bk.embedding(indices=positions, weight=self.position_encoding)
69 |       return pe
70 | 
71 |   def get_config(self):
72 |     config = super().get_config()
73 |     config.update({
74 |         'output_dim': self.output_dim,
75 |         'trainable': self.trainable,
76 |         'mask_zero': self.mask_zero,
77 |         'max_len': self.max_len
78 |     })
79 |     return config
80 | 


--------------------------------------------------------------------------------
/odin/preprocessing/confs/smileF0.cfg:
--------------------------------------------------------------------------------
  1 | ///////////////////////////////////////////////////////////////////////////////////////
  2 | ///////// > openSMILE config for SHS viterbi smoothed pitch <        //////////////////
  3 | /////////                                                            //////////////////
  4 | ///////// (c) 2013-2016 audEERING.                                   //////////////////
  5 | /////////     All rights reserverd. See file COPYING for details.    //////////////////
  6 | ///////////////////////////////////////////////////////////////////////////////////////
  7 | 
  8 | [componentInstances:cComponentManager]
  9 | instance[energy].type=cEnergy
 10 | {turn_on_specscale}instance[f0scale].type=cSpecScale
 11 | instance[f0shs].type=cPitchShs
 12 | instance[f0Smooth].type=cPitchSmootherViterbi
 13 | instance[smoF0].type=cContourSmoother
 14 | instance[f0Selector].type=cDataSelector
 15 | instance[volmerge].type = cValbasedSelector
 16 | instance[f0Selector2].type=cDataSelector
 17 | 
 18 | [energy:cEnergy]
 19 | reader.dmLevel=win
 20 | writer.dmLevel=ene
 21 | rms=1
 22 | log=0
 23 | writer.levelconf.nT=100
 24 | 
 25 | [f0scale:cSpecScale]
 26 | reader.dmLevel=fftmag
 27 | writer.dmLevel=hps
 28 | copyInputName = 1
 29 | processArrayFields = 0
 30 | scale=octave
 31 | sourceScale = lin
 32 | // logScaleBase = 2
 33 | // logSourceScaleBase = 2
 34 | // firstNote = 55
 35 | interpMethod = spline
 36 | minF = 20
 37 | maxF = -1
 38 | nPointsTarget = 0
 39 | specSmooth = 1
 40 | specEnhance = 1
 41 | auditoryWeighting = 1
 42 | 
 43 | [f0shs:cPitchShs]
 44 | reader.dmLevel=hps
 45 | writer.dmLevel=pitchShsF0
 46 | copyInputName = 1
 47 | processArrayFields = 0
 48 | maxPitch = {fmax}
 49 | minPitch = {fmin}
 50 | nCandidates = {nCandidates}
 51 | scores = 1
 52 | voicing = 1
 53 | F0C1 = 0
 54 | voicingC1 = 0
 55 | F0raw = 1
 56 | voicingClip = 1
 57 | voicingCutoff = {voicingCutoff}
 58 | inputFieldSearch = Mag_octScale
 59 | octaveCorrection = 0
 60 | nHarmonics = 15
 61 | compressionFactor = 0.850000
 62 | greedyPeakAlgo = 1
 63 | 
 64 | [f0Smooth:cPitchSmootherViterbi]
 65 | reader.dmLevel=pitchShsF0
 66 | reader2.dmLevel=pitchShsF0
 67 | writer.dmLevel=pitchF0
 68 | copyInputName = 1
 69 | bufferLength=90
 70 | F0final = 1
 71 | F0finalEnv = 0
 72 | voicingFinalClipped = 0
 73 | voicingFinalUnclipped = 1
 74 | F0raw = 0
 75 | voicingC1 = 0
 76 | voicingClip = 0
 77 | wTvv =10.0
 78 | wTvvd= 5.0
 79 | wTvuv=10.0
 80 | wThr = 4.0
 81 | wTuu = 0.0
 82 | wLocal=2.0
 83 | wRange=1.0
 84 | 
 85 | [smoF0:cContourSmoother]
 86 | reader.dmLevel = pitchF0
 87 | writer.dmLevel = pitchSmoF0
 88 | writer.levelconf.isRb=0
 89 | writer.levelconf.growDyn=1
 90 | nameAppend = smaf0
 91 | copyInputName = 1
 92 | noPostEOIprocessing = 0
 93 | smaWin = 2
 94 | noZeroSma = 1
 95 | 
 96 | [f0Selector:cDataSelector]
 97 | reader.dmLevel = pitchSmoF0
 98 | writer.dmLevel = F0a
 99 | copyInputName = 1
100 | selected[0] = F0final_smaf0
101 | elementMode = 1
102 | 
103 | [volmerge:cValbasedSelector]
104 | reader.dmLevel = ene;F0a
105 | writer.dmLevel = F0cl
106 | idx=0
107 | threshold=0.0008
108 | removeIdx=0
109 | zeroVec=1
110 | outputVal=0.0
111 | 
112 | [f0Selector2:cDataSelector]
113 | reader.dmLevel = F0cl
114 | writer.dmLevel = F0
115 | copyInputName = 1
116 | selected[0] = F0final_smaf0
117 | elementMode = 1
118 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from os import path
 2 | 
 3 | from setuptools import find_packages, setup
 4 | 
 5 | ODIN_VERSION = '1.4.0'
 6 | 
 7 | TENSORFLOW_VERSION = '2.5.0'
 8 | TFP_VERSION = '0.13.0'
 9 | PYTORCH_VERSION = '1.9.0+cu111'
10 | PYTORCH_VISION = '0.10.0+cu111'
11 | PYTORCH_AUDIO = '0.9.0'
12 | 
13 | # ===========================================================================
14 | # Dependencies
15 | # ===========================================================================
16 | dependencies = [
17 |   'numpy',
18 |   'scipy',
19 |   f"tensorflow=={TENSORFLOW_VERSION}",
20 |   f'tensorflow-probability=={TFP_VERSION}',
21 |   # f'torch=={PYTORCH_VERSION}',
22 |   # f'torchvision=={PYTORCH_VISION}',
23 |   # f'torchaudio=={PYTORCH_AUDIO}',
24 |   # 'pyro-ppl',
25 |   'tensorflow-addons',
26 |   'tensorflow-datasets',
27 |   'transformers',
28 |   'hydra-core>=1.0.0',
29 |   'bigarray>=0.2.1',
30 |   'six',
31 |   'scikit-learn',
32 |   'matplotlib',
33 |   'decorator',
34 |   'tqdm',
35 |   'pyyaml',
36 |   'pycrypto',
37 |   'typeguard'  # runtime type check
38 | ]
39 | # ===========================================================================
40 | # Description
41 | # ===========================================================================
42 | here = path.abspath(path.dirname(__file__))
43 | 
44 | long_description = \
45 |   '''
46 |   An end-to-end framework support multi-modal data processing
47 |   and fast prototyping of machine learning algorithm in form
48 |   of organized networks.
49 |   '''
50 | 
51 | # ===========================================================================
52 | # Setup
53 | # ===========================================================================
54 | setup(
55 |   name='odin-ai',
56 |   version=ODIN_VERSION,
57 |   description="Deep learning for research and production",
58 |   long_description=long_description,
59 |   long_description_content_type='text/x-rst',
60 |   url='https://github.com/imito/odin-ai',
61 |   author='Trung Ngo Trong',
62 |   author_email='trungnt13@gmail.com',
63 |   license='MIT',
64 |   classifiers=[
65 |     'Development Status :: 3 - Alpha',
66 |     'Intended Audience :: Developers',
67 |     'Intended Audience :: Education',
68 |     'Intended Audience :: Science/Research',
69 |     'Topic :: Scientific/Engineering :: Artificial Intelligence',
70 |     'Topic :: Scientific/Engineering :: Information Analysis',
71 |     'Topic :: Scientific/Engineering :: Bio-Informatics',
72 |     'Topic :: Multimedia :: Sound/Audio :: Speech',
73 |     'License :: OSI Approved :: MIT License',
74 |     'Programming Language :: Python :: 3.7',
75 |     'Natural Language :: English',
76 |     'Operating System :: MacOS :: MacOS X',
77 |     'Operating System :: Microsoft :: Windows',
78 |     'Operating System :: POSIX :: Linux',
79 |   ],
80 |   keywords=
81 |   'tensorflow pytorch machine learning neural networks deep learning bayesian',
82 |   packages=find_packages(exclude=['examples', 'examples/*', 'docs', 'tests']),
83 |   # scripts=['bin/speech-augmentation', 'bin/speech-test'],
84 |   setup_requires=['pip>=19.0'],
85 |   install_requires=dependencies,
86 |   extras_require={
87 |     'visualize': ['pydot>=1.2.4', 'colorama', 'seaborn'],
88 |     'tests': ['pytest', 'pandas', 'requests'],
89 |     'audio': ['soundfile', 'resampy'],
90 |     'docs': ['sphinx', 'sphinx_rtd_theme']
91 |   },
92 |   zip_safe=False)
93 | 


--------------------------------------------------------------------------------
/examples/voxceleb/train_ivec.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function, division, absolute_import
 2 | import os
 3 | os.environ['ODIN'] = 'float32,gpu'
 4 | 
 5 | import numpy as np
 6 | 
 7 | from odin import ml
 8 | from odin import fuel as F
 9 | from odin.utils import args_parse, ctext, stdio, Progbar
10 | 
11 | from utils import (get_model_path, prepare_ivec_data, csv2mat,
12 |                    TRAIN_DATA)
13 | # ===========================================================================
14 | # Configs
15 | # ===========================================================================
16 | args = args_parse([
17 |     ('recipe', 'the name of function defined in feature_recipes.py', None),
18 |     ('-nmix', "Number of GMM mixture", None, 2048),
19 |     ('-tdim', "Dimension of t-matrix", None, 600),
20 |     ('-feat', "Acoustic feature", ('mspec', 'bnf'), 'bnf'),
21 |     ('--gmm', "Force re-run training GMM", None, False),
22 |     ('--stat', "Force re-extraction of centered statistics", None, False),
23 |     ('--tmat', "Force re-run training Tmatrix", None, False),
24 |     ('--ivec', "Force re-run extraction of i-vector", None, False),
25 |     ('--all', "Run all the system again, just a shortcut", None, False),
26 | ])
27 | args.gmm |= args.all
28 | args.stat |= args.all | args.gmm
29 | args.tmat |= args.all | args.stat
30 | args.ivec |= args.all | args.tmat
31 | FEAT = args.feat
32 | EXP_DIR, MODEL_PATH, LOG_PATH, TRAIN_PATH, TEST_PATH = get_model_path('ivec', args)
33 | stdio(LOG_PATH)
34 | # ===========================================================================
35 | # Load dataset
36 | # ===========================================================================
37 | X, train, test = prepare_ivec_data(args.recipe, FEAT)
38 | # ===========================================================================
39 | # Training I-vector model
40 | # ===========================================================================
41 | ivec = ml.Ivector(path=MODEL_PATH, nmix=args.nmix, tv_dim=args.tdim,
42 |                   niter_gmm=16, niter_tmat=16,
43 |                   downsample=2, stochastic_downsample=True,
44 |                   device='gpu', name="VoxCelebIvec")
45 | ivec.fit(X, indices=train,
46 |          extract_ivecs=True, keep_stats=False)
47 | # ====== extract train i-vector ====== #
48 | I_train = F.MmapData(ivec.ivec_path, read_only=True)
49 | name_train = np.genfromtxt(ivec.name_path, dtype=str)
50 | print("Train i-vectors:", ctext(I_train, 'cyan'))
51 | # save train i-vectors to csv
52 | prog = Progbar(target=len(name_train),
53 |                print_report=True, print_summary=True,
54 |                name="Saving train i-vectors")
55 | with open(TRAIN_PATH, 'w') as f_train:
56 |   for i, name in enumerate(name_train):
57 |     spk = TRAIN_DATA[name]
58 |     vec = I_train[i]
59 |     f_train.write('\t'.join([str(spk)] + [str(v) for v in vec]) + '\n')
60 |     prog.add(1)
61 | # ====== extract test i-vector ====== #
62 | test = sorted(test.items(), key=lambda x: x[0])
63 | I_test = ivec.transform(X, indices=test,
64 |                         save_ivecs=False, keep_stats=False)
65 | # save test i-vector to csv
66 | with open(TEST_PATH, 'w') as f_test:
67 |   for (name, (start, end)), z in zip(test, I_test):
68 |     f_test.write('\t'.join([name] + [str(i) for i in z]) + '\n')
69 | # ====== print the model ====== #
70 | csv2mat(exp_dir=EXP_DIR)
71 | print(ivec)
72 | 


--------------------------------------------------------------------------------
/odin/bay/layers/latents.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | import tensorflow as tf
 4 | from tensorflow_probability.python.distributions import (Independent,
 5 |                                                          MultivariateNormalDiag,
 6 |                                                          Normal, Distribution)
 7 | 
 8 | from odin.bay.layers.continuous import (MultivariateNormalLayer,
 9 |                                         NormalLayer)
10 | from odin.bay.layers.dense_distribution import (DistributionDense,
11 |                                                 MixtureDensityNetwork)
12 | 
13 | __all__ = [
14 |   'MVNDiagLatents',
15 |   'NormalLatents',
16 |   'MixtureMVNDiagLatents',
17 |   'MixtureNormalLatents',
18 | ]
19 | 
20 | 
21 | # NOTE: DO NOT USE softplus1
22 | 
23 | class MVNDiagLatents(DistributionDense):
24 |   """Multivariate normal diagonal latent distribution"""
25 | 
26 |   def __init__(self,
27 |                units: int,
28 |                prior_loc: float = 0.,
29 |                prior_scale: float = 1.,
30 |                projection: bool = True,
31 |                name: str = "Latents",
32 |                **kwargs):
33 |     # prior = MultivariateNormalDiag(loc=tf.fill((units,), prior_loc),
34 |     #                                scale_identity_multiplier=prior_scale)
35 |     super().__init__(
36 |       event_shape=(int(units),),
37 |       posterior=MultivariateNormalLayer,
38 |       posterior_kwargs=dict(covariance='diag', scale_activation=tf.nn.softplus),
39 |       prior=Independent(Normal(loc=tf.fill((units,), prior_loc),
40 |                                scale=tf.fill((units,), prior_scale)),
41 |                         reinterpreted_batch_ndims=1),
42 |       projection=projection,
43 |       name=name,
44 |       **kwargs,
45 |     )
46 | 
47 | 
48 | class NormalLatents(DistributionDense):
49 |   """Independent normal distribution latent"""
50 | 
51 |   def __init__(self,
52 |                units: int,
53 |                prior_loc: float = 0.,
54 |                prior_scale: float = 1.,
55 |                projection: bool = True,
56 |                name: str = "Latents",
57 |                **kwargs):
58 |     super().__init__(
59 |       event_shape=(int(units),),
60 |       posterior=NormalLayer,
61 |       posterior_kwargs=dict(scale_activation='softplus'),
62 |       prior=Independent(Normal(loc=tf.fill((units,), prior_loc),
63 |                                scale=tf.fill((units,), prior_scale)),
64 |                         reinterpreted_batch_ndims=1),
65 |       projection=projection,
66 |       name=name,
67 |       **kwargs,
68 |     )
69 | 
70 | 
71 | class MixtureNormalLatents(MixtureDensityNetwork):
72 | 
73 |   def __init__(self,
74 |                units,
75 |                n_components=8,
76 |                projection=True,
77 |                prior: Optional[Distribution] = None,
78 |                **kwargs):
79 |     kwargs['covariance'] = 'none'
80 |     kwargs['n_components'] = int(n_components)
81 |     super().__init__(units, projection=projection, **kwargs)
82 |     if prior is None:
83 |       self.set_prior()
84 |     else:
85 |       self.prior = prior
86 | 
87 | 
88 | class MixtureMVNDiagLatents(MixtureDensityNetwork):
89 | 
90 |   def __init__(self, units, n_components=8, projection=True, **kwargs):
91 |     kwargs['covariance'] = 'diag'
92 |     kwargs['n_components'] = int(n_components)
93 |     super().__init__(units, projection=projection, **kwargs)
94 |     self.set_prior()
95 | 


--------------------------------------------------------------------------------
/benchmarks/tf_factorvae_permute_dims.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from collections import defaultdict
  3 | from time import time
  4 | 
  5 | import tensorflow as tf
  6 | from tensorflow.python.autograph import to_code
  7 | 
  8 | from odin.utils import UnitTimer
  9 | 
 10 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
 11 | os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
 12 | 
 13 | tf.random.set_seed(8)
 14 | 
 15 | 
 16 | def permute_dims1(z):
 17 |   z_perm = []
 18 |   for i in range(z.shape[1]):
 19 |     z_perm.append(tf.random.shuffle(z[:, i]))
 20 |   return tf.transpose(tf.stack(z_perm))
 21 | 
 22 | 
 23 | @tf.function
 24 | def permute_dims2(z):
 25 |   perm = tf.TensorArray(dtype=z.dtype,
 26 |                         size=z.shape[1],
 27 |                         dynamic_size=False,
 28 |                         clear_after_read=False,
 29 |                         element_shape=(z.shape[0],))
 30 |   for i in tf.range(z.shape[1]):
 31 |     z_i = tf.random.shuffle(z[:, i])
 32 |     perm = perm.write(i, z_i)
 33 |   return tf.transpose(perm.stack())
 34 | 
 35 | 
 36 | @tf.function
 37 | def permute_dims3(z):
 38 |   perm = tf.TensorArray(dtype=z.dtype,
 39 |                         size=z.shape[1],
 40 |                         dynamic_size=False,
 41 |                         clear_after_read=False,
 42 |                         element_shape=(z.shape[0],))
 43 |   ids = tf.range(z.shape[0], dtype=tf.int32)
 44 |   for i in tf.range(z.shape[1]):
 45 |     z_i = tf.gather(z[:, i], tf.random.shuffle(ids))
 46 |     perm = perm.write(i, z_i)
 47 |   return tf.transpose(perm.stack())
 48 | 
 49 | 
 50 | @tf.function
 51 | def permute_dims4(z):
 52 |   perm = tf.transpose(z)
 53 |   for i in tf.range(z.shape[1]):
 54 |     z_i = tf.expand_dims(tf.random.shuffle(z[:, i]), axis=0)
 55 |     perm = tf.tensor_scatter_nd_update(perm, indices=[[i]], updates=z_i)
 56 |   return tf.transpose(perm)
 57 | 
 58 | 
 59 | @tf.function
 60 | def permute_dims5(z):
 61 |   perm = tf.transpose(z)
 62 |   ids = tf.range(z.shape[0], dtype=tf.int32)
 63 |   for i in tf.range(z.shape[1]):
 64 |     z_i = tf.gather(z[:, i], tf.random.shuffle(ids))
 65 |     z_i = tf.expand_dims(z_i, axis=0)
 66 |     perm = tf.tensor_scatter_nd_update(perm, indices=[[i]], updates=z_i)
 67 |   return tf.transpose(perm)
 68 | 
 69 | 
 70 | benchmark = {}
 71 | 
 72 | for batch_size in (10, 100, 1024, 20000):
 73 |   for dim in (16, 64, 128, 512, 1024):
 74 |     shape = (batch_size, dim)
 75 |     z = tf.reshape(tf.range(shape[0] * shape[1], dtype=tf.float64), shape)
 76 | 
 77 |     print("\n Shape:", shape)
 78 |     permute_dims2(z + 1)
 79 |     permute_dims3(z + 1)
 80 |     permute_dims4(z + 1)
 81 |     permute_dims5(z + 1)
 82 | 
 83 |     start = time()
 84 |     z1 = permute_dims1(z)
 85 |     t1 = time() - start
 86 | 
 87 |     start = time()
 88 |     z2 = permute_dims2(z)
 89 |     t2 = time() - start
 90 | 
 91 |     start = time()
 92 |     z3 = permute_dims3(z)
 93 |     t3 = time() - start
 94 | 
 95 |     start = time()
 96 |     z4 = permute_dims4(z)
 97 |     t4 = time() - start
 98 | 
 99 |     start = time()
100 |     z5 = permute_dims5(z)
101 |     t5 = time() - start
102 | 
103 |     benchmark[shape] = [t1, t2, t3, t4, t5]
104 |     tf.assert_equal(tf.reduce_mean(z1), tf.reduce_mean(z2), tf.reduce_mean(z3),
105 |                     tf.reduce_mean(z))
106 | 
107 | for k, v in benchmark.items():
108 |   print('(%s)' % ', '.join(['%5d' % i for i in k]),
109 |         ', '.join(['%.3f' % i for i in v]))
110 | 


--------------------------------------------------------------------------------
/odin/visual/base.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function
  2 | 
  3 | import os
  4 | import sys
  5 | from collections import defaultdict
  6 | from typing import Dict, Text, Optional
  7 | 
  8 | from matplotlib import pyplot as plt
  9 | 
 10 | _FIGURE_LIST = defaultdict(dict)
 11 | _FIGURE_COUNT = defaultdict(lambda: defaultdict(int))
 12 | 
 13 | 
 14 | class Visualizer(object):
 15 |   r""" Visualizer """
 16 | 
 17 |   def assert_figure(self, fig):
 18 |     assert isinstance(fig, plt.Figure), \
 19 |       'fig must be instance of matplotlib.Figure, but given: %s' % str(
 20 |         type(fig))
 21 |     return fig
 22 | 
 23 |   def assert_axis(self, ax):
 24 |     from matplotlib import pyplot as plt
 25 |     from odin.visual.figures import to_axis
 26 |     ax = to_axis(ax)
 27 |     assert isinstance(ax, plt.Axes), \
 28 |       'ax must be instance of matplotlib.Axes, but given: %s' % str(type(ax))
 29 |     return ax
 30 | 
 31 |   @property
 32 |   def figures(self) -> Dict[Text, plt.Figure]:
 33 |     return _FIGURE_LIST[id(self)]
 34 | 
 35 |   def add_figure(self, name: str, fig: plt.Figure) -> 'Visualizer':
 36 |     self.assert_figure(fig)
 37 |     figures = _FIGURE_LIST[id(self)]
 38 |     count = _FIGURE_COUNT[id(self)]
 39 |     count[name] += 1
 40 |     if count[name] > 1:
 41 |       name = f"{name}_{count[name] - 1}"
 42 |     figures[name] = fig
 43 |     return self
 44 | 
 45 |   def save_figures(self,
 46 |                    path: str = '/tmp/tmp.pdf',
 47 |                    dpi: Optional[int] = None,
 48 |                    verbose: bool = False) -> 'Visualizer':
 49 |     """ Saving all stored figures to path
 50 | 
 51 |     Parameters
 52 |     ----------
 53 |     path : a String.
 54 |         path to a pdf or image file, or a directory in case saving the figures
 55 |         to separated image files.
 56 |     dpi : int, optional
 57 |         dot-per-inch
 58 |     verbose : bool
 59 |         print out the log
 60 |     """
 61 |     if dpi is None and hasattr(self, 'dpi'):
 62 |       dpi = self.dpi
 63 |     # checking arguments
 64 |     figures = _FIGURE_LIST[id(self)]
 65 |     if len(figures) == 0:
 66 |       return self
 67 |     # ====== saving PDF file ====== #
 68 |     if '.pdf' == path[-4:].lower():
 69 |       try:
 70 |         from matplotlib.backends.backend_pdf import PdfPages
 71 |         pp = PdfPages(path)
 72 |         for fig in figures.values():
 73 |           fig: plt.Figure
 74 |           fig.savefig(pp,
 75 |                       dpi=dpi,
 76 |                       transparent=False,
 77 |                       format='pdf',
 78 |                       bbox_inches="tight")
 79 |           plt.close(fig)
 80 |         pp.close()
 81 |         if verbose:
 82 |           sys.stdout.write(f"Saved figures to:{path}\n")
 83 |       except Exception as e:
 84 |         sys.stderr.write(f'Cannot save figures to pdf, error:{str(e)}\n')
 85 |     # ====== saving PNG file ====== #
 86 |     else:
 87 |       if not os.path.exists(path):
 88 |         os.makedirs(path)
 89 |       assert os.path.isdir(path), f'Invalid directory path: {path}'
 90 |       kwargs = dict(dpi=dpi, bbox_inches="tight")
 91 |       for name, fig in figures.items():
 92 |         fig: plt.Figure
 93 |         img_path = os.path.join(path, f'{name}.png')
 94 |         fig.savefig(img_path, transparent=False, **kwargs)
 95 |         plt.close(fig)
 96 |         if verbose:
 97 |           sys.stdout.write(f"Saved figures to:{img_path}\n")
 98 |     # clean
 99 |     figures.clear()
100 |     return self
101 | 


--------------------------------------------------------------------------------
/examples/mnist.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | 
 3 | import os
 4 | import time
 5 | 
 6 | import numpy as np
 7 | import tensorflow as tf
 8 | import tensorflow_datasets as tfds
 9 | from tensorflow import keras
10 | 
11 | from odin.training import Trainer
12 | 
13 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
14 | os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
15 | 
16 | tf.random.set_seed(8)
17 | np.random.seed(8)
18 | 
19 | # ===========================================================================
20 | # Load data
21 | # ===========================================================================
22 | train, valid, test = tfds.load('fashion_mnist:3.0.0',
23 |                                split=['train[:80%]', 'train[80%:]', 'test'],
24 |                                read_config=tfds.ReadConfig(
25 |                                    shuffle_seed=1,
26 |                                    shuffle_reshuffle_each_iteration=True))
27 | 
28 | input_shape = tf.data.experimental.get_structure(train)['image'].shape
29 | 
30 | 
31 | def process(data):
32 |   image = tf.cast(data['image'], tf.float32)
33 |   label = tf.cast(data['label'], tf.float32)
34 |   image = (image / 255. - 0.5) * 2.
35 |   return image, label
36 | 
37 | 
38 | # ===========================================================================
39 | # Test
40 | # ===========================================================================
41 | network = keras.Sequential([
42 |     keras.layers.Flatten(input_shape=input_shape),
43 |     keras.layers.Dropout(0.3),
44 |     keras.layers.Dense(512, activation='relu'),
45 |     keras.layers.Dense(256, activation='relu'),
46 |     keras.layers.Dense(128, activation='relu'),
47 |     keras.layers.Dense(10, activation='softmax'),
48 | ])
49 | opt = tf.optimizers.Adam(learning_rate=0.001,
50 |                          beta_1=0.9,
51 |                          beta_2=0.999,
52 |                          epsilon=1e-07,
53 |                          amsgrad=False)
54 | 
55 | 
56 | def optimize(inputs, training):
57 |   X, y_true = inputs
58 |   with tf.GradientTape(watch_accessed_variables=bool(training)) as tape:
59 |     y_pred = network(X, training=training)
60 |     loss = tf.reduce_mean(
61 |         tf.losses.sparse_categorical_crossentropy(y_true, y_pred))
62 |     acc = tf.cast(y_true == tf.cast(tf.argmax(y_pred, axis=-1), tf.float32),
63 |                   tf.float32)
64 |     acc = tf.reduce_sum(acc) / tf.cast(tf.shape(y_true)[0], tf.float32)
65 |     if training:
66 |       Trainer.apply_gradients(tape, opt, loss, network)
67 |   return loss, acc
68 | 
69 | 
70 | def callback():
71 |   signal = Trainer.early_stop(trainer.valid_loss, threshold=0.25, verbose=True)
72 |   if signal == Trainer.SIGNAL_BEST:
73 |     print(" - Save the best weights!")
74 |     Trainer.save_weights(network)
75 |   elif signal == Trainer.SIGNAL_TERMINATE:
76 |     print(" - Restore the best weights!")
77 |     Trainer.restore_weights(network)
78 |   return signal
79 | 
80 | 
81 | trainer = Trainer()
82 | 
83 | start_time = time.time()
84 | trainer.fit(Trainer.prepare(train,
85 |                             postprocess=process,
86 |                             parallel_postprocess=False,
87 |                             shuffle=True,
88 |                             epochs=32),
89 |             optimize,
90 |             valid_ds=Trainer.prepare(valid, postprocess=process),
91 |             valid_freq=2500,
92 |             autograph=True,
93 |             logging_interval=2,
94 |             on_valid_end=callback)
95 | print("Total:", time.time() - start_time)
96 | 


--------------------------------------------------------------------------------
/tests/test_datasets.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import seaborn as sns
 3 | from matplotlib import pyplot as plt
 4 | 
 5 | from odin import visual as vs
 6 | from odin.fuel import (CelebABig, CelebASmall, Shapes3DSmall, dSpritesSmall,
 7 |                        dSprites, ImageDataset, get_all_dataset, SVHN, CIFAR10)
 8 | from odin.utils import as_tuple
 9 | from tqdm import tqdm
10 | from collections import Counter
11 | 
12 | sns.set()
13 | 
14 | ds = CIFAR10()
15 | p = []
16 | it = ds.create_dataset('train',
17 |                        label_percent=100,
18 |                        oversample_ratio=0.1,
19 |                        normalize='raster',
20 |                        drop_remainder=False).take(1404)
21 | all_labels = []
22 | all_unlabels = []
23 | total = 0
24 | for data in tqdm(it.as_numpy_iterator()):
25 |   x, y = data['inputs']
26 |   m = data['mask'].ravel()
27 |   p.append(np.sum(m) / m.shape[0])
28 |   y_labeled = y[m]
29 |   y_unlabeled = y[np.logical_not(m)]
30 |   all_labels += np.argmax(y_labeled, axis=-1).tolist()
31 |   all_unlabels += np.argmax(y_unlabeled, axis=-1).tolist()
32 |   total += x.shape[0]
33 | print('Total:', total)
34 | print('Labeled:', Counter(all_labels))
35 | print('Unlabeled:', Counter(all_unlabels))
36 | print('p=', np.mean(p))
37 | ## plot images
38 | plt.figure(figsize=(15, 15), dpi=150)
39 | for i, (img, lab, mask) in enumerate(zip(x[:25], y, m)):
40 |   plt.subplot(5, 5, i + 1)
41 |   plt.imshow(img.astype(np.uint8))
42 |   plt.title(ds.labels[np.argmax(lab)] if mask else str(lab), fontsize=6)
43 |   plt.axis('off')
44 | plt.tight_layout()
45 | vs.plot_save()
46 | 
47 | for ds in get_all_dataset('image'):
48 |   print(ds)
49 |   if ds in (Shapes3DSmall, dSpritesSmall, CelebABig, CelebASmall):
50 |     continue
51 |   ds = ds()
52 |   ds: ImageDataset
53 |   # first test
54 |   for partition in ('train', 'valid', 'test'):
55 |     print(' ', partition)
56 |     for normalize in ('probs', 'tanh', 'raster'):
57 |       print('  ', normalize)
58 |       x = ds.create_dataset(partition,
59 |                             label_percent=True,
60 |                             normalize=normalize,
61 |                             drop_remainder=True)
62 |       for data in x.shuffle(1000).take(10):
63 |         data = as_tuple(data)
64 |         if len(data) == 2:
65 |           img, lab = data
66 |         else:
67 |           img = data[0]
68 |           lab = None
69 |         img = img.numpy()
70 |         if normalize == 'probs':
71 |           assert np.all(img >= 0.0) and np.all(img <= 1.0)
72 |         elif normalize == 'tanh':
73 |           assert np.all(img >= -1.0) and np.all(img <= 1.0) and np.any(
74 |               img < 0.0)
75 |         elif normalize == 'raster':
76 |           assert np.all(img >= 0.0) and np.all(img <= 255.0) and np.any(
77 |               img > 1.0)
78 |       ## save images
79 |       image = img[:9]
80 |       labels = [None] * 9 if lab is None else lab[:9].numpy()
81 |       if image.shape[-1] == 1:
82 |         image = np.squeeze(image, -1)
83 |       if normalize == 'raster':
84 |         image = image.astype(np.uint8)
85 |       elif normalize == 'tanh':
86 |         image = (image + 1.) / 2.
87 |       plt.figure(figsize=(5, 5), dpi=150)
88 |       for i in range(9):
89 |         plt.subplot(3, 3, i + 1)
90 |         plt.imshow(image[i], cmap='Greys' if image[i].ndim == 2 else None)
91 |         plt.title(str(labels[i]), fontsize=6)
92 |         plt.axis('off')
93 |       plt.tight_layout()
94 |       plt.suptitle(f'{partition}_{normalize}')
95 |   vs.plot_save(f'/tmp/data_{ds.name.lower()}.pdf', verbose=True)
96 | 


--------------------------------------------------------------------------------
/odin/bay/distributions/logarizmed.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | 
 3 | import numpy as np
 4 | import tensorflow as tf
 5 | from tensorflow_probability.python.bijectors.exp import Exp
 6 | from tensorflow_probability.python.distributions import (
 7 |     LogNormal, TransformedDistribution, Uniform)
 8 | from tensorflow_probability.python.internal import dtype_util
 9 | 
10 | __all__ = [
11 |     "LogUniform",
12 | ]
13 | 
14 | 
15 | class LogUniform(TransformedDistribution):
16 |   """The log-uniform distribution (i.e. the logarithm of the
17 |   samples from this distribution are Uniform) """
18 | 
19 |   def __init__(self,
20 |                low=0.,
21 |                high=1.,
22 |                validate_args=False,
23 |                allow_nan_stats=True,
24 |                name="LogUniform"):
25 |     """Construct a log-normal distribution.
26 | 
27 |     The LogNormal distribution models positive-valued random variables
28 |     whose logarithm is normally distributed with mean `loc` and
29 |     standard deviation `scale`. It is constructed as the exponential
30 |     transformation of a Normal distribution.
31 | 
32 |     Args:
33 |       low: Floating point tensor, lower boundary of the output interval. Must
34 |         have `low < high`.
35 |       high: Floating point tensor, upper boundary of the output interval. Must
36 |         have `low < high`.
37 |       validate_args: Python `bool`, default `False`. Whether to validate input
38 |         with asserts. If `validate_args` is `False`, and the inputs are
39 |         invalid, correct behavior is not guaranteed.
40 |       allow_nan_stats: Python `bool`, default `True`. If `False`, raise an
41 |         exception if a statistic (e.g. mean/mode/etc...) is undefined for any
42 |         batch member If `True`, batch members with valid parameters leading to
43 |         undefined statistics will return NaN for this statistic.
44 |       name: The name to give Ops created by the initializer.
45 |     """
46 |     parameters = dict(locals())
47 |     with tf.name_scope(name) as name:
48 |       dtype = dtype_util.common_dtype([low, high], tf.float32)
49 |       super(LogUniform, self).__init__(distribution=Uniform(
50 |           low=tf.convert_to_tensor(value=low, name="low", dtype=dtype),
51 |           high=tf.convert_to_tensor(value=high, name="high", dtype=dtype),
52 |           allow_nan_stats=allow_nan_stats),
53 |                                        bijector=Exp(),
54 |                                        validate_args=validate_args,
55 |                                        parameters=parameters,
56 |                                        name=name)
57 | 
58 |   @staticmethod
59 |   def _param_shapes(sample_shape):
60 |     return dict(
61 |         zip(("low", "high"),
62 |             ([tf.convert_to_tensor(value=sample_shape, dtype=tf.int32)] * 2)))
63 | 
64 |   @classmethod
65 |   def _params_event_ndims(cls):
66 |     return dict(low=0, high=0)
67 | 
68 |   @property
69 |   def low(self):
70 |     """Lower boundary of the output interval."""
71 |     return self.distribution.low
72 | 
73 |   @property
74 |   def high(self):
75 |     """Upper boundary of the output interval."""
76 |     return self.distribution.high
77 | 
78 |   def range(self, name="range"):
79 |     """`high - low`."""
80 |     with self._name_scope(name):
81 |       return self.high - self.low
82 | 
83 |   def _entropy(self):
84 |     raise NotImplementedError
85 | 
86 |   def _mean(self):
87 |     raise NotImplementedError
88 | 
89 |   def _variance(self):
90 |     raise NotImplementedError
91 | 
92 |   def _stddev(self):
93 |     raise NotImplementedError
94 | 


--------------------------------------------------------------------------------
/examples/features/speech_pipeline.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function, division, absolute_import
 2 | 
 3 | import numpy as np
 4 | import scipy as sp
 5 | 
 6 | from odin.preprocessing import signal, speech, make_pipeline, base
 7 | 
 8 | from matplotlib import pyplot as plt
 9 | 
10 | AUDIO_PATH = '/tmp/test.wav'
11 | # ===========================================================================
12 | # Helper
13 | # ===========================================================================
14 | def formatted_printer(feats):
15 |   feats = sorted(feats.items(), key=lambda x: x[0])
16 |   text = []
17 |   for name, val in feats:
18 |     text.append([
19 |         name,
20 |         str(val.shape if hasattr(val, 'shape') else val),
21 |         str(val.dtype if hasattr(val, 'dtype') else val.__class__.__name__)
22 |     ])
23 |   max_len = [max([len(t[i]) for t in text])
24 |              for i in range(len(text[0]))]
25 |   fmt = '  '.join(['%-' + ('%ds' % l) for l in max_len])
26 |   for line in text:
27 |     print(fmt % tuple(line))
28 | # ===========================================================================
29 | # More detail pipeline
30 | # ===========================================================================
31 | pp1 = make_pipeline(steps=[
32 |     speech.AudioReader(),
33 |     speech.STFTExtractor(frame_length=0.025, padding=False),
34 |     # spectra analysis
35 |     speech.PowerSpecExtractor(output_name='spec', power=1.0),
36 |     speech.PowerSpecExtractor(output_name='pspec', power=2.0),
37 |     speech.Power2Db(input_name='pspec', output_name='db'),
38 |     # Cepstra analysis
39 |     speech.MelsSpecExtractor(n_mels=40, input_name=('pspec', 'sr')),
40 |     speech.MFCCsExtractor(n_ceps=13, input_name='mspec'),
41 |     # others
42 |     speech.PitchExtractor(frame_length=0.025, f0=True),
43 |     speech.SADgmm(input_name='stft_energy'),
44 |     speech.RASTAfilter(input_name='mfcc', output_name='rasta'),
45 |     base.EqualizeShape0(input_name=None),
46 |     speech.AcousticNorm(input_name=('mfcc', 'mspec', 'spec'),
47 |                         output_name=('mfcc_norm', 'mspec_norm', 'spec_norm')),
48 |     speech.ApplyingSAD(input_name='mfcc', output_name='mfcc_sad'),
49 |     base.StackFeatures(n_context=4, input_name='mfcc')
50 | ])
51 | formatted_printer(feats=pp1.transform(AUDIO_PATH))
52 | print("///////////////////////////")
53 | # ===========================================================================
54 | # Fast pipeline
55 | # ===========================================================================
56 | pp2 = make_pipeline(steps=[
57 |     speech.AudioReader(),
58 |     speech.SpectraExtractor(frame_length=0.025, n_mels=40, n_ceps=13),
59 |     speech.CQTExtractor(frame_length=0.025, n_mels=40, n_ceps=13),
60 |     base.DeltaExtractor(input_name=('mspec', 'mfcc'),
61 |                         output_name=('mspec_d', 'mfcc_d')),
62 |     base.RunningStatistics(),
63 |     base.AsType(dtype='float16'),
64 |     base.DuplicateFeatures('spec', 'mag'),
65 |     base.DeleteFeatures('spec')
66 | ])
67 | formatted_printer(feats=pp2.transform(AUDIO_PATH))
68 | print("///////////////////////////")
69 | # ===========================================================================
70 | # OpenSMILE
71 | # ===========================================================================
72 | pp3 = make_pipeline(steps=[
73 |     speech.AudioReader(),
74 |     speech.Dithering(output_name='dither'),
75 |     speech.PreEmphasis(coeff=0.97, output_name='preemphasis'),
76 |     speech.openSMILEpitch(frame_length=0.025),
77 |     speech.openSMILEf0(frame_length=0.025),
78 |     speech.openSMILEloudness(frame_length=0.025),
79 |     speech.openSMILEsad(frame_length=0.025),
80 | ])
81 | formatted_printer(feats=pp3.transform(AUDIO_PATH))
82 | 


--------------------------------------------------------------------------------
/docs/principle.rst:
--------------------------------------------------------------------------------
 1 | Principles
 2 | ==========
 3 | 
 4 | The O.D.I.N project was started by Trung Ngo Trong in June 2016, the author is inspired by the works from three most renowned deep learning frameworks at the time: Keras_, O.D.I.N_, and Blocks_.
 5 | 
 6 | Since the three frameworks have their own merits and drawbacks, the goals of us is leveraging our experience in using them for creating more advanced API.
 7 | 
 8 | In short, O.D.I.N is the combination of: simplicity, restraint and pragmatism from O.D.I.N_, the transparency and features-rich from Keras_, and the modularity and great graph manipulation from Blocks_.
 9 | 
10 | It is important to emphasize the contributions from: `Keras contributors <https://github.com/fchollet/keras/blob/master/README.md>`_, `O.D.I.N contributors <https://github.com/O.D.I.N/O.D.I.N/blob/master/README.rst>`_ and `Blocks contributors <https://github.com/mila-udem/blocks/blob/master/README.rst>`_. Without their frameworks, we would go much longer way to reach these points.
11 | 
12 | As an open-source project by researchers for researchers, we highly welcome
13 | contributions! Every bit helps and will be credited.
14 | 
15 | .. _Keras: https://github.com/fchollet/keras
16 | .. _O.D.I.N: https://github.com/O.D.I.N/O.D.I.N
17 | .. _Blocks: https://github.com/mila-udem/blocks
18 | 
19 | .. ======================== Tutorial ========================
20 | .. _odin-philosopy:
21 | 
22 | Philosophy
23 | ----------
24 | 
25 | O.D.I.N grew out of a need to combine the flexibility of Theano with the availability of the right building blocks for training neural networks. Its development is guided by a number of design goals:
26 | 
27 | * **Simplicity**: Be easy to use, easy to understand and easy to extend, to
28 |   facilitate use in research. Interfaces should be kept small, with as few
29 |   classes and methods as possible. Every added abstraction and feature should
30 |   be carefully scrutinized, to determine whether the added complexity is
31 |   justified.
32 | 
33 | * **Transparency**: Do not hide Theano behind abstractions, directly process
34 |   and return Theano expressions or Python / numpy data types. Try to rely on
35 |   Theano's functionality where possible, and follow Theano's conventions.
36 | 
37 | * **Modularity**: Allow all parts (layers, regularizers, optimizers, ...) to be
38 |   used independently of O.D.I.N. Make it easy to use components in isolation or
39 |   in conjunction with other frameworks.
40 | 
41 | * **Pragmatism**: Make common use cases easy, do not overrate uncommon cases.
42 |   Ideally, everything should be possible, but common use cases shouldn't be
43 |   made more difficult just to cater for exotic ones.
44 | 
45 | * **Restraint**: Do not obstruct users with features they decide not to use.
46 |   Both in using and in extending components, it should be possible for users to
47 |   be fully oblivious to features they do not need.
48 | 
49 | * **Focus**: "Do one thing and do it well". Do not try to provide a library for
50 |   everything to do with deep learning.
51 | 
52 | .. ======================== Tutorial ========================
53 | .. _odin-pipeline:
54 | 
55 | Machine Learning pipeline
56 | -------------------------
57 | 
58 | We enhance the modularity of traditional machine learning pipeline in order to parallelized and speed up the process as much as possible, the following figure illustrate overall O.D.I.N' design for machine learning problem.
59 | 
60 | .. image:: ./_imgs/odin_scheme.jpg
61 |     :align: center
62 |     :alt: O.D.I.N scheme for machine learning
63 | 
64 | The main difference is that we divide data preprocessing and feature extraction into many steps, and leveraging python ``multiprocessing`` to significantly speed up the process.
65 | 
66 | This scheme is also more storage efficient, since there is cached data after each step, the step can reuse preprocessed data without re-processing.
67 | 


--------------------------------------------------------------------------------
/odin/preprocessing/confs/openSMILEloudness.cfg:
--------------------------------------------------------------------------------
  1 | ///////////////////////////////////////////////////////////////////////////////////////
  2 | ///////// > openSMILE configuration file for speech prosody features //////////////////
  3 | /////////   pitch and loudness                                       //////////////////
  4 | /////////                                                            //////////////////
  5 | ///////// (c) 2014-2016 audEERING.                                   //////////////////
  6 | /////////     All rights reserverd. See file COPYING for details.    //////////////////
  7 | ///////////////////////////////////////////////////////////////////////////////////////
  8 | 
  9 | // === Newest version of prosody features: ===
 10 | //
 11 | // Includes viterbi-smoothed SHS pitch
 12 | // Loudness via simple auditory band model
 13 | 
 14 | // Supports both summarised features (over full input) with -O option (ARFF format)
 15 | // and LLDs with -lld option  (disabled by default if option is not given)
 16 | 
 17 | [componentInstances:cComponentManager]
 18 | instance[dataMemory].type=cDataMemory
 19 | instance[waveIn].type=cWaveSource
 20 | ;printLevelStats=0
 21 | 
 22 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;; Wave input ;;;;;;;;;;;;;;;;;;;;;;;;;;;
 23 | [waveIn:cWaveSource]
 24 | writer.dmLevel=wave
 25 | buffersize_sec = 10.0
 26 | filename=\cm[inputfile(I):name of input file]
 27 | start=0
 28 | end=-1
 29 | monoMixdown=1
 30 | outFieldName = pcm
 31 | 
 32 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; LOUDNESS ;;;;;;;;;;;;;;;;;;;;;;;;;
 33 | [componentInstances:cComponentManager]
 34 | instance[frame25].type=cFramer
 35 | instance[win25].type=cWindower
 36 | instance[fft25].type=cTransformFFT
 37 | instance[fftmp25].type=cFFTmagphase
 38 | instance[lldcsvsink].type=cCsvSink
 39 | 
 40 | [frame25:cFramer]
 41 | reader.dmLevel=wave
 42 | writer.dmLevel=frame25
 43 | frameSize = {framesize}
 44 | frameStep = {framestep}
 45 | frameCenterSpecial = left
 46 | 
 47 | [win25:cWindower]
 48 | reader.dmLevel=frame25
 49 | writer.dmLevel=winH25
 50 | winFunc=hamming
 51 | 
 52 | [fft25:cTransformFFT]
 53 | reader.dmLevel=winH25
 54 | writer.dmLevel=fftcH25
 55 |  ; for compatibility with 2.2.0 and older versions
 56 | zeroPadSymmetric = 0
 57 | 
 58 | [fftmp25:cFFTmagphase]
 59 | reader.dmLevel=fftcH25
 60 | writer.dmLevel=fftmagH25
 61 | 
 62 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; LOUDNESS ;;;;;;;;;;;;;;;;;;;;;;;;;
 63 | [componentInstances:cComponentManager]
 64 | instance[melspec1].type=cMelspec
 65 | instance[audspec].type=cPlp
 66 | instance[audspecSum].type=cVectorOperation
 67 | 
 68 | [melspec1:cMelspec]
 69 | reader.dmLevel=fftmagH25
 70 | writer.dmLevel=melspec1
 71 | ; htk compatible sample value scaling
 72 | htkcompatible = 0
 73 | nBands = {nmel}
 74 | ; use power spectrum instead of magnitude spectrum
 75 | usePower = 1
 76 | lofreq = {fmin}
 77 | hifreq = {fmax}
 78 | specScale = mel
 79 | showFbank = 0
 80 | 
 81 | ; perform auditory weighting of spectrum
 82 | [audspec:cPlp]
 83 | reader.dmLevel=melspec1
 84 | writer.dmLevel=audspec
 85 | firstCC = 0
 86 | lpOrder = 5
 87 | cepLifter = 22
 88 | compression = 0.33
 89 | htkcompatible = 0
 90 | doIDFT = 0
 91 | doLpToCeps = 0
 92 | doLP = 0
 93 | doInvLog = 0
 94 | doAud = 1
 95 | doLog = 0
 96 | newRASTA=0
 97 | RASTA=0
 98 | 
 99 | [audspecSum:cVectorOperation]
100 | reader.dmLevel = audspec
101 | writer.dmLevel = loudness
102 | writer.levelconf.growDyn = 0
103 | writer.levelconf.isRb = 1
104 | ; This must be > than buffersize of viterbi smoother
105 | writer.levelconf.nT = 200
106 | nameAppend = loudness
107 | copyInputName = 0
108 | processArrayFields = 0
109 | operation = ll1
110 | nameBase = loudness
111 | 
112 | [lldcsvsink:cCsvSink]
113 | reader.dmLevel = loudness
114 | filename= \cm[csvoutput(O):name of output file]
115 | append = 0
116 | timestamp = 0
117 | number = 0
118 | printHeader = 0
119 | errorOnNoOutput = 1
120 | delimChar = ,
121 | 


--------------------------------------------------------------------------------
/examples/vae/two_stage_vae_test.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | import matplotlib.pyplot as plt
  4 | 
  5 | from odin.bay import TwoStageVAE, plot_latent_stats
  6 | from odin.fuel import MNIST
  7 | from odin.networks import get_networks
  8 | from odin import visual as vs
  9 | from tqdm import tqdm
 10 | from odin.ml import fast_tsne
 11 | 
 12 | ds = MNIST()
 13 | train = ds.create_dataset('train', batch_size=32)
 14 | valid = ds.create_dataset('valid', batch_size=36, label_percent=1.0,
 15 |                           drop_remainder=True)
 16 | 
 17 | vae = TwoStageVAE(**get_networks(ds.name))
 18 | vae.build(ds.full_shape)
 19 | if True:
 20 |   vae.load_weights('/tmp/twostagevae', verbose=True, raise_notfound=True)
 21 | else:
 22 |   vae.fit(train, learning_rate=1e-3, max_iter=300000)
 23 |   vae.save_weights('/tmp/twostagevae')
 24 |   exit()
 25 | 
 26 | Z = []
 27 | U = []
 28 | Z_hat = []
 29 | Y = []
 30 | for x, y in tqdm(valid):
 31 |   qz_x, qu_z, qz_u = vae.encode_two_stages(x)
 32 |   Z.append(qz_x.mean())
 33 |   U.append(qu_z.mean())
 34 |   Z_hat.append(qz_u.mean())
 35 |   Y.append(np.argmax(y, axis=-1))
 36 | Z = np.concatenate(Z, 0)[:5000]
 37 | U = np.concatenate(U, 0)[:5000]
 38 | Z_hat = np.concatenate(Z_hat, 0)[:5000]
 39 | Y = np.concatenate(Y, 0)[:5000]
 40 | 
 41 | plt.figure(figsize=(15, 5), dpi=150)
 42 | vs.plot_scatter(fast_tsne(Z), color=Y, grid=False, ax=(1, 3, 1))
 43 | vs.plot_scatter(fast_tsne(U), color=Y, grid=False, ax=(1, 3, 2))
 44 | vs.plot_scatter(fast_tsne(Z_hat), color=Y, grid=False, ax=(1, 3, 3))
 45 | plt.tight_layout()
 46 | 
 47 | ids = np.argsort(np.mean(qz_x.stddev(), 0))
 48 | ids_u = np.argsort(np.mean(qu_z.stddev(), 0))
 49 | 
 50 | plt.figure(figsize=(10, 10), dpi=200)
 51 | plot_latent_stats(mean=np.mean(qz_x.mean(), 0)[ids],
 52 |                   stddev=np.mean(qz_x.stddev(), 0)[ids],
 53 |                   ax=(3, 1, 1), name='q(z|x)')
 54 | plot_latent_stats(mean=np.mean(qu_z.mean(), 0)[ids_u],
 55 |                   stddev=np.mean(qu_z.stddev(), 0)[ids_u],
 56 |                   ax=(3, 1, 2), name='q(u|z)')
 57 | plot_latent_stats(mean=np.mean(qz_u.mean(), 0)[ids],
 58 |                   stddev=np.mean(qz_u.stddev(), 0)[ids],
 59 |                   ax=(3, 1, 3), name='q(z|u)')
 60 | plt.tight_layout()
 61 | 
 62 | vae.set_eval_stage(1)
 63 | px1, _ = vae(x)
 64 | llk1 = np.mean(tf.concat([vae(x)[0].log_prob(x) for x, _ in tqdm(valid)], 0))
 65 | print('Stage1:', llk1)
 66 | 
 67 | vae.set_eval_stage(2)
 68 | px2, _ = vae(x)
 69 | llk2 = np.mean(tf.concat([vae(x)[0].log_prob(x) for x, _ in tqdm(valid)], 0))
 70 | print('Stage2:', llk2)
 71 | 
 72 | images = np.squeeze(px1.mean().numpy(), -1)
 73 | plt.figure(figsize=(10, 10), dpi=150)
 74 | for i in range(36):
 75 |   img = images[i]
 76 |   plt.subplot(6, 6, i + 1)
 77 |   plt.imshow(img, cmap='Greys_r')
 78 |   plt.axis('off')
 79 |   plt.margins(0)
 80 | plt.tight_layout()
 81 | 
 82 | images = np.squeeze(px2.mean().numpy(), -1)
 83 | plt.figure(figsize=(10, 10), dpi=150)
 84 | for i in range(36):
 85 |   img = images[i]
 86 |   plt.subplot(6, 6, i + 1)
 87 |   plt.imshow(img, cmap='Greys_r')
 88 |   plt.axis('off')
 89 |   plt.margins(0)
 90 | plt.tight_layout()
 91 | 
 92 | images = np.squeeze(vae.sample_observation(36, two_stage=False).mean().numpy(),
 93 |                     -1)
 94 | plt.figure(figsize=(10, 10), dpi=150)
 95 | for i in range(36):
 96 |   img = images[i]
 97 |   plt.subplot(6, 6, i + 1)
 98 |   plt.imshow(img, cmap='Greys_r')
 99 |   plt.axis('off')
100 |   plt.margins(0)
101 | plt.tight_layout()
102 | 
103 | images = np.squeeze(vae.sample_observation(36, two_stage=True).mean().numpy(),
104 |                     -1)
105 | plt.figure(figsize=(10, 10), dpi=150)
106 | for i in range(36):
107 |   img = images[i]
108 |   plt.subplot(6, 6, i + 1)
109 |   plt.imshow(img, cmap='Greys_r')
110 |   plt.axis('off')
111 |   plt.margins(0)
112 | plt.tight_layout()
113 | 
114 | vs.plot_save()
115 | 


--------------------------------------------------------------------------------
/examples/vae/rate_distortion_onehot.py:
--------------------------------------------------------------------------------
  1 | import itertools
  2 | import os
  3 | import pickle
  4 | 
  5 | import matplotlib.pyplot as plt
  6 | import numpy as np
  7 | import pandas as pd
  8 | from sklearn.metrics import accuracy_score
  9 | from tensorflow.python.keras import Sequential
 10 | from tensorflow.python.keras.layers import Dense
 11 | 
 12 | from odin.bay import DistributionDense, MVNDiagLatents, BetaGammaVAE, \
 13 |   DisentanglementGym
 14 | from odin.fuel import MNIST
 15 | from odin.utils import MPI
 16 | from multiprocessing import cpu_count
 17 | import seaborn as sns
 18 | from odin import  visual as vs
 19 | 
 20 | sns.set()
 21 | 
 22 | # ===========================================================================
 23 | # Const and helper
 24 | # ===========================================================================
 25 | root_path = '/home/trung/exp/rate_distortion_onehot'
 26 | if not os.path.exists(root_path):
 27 |   os.makedirs(root_path)
 28 | 
 29 | 
 30 | # ===========================================================================
 31 | # Create dataset and some test
 32 | # ===========================================================================
 33 | def test_vae_y(args):
 34 |   ds = MNIST()
 35 |   train_y = ds.create_dataset('train', label_percent=1.0).map(lambda x, y: y)
 36 |   valid_y = ds.create_dataset('valid', label_percent=1.0).map(
 37 |     lambda x, y: (y, y))
 38 | 
 39 |   gamma, beta = args
 40 |   basedir = os.path.join(root_path, 'vaey')
 41 |   save_path = os.path.join(basedir, f'{gamma}_{beta}')
 42 |   logdir = os.path.join(basedir, f'{gamma}_{beta}_log')
 43 |   vae_y = BetaGammaVAE(
 44 |     encoder=Sequential([Dense(256, 'relu'),
 45 |                         Dense(256, 'relu')],
 46 |                        name='Encoder'),
 47 |     decoder=Sequential([Dense(256, 'relu'),
 48 |                         Dense(256, 'relu')],
 49 |                        name='Decoder'),
 50 |     latents=MVNDiagLatents(10),
 51 |     observation=DistributionDense([10], posterior='onehot', projection=True,
 52 |                                   name='Digits'),
 53 |     gamma=gamma,
 54 |     beta=beta
 55 |   )
 56 |   vae_y.build((None, 10))
 57 |   vae_y.load_weights(save_path)
 58 |   vae_y.fit(train_y, max_iter=20000, logdir=logdir, skip_fitted=True)
 59 |   vae_y.save_weights(save_path)
 60 | 
 61 |   gym = DisentanglementGym(model=vae_y, valid=valid_y)
 62 |   with gym.run_model(partition='valid'):
 63 |     y_true = np.argmax(gym.y_true, -1)
 64 |     y_pred = np.argmax(gym.px_z[0].mode(), -1)
 65 |     acc = accuracy_score(y_true, y_pred)
 66 |     results = dict(acc=acc,
 67 |                    llk=gym.log_likelihood()[0],
 68 |                    kl=gym.kl_divergence()[0],
 69 |                    au=gym.active_units()[0],
 70 |                    gamma=gamma,
 71 |                    beta=beta)
 72 |     gym.plot_correlation()
 73 |     gym.plot_latents_stats()
 74 |     gym.plot_latents_tsne()
 75 |   gym.save_figures(save_path + '.pdf', verbose=True)
 76 |   return results
 77 | 
 78 | 
 79 | results_path = os.path.join(root_path, 'results')
 80 | jobs = list(itertools.product(np.linspace(0.1, 100, num=30),
 81 |                               np.linspace(0.1, 100, num=30)))
 82 | if not os.path.exists(results_path):
 83 |   data = []
 84 |   for results in MPI(jobs, func=test_vae_y, ncpu=cpu_count() - 1):
 85 |     data.append(results)
 86 |   df = pd.DataFrame(data)
 87 |   with open(results_path, 'wb') as f:
 88 |     pickle.dump(df, f)
 89 | else:
 90 |   with open(results_path, 'rb') as f:
 91 |     df = pickle.load(f)
 92 | 
 93 | df: pd.DataFrame
 94 | print(df)
 95 | 
 96 | for name in ['acc', 'llk', 'kl', 'au']:
 97 |   plt.figure(figsize=(9, 8), dpi=150)
 98 |   splot = sns.scatterplot(x='beta', y='gamma', hue=name, size=name,
 99 |                           data=df, sizes=(20, 200), alpha=0.95,
100 |                           linewidth=0, palette='coolwarm')
101 |   plt.title(name)
102 |   plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., fontsize=12)
103 | 
104 | vs.plot_save(verbose=True)


--------------------------------------------------------------------------------
/odin/visual/animation.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function
  2 | 
  3 | import os
  4 | 
  5 | import numpy as np
  6 | 
  7 | 
  8 | class Animation(object):
  9 |   r""" This class tracking the changes in image using Gif animation
 10 | 
 11 |   Arguments:
 12 |     figsize : tuple of Integer. Given the width and height of the figure.
 13 |   """
 14 | 
 15 |   def __init__(self, figsize=None):
 16 |     super().__init__()
 17 |     from matplotlib import pyplot as plt
 18 |     if figsize is not None:
 19 |       self.fig = plt.figure(figsize=figsize)
 20 |     else:
 21 |       self.fig = plt.figure()
 22 |     self.artists = []
 23 |     self.axes = []
 24 | 
 25 |   def __len__(self):
 26 |     return len(self.artists)
 27 | 
 28 |   def plot_spectrogram(self, spec, cmap='magma'):
 29 |     r"""
 30 |     Arguments:
 31 |       spec: 3D Tensor, a minibatch of spectrogram in (T, D) format
 32 |     """
 33 |     assert len(spec.shape) == 3, "spec must be 3-D tensor."
 34 |     n = int(np.ceil(np.sqrt(spec.shape[0])))
 35 |     if len(self.axes) == 0:
 36 |       self.axes = [
 37 |           self.fig.add_subplot(n, n, i + 1) for i in range(spec.shape[0])
 38 |       ]
 39 |     imgs = []
 40 |     for i, ax in enumerate(self.axes):
 41 |       x = spec[i, :, :]
 42 |       if hasattr(x, 'numpy'):
 43 |         x = x.numpy()
 44 |       # transpose to time(x)-frequency(y)
 45 |       im = ax.pcolorfast(x.T, cmap=cmap)
 46 |       ax.axis('off')
 47 |       imgs.append(im)
 48 |     self.artists.append(imgs)
 49 |     return self
 50 | 
 51 |   def plot_images(self, images, grayscale=False):
 52 |     r"""
 53 |     Arguments:
 54 |       images: 3D or 4D Tensor
 55 |       grayscale: A Boolean. The images are grayscale images, if 3D tensor is
 56 |         provided, grayscale automatically switch to True
 57 |     """
 58 |     assert len(images.shape) == 4 or len(images.shape) == 3, \
 59 |       "Only support 3D or 4D batched-images."
 60 |     if len(images.shape) == 3:
 61 |       grayscale = True
 62 |     elif len(images.shape) == 4 and images.shape[-1] == 1:
 63 |       grayscale = True
 64 |       images = images[:, :, :, 0]
 65 |     n = int(np.ceil(np.sqrt(images.shape[0])))
 66 |     if len(self.axes) == 0:
 67 |       self.axes = [
 68 |           self.fig.add_subplot(n, n, i + 1) for i in range(images.shape[0])
 69 |       ]
 70 |     imgs = []
 71 |     for i, ax in enumerate(self.axes):
 72 |       im = ax.imshow(images[i, :, :], cmap='gray') if grayscale else \
 73 |         ax.imshow(images[i, :, :])  # channel last
 74 |       ax.axis('off')
 75 |       imgs.append(im)
 76 |     self.artists.append(imgs)
 77 |     return self
 78 | 
 79 |   def save(self,
 80 |            path='/tmp/tmp.gif',
 81 |            save_freq=None,
 82 |            writer='imagemagick',
 83 |            clear_folder=False,
 84 |            dpi=None,
 85 |            interval=200,
 86 |            repeat_delay=1200,
 87 |            repeat=False):
 88 |     r"""
 89 |     path : path to 'gif' or 'png' file, if a folder is given, write the
 90 |       animation to multiple 'png' files.
 91 |     save_freq : None or Integer. If given, only save the animation at given
 92 |       frequency, determined by number of artists stored.
 93 |     writer: 'ffmpeg', 'pillow', 'imagemagick', None
 94 |     """
 95 |     if len(self.artists) <= 1:
 96 |       return self
 97 |     if save_freq is not None:
 98 |       if len(self.artists) % int(save_freq) != 0:
 99 |         return self
100 |     # ====== save to Animation ====== #
101 |     import matplotlib.animation as animation
102 |     if os.path.isdir(path):
103 |       if clear_folder:
104 |         for f in os.listdir(path):
105 |           f = os.path.join(path, f)
106 |           if os.path.isfile(f):
107 |             os.remove(f)
108 |       path = os.path.join(path, 'image.png')
109 |     ani = animation.ArtistAnimation(self.fig,
110 |                                     self.artists,
111 |                                     interval=interval,
112 |                                     repeat_delay=repeat_delay,
113 |                                     repeat=repeat,
114 |                                     blit=True)
115 |     ani.save(path, writer=writer, dpi=dpi)
116 |     return self
117 | 


--------------------------------------------------------------------------------
/odin/bay/vi/autoencoder/hyperbolic_vae.py:
--------------------------------------------------------------------------------
  1 | from typing import Union
  2 | 
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | from tensorflow.python.keras.layers import Layer
  6 | from tensorflow_probability.python.distributions import (PowerSpherical,
  7 |                                                          SphericalUniform,
  8 |                                                          VonMisesFisher)
  9 | from tensorflow_probability.python.layers import DistributionLambda
 10 | from typing_extensions import Literal
 11 | 
 12 | from odin.backend.interpolation import Interpolation, linear
 13 | from odin.bay.layers.dense_distribution import DistributionDense
 14 | from odin.bay.random_variable import RVconf
 15 | from odin.bay.vi.autoencoder.beta_vae import BetaVAE
 16 | 
 17 | __all__ = ['HypersphericalVAE', 'PowersphericalVAE']
 18 | 
 19 | 
 20 | class _von_mises_fisher:
 21 | 
 22 |   def __init__(self, event_size):
 23 |     self.event_size = int(event_size)
 24 | 
 25 |   def __call__(self, x):
 26 |     # use softplus1 for concentration to prevent collapse and instability with
 27 |     # small concentration
 28 |     # note in the paper:
 29 |     # z_var = tf.layers.dense(h1, units=1, activation=tf.nn.softplus) + 1
 30 |     return VonMisesFisher(
 31 |       mean_direction=tf.math.l2_normalize(x[..., :self.event_size], axis=-1),
 32 |       concentration=tf.nn.softplus(x[..., -1]),
 33 |     )
 34 | 
 35 | 
 36 | class _power_spherical:
 37 | 
 38 |   def __init__(self, event_size):
 39 |     self.event_size = int(event_size)
 40 | 
 41 |   def __call__(self, x):
 42 |     return PowerSpherical(
 43 |       mean_direction=tf.math.l2_normalize(x[..., :self.event_size], axis=-1),
 44 |       concentration=tf.nn.softplus(x[..., -1]),
 45 |     )
 46 | 
 47 | 
 48 | class HypersphericalVAE(BetaVAE):
 49 |   """Hyper-spherical VAE
 50 | 
 51 |   References
 52 |   -----------
 53 |   Davidson, T. R., Falorsi, L., De Cao, N., Kipf, T. & Tomczak, J. M.
 54 |       Hyperspherical Variational Auto-Encoders. arXiv:1804.00891 [cs, stat] (2018).
 55 |   Davidson, T. R., Tomczak, J. M. & Gavves, E. Increasing Expressivity
 56 |       of a Hyperspherical VAE.
 57 |   Xu, J. & Durrett, G. Spherical Latent Spaces for Stable Variational
 58 |       Autoencoders. arXiv:1808.10805 [cs] (2018).
 59 |   De Cao, N. & Aziz, W. The Power Spherical distribution.
 60 |       arXiv:2006.04437 [cs, stat] (2020).
 61 |   """
 62 | 
 63 |   def __init__(
 64 |       self,
 65 |       latents: Union[RVconf, Layer] = RVconf(64, name="latents"),
 66 |       distribution: Literal[
 67 |         'powerspherical', 'vonmisesfisher'] = 'vonmisesfisher',
 68 |       prior: Union[
 69 |         None, SphericalUniform, VonMisesFisher, PowerSpherical] = None,
 70 |       beta: Union[float, Interpolation] = linear(vmin=1e-6,
 71 |                                                  vmax=1.,
 72 |                                                  steps=2000,
 73 |                                                  delay_in=0),
 74 |       **kwargs):
 75 |     event_shape = latents.event_shape
 76 |     event_size = int(np.prod(event_shape))
 77 |     distribution = str(distribution).lower()
 78 |     assert distribution in ('powerspherical', 'vonmisesfisher'), \
 79 |       ('Support PowerSpherical or VonMisesFisher distribution, '
 80 |        f'but given: {distribution}')
 81 |     if distribution == 'powerspherical':
 82 |       fn_distribution = _power_spherical(event_size)
 83 |       default_prior = SphericalUniform(dimension=event_size)
 84 |     else:
 85 |       fn_distribution = _von_mises_fisher(event_size)
 86 |       default_prior = VonMisesFisher(0, 10)
 87 |     if prior is None:
 88 |       prior = default_prior
 89 |     latents = DistributionDense(
 90 |       event_shape,
 91 |       posterior=DistributionLambda(make_distribution_fn=fn_distribution),
 92 |       prior=prior,
 93 |       units=event_size + 1,
 94 |       name=latents.name)
 95 |     super().__init__(latents=latents,
 96 |                      analytic=True,
 97 |                      beta=beta,
 98 |                      **kwargs)
 99 | 
100 | 
101 | class PowersphericalVAE(HypersphericalVAE):
102 | 
103 |   def __init__(self, **kwargs):
104 |     kwargs.pop('distribution')
105 |     super().__init__(distribution='powerspherical', **kwargs)
106 | 
107 | 
108 | class poincareVAE(BetaVAE):
109 |   ...
110 | 


--------------------------------------------------------------------------------
/tests/bayesian/test_mixture_distributions.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function
  2 | 
  3 | import os
  4 | import unittest
  5 | from itertools import product
  6 | from tempfile import mkstemp
  7 | 
  8 | import numpy as np
  9 | import tensorflow as tf
 10 | 
 11 | from odin.bay import distributions as obd
 12 | from odin.bay.layers import MixtureDensityNetwork, MixtureMassNetwork
 13 | 
 14 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
 15 | os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
 16 | 
 17 | np.random.seed(8)
 18 | tf.random.set_seed(1)
 19 | 
 20 | 
 21 | class MixtureTest(unittest.TestCase):
 22 | 
 23 |   def test_gmm(self):
 24 |     nsamples = 3
 25 |     ndims = 5
 26 |     x = np.random.rand(nsamples, ndims).astype('float32')
 27 |     #
 28 |     for cov in ('tril', 'none', 'diag'):
 29 |       gmm = obd.GaussianMixture(
 30 |           loc=np.random.rand(nsamples, 2, ndims).astype('float32'),
 31 |           scale=np.random.rand(nsamples, 2,
 32 |                                obd.GaussianMixture.scale_size(
 33 |                                    ndims, cov)).astype('float32'),
 34 |           logits=np.random.rand(nsamples, 2).astype('float32'),
 35 |           covariance_type=cov)
 36 |       print(gmm, gmm.sample().shape)
 37 |       gmm.log_prob(x)
 38 |     #
 39 |     for cov in ('tied', 'full', 'diag', 'spherical'):
 40 |       tfp_gmm, sk_gmm = obd.GaussianMixture.init(x,
 41 |                                                  n_components=2,
 42 |                                                  covariance_type=cov,
 43 |                                                  return_sklearn=True)
 44 |       print(cov, tfp_gmm, tfp_gmm.sample().shape)
 45 |       llk1 = tfp_gmm.log_prob(x).numpy()
 46 |       llk2 = sk_gmm.score_samples(x)
 47 |       assert np.all(
 48 |           np.isclose(llk1, llk2, rtol=1.e-3, atol=1.e-3, equal_nan=True))
 49 | 
 50 |   def test_trainable_gmm(self):
 51 |     X = 2 + 3 * np.random.randn(100000, 5)
 52 |     gmm = obd.GaussianMixture.init(X,
 53 |                                    n_components=3,
 54 |                                    covariance_type='tril',
 55 |                                    max_samples=64,
 56 |                                    trainable=True)
 57 |     gmm.fit(X, verbose=True, max_iter=1000)
 58 | 
 59 |   def test_mixture_density_network(self):
 60 |     x = tf.random.uniform((8, 4), dtype='float32')
 61 |     it = [True, False]
 62 |     for i, (covariance, tie_mixtures, tie_loc, tie_scale) in enumerate(
 63 |         product(['none', 'diag', 'tril'], it, it, it)):
 64 |       print(f"#{i} MixtureDensityNetwork tie_mixtures:{tie_mixtures} "
 65 |             f"tie_loc:{tie_loc} tie_scale:{tie_scale} covariance:{covariance}")
 66 |       kw = dict(covariance=covariance,
 67 |                 tie_mixtures=tie_mixtures,
 68 |                 tie_loc=tie_loc,
 69 |                 tie_scale=tie_scale)
 70 |       try:
 71 |         net = MixtureDensityNetwork(units=5, **kw)
 72 |         y = net(x)
 73 |       except ValueError as e:
 74 |         pass
 75 |       except Exception as e:
 76 |         import traceback
 77 |         traceback.print_exc()
 78 |         raise e
 79 | 
 80 |   def test_mixture_mass_network(self):
 81 |     x = tf.random.uniform((8, 4), dtype='float32')
 82 |     it = [True, False]
 83 | 
 84 |     for i, (alternative, tie_mixtures, tie_mean, dispersion,
 85 |             inflation) in enumerate(
 86 |                 product(it, it, it, ['full', 'share', 'single'],
 87 |                         ['full', 'share', 'single', None])):
 88 |       print(f"#{i} MixtureMassNetwork tie_mixtures:{tie_mixtures} "
 89 |             f"tie_mean:{tie_mean} disp:{dispersion} inflated:{inflation}")
 90 |       kw = dict(tie_mixtures=tie_mixtures,
 91 |                 tie_mean=tie_mean,
 92 |                 zero_inflated=inflation is not None,
 93 |                 dispersion=dispersion,
 94 |                 inflation='full' if inflation is None else inflation,
 95 |                 alternative=alternative)
 96 |       try:
 97 |         net = MixtureMassNetwork(event_shape=(5,), **kw)
 98 |         y = net(x)
 99 |       except ValueError as e:
100 |         pass
101 |       except Exception as e:
102 |         import traceback
103 |         traceback.print_exc()
104 |         raise e
105 | 
106 | 
107 | if __name__ == '__main__':
108 |   unittest.main()
109 | 


--------------------------------------------------------------------------------
/odin/fuel/nlp_data/newsgroup.py:
--------------------------------------------------------------------------------
  1 | from typing import Iterable, Union
  2 | 
  3 | import numpy as np
  4 | from numpy import ndarray
  5 | from odin.fuel.nlp_data._base import NLPDataset
  6 | from odin.utils import one_hot
  7 | from scipy.sparse import spmatrix
  8 | from sklearn.datasets import fetch_20newsgroups
  9 | from sklearn.model_selection import train_test_split
 10 | 
 11 | 
 12 | class Newsgroup20(NLPDataset):
 13 |   r""" Categories:
 14 |     - alt.atheism
 15 |     - misc.forsale
 16 |     - soc.religion.christian
 17 |     - comp.graphics, comp.os.ms-windows.misc, comp.sys.ibm.pc.hardware,
 18 |         comp.sys.mac.hardware, comp.windows.x
 19 |     - rec.autos, rec.motorcycles, rec.sport.baseball, rec.sport.hockey
 20 |     - sci.crypt, sci.electronics, sci.med, sci.space
 21 |     - talk.politics.guns, talk.politics.mideast, talk.politics.misc,
 22 |         talk.religion.misc
 23 |   """
 24 | 
 25 |   def __init__(self,
 26 |                algorithm='count',
 27 |                vocab_size: int = 2000,
 28 |                min_frequency: int = 2,
 29 |                max_frequency: float = 0.95,
 30 |                max_length: int = 500,
 31 |                cache_path: str = "~/nlp_data/newsgroup20",
 32 |                **kwargs):
 33 |     categorices = kwargs.pop('categorices', None)
 34 |     super().__init__(algorithm=algorithm,
 35 |                      vocab_size=vocab_size,
 36 |                      min_frequency=min_frequency,
 37 |                      max_frequency=max_frequency,
 38 |                      max_length=max_length,
 39 |                      cache_path=cache_path,
 40 |                      **kwargs)
 41 |     kw = dict(shuffle=True,
 42 |               random_state=1,
 43 |               categories=categorices,
 44 |               remove=('headers', 'footers', 'quotes'))
 45 |     data = fetch_20newsgroups(subset='train', return_X_y=False, **kw)
 46 |     X_train, y_train = data.data, data.target
 47 |     labels_name = data.target_names
 48 |     self.X_test, y_test = fetch_20newsgroups(subset='test',
 49 |                                              return_X_y=True,
 50 |                                              **kw)
 51 |     self.X_train, self.X_valid, y_train, y_valid = train_test_split(
 52 |         X_train, y_train, test_size=0.2, shuffle=True, random_state=0)
 53 |     self._labels = np.array(labels_name)
 54 |     self.y_train = one_hot(y_train, len(self._labels))
 55 |     self.y_valid = one_hot(y_valid, len(self._labels))
 56 |     self.y_test = one_hot(y_test, len(self._labels))
 57 | 
 58 |   @property
 59 |   def train_text(self) -> Iterable[str]:
 60 |     for doc in self.X_train:
 61 |       yield doc
 62 | 
 63 |   @property
 64 |   def valid_text(self) -> Iterable[str]:
 65 |     for doc in self.X_valid:
 66 |       yield doc
 67 | 
 68 |   @property
 69 |   def test_text(self) -> Iterable[str]:
 70 |     for doc in self.X_test:
 71 |       yield doc
 72 | 
 73 |   @property
 74 |   def train_labels(self) -> Union[ndarray, spmatrix]:
 75 |     return self.y_train
 76 | 
 77 |   @property
 78 |   def valid_labels(self) -> Union[ndarray, spmatrix]:
 79 |     return self.y_valid
 80 | 
 81 |   @property
 82 |   def test_labels(self) -> Union[ndarray, spmatrix]:
 83 |     return self.y_test
 84 | 
 85 | 
 86 | class Newsgroup5(Newsgroup20):
 87 |   r""" Subset of 5 categories:
 88 |     - 'soc.religion.christian'
 89 |     - 'comp.graphics'
 90 |     - 'rec.sport.hockey'
 91 |     - 'sci.space'
 92 |     - 'talk.politics.guns'
 93 |   """
 94 | 
 95 |   def __init__(self,
 96 |                algorithm='count',
 97 |                vocab_size: int = 2000,
 98 |                min_frequency: int = 2,
 99 |                max_frequency: float = 0.95,
100 |                max_length: int = 500,
101 |                cache_path: str = "~/nlp_data/newsgroup5",
102 |                **kwargs):
103 |     super().__init__(algorithm=algorithm,
104 |                      vocab_size=vocab_size,
105 |                      min_frequency=min_frequency,
106 |                      max_frequency=max_frequency,
107 |                      max_length=max_length,
108 |                      cache_path=cache_path,
109 |                      categorices=[
110 |                          'soc.religion.christian', 'comp.graphics',
111 |                          'rec.sport.hockey', 'sci.space', 'talk.politics.guns'
112 |                      ],
113 |                      **kwargs)
114 | 


--------------------------------------------------------------------------------
/odin/ml/tree.py:
--------------------------------------------------------------------------------
  1 | import inspect
  2 | from typing import Optional, Union, Any
  3 | from warnings import warn
  4 | 
  5 | import numpy as np
  6 | from typing_extensions import Literal
  7 | from sklearn.ensemble import GradientBoostingClassifier
  8 | from sklearn.ensemble import RandomForestClassifier
  9 | 
 10 | __all__ = [
 11 |     'fast_gbtree_classifier',
 12 |     'fast_rf_classifier',
 13 | ]
 14 | 
 15 | Objectives = Literal['reg:squarederror', 'reg:squaredlogerror', 'reg:logistic',
 16 |                      'reg:pseudohubererror', 'binary:logistic',
 17 |                      'binary:logitraw', 'binary:hinge', 'count:poisson',
 18 |                      'survival:cox', 'survival:aft', 'aft_loss_distribution',
 19 |                      'multi:softmax', 'multi:softprob', 'rank:pairwise',
 20 |                      'rank:ndcg', 'rank:map', 'reg:gamma', 'reg:tweedie']
 21 | 
 22 | 
 23 | def fast_gbtree_classifier(
 24 |     X,
 25 |     y,
 26 |     *,
 27 |     learning_rate: float = 1.0,
 28 |     n_estimators: int = 100,
 29 |     subsample: float = 0.8,
 30 |     max_depth: Optional[int] = None,
 31 |     reg_alpha: Optional[float] = None,  # L1
 32 |     reg_lambda: Optional[float] = 1e-05,  # L2
 33 |     gamma: Optional[float] = None,
 34 |     missing: Optional[Any] = np.nan,
 35 |     objective: Objectives = 'binary:logistic',
 36 |     grow_policy: Literal['depthwise', 'lossguide'] = 'depthwise',
 37 |     tree_method: Literal['auto', 'exact', 'approx', 'hist',
 38 |                          'gpu_hist'] = 'auto',
 39 |     importance_type: Literal['gain', 'weight', 'cover', 'total_gain',
 40 |                              'total_cover'] = 'gain',
 41 |     random_state: int = 1,
 42 |     n_jobs: Optional[int] = None,
 43 |     framework: Literal['auto', 'xgboost', 'sklearn'] = 'auto',
 44 |     **kwargs,
 45 | ) -> GradientBoostingClassifier:
 46 |   """Shared interface for XGBoost and sklearn Gradient Boosting Tree Classifier"""
 47 |   kw = dict(locals())
 48 |   kwargs = kw.pop('kwargs')
 49 |   X = kw.pop('X')
 50 |   y = kw.pop('y')
 51 |   kw.update(kwargs)
 52 |   framework = kw.pop('framework')
 53 |   ### XGBOOST
 54 |   is_xgboost = False
 55 |   if framework == 'sklearn':
 56 |     XGB = GradientBoostingClassifier
 57 |   else:
 58 |     try:
 59 |       from xgboost import XGBRFClassifier as XGB
 60 |       is_xgboost = True
 61 |     except ImportError as e:
 62 |       warn('Run `pip install xgboost` to get significant '
 63 |            'faster GradientBoostingTree')
 64 |       XGB = GradientBoostingClassifier
 65 |   ### fine-tune the keywords for sklearn
 66 |   if not is_xgboost:
 67 |     org = dict(kw)
 68 |     spec = inspect.getfullargspec(XGB.__init__)
 69 |     kw = dict()
 70 |     for k in spec.args + spec.kwonlyargs:
 71 |       if k in org:
 72 |         kw[k] = org[k]
 73 |   ### training
 74 |   tree = XGB(**kw)
 75 |   tree.fit(X, y)
 76 |   return tree
 77 | 
 78 | 
 79 | def fast_rf_classifier(
 80 |     X,
 81 |     y,
 82 |     *,
 83 |     num_classes=2,
 84 |     split_algo=1,
 85 |     split_criterion=0,
 86 |     min_rows_per_node=2,
 87 |     min_impurity_decrease=0.0,
 88 |     bootstrap_features=False,
 89 |     rows_sample=1.0,
 90 |     max_leaves=-1,
 91 |     n_estimators=100,
 92 |     max_depth=16,
 93 |     max_features='auto',
 94 |     bootstrap=True,
 95 |     n_bins=8,
 96 |     n_cols=None,
 97 |     dtype=None,
 98 |     accuracy_metric=None,
 99 |     quantile_per_tree=False,
100 |     n_streams=8,
101 |     random_state: int = 1,
102 |     n_jobs: Optional[int] = None,
103 |     framework: Literal['auto', 'cuml', 'sklearn'] = 'auto',
104 |     **kwargs,
105 | ):
106 |   kw = dict(locals())
107 |   kwargs = kw.pop('kwargs')
108 |   X = kw.pop('X')
109 |   y = kw.pop('y')
110 |   kw.update(kwargs)
111 |   framework = kw.pop('framework')
112 |   ### import
113 |   is_cuml = False
114 |   if framework == 'sklearn':
115 |     RFC = RandomForestClassifier
116 |   else:
117 |     try:
118 |       from cuml.ensemble import RandomForestClassifier as RFC
119 |       is_cuml = True
120 |     except ImportError as e:
121 |       RFC = RandomForestClassifier
122 |   ### fine-tune keywords
123 |   if is_cuml:
124 |     kw['output_type'] = 'numpy'
125 |     kw['seed'] = kw.pop('random_state')
126 |   else:
127 |     kw = dict()
128 |   ### training
129 |   tree = RFC()
130 |   for k, v in tree.__dict__.items():
131 |     print(k, v)
132 |   exit()
133 |   tree.fit(X, y)
134 |   return tree
135 | 


--------------------------------------------------------------------------------
/odin/fuel/image_data/cifar.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from typing_extensions import Literal
  3 | 
  4 | import tensorflow_datasets as tfds
  5 | from odin.fuel.image_data._base import ImageDataset
  6 | 
  7 | 
  8 | def _quantisize(images, levels=256):
  9 |   """" Quantization code from
 10 |   `https://github.com/larsmaaloee/BIVA/blob/master/data/cifar10.py` """
 11 |   images = images / 255.
 12 |   return (np.digitize(images, np.arange(levels) / levels) - 1).astype('i')
 13 | 
 14 | 
 15 | class CIFAR(ImageDataset):
 16 |   """ CIFAR10 """
 17 | 
 18 |   def __init__(self,
 19 |                version: Literal[10, 20, 100],
 20 |                quantize_bits: int = 8,
 21 |                seed: int = 1):
 22 |     assert version in (10, 20, 100), \
 23 |       "Only support CIFAR-10, CIFAR-20 and CIFAR-100"
 24 |     self.version = version
 25 |     if version == 10:
 26 |       dsname = 'cifar10'
 27 |     else:
 28 |       dsname = 'cifar100'
 29 |     self.train, self.valid, self.test = tfds.load(
 30 |         name=dsname,
 31 |         split=['train[:48000]', 'train[48000:]', 'test'],
 32 |         # as_supervised=True,
 33 |         read_config=tfds.ReadConfig(shuffle_seed=seed,
 34 |                                     shuffle_reshuffle_each_iteration=True),
 35 |         shuffle_files=True,
 36 |         with_info=False,
 37 |     )
 38 |     if version in (10, 100):
 39 |       process = lambda dat: (dat['image'], dat['label'])
 40 |     elif version == 20:
 41 |       process = lambda dat: (dat['image'], dat['coarse_label'])
 42 |     self.train = self.train.map(process)
 43 |     self.valid = self.valid.map(process)
 44 |     self.test = self.test.map(process)
 45 | 
 46 |   @property
 47 |   def binarized(self):
 48 |     return False
 49 | 
 50 |   @property
 51 |   def shape(self):
 52 |     return (32, 32, 3)
 53 | 
 54 |   @property
 55 |   def labels(self):
 56 |     if self.version == 10:
 57 |       y = [
 58 |           'airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog',
 59 |           'horse', 'ship', 'truck'
 60 |       ]
 61 |     elif self.version == 20:
 62 |       y = [
 63 |           'aquatic_mammals', 'fish', 'flowers', 'food_containers',
 64 |           'fruit_and_vegetables', 'household_electrical_devices',
 65 |           'household_furniture', 'insects', 'large_carnivores',
 66 |           'large_man-made_outdoor_things', 'large_natural_outdoor_scenes',
 67 |           'large_omnivores_and_herbivores', 'medium_mammals',
 68 |           'non-insect_invertebrates', 'people', 'reptiles', 'small_mammals',
 69 |           'trees', 'vehicles_1', 'vehicles_2'
 70 |       ]
 71 |     else:
 72 |       y = [
 73 |           'apple', 'aquarium_fish', 'baby', 'bear', 'beaver', 'bed', 'bee',
 74 |           'beetle', 'bicycle', 'bottle', 'bowl', 'boy', 'bridge', 'bus',
 75 |           'butterfly', 'camel', 'can', 'castle', 'caterpillar', 'cattle',
 76 |           'chair', 'chimpanzee', 'clock', 'cloud', 'cockroach', 'couch', 'cra',
 77 |           'crocodile', 'cup', 'dinosaur', 'dolphin', 'elephant', 'flatfish',
 78 |           'forest', 'fox', 'girl', 'hamster', 'house', 'kangaroo', 'keyboard',
 79 |           'lamp', 'lawn_mower', 'leopard', 'lion', 'lizard', 'lobster', 'man',
 80 |           'maple_tree', 'motorcycle', 'mountain', 'mouse', 'mushroom',
 81 |           'oak_tree', 'orange', 'orchid', 'otter', 'palm_tree', 'pear',
 82 |           'pickup_truck', 'pine_tree', 'plain', 'plate', 'poppy', 'porcupine',
 83 |           'possum', 'rabbit', 'raccoon', 'ray', 'road', 'rocket', 'rose', 'sea',
 84 |           'seal', 'shark', 'shrew', 'skunk', 'skyscraper', 'snail', 'snake',
 85 |           'spider', 'squirrel', 'streetcar', 'sunflower', 'sweet_pepper',
 86 |           'table', 'tank', 'telephone', 'television', 'tiger', 'tractor',
 87 |           'train', 'trout', 'tulip', 'turtle', 'wardrobe', 'whale',
 88 |           'willow_tree', 'wolf', 'woman', 'worm'
 89 |       ]
 90 |     return np.array(y)
 91 | 
 92 | 
 93 | # ===========================================================================
 94 | # Shortcuts
 95 | # ===========================================================================
 96 | class CIFAR10(CIFAR):
 97 |   """Labels: ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog',
 98 |               'horse', 'ship', 'truck']
 99 |   """
100 | 
101 |   def __init__(self):
102 |     super().__init__(10)
103 | 
104 | 
105 | class CIFAR100(CIFAR):
106 | 
107 |   def __init__(self):
108 |     super().__init__(100)
109 | 
110 | class CIFAR20(CIFAR):
111 | 
112 |   def __init__(self):
113 |     super().__init__(20)
114 | 


--------------------------------------------------------------------------------
/odin/fuel/image_data/synthesize.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import numpy as np
  4 | 
  5 | from odin.fuel.image_data._base import ImageDataset
  6 | from odin.utils.crypto import md5_checksum
  7 | 
  8 | 
  9 | class YDisentanglement(ImageDataset):
 10 |   """
 11 |   Attributes :
 12 |     number of letter "Y" : an Integer
 13 |     xoffset : a Float
 14 |     yoffset : a Float
 15 |     rotation : a Float (from 0 - 180)
 16 |   """
 17 |   MD5 = r"19db3f0cc5829a1308a8023930dd61e6"
 18 | 
 19 |   def __init__(self, path="/tmp/ydisentanglement.npz"):
 20 |     path = os.path.abspath(os.path.expanduser(path))
 21 |     if os.path.exists(path):
 22 |       if not os.path.isfile(path):
 23 |         raise ValueError("path to '%s' is a folder, require path to a file" %
 24 |                          path)
 25 |       if md5_checksum(path) != YDisentanglement.MD5:
 26 |         os.remove(path)
 27 |     # create new dataset if not exist
 28 |     if not os.path.exists(path):
 29 |       images_train, attributes_train = YDisentanglement.generate_data(
 30 |         training=True)
 31 |       images_test, attributes_test = YDisentanglement.generate_data(
 32 |         training=False)
 33 |       with open(path, 'wb') as f:
 34 |         np.savez(f,
 35 |                  images_train=images_train,
 36 |                  attributes_train=attributes_train,
 37 |                  images_test=images_test,
 38 |                  attributes_test=attributes_test)
 39 |       print(md5_checksum(path))
 40 | 
 41 |     with open(path, 'rb') as f:
 42 |       data = np.load(f)
 43 |       self.images_train = data['images_train']
 44 |       self.attributes_train = data['attributes_train']
 45 |       self.images_test = data['images_test']
 46 |       self.attributes_test = data['attributes_test']
 47 | 
 48 |   @property
 49 |   def labels(self):
 50 |     return np.array(["num", "xoffset", "yoffset", "rotation"])
 51 | 
 52 |   @property
 53 |   def binarized(self):
 54 |     return True
 55 | 
 56 |   @property
 57 |   def shape(self):
 58 |     return 48, 48, 1
 59 | 
 60 |   @staticmethod
 61 |   def generate_data(num=16, image_path=None, training=True, seed=1):
 62 |     from PIL import Image, ImageChops, ImageDraw
 63 |     size = 48
 64 |     resample = Image.BICUBIC
 65 |     org = Image.new("1", (size, size))
 66 |     images = []
 67 |     attributes = []
 68 |     rand = np.random.RandomState(seed)
 69 |     ## different configuraiton for training and testing
 70 |     if training:
 71 |       all_text = ("Y", "YY", "YYYY")
 72 |       rotation_range = (0, 180)
 73 |     else:
 74 |       rotation_range = (180, 360)
 75 |       all_text = ("YYY",)
 76 | 
 77 |     def _to_offset(o, mode):
 78 |       # only moving horizontal and diagonal
 79 |       # (let see if the model could extrapolate to vertial movement)
 80 |       if training:
 81 |         x, y = (o, 0) if mode == 0 else (o, o)
 82 |       else:
 83 |         x, y = 0, o
 84 |       return x, y
 85 | 
 86 |     ## test
 87 |     for text in all_text:
 88 |       img = org.copy()
 89 |       draw = ImageDraw.Draw(img)
 90 |       w, h = draw.textsize(text)
 91 |       draw.text([(size - w) / 2, (size - h) / 2], text, fill=1)
 92 |       del draw
 93 |       for offset in np.linspace(-(num - 1) // 2, (num - 1) // 2,
 94 |                                 num=num,
 95 |                                 endpoint=True):
 96 |         offset = int(offset)
 97 |         if offset == 0:
 98 |           i1 = img
 99 |         else:
100 |           mode = rand.randint(0, 2)
101 |           xoffset, yoffset = _to_offset(offset, mode)
102 |           i1 = ImageChops.offset(img, xoffset=xoffset, yoffset=yoffset)
103 |         # rotation
104 |         for rotation in np.linspace(*rotation_range, num=num, endpoint=False):
105 |           if rotation > 0:
106 |             i2 = i1.rotate(rotation, resample=resample)
107 |           else:
108 |             i2 = i1
109 |           images.append(np.array(i2).astype(np.uint8))
110 |           attributes.append((len(text), xoffset, yoffset, rotation))
111 |     # final data
112 |     images = np.stack(images)
113 |     attributes = np.array(attributes)
114 |     ## save image
115 |     if image_path is not None:
116 |       from tqdm import tqdm
117 |       n = int(np.ceil(np.sqrt(images.shape[0])))
118 |       fig = plt.figure(figsize=(18, 18), dpi=80)
119 |       for i, img in tqdm(list(enumerate(images))):
120 |         ax = plt.subplot(n, n, i + 1)
121 |         ax.imshow(img, cmap='gray')
122 |         ax.axis('off')
123 |       fig.tight_layout()
124 |       fig.savefig(image_path, dpi=80)
125 |     return images, attributes
126 | 


--------------------------------------------------------------------------------
/benchmarks/single_vs_multi_feeders.py:
--------------------------------------------------------------------------------
  1 | # ===========================================================================
  2 | # Benchmark:
  3 | #        | Time |
  4 | # -------|------|
  5 | # single |  44  |
  6 | # ncpu=1 |  43  |
  7 | # ncpu=2 |  24  |
  8 | # ncpu=3 |  20  |
  9 | # ncpu=4 |  18  |
 10 | # ncpu=6 |  16  |
 11 | # ===========================================================================
 12 | from __future__ import print_function, division, absolute_import
 13 | 
 14 | import numpy as np
 15 | 
 16 | import os
 17 | os.environ['ODIN'] = 'float32,cpu,theano,seed=12'
 18 | 
 19 | from odin import backend as K
 20 | from odin import nnet as N
 21 | from odin import fuel
 22 | from odin.utils import one_hot, UnitTimer
 23 | 
 24 | ds = fuel.load_mspec_test()
 25 | transcription_path = os.path.join(ds.path, 'alignment.dict')
 26 | indices_path = os.path.join(ds.path, 'indices.csv')
 27 | 
 28 | indices = np.genfromtxt(indices_path, dtype=str, delimiter=' ')
 29 | transcription = fuel.MmapDict(transcription_path)
 30 | mean = ds['mspec_mean'][:]
 31 | std = ds['mspec_mean'][:]
 32 | cache = 5
 33 | 
 34 | 
 35 | # ===========================================================================
 36 | # Single process
 37 | # ===========================================================================
 38 | def get_data():
 39 |     """ batch_size = 128 """
 40 |     batch = []
 41 |     batch_trans = []
 42 |     for name, start, end in indices:
 43 |         start = int(start)
 44 |         end = int(end)
 45 |         data = ds['mspec'][start:end]
 46 |         data = (data - data.mean(0)) / data.std(0)
 47 |         data = (data - mean) / std
 48 |         data = np.vstack([data[i:i + 21].reshape(1, -1)
 49 |                           for i in range(0, data.shape[0], 21)
 50 |                           if i + 21 < data.shape[0]])
 51 |         trans = transcription[name]
 52 |         trans = np.array([int(i) for i in trans.split(' ') if len(i) > 0])
 53 |         trans = np.vstack([trans[i + 11].reshape(1, -1)
 54 |                           for i in range(0, trans.shape[0], 21)
 55 |                           if i + 21 < trans.shape[0]])
 56 |         batch.append(data)
 57 |         batch_trans.append(trans)
 58 |         if len(batch) == cache:
 59 |             batch = np.vstack(batch)
 60 |             trans = one_hot(np.vstack(batch_trans).ravel(), 10)
 61 | 
 62 |             idx = np.random.permutation(batch.shape[0])
 63 |             batch = batch[idx]
 64 |             trans = trans[idx]
 65 | 
 66 |             i = 0
 67 |             while i < batch.shape[0]:
 68 |                 start = i
 69 |                 end = i + 128
 70 |                 yield batch[start:end], trans[start:end]
 71 |                 i = end
 72 | 
 73 |             batch = []
 74 |             batch_trans = []
 75 | 
 76 | 
 77 | # ===========================================================================
 78 | # Feeder
 79 | # ===========================================================================
 80 | data = fuel.Feeder(ds['mspec'], '/Users/trungnt13/tmp/fbank/indices.csv',
 81 |                    transcription=fuel.MmapDict(transcription_path),
 82 |                    ncpu=1, cache=5)# change ncpu here
 83 | data.set_batch(batch_size=128, seed=12)
 84 | data.set_recipes(fuel.Normalization(local_normalize=True,
 85 |                                 mean=ds['mspec_mean'],
 86 |                                 std=ds['mspec_std']),
 87 |                 fuel.Stacking(left_context=10,
 88 |                               right_context=10,
 89 |                               shift=None),
 90 |                 fuel.OneHotTrans(n_classes=10),
 91 |                 fuel.CreateBatch()
 92 | )
 93 | print('Number of CPU for feeders:', data.ncpu)
 94 | 
 95 | 
 96 | # ===========================================================================
 97 | # Training
 98 | # ===========================================================================
 99 | X = K.placeholder(shape=(None, 2583), name='X')
100 | y = K.placeholder(shape=(None, 10), name='y')
101 | 
102 | f = N.Sequence([
103 |     N.Dense(128, activation=K.linear),
104 |     N.Dense(10, activation=K.softmax)
105 | ])
106 | y_ = f(X)
107 | cost_train = K.mean(K.categorical_crossentropy(y_, y))
108 | f_train = K.function([X, y], cost_train)
109 | 
110 | 
111 | # ====== single process ====== #
112 | with UnitTimer():
113 |     for _, (i, j) in enumerate(get_data()):
114 |         f_train(i, j)
115 | print(_)
116 | 
117 | # ====== multi-processes ====== #
118 | with UnitTimer():
119 |     for _, (i, j) in enumerate(data):
120 |         f_train(i, j)
121 | print(_)
122 | 


--------------------------------------------------------------------------------
/odin/bay/stochastic_initializers.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function
  2 | 
  3 | from functools import partial
  4 | 
  5 | import tensorflow as tf
  6 | import tensorflow_probability as tfp
  7 | from tensorflow.python import keras
  8 | from tensorflow.python.ops import init_ops_v2
  9 | 
 10 | from odin.backend.alias import (parse_activation, parse_constraint,
 11 |                                 parse_initializer, parse_regularizer)
 12 | from odin.bay.helpers import coercible_tensor
 13 | 
 14 | 
 15 | class StochasticVariable(keras.layers.Layer, tf.initializers.Initializer):
 16 | 
 17 |   def __init__(self, sample_shape=(), seed=None):
 18 |     super().__init__()
 19 |     self._sample_shape = sample_shape
 20 |     self._seed = seed
 21 | 
 22 |   @property
 23 |   def sample_shape(self):
 24 |     return self._sample_shape
 25 | 
 26 |   @sample_shape.setter
 27 |   def sample_shape(self, shape):
 28 |     self._sample_shape = shape
 29 | 
 30 |   def __call__(self, shape, dtype=None):
 31 |     if not self.built:
 32 |       self.build(shape, dtype)
 33 |     distribution = self.call()
 34 |     assert isinstance(distribution, tfp.distributions.Distribution), \
 35 |       'StochasticVariable.call must return Distribution'
 36 |     distribution = coercible_tensor(distribution,
 37 |                                     convert_to_tensor_fn=partial(
 38 |                                         tfp.distributions.Distribution.sample,
 39 |                                         sample_shape=self.sample_shape))
 40 |     return distribution
 41 | 
 42 | 
 43 | class TrainableNormal(StochasticVariable):
 44 | 
 45 |   def __init__(self,
 46 |                loc_initializer='truncated_normal',
 47 |                scale_initializer='truncated_normal',
 48 |                loc_regularizer=None,
 49 |                scale_regularizer=None,
 50 |                loc_activation=None,
 51 |                scale_activation='softplus',
 52 |                shared_scale=False,
 53 |                **kwargs):
 54 |     super().__init__(**kwargs)
 55 |     self.loc_initializer = parse_initializer(loc_initializer, 'tf')
 56 |     self.scale_initializer = parse_initializer(scale_initializer, 'tf')
 57 |     self.loc_regularizer = parse_regularizer(loc_regularizer, 'tf')
 58 |     self.scale_regularizer = parse_regularizer(scale_regularizer, 'tf')
 59 |     self.loc_activation = parse_activation(loc_activation, 'tf')
 60 |     self.scale_activation = parse_activation(scale_activation, 'tf')
 61 |     self.shared_scale = bool(shared_scale)
 62 | 
 63 |   def build(self, shape, dtype=None):
 64 |     super().build(shape)
 65 |     self.loc = self.add_weight(
 66 |         name='loc',
 67 |         shape=shape,
 68 |         dtype=dtype,
 69 |         initializer=self.loc_initializer,
 70 |         regularizer=self.loc_regularizer,
 71 |         constraint=None,
 72 |         trainable=True,
 73 |     )
 74 |     self.scale = self.add_weight(
 75 |         name='scale',
 76 |         shape=() if self.shared_scale else shape,
 77 |         dtype=dtype,
 78 |         initializer=self.scale_initializer,
 79 |         regularizer=self.scale_regularizer,
 80 |         constraint=None,
 81 |         trainable=True,
 82 |     )
 83 | 
 84 |   def call(self):
 85 |     dist = tfp.distributions.Independent(
 86 |         tfp.distributions.Normal(loc=self.loc_activation(self.loc),
 87 |                                  scale=self.scale_activation(self.scale)), 1)
 88 |     return dist
 89 | 
 90 | 
 91 | class TrainableNormalSharedScale(TrainableNormal):
 92 | 
 93 |   def __init__(self,
 94 |                loc_initializer='glorot_normal',
 95 |                scale_initializer='truncated_normal',
 96 |                loc_regularizer=None,
 97 |                scale_regularizer=None,
 98 |                loc_activation=None,
 99 |                scale_activation='softplus',
100 |                **kwargs):
101 |     super().__init__(loc_initializer,
102 |                      scale_initializer,
103 |                      loc_regularizer,
104 |                      scale_regularizer,
105 |                      loc_activation,
106 |                      scale_activation,
107 |                      shared_scale=True,
108 |                      **kwargs)
109 | 
110 | 
111 | trainable_normal = TrainableNormal
112 | trainable_normal_shared_scale = TrainableNormalSharedScale
113 | 
114 | # NOTE: this only hijack the keras.initializers if you import odin.bay
115 | init_ops_v2.trainable_normal = TrainableNormal
116 | init_ops_v2.trainable_normal_shared_scale = TrainableNormalSharedScale
117 | 
118 | get = keras.initializers.get
119 | 


--------------------------------------------------------------------------------