├── pydpm
    ├── model
    │   ├── hybrid_pm
    │   │   └── __init__.py
    │   ├── bayesian_pm
    │   │   └── __init__.py
    │   ├── deep_learning_pm
    │   │   ├── __init__.py
    │   │   ├── rbm.py
    │   │   └── dcgan.py
    │   ├── __init__.py
    │   └── basic_model.py
    ├── example
    │   ├── Bayesian_PM
    │   │   ├── __init__.py
    │   │   ├── Factor_Analysis
    │   │   │   ├── __init__.py
    │   │   │   ├── readme.md
    │   │   │   └── FA_demo.py
    │   │   ├── Dirchilet_Belief_Network
    │   │   │   ├── __init__.py
    │   │   │   ├── readme.md
    │   │   │   └── DirBN_Demo.py
    │   │   ├── Gaussian_Mixture_Model
    │   │   │   ├── __init__.py
    │   │   │   ├── readme.md
    │   │   │   └── GMM_Demo.py
    │   │   ├── Poisson_Factor_Analysis
    │   │   │   ├── __init__.py
    │   │   │   ├── readme.md
    │   │   │   └── PFA_Demo.py
    │   │   ├── Deep_Poisson_Factor_Analysis
    │   │   │   ├── __init__.py
    │   │   │   ├── readme.md
    │   │   │   └── DPFA_Demo.py
    │   │   ├── Latent_Dirchilet_Allocation
    │   │   │   ├── __init__.py
    │   │   │   ├── readme.md
    │   │   │   └── LDA_Demo.py
    │   │   ├── Poisson_Gamma_Belief_Network
    │   │   │   ├── __init__.py
    │   │   │   ├── readme.md
    │   │   │   └── PGBN_Demo.py
    │   │   ├── Poisson_Gamma_Dynamic_System
    │   │   │   ├── __init__.py
    │   │   │   ├── readme.md
    │   │   │   └── PGDS_Demo.py
    │   │   ├── Deep_Poisson_Gamma_Dynamic_System
    │   │   │   ├── __init__.py
    │   │   │   ├── readme.md
    │   │   │   └── DPGDS_Demo.py
    │   │   ├── Graph_Poisson_Gamma_Belief_Network
    │   │   │   ├── __init__.py
    │   │   │   ├── readme.md
    │   │   │   └── GPGBN_Demo.py
    │   │   ├── Word_Embeddings_Deep_Topic_Model
    │   │   │   ├── __init__.py
    │   │   │   ├── readme.md
    │   │   │   └── WEDTM_Demo.py
    │   │   ├── Multimodal_Poisson_Gamma_Belief_Network
    │   │   │   ├── __init__.py
    │   │   │   ├── readme.md
    │   │   │   └── MPGBN_Demo.py
    │   │   └── Convolutional_Poisson_Gamma_Belief_Network
    │   │   │   ├── __init__.py
    │   │   │   ├── readme.md
    │   │   │   ├── CPFA_Demo.py
    │   │   │   └── CPGBN_Demo.py
    │   ├── Hybrid_PM
    │   │   ├── __init__.py
    │   │   ├── HyperMiner
    │   │   │   ├── __init__.py
    │   │   │   ├── HyperMiner.py
    │   │   │   └── readme.md
    │   │   ├── TopicNet
    │   │   │   ├── __init__.py
    │   │   │   ├── TopicNet.py
    │   │   │   └── readme.md
    │   │   ├── Sawtooth_Embedding_Topic_Model
    │   │   │   ├── __init__.py
    │   │   │   ├── readme.md
    │   │   │   └── SawETM_Demo.py
    │   │   ├── Deep_Coupling_Embedding_Topic_Model
    │   │   │   ├── __init__.py
    │   │   │   ├── dcETM.py
    │   │   │   └── readme.md
    │   │   ├── Weibull_Graph_Attention_Autoencoder
    │   │   │   ├── __init__.py
    │   │   │   ├── readme.md
    │   │   │   └── WGAAE_Demo.py
    │   │   ├── Weibull_Hybrid_Autoencoding_Inference
    │   │   │   ├── __init__.py
    │   │   │   ├── readme.md
    │   │   │   └── WHAI_Demo.py
    │   │   ├── Knowledge_Aware_Bayesian_Deep_Topic_Model
    │   │   │   ├── __init__.py
    │   │   │   ├── KG_ETM.py
    │   │   │   └── readme.md
    │   │   ├── Multimodal_Weibull_Variational_Autoencoder
    │   │   │   ├── __init__.py
    │   │   │   ├── MWVAE.py
    │   │   │   └── readme.md
    │   │   ├── Variational_Temporal_Deep_Generative_Model
    │   │   │   ├── __init__.py
    │   │   │   ├── RGBN.py
    │   │   │   └── readme.md
    │   │   ├── Generative_Text_Convolutional_Neural_Network
    │   │   │   ├── GTCNN.py
    │   │   │   ├── __init__.py
    │   │   │   └── readme.md
    │   │   └── Variational_Edge_Parition_Graph_Neural_Network
    │   │   │   ├── VEPM.py
    │   │   │   ├── __init__.py
    │   │   │   └── readme.md
    │   ├── Deep_Learning_PM
    │   │   ├── __init__.py
    │   │   ├── Real_NVP
    │   │   │   ├── __init__.py
    │   │   │   ├── readme.md
    │   │   │   └── Real_NVP_Demo.py
    │   │   ├── Normlizing_Flow
    │   │   │   ├── __init__.py
    │   │   │   ├── readme.md
    │   │   │   └── NFlow_Demo.py
    │   │   ├── Variational_Autoencoder
    │   │   │   ├── __init__.py
    │   │   │   ├── readme.md
    │   │   │   └── VAE_Demo.py
    │   │   ├── Generative_Adversarial_Network
    │   │   │   ├── __init__.py
    │   │   │   ├── readme.md
    │   │   │   └── GAN_Demo.py
    │   │   ├── Restricted_Boltzmann_Machine
    │   │   │   ├── __init__.py
    │   │   │   ├── readme.md
    │   │   │   └── RBM_Demo.py
    │   │   ├── VQ_Variational_Autoencoder
    │   │   │   ├── __init__.py
    │   │   │   ├── readme.md
    │   │   │   └── VQ_VAE_Demo.py
    │   │   ├── Conditional_Variational_Auto-encoder
    │   │   │   ├── __init__.py
    │   │   │   ├── readme.md
    │   │   │   └── CVAE_Demo.py
    │   │   ├── Denoising_Diffusion_Probabilistic_Model
    │   │   │   ├── __init__.py
    │   │   │   ├── readme.md
    │   │   │   └── DDPM_Demo.py
    │   │   ├── Wasserstein_Generative_Adversarial_Networks
    │   │   │   ├── __init__.py
    │   │   │   ├── readme.md
    │   │   │   └── WGAN_Demo.py
    │   │   ├── Deep_Convolution_Generative_Adversarial_Networks
    │   │   │   ├── __init__.py
    │   │   │   ├── readme.md
    │   │   │   └── DCGAN_Demo.py
    │   │   └── Information_Maximizing_Generative_Adversarial_Nets
    │   │   │   ├── __init__.py
    │   │   │   ├── readme.md
    │   │   │   └── InfoGAN_Demo.py
    │   └── __init__.py
    ├── utils
    │   ├── __init__.py
    │   └── utils.py
    ├── __init__.py
    ├── sampler
    │   ├── __init__.py
    │   ├── _compact
    │   │   ├── model_sampler_linux.h
    │   │   ├── distribution_sampler_linux.h
    │   │   ├── model_sampler_win.h
    │   │   ├── distribution_sampler_win.h
    │   │   ├── crt_cpu.c
    │   │   ├── crt_multi_aug_cpu.c
    │   │   └── multi_aug_cpu.c
    │   ├── distribution_sampler_cpu.py
    │   ├── basic_sampler.py
    │   ├── pre_process.py
    │   └── model_sampler_cpu.py
    ├── dataloader
    │   ├── __init__.py
    │   └── image_data.py
    └── metric
    │   ├── __init__.py
    │   ├── reconstruction.py
    │   ├── purity.py
    │   ├── perplexity.py
    │   ├── accuracy.py
    │   ├── roc_score.py
    │   ├── cluster_acc.py
    │   ├── normalized_mutual_information.py
    │   └── topic_coherence.py
├── .gitignore
├── docs
    └── imgs
    │   ├── intro.png
    │   ├── compare_numpy.png
    │   ├── dpm_tutorial.png
    │   ├── pydpm_logo_1.png
    │   ├── pydpm_logo_2.png
    │   ├── pydpm_framework.png
    │   ├── pydpm_workflow.png
    │   ├── pydpm_workflow1.png
    │   ├── pydpm_workflow2.png
    │   ├── compare_tf2_torch.png
    │   ├── pydpm_framework_3.png
    │   ├── pydpm_framework_4.png
    │   ├── compare_cupy_pycuda.png
    │   ├── pydpm_framework_new.png
    │   └── pydpm_framework_old.png
├── CONTRIBUTING.md
├── MANIFEST.in
├── setup.py
└── enviroment.yaml


/pydpm/model/hybrid_pm/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | */.DS_Store


--------------------------------------------------------------------------------
/pydpm/example/Bayesian_PM/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pydpm/example/Hybrid_PM/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pydpm/model/bayesian_pm/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pydpm/example/Deep_Learning_PM/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pydpm/model/deep_learning_pm/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pydpm/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .utils import *


--------------------------------------------------------------------------------
/pydpm/example/Hybrid_PM/HyperMiner/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pydpm/example/Hybrid_PM/TopicNet/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pydpm/example/Bayesian_PM/Factor_Analysis/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pydpm/example/Deep_Learning_PM/Real_NVP/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pydpm/example/Hybrid_PM/HyperMiner/HyperMiner.py:
--------------------------------------------------------------------------------
1 | #TODO


--------------------------------------------------------------------------------
/pydpm/example/Hybrid_PM/TopicNet/TopicNet.py:
--------------------------------------------------------------------------------
1 | #TODO


--------------------------------------------------------------------------------
/pydpm/example/Deep_Learning_PM/Normlizing_Flow/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pydpm/__init__.py:
--------------------------------------------------------------------------------
1 | '''need '__init__.py' to package pydpm'''
2 | 


--------------------------------------------------------------------------------
/pydpm/example/Bayesian_PM/Dirchilet_Belief_Network/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pydpm/example/Bayesian_PM/Gaussian_Mixture_Model/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pydpm/example/Bayesian_PM/Poisson_Factor_Analysis/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pydpm/example/Bayesian_PM/Deep_Poisson_Factor_Analysis/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pydpm/example/Bayesian_PM/Latent_Dirchilet_Allocation/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pydpm/example/Bayesian_PM/Poisson_Gamma_Belief_Network/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pydpm/example/Bayesian_PM/Poisson_Gamma_Dynamic_System/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pydpm/example/Deep_Learning_PM/Variational_Autoencoder/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pydpm/example/Hybrid_PM/Sawtooth_Embedding_Topic_Model/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pydpm/sampler/__init__.py:
--------------------------------------------------------------------------------
1 | from .basic_sampler import Basic_Sampler


--------------------------------------------------------------------------------
/pydpm/example/Bayesian_PM/Deep_Poisson_Gamma_Dynamic_System/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pydpm/example/Bayesian_PM/Graph_Poisson_Gamma_Belief_Network/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pydpm/example/Bayesian_PM/Word_Embeddings_Deep_Topic_Model/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pydpm/example/Deep_Learning_PM/Generative_Adversarial_Network/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pydpm/example/Deep_Learning_PM/Restricted_Boltzmann_Machine/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pydpm/example/Deep_Learning_PM/VQ_Variational_Autoencoder/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pydpm/example/Hybrid_PM/Deep_Coupling_Embedding_Topic_Model/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pydpm/example/Hybrid_PM/Deep_Coupling_Embedding_Topic_Model/dcETM.py:
--------------------------------------------------------------------------------
1 | #TODO


--------------------------------------------------------------------------------
/pydpm/example/Hybrid_PM/Weibull_Graph_Attention_Autoencoder/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pydpm/example/Hybrid_PM/Weibull_Hybrid_Autoencoding_Inference/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pydpm/example/Bayesian_PM/Multimodal_Poisson_Gamma_Belief_Network/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pydpm/example/Hybrid_PM/Knowledge_Aware_Bayesian_Deep_Topic_Model/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pydpm/example/Hybrid_PM/Multimodal_Weibull_Variational_Autoencoder/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pydpm/example/Hybrid_PM/Variational_Temporal_Deep_Generative_Model/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pydpm/example/Bayesian_PM/Convolutional_Poisson_Gamma_Belief_Network/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pydpm/example/Deep_Learning_PM/Conditional_Variational_Auto-encoder/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pydpm/example/Deep_Learning_PM/Denoising_Diffusion_Probabilistic_Model/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pydpm/example/Hybrid_PM/Generative_Text_Convolutional_Neural_Network/GTCNN.py:
--------------------------------------------------------------------------------
1 | #TODO


--------------------------------------------------------------------------------
/pydpm/example/Hybrid_PM/Generative_Text_Convolutional_Neural_Network/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pydpm/example/Hybrid_PM/Knowledge_Aware_Bayesian_Deep_Topic_Model/KG_ETM.py:
--------------------------------------------------------------------------------
1 | #TODO


--------------------------------------------------------------------------------
/pydpm/example/Hybrid_PM/Multimodal_Weibull_Variational_Autoencoder/MWVAE.py:
--------------------------------------------------------------------------------
1 | #TODO


--------------------------------------------------------------------------------
/pydpm/example/Hybrid_PM/Variational_Edge_Parition_Graph_Neural_Network/VEPM.py:
--------------------------------------------------------------------------------
1 | #TODO


--------------------------------------------------------------------------------
/pydpm/example/Hybrid_PM/Variational_Edge_Parition_Graph_Neural_Network/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pydpm/example/Hybrid_PM/Variational_Temporal_Deep_Generative_Model/RGBN.py:
--------------------------------------------------------------------------------
1 | #TODO


--------------------------------------------------------------------------------
/pydpm/example/Deep_Learning_PM/Wasserstein_Generative_Adversarial_Networks/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docs/imgs/intro.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BoChenGroup/PyDPM/HEAD/docs/imgs/intro.png


--------------------------------------------------------------------------------
/pydpm/example/Deep_Learning_PM/Deep_Convolution_Generative_Adversarial_Networks/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pydpm/example/Deep_Learning_PM/Information_Maximizing_Generative_Adversarial_Nets/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docs/imgs/compare_numpy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BoChenGroup/PyDPM/HEAD/docs/imgs/compare_numpy.png


--------------------------------------------------------------------------------
/docs/imgs/dpm_tutorial.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BoChenGroup/PyDPM/HEAD/docs/imgs/dpm_tutorial.png


--------------------------------------------------------------------------------
/docs/imgs/pydpm_logo_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BoChenGroup/PyDPM/HEAD/docs/imgs/pydpm_logo_1.png


--------------------------------------------------------------------------------
/docs/imgs/pydpm_logo_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BoChenGroup/PyDPM/HEAD/docs/imgs/pydpm_logo_2.png


--------------------------------------------------------------------------------
/docs/imgs/pydpm_framework.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BoChenGroup/PyDPM/HEAD/docs/imgs/pydpm_framework.png


--------------------------------------------------------------------------------
/docs/imgs/pydpm_workflow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BoChenGroup/PyDPM/HEAD/docs/imgs/pydpm_workflow.png


--------------------------------------------------------------------------------
/docs/imgs/pydpm_workflow1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BoChenGroup/PyDPM/HEAD/docs/imgs/pydpm_workflow1.png


--------------------------------------------------------------------------------
/docs/imgs/pydpm_workflow2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BoChenGroup/PyDPM/HEAD/docs/imgs/pydpm_workflow2.png


--------------------------------------------------------------------------------
/pydpm/dataloader/__init__.py:
--------------------------------------------------------------------------------
1 | from .graph_data import *
2 | from .image_data import *
3 | from .text_data import *


--------------------------------------------------------------------------------
/pydpm/example/Bayesian_PM/Factor_Analysis/readme.md:
--------------------------------------------------------------------------------
1 | This is the demo code for the Factor Analysis (FA).
2 | 
3 | 


--------------------------------------------------------------------------------
/docs/imgs/compare_tf2_torch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BoChenGroup/PyDPM/HEAD/docs/imgs/compare_tf2_torch.png


--------------------------------------------------------------------------------
/docs/imgs/pydpm_framework_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BoChenGroup/PyDPM/HEAD/docs/imgs/pydpm_framework_3.png


--------------------------------------------------------------------------------
/docs/imgs/pydpm_framework_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BoChenGroup/PyDPM/HEAD/docs/imgs/pydpm_framework_4.png


--------------------------------------------------------------------------------
/docs/imgs/compare_cupy_pycuda.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BoChenGroup/PyDPM/HEAD/docs/imgs/compare_cupy_pycuda.png


--------------------------------------------------------------------------------
/docs/imgs/pydpm_framework_new.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BoChenGroup/PyDPM/HEAD/docs/imgs/pydpm_framework_new.png


--------------------------------------------------------------------------------
/docs/imgs/pydpm_framework_old.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BoChenGroup/PyDPM/HEAD/docs/imgs/pydpm_framework_old.png


--------------------------------------------------------------------------------
/pydpm/example/Bayesian_PM/Gaussian_Mixture_Model/readme.md:
--------------------------------------------------------------------------------
1 | This is the demo code for the Gaussian Mixture Model (GMM).
2 | 


--------------------------------------------------------------------------------
/pydpm/example/Bayesian_PM/Poisson_Gamma_Belief_Network/readme.md:
--------------------------------------------------------------------------------
1 | This is the demo code for the Poisson Gamma Belief Network (PGBN), from the 2015 NeurIPS paper with the title "The Poisson Gamma Belief Network".
2 | 


--------------------------------------------------------------------------------
/pydpm/example/Deep_Learning_PM/Normlizing_Flow/readme.md:
--------------------------------------------------------------------------------
1 | This is the demo code for the RealNVP_2D, one of the traditional flow-based models from the 2017 paper with the title "Density Estimation Using Real NVP".
2 | 


--------------------------------------------------------------------------------
/pydpm/example/Deep_Learning_PM/VQ_Variational_Autoencoder/readme.md:
--------------------------------------------------------------------------------
1 | This is the demo code for the Neural Discrete Representation Learning (VQ-VAE), from the 2017 paper with the title "Neural Discrete Representation Learning".
2 | 


--------------------------------------------------------------------------------
/pydpm/example/Bayesian_PM/Poisson_Factor_Analysis/readme.md:
--------------------------------------------------------------------------------
1 | This is the demo code for the Poisson Factor Analysis (PFA), from the 2012 AISTATS paper with the title "Poisson Factor Analysis".
2 | 
3 | The source project can be found in https://github.com/yxnchen/PFA
4 | 


--------------------------------------------------------------------------------
/pydpm/example/Deep_Learning_PM/Conditional_Variational_Auto-encoder/readme.md:
--------------------------------------------------------------------------------
1 | This is the demo code for the Conditional Variational Autoencoder (VAE), from the 2015 paper with the title "Learning Structured Output Representation using Deep Conditional Generative Models".
2 | 
3 | 


--------------------------------------------------------------------------------
/pydpm/example/Deep_Learning_PM/Real_NVP/readme.md:
--------------------------------------------------------------------------------
1 | This is the demo code for the more powerful RealNVP, one of the traditional flow-based models from the 2017 paper with the title "Density Estimation Using Real NVP".
2 | Reference code can be found in https://github.com/fmu2/realNVP 


--------------------------------------------------------------------------------
/pydpm/example/Deep_Learning_PM/Variational_Autoencoder/readme.md:
--------------------------------------------------------------------------------
1 | This is the demo code for the Variational Autoencoder (VAE), from the 2014 paper with the title "Auto-Encoding Variational Bayes".
2 | 
3 | The source project can be found in https://github.com/AntixK/PyTorch-VAE
4 | 


--------------------------------------------------------------------------------
/pydpm/example/Bayesian_PM/Poisson_Gamma_Dynamic_System/readme.md:
--------------------------------------------------------------------------------
1 | This is the demo code for the Poisson-Gamma Dynamical Systems (PGDS), from the 2016 NeurIPS paper with the title "Poisson-Gamma Dynamical Systems".
2 | 
3 | The source project can be found in https://github.com/aschein/pgds
4 | 


--------------------------------------------------------------------------------
/pydpm/example/Deep_Learning_PM/Generative_Adversarial_Network/readme.md:
--------------------------------------------------------------------------------
1 | This is the demo code for the Generative Adversarial Networks (GAN), from the 2014 paper with the title "Generative Adversarial Networks".
2 | 
3 | The source project can be found in https://github.com/yfeng95/GAN
4 | 


--------------------------------------------------------------------------------
/pydpm/example/Bayesian_PM/Latent_Dirchilet_Allocation/readme.md:
--------------------------------------------------------------------------------
1 | This is the demo code for the Latent Dirichlet Allocation (LDA), from the 2003 JMLR paper with the title "Knowledge-Aware Bayesian Deep Topic Model".
2 | 
3 | The source project can be found in https://github.com/lda-project/lda
4 | 


--------------------------------------------------------------------------------
/pydpm/example/__init__.py:
--------------------------------------------------------------------------------
1 | '''
2 | example of models and GPU distribution sampler.
3 | To run models demo, you need to download dataset files 'mnist_gray.mat'&'TREC.pkl' and put it under pydpm.example.dataset
4 | dataset url: https://1drv.ms/u/s!AlkDawhaUUBWtHRWuNESEdOsDz7V?e=LQlGLW
5 | '''
6 | 


--------------------------------------------------------------------------------
/pydpm/example/Bayesian_PM/Dirchilet_Belief_Network/readme.md:
--------------------------------------------------------------------------------
1 | This is the demo code for the Dirichlet Belief Networks (DirBN), from the 2018 NeurIPS paper with the title "Dirichlet belief networks for topic structure learning".
2 | 
3 | The source project can be found in https://github.com/ethanhezhao/DirBN
4 | 


--------------------------------------------------------------------------------
/pydpm/example/Deep_Learning_PM/Deep_Convolution_Generative_Adversarial_Networks/readme.md:
--------------------------------------------------------------------------------
1 | This is the demo code for the Deep Convolution Generative Adversarial Networks (DCGAN), from the 2016 paper with the title "Unsupervised Representation Learning with Deep Convolutional Generative Adversarial Networks".
2 | 
3 | 


--------------------------------------------------------------------------------
/pydpm/example/Deep_Learning_PM/Wasserstein_Generative_Adversarial_Networks/readme.md:
--------------------------------------------------------------------------------
1 | This is the demo code for the Wasserstein Generative Adversarial Networks (GAN), from the 2017 paper with the title "Wasserstein GAN".
2 | 
3 | The source project can be found in https://github.com/martinarjovsky/WassersteinGAN
4 | 


--------------------------------------------------------------------------------
/pydpm/example/Bayesian_PM/Deep_Poisson_Factor_Analysis/readme.md:
--------------------------------------------------------------------------------
1 | This is the demo code for the Deep Poisson Factor Analysis (DPFA), from the 2015 ICML paper with the title "Scalable Deep Poisson Factor Analysis for Topic Modeling".
2 | 
3 | The source project can be found in https://github.com/zhegan27/dpfa_icml2015
4 | 


--------------------------------------------------------------------------------
/pydpm/example/Bayesian_PM/Deep_Poisson_Gamma_Dynamic_System/readme.md:
--------------------------------------------------------------------------------
1 | This is the demo code for the Deep Poisson Gamma Dynamical Systems Demo (DPGDS), from the 2018 NeurIPS paper with the title "Deep Poisson Gamma Dynamical Systems Demo".
2 | 
3 | The source project can be found in https://github.com/BoChenGroup/DPGDS
4 | 


--------------------------------------------------------------------------------
/pydpm/example/Hybrid_PM/TopicNet/readme.md:
--------------------------------------------------------------------------------
1 | This is the demo code for the TopicNet, from the 2021 NeurIPS paper with the title "TopicNet: Semantic Graph-Guaided Topic Discovery".
2 | 
3 | The source project can be found in https://github.com/BoChenGroup/TopicNet
4 | 
5 | The details of this model will be coming soon.
6 | 


--------------------------------------------------------------------------------
/pydpm/example/Hybrid_PM/HyperMiner/readme.md:
--------------------------------------------------------------------------------
1 | This is the demo code for the HyperMiner, from the paper with the title "HyperMiner: Topic Taxonomy Mining with Hyperbolic Embedding".
2 | 
3 | The source project can be found in https://github.com/BoChenGroup/HyperMiner
4 | 
5 | The details of this model will be coming soon.
6 | 


--------------------------------------------------------------------------------
/pydpm/example/Bayesian_PM/Word_Embeddings_Deep_Topic_Model/readme.md:
--------------------------------------------------------------------------------
1 | This is the demo code for the Word Embeddings Deep Topic Model (WEDTM), from the 2018 ICML paper with the title "Inter and Intra Topic Structure Learning with Word Embeddings".
2 | 
3 | The source project can be found in https://github.com/ethanhezhao/WEDTM
4 | 


--------------------------------------------------------------------------------
/pydpm/example/Bayesian_PM/Multimodal_Poisson_Gamma_Belief_Network/readme.md:
--------------------------------------------------------------------------------
1 | This is the demo code for the Mutlimodal Poisson Gamma Belief Network (MPGBN), from the 2022 NeurIPS paper with the title "Mutlimodal Poisson Gamma Belief Network".
2 | 
3 | The source project can be found in https://github.com/BoChenGroup/Multimodal_PGBN
4 | 


--------------------------------------------------------------------------------
/pydpm/example/Deep_Learning_PM/Restricted_Boltzmann_Machine/readme.md:
--------------------------------------------------------------------------------
1 | This is the demo code for the Restricted Boltzmann Machines (RBM), from the 2010 paper with the title "A Practical guide to training restricted Boltzmann machines".
2 | 
3 | The source project can be found in https://github.com/echen/restricted-boltzmann-machines
4 | 


--------------------------------------------------------------------------------
/pydpm/example/Bayesian_PM/Convolutional_Poisson_Gamma_Belief_Network/readme.md:
--------------------------------------------------------------------------------
1 | This is the demo code for the Convolutional Poisson Gamma Belief Network (CPGBN), from the 2019 ICML paper with the title "Convolutional Poisson Gamma Belief Network Demo".
2 | 
3 | The source project can be found in https://github.com/BoChenGroup/CPGBN
4 | 
5 | 


--------------------------------------------------------------------------------
/pydpm/example/Hybrid_PM/Weibull_Hybrid_Autoencoding_Inference/readme.md:
--------------------------------------------------------------------------------
1 | This is the demo code for the Weibull Hybrid Autoencoding Inference (WHAI), from the 2018 ICLR paper with the title "WHAI: Weibull Hybrid Autoencoding Inference for Deep Topic Modeling".
2 | 
3 | The source project can be found in https://github.com/BoChenGroup/WHAI
4 | 


--------------------------------------------------------------------------------
/pydpm/example/Bayesian_PM/Graph_Poisson_Gamma_Belief_Network/readme.md:
--------------------------------------------------------------------------------
1 | This is the demo code for the Graph Poisson Gamma Belief Network (GPGBN), from the 2020 NeurIPS paper with the title "Deep Relational Topic Modeling via Graph Poisson Gamma Belief Network".
2 | 
3 | The source project can be found in https://github.com/BoChenGroup/GPGBN
4 | 


--------------------------------------------------------------------------------
/pydpm/example/Hybrid_PM/Weibull_Graph_Attention_Autoencoder/readme.md:
--------------------------------------------------------------------------------
1 | This is the demo code for the Weibull Graph Attention Autoencoder (WGAAE), from the 2020 NeurIPS paper with the title "Bayesian Attention Modules".
2 | 
3 | The source project can be found in https://github.com/chaojiewang94/WGAAE
4 | 
5 | The details of this model will be coming soon.
6 | 


--------------------------------------------------------------------------------
/pydpm/example/Hybrid_PM/Multimodal_Weibull_Variational_Autoencoder/readme.md:
--------------------------------------------------------------------------------
1 | This is the demo code for the Multimodal Weibull Variational Autoencoder (MEVAE), from the 2022 IEEE Transactions on Cybernetics paper with the title "Multimodal Weibull Variational Autoencoder for Jointly Modeling Image-Text Data".
2 | 
3 | The details of this model will be coming soon.
4 | 


--------------------------------------------------------------------------------
/pydpm/metric/__init__.py:
--------------------------------------------------------------------------------
1 | from .accuracy import ACC
2 | from .reconstruction import Poisson_Likelihood, Reconstruct_Error
3 | from .cluster_acc import Cluster_ACC
4 | from .topic_coherence import Topic_Coherence
5 | from .perplexity import Perplexity
6 | from .normalized_mutual_information import NMI
7 | from .roc_score import ROC_AP_SCORE
8 | from .purity import Purity
9 | 


--------------------------------------------------------------------------------
/pydpm/example/Deep_Learning_PM/Denoising_Diffusion_Probabilistic_Model/readme.md:
--------------------------------------------------------------------------------
1 | This is the demo code for the Denoising Diffusion Probabilistic Models (DDPM), from the 2020 NeurIPS paper with the title "Denoising Diffusion Probabilistic Models".
2 | 
3 | The source project can be found in https://github.com/abarankab/DDPM
4 | 
5 | The details of this model will be coming soon.
6 | 


--------------------------------------------------------------------------------
/pydpm/example/Hybrid_PM/Knowledge_Aware_Bayesian_Deep_Topic_Model/readme.md:
--------------------------------------------------------------------------------
1 | This is the demo code for the Knowledge Graph Embedding Topic Model (KG-ETM), from the 2022 NeurIPS paper with the title "Knowledge-Aware Bayesian Deep Topic Model".
2 | 
3 | The source project can be found in https://github.com/BoChenGroup/TopicKG
4 | 
5 | The details of this model will be coming soon.
6 | 


--------------------------------------------------------------------------------
/pydpm/example/Hybrid_PM/Variational_Temporal_Deep_Generative_Model/readme.md:
--------------------------------------------------------------------------------
1 | This is the demo code for the Recurrent Gamma Belief Network (RGBN), from the 2020 ICLR paper with the title "Recurrent Hierarchical Topic-Guided Neural Language Models".
2 | 
3 | The source project can be found in https://github.com/BoChenGroup/rGBN_RNN
4 | 
5 | The details of this model will be coming soon.
6 | 


--------------------------------------------------------------------------------
/pydpm/example/Hybrid_PM/Deep_Coupling_Embedding_Topic_Model/readme.md:
--------------------------------------------------------------------------------
1 | This is the demo code for the Deep Coupling Embedding Topic Model (dc-ETM), from the 2022 NeurIPS paper with the title "Alleviating ''Posterior Collapse'' in Deep Topic Models via Policy Gradient".
2 | 
3 | The source project can be found in https://github.com/BoChenGroup/dc-ETM
4 | 
5 | The details of this model will be coming soon.
6 | 


--------------------------------------------------------------------------------
/pydpm/example/Deep_Learning_PM/Information_Maximizing_Generative_Adversarial_Nets/readme.md:
--------------------------------------------------------------------------------
1 | This is the demo code for the Information Maximizing Generative Adversarial Nets (infoGAN), from the 2014 paper with the title "InfoGAN: Interpretable Representation Learning by Information Maximizing Generative Adversarial Nets".
2 | 
3 | The source project can be found in https://github.com/Natsu6767/InfoGAN-PyTorch
4 | 


--------------------------------------------------------------------------------
/pydpm/example/Hybrid_PM/Variational_Edge_Parition_Graph_Neural_Network/readme.md:
--------------------------------------------------------------------------------
1 | This is the demo code for the Variational Edge Parition Graph Neural Network (VEPM), from the 2022 NeurIPS paper with the title "A Variational Edge Partition Model for Supervised Graph Representation Learning".
2 | 
3 | The source project can be found in https://github.com/YH-UtMSB/VEPM
4 | 
5 | The details of this model will be coming soon.
6 | 


--------------------------------------------------------------------------------
/pydpm/example/Hybrid_PM/Generative_Text_Convolutional_Neural_Network/readme.md:
--------------------------------------------------------------------------------
1 | This is the demo code for the Generative Text Convolutional Neural Network (GTCNN), from the paper with the title "Generative Text Convolutional Neural Network for Hierarchial Document Representation Learning".
2 | 
3 | The source project can be found in https://github.com/BoChenGroup/GTCNN
4 | 
5 | The details of this model will be coming soon.
6 | 


--------------------------------------------------------------------------------
/pydpm/example/Hybrid_PM/Sawtooth_Embedding_Topic_Model/readme.md:
--------------------------------------------------------------------------------
1 | This is the demo code for the Sawtooth Embedding Topic Model (SawETM), from the 2021 paper with the title "Sawtooth Factorial Topic Embeddings Guided Gamma Belief Network".
2 | 
3 | The source project can be found in https://github.com/BoChenGroup/SawETM and the implementation in PyDPM references https://github.com/NoviceStone/HyperMiner/blob/main/models/sawetm.py
4 | 


--------------------------------------------------------------------------------
/pydpm/sampler/_compact/model_sampler_linux.h:
--------------------------------------------------------------------------------
 1 | #include "cuda_runtime.h"
 2 | #include "device_launch_parameters.h"
 3 | #include <curand.h>
 4 | #include <curand_kernel.h>
 5 | #include <stdlib.h>
 6 | #include <stdio.h>
 7 | 
 8 | // status
 9 | #define blockDimX 32
10 | #define blockDimY 4  // blockDimX * Y should be multiples of 32, and no more than 1024
11 | #define gridDimX 128
12 | #define nStatus (blockDimX * blockDimY * gridDimX)
13 | 
14 | // const
15 | #define one_third 0.333333333333333
16 | #define Pi 3.141592654
17 | 
18 | // gamma
19 | #define nThreads_x 32
20 | #define nThreads_y 4
21 | #define nThreads (nThreads_x * nThreads_y)
22 | 
23 | 


--------------------------------------------------------------------------------
/pydpm/metric/reconstruction.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ===========================================
 3 | Metric
 4 | ===========================================
 5 | 
 6 | """
 7 | 
 8 | # Author: Chaojie Wang <xd_silly@163.com>; Jiawen Wu
 9 | # License: BSD-3-Clause
10 | 
11 | import numpy as np
12 | 
13 | from scipy.special import gamma
14 | from ..utils import *
15 | 
16 | def Poisson_Likelihood(X, X_re):
17 | 
18 |     # X[np.where(X>100)] = 100
19 |     # X_re[np.where(X>100)] = 100
20 | 
21 |     Likelihood = np.sum(X*log_max(X_re) - X_re - log_max(gamma(X_re + 1)))
22 |     return Likelihood
23 | 
24 | def Reconstruct_Error(X, X_re):
25 |     return np.power(X - X_re, 2).sum()


--------------------------------------------------------------------------------
/pydpm/sampler/_compact/distribution_sampler_linux.h:
--------------------------------------------------------------------------------
 1 | #include "cuda_runtime.h"
 2 | #include "device_launch_parameters.h"
 3 | #include <curand.h>
 4 | #include <curand_kernel.h>
 5 | #include <stdlib.h>
 6 | #include <stdio.h>
 7 | 
 8 | // status
 9 | #define blockDimX 32
10 | #define blockDimY 4  // blockDimX * Y should be multiples of 32, and no more than 1024
11 | #define gridDimX 128
12 | #define nStatus (blockDimX * blockDimY * gridDimX)
13 | 
14 | // const
15 | #define one_third 0.333333333333333
16 | #define Pi 3.141592654
17 | 
18 | // gamma
19 | #define nThreads_gamma_x 32
20 | #define nThreads_gamma_y 4
21 | #define nThreads_gamma (nThreads_gamma_x * nThreads_gamma_y)
22 | 
23 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to PyDPM
 2 | 
 3 | If you are interested in contributing to PyDPM, your contributions will likely fall into one of the following two categories:
 4 | 
 5 | 1. You want to implement a new feature:
 6 |    - In general, we accept any features as long as they fit the scope of this package. If you are unsure about this or need help on the design/implementation of your feature, post about it in an issue.
 7 | 2. You want to fix a bug:
 8 |    - Feel free to send a Pull Request any time you encounter a bug. Please provide a clear and concise description of what the bug was. If you are unsure about if this is a bug at all or how to fix, post about it in an issue.
 9 | 
10 | Once you finish implementing a feature or bug-fix, please send a Pull Request to https://github.com/BoChenGroup/PyDPM.
11 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include pydpm/_sampler/_compact/crt_cpu.c
 2 | include pydpm/_sampler/_compact/crt_cpu.dll
 3 | include pydpm/_sampler/_compact/crt_cpu.so
 4 | include pydpm/_sampler/_compact/crt_multi_aug_cpu.c
 5 | include pydpm/_sampler/_compact/crt_multi_aug_cpu.dll
 6 | include pydpm/_sampler/_compact/crt_multi_aug_cpu.so
 7 | include pydpm/_sampler/_compact/multi_aug_cpu.c
 8 | include pydpm/_sampler/_compact/multi_aug_cpu.dll
 9 | include pydpm/_sampler/_compact/multi_aug_cpu.so
10 | include pydpm/_sampler/_compact/sampler_kernel_win.cu
11 | include pydpm/_sampler/_compact/sampler_kernel_win.h
12 | include pydpm/_sampler/_compact/sampler_kernel_linux.cu
13 | include pydpm/_sampler/_compact/sampler_kernel_linux.h
14 | include compare_numpy.jpg
15 | include compare_tf2_torch.jpg
16 | include pydpm_framework.png
17 | 


--------------------------------------------------------------------------------
/pydpm/sampler/_compact/model_sampler_win.h:
--------------------------------------------------------------------------------
 1 | #include "cuda_runtime.h"
 2 | #include "device_launch_parameters.h"
 3 | #include <curand.h>
 4 | #include <curand_kernel.h>
 5 | #include <stdlib.h>
 6 | #include <stdio.h>
 7 | 
 8 | // DLL export function
 9 | // if not use DLLEXPORT, the function will be unable to be transferred on Windows
10 | #define DLLEXPORT extern "C" __declspec(dllexport)
11 | 
12 | // status
13 | #define blockDimX 32
14 | #define blockDimY 4  // blockDimX * Y should be multiples of 32, and no more than 1024
15 | #define gridDimX 128
16 | #define nStatus (blockDimX * blockDimY * gridDimX)
17 | 
18 | // const
19 | #define one_third 0.333333333333333
20 | #define Pi 3.141592654
21 | 
22 | // gamma
23 | #define nThreads_x 32
24 | #define nThreads_y 4
25 | #define nThreads (nThreads_x * nThreads_y)
26 | 
27 | 


--------------------------------------------------------------------------------
/pydpm/sampler/_compact/distribution_sampler_win.h:
--------------------------------------------------------------------------------
 1 | #include "cuda_runtime.h"
 2 | #include "device_launch_parameters.h"
 3 | #include <curand.h>
 4 | #include <curand_kernel.h>
 5 | #include <stdlib.h>
 6 | #include <stdio.h>
 7 | 
 8 | // DLL export function
 9 | // if not use DLLEXPORT, the function will be unable to be transferred on Windows
10 | #define DLLEXPORT extern "C" __declspec(dllexport)
11 | 
12 | // status
13 | #define blockDimX 32
14 | #define blockDimY 4  // blockDimX * Y should be multiples of 32, and no more than 1024
15 | #define gridDimX 128
16 | #define nStatus (blockDimX * blockDimY * gridDimX)
17 | 
18 | // const
19 | #define one_third 0.333333333333333
20 | #define Pi 3.141592654
21 | 
22 | // gamma
23 | #define nThreads_gamma_x 32
24 | #define nThreads_gamma_y 4
25 | #define nThreads_gamma (nThreads_gamma_x * nThreads_gamma_y)
26 | 
27 | 


--------------------------------------------------------------------------------
/pydpm/metric/purity.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | # -*- coding: utf-8 -*-
 3 | # Author: Xinyang Liu <lxy771258012@163.com>
 4 | # License: BSD-3-Clause
 5 | 
 6 | import numpy as np
 7 | 
 8 | class Purity(object):
 9 | 
10 | 	def __init__(self, y, ypred):
11 | 		"""
12 | 		Inputs:
13 | 			y: the ground_true, shape:(n_sample,)
14 | 			ypred: pred_label, shape:(n_sample,)
15 | 		Output:
16 | 			accuracy of cluster, in [0, 1]
17 | 		"""
18 | 		self.y = y
19 | 		self.ypred = ypred
20 | 
21 | 		self._get()
22 | 
23 | 		print(f'The cluster purity is: {self._purity:.4f}')
24 | 
25 | 	def _get(self):
26 | 
27 | 		clusters = np.unique(self.ypred)
28 | 		counts = []
29 | 		for c in clusters:
30 | 			indices = np.where(self.ypred == c)[0]
31 | 			max_votes = np.bincount(self.y[indices]).max()
32 | 			counts.append(max_votes)
33 | 		self._purity = sum(counts) / self.y.shape[0]
34 | 


--------------------------------------------------------------------------------
/pydpm/metric/perplexity.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ===========================================
 3 | per-held-word perplexity
 4 | ===========================================
 5 | 
 6 | """
 7 | #!/usr/bin/python3
 8 | # -*- coding: utf-8 -*-
 9 | # Author: Xinyang Liu <lxy771258012@163.com>
10 | # License: BSD-3-Clause
11 | 
12 | import numpy as np
13 | from ..utils import *
14 | 
15 | class Perplexity(object):
16 | 	def __init__(self, x, x_hat):
17 | 		'''
18 | 		Inputs:
19 | 			x: [float] np.ndarray, N*V test matrix, the observations for test_data
20 | 			x_hat: [float] np.ndarray, N*V reconstruct matrix
21 | 		Outputs:
22 | 			PPL: [float], the perplexity score
23 | 		'''
24 | 
25 | 		self.x = x
26 | 		self.x_hat = x_hat
27 | 
28 | 		self._get()
29 | 
30 | 		print(f'The PPL is: {self._PPL:.4f}')
31 | 
32 | 	def _get(self):
33 | 
34 | 		self.x_hat = self.x_hat / (np.sum(self.x_hat, axis=1, keepdims=True) + realmin)
35 | 		ppl = -1.0 * self.x * np.log(self.x_hat + realmin) / np.sum(self.x)
36 | 		ppl = np.exp(ppl.sum())
37 | 
38 | 		self._PPL = ppl
39 | 


--------------------------------------------------------------------------------
/pydpm/model/__init__.py:
--------------------------------------------------------------------------------
 1 | from .basic_model import Params, Basic_Model
 2 | from .bayesian_pm.lda import LDA
 3 | from .bayesian_pm.pfa import PFA
 4 | from .bayesian_pm.pgbn import PGBN
 5 | from .bayesian_pm.cpfa import CPFA
 6 | from .bayesian_pm.cpgbn import CPGBN
 7 | from .bayesian_pm.pgds import PGDS
 8 | from .bayesian_pm.dpgds import DPGDS
 9 | from .bayesian_pm.wedtm import WEDTM
10 | from .bayesian_pm.dirbn import DirBN
11 | from .bayesian_pm.dpfa import DPFA
12 | from .bayesian_pm.gpgbn import GPGBN
13 | from .bayesian_pm.mpgbn import MPGBN
14 | from .bayesian_pm.fa import FA
15 | from .bayesian_pm.gmm import GMM
16 | from .deep_learning_pm.vae import VAE
17 | from .deep_learning_pm.cvae import CVAE
18 | form .deep_learning_pm.vqvae import VQVAE
19 | from .deep_learning_pm.gan import GAN
20 | from .deep_learning_pm.wgan import WGAN
21 | from .deep_learning_pm.dcgan import DCGAN
22 | from .deep_learning_pm.infogan import InfoGAN
23 | from .deep_learning_pm.rbm import RBM
24 | from .deep_learning_pm.ddpm import DDPM
25 | from .deep_learning_pm.nflow import NFlow
26 | from .deep_learning_pm.realnvp import RealNVP
27 | from .hybrid_pm.whai import WHAI
28 | from .hybrid_pm.wgaae import WGAAE
29 | from .hybrid_pm.sawetm import SawETM
30 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding=utf-8
 3 | 
 4 | from setuptools import setup, find_packages
 5 | 
 6 | 
 7 | setup(
 8 |     name='pydpm',
 9 |     version='5.0.0',
10 |     description='A python library focuses on constructing deep probabilistic models on GPU.',
11 |     py_modules=['pydpm'],
12 |     long_description=open('README.md').read(),
13 |     long_description_content_type='text/markdown',
14 |     author='Chaojie Wang, Wei Zhao, Xinyang Liu, Jiawen Wu',
15 |     author_email='xd_silly@163.com',
16 |     maintainer='BoChenGroup',
17 |     maintainer_email='13279389260@163.com',
18 |     license='Apache License Version 2.0',
19 |     packages=find_packages(),
20 |     # package_data={'pydpm': c_package_data},
21 |     # data_files=c_package_data,
22 |     include_package_data=True,  # include all files
23 |     platforms=["Windows", "Linux"],
24 |     url='https://github.com/BoChenGroup/Pydpm',
25 |     requires=['numpy', 'scipy', 'sklearn', 'PyTorch', 'ctypes', 'subprocess', ],
26 |     classifiers=[
27 |         'Environment :: GPU :: NVIDIA CUDA',
28 |         'Intended Audience :: Developers',
29 |         'Programming Language :: Python :: 3',
30 |         'Topic :: Software Development :: Libraries'
31 |     ],
32 | )
33 | 


--------------------------------------------------------------------------------
/pydpm/sampler/_compact/crt_cpu.c:
--------------------------------------------------------------------------------
 1 | #include<stdio.h>
 2 | #include<stdlib.h>
 3 | // Chaojie 2017_10_12
 4 | // No Check
 5 | 
 6 | int Binary_Search(double *probvec, double prob, int K)
 7 | {
 8 | 	int kstart, kend, kmid;
 9 | 	// K : the length of probvec
10 | 	if (prob <= probvec[0])
11 | 		return(0);
12 | 	else
13 | 	{
14 | 		for(kstart = 1, kend = K-1;;)
15 | 		{
16 | 			if (kstart >= kend)
17 | 				return(kend);
18 | 			else
19 | 			{
20 | 				kmid = (kstart + kend)/2;
21 | 				if (probvec[kmid-1]>=prob)
22 | 					kend = kmid - 1;
23 | 				else if (probvec[kmid]<prob)
24 | 					kstart = kmid + 1;
25 | 				else
26 | 					return(kmid);
27 | 			}
28 | 		}
29 | 	}
30 | 	return(kmid);
31 | }
32 | 
33 | void Crt_Sample(double* Xt_to_t1, double* p, double* Xt1, int V, int J)
34 | {
35 | 	double cum_sum, probrnd;
36 | 	int token, table, total;
37 | 	int v,j,k;
38 | 	for ( j=0; j<J ; j++)
39 | 	{
40 | 		for ( v=0 ; v<V; v++)
41 | 		{
42 | 			cum_sum = p[v*J + j];
43 | 			if (Xt_to_t1[v*J+j] < 0.5)
44 | 				table = 0;
45 | 			else
46 | 			{
47 | 				for ( token = 1, table = 1; token<Xt_to_t1[v*J+j]; token++)
48 | 				{
49 | 					if ((((double) rand()) / RAND_MAX) <= cum_sum/(cum_sum + token))
50 | 						table++;
51 | 				}
52 | 				Xt1[v*J+j] = table;
53 | 			}
54 | 		}
55 | 	}
56 | }
57 | 


--------------------------------------------------------------------------------
/pydpm/model/basic_model.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Author: Chaojie Wang <xd_silly@163.com>; Jiawen Wu <wjw19960807@163.com>; Wei Zhao <13279389260@163.com>
 3 | # License: BSD-3-Claus
 4 | 
 5 | class Params(object):
 6 |     def __init__(self):
 7 |         """
 8 |         The basic class for storing the parameters in the probabilistic model
 9 |         """
10 |         super(Params, self).__init__()
11 | 
12 | 
13 | class Basic_Model(object):
14 |     def __init__(self, *args, **kwargs):
15 |         """
16 |         The basic model for all probabilistic models in this package
17 |         Attributes:
18 |             @public:
19 |                 global_params : [Params] the global parameters of the probabilistic model
20 |                 local_params  : [Params] the local parameters of the probabilistic model
21 | 
22 |             @private:
23 |                 _model_setting : [Params] the model settings of the probabilistic model
24 |                 _hyper_params  : [Params] the hyper parameters of the probabilistic model
25 | 
26 |         """
27 |         super(Basic_Model, self).__init__()
28 | 
29 |         setattr(self, 'global_params', Params())
30 |         setattr(self, 'local_params', Params())
31 | 
32 |         setattr(self, '_model_setting', Params())
33 |         setattr(self, '_hyper_params', Params())
34 | 
35 | 


--------------------------------------------------------------------------------
/pydpm/metric/accuracy.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ===========================================
 3 | Metric to evaluate the performance of the classification
 4 | ===========================================
 5 | 
 6 | """
 7 | 
 8 | # Author: Chaojie Wang <xd_silly@163.com>; Jiawen Wu <wjw19960807@163.com>; Wei Zhao <13279389260@163.com>
 9 | # License: BSD-3-Clause
10 | 
11 | import numpy as np
12 | from sklearn import svm
13 | 
14 | 
15 | class ACC(object):
16 | 
17 |     def __init__(self, x_tr: np.ndarray, x_te: np.ndarray, y_tr: np.ndarray, y_te: np.ndarray, model='SVM'):
18 |         '''
19 |         Inputs:
20 |             x_tr : [np.ndarray] K*N_train matrix, N_train latent features of length K
21 |             x_te : [np.ndarray] K*N_test matrix, N_test latent features of length K
22 |             y_tr : [np.ndarray] N_train vector, labels of N_train latent features
23 |             y_te : [np.ndarray] N_test vector, labels of N_test latent features
24 | 
25 |         Outputs:
26 |             accuracy: [float] scalar, the accuracy score
27 | 
28 |         '''
29 |         self.x_tr = x_tr
30 |         self.x_te = x_te
31 |         self.y_tr = y_tr
32 |         self.y_te = y_te
33 | 
34 |         if model == 'SVM':
35 |             self._svm()
36 |         else:
37 |             print("Please input metric model correctly. Options: 'SVM'")
38 | 
39 |         print(f'The classification accuracy with {model} is: {self._accuracy:.4f}')
40 | 
41 | 
42 |     def _svm(self):
43 | 
44 |         self.model = svm.SVC()
45 |         self.model.fit(self.x_tr.T, self.y_tr)
46 |         print(f'Optimization Finished')
47 |         self._accuracy = self.model.score(self.x_te.T, self.y_te)
48 | 
49 | 


--------------------------------------------------------------------------------
/pydpm/utils/utils.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ===========================================
 3 | Metric
 4 | ===========================================
 5 | 
 6 | """
 7 | 
 8 | # Author: Chaojie Wang <xd_silly@163.com>; Jiawen Wu; Xinyang Liu <lxy771258012@163.com>
 9 | # License: BSD-3-Clause
10 | import os
11 | import random
12 | import numpy as np
13 | 
14 | import torch
15 | 
16 | realmin = 2.2e-10
17 | 
18 | # randomness
19 | def seed_everything(seed_value):
20 |     random.seed(seed_value)
21 |     np.random.seed(seed_value)
22 |     torch.manual_seed(seed_value)
23 |     os.environ['PYTHONHASHSEED'] = str(seed_value)
24 | 
25 |     if torch.cuda.is_available():
26 |         torch.cuda.manual_seed(seed_value)
27 |         torch.cuda.manual_seed_all(seed_value)
28 |         torch.backends.cudnn.deterministic = True
29 |         torch.backends.cudnn.benchmark = True
30 | 
31 | # math
32 | def log_max(x):
33 |     return np.log(np.maximum(x, realmin))
34 | 
35 | def cosine_simlarity(A, B):
36 |     # A: N*D, B: N*D
37 |     [N, D] = A.shape
38 |     inter_product = np.matmul(A, np.transpose(B))  # N*N
39 |     len_A = np.sqrt(np.sum(A * A, axis=1, keepdims=True))
40 |     len_B = np.sqrt(np.sum(B * B, axis=1, keepdims=True))
41 |     len_AB = np.matmul(len_A, np.transpose(len_B))
42 |     cos_AB = inter_product / (len_AB + realmin)
43 |     cos_AB[(np.arange(N), np.arange(N))] = 1
44 |     return cos_AB
45 | 
46 | def standardization(data):
47 |     mu = np.mean(data, axis=1, keepdims=True)
48 |     sigma = np.std(data, axis=1, keepdims=True)
49 |     return (data - mu) / (sigma + 2.2e-8)
50 | 
51 | def normalize_to_neg_one_to_one(img):
52 |     return img * 2 - 1
53 | 
54 | def unnormalize_to_zero_to_one(t):
55 |     return (t + 1) * 0.5


--------------------------------------------------------------------------------
/pydpm/sampler/_compact/crt_multi_aug_cpu.c:
--------------------------------------------------------------------------------
 1 | #include<stdio.h>
 2 | #include<stdlib.h>
 3 | // Chaojie 2017_10_12
 4 | // No Check
 5 | 
 6 | int Binary_Search(double *probvec, double prob, int K)
 7 | {
 8 | 	int kstart, kend, kmid;
 9 | 	// K : the length of probvec
10 | 	if (prob <= probvec[0])
11 | 		return(0);
12 | 	else
13 | 	{
14 | 		for(kstart = 1, kend = K-1;;)
15 | 		{
16 | 			if (kstart >= kend)
17 | 				return(kend);
18 | 			else
19 | 			{
20 | 				kmid = (kstart + kend)/2;
21 | 				if (probvec[kmid-1]>=prob)
22 | 					kend = kmid - 1;
23 | 				else if (probvec[kmid]<prob)
24 | 					kstart = kmid + 1;
25 | 				else
26 | 					return(kmid);
27 | 			}
28 | 		}
29 | 	}
30 | 	return(kmid);
31 | }
32 | 
33 | void Crt_Multi_Sample(double* Xt_to_t1, double* Phi_t1, double* Theta_t1, double* Xt1_VK, double* Xt1_KJ, int V, int K, int J)
34 | {
35 | 	double* probvec = (double*)malloc(K * sizeof(double));	
36 | 	double cum_sum, probrnd;
37 | 	int token, table, total;
38 | 	int v,j,k;
39 | 	for ( j=0; j<J ; j++)
40 | 	{
41 | 		for ( v=0 ; v<V; v++)
42 | 		{
43 | 			for(cum_sum = 0, k = 0; k<K ; k++)
44 | 			{
45 | 				cum_sum += Phi_t1[v*K + k] * Theta_t1[k*J + j]; // C index is different of Matlab
46 | 				probvec[k] = cum_sum;
47 | 			}
48 | 
49 | 			if (Xt_to_t1[v*J+j] < 0.5)
50 | 				table = 0;
51 | 			else
52 | 			{
53 | 				for ( token = 1, table = 1; token<Xt_to_t1[v*J+j]; token++)
54 | 				{
55 | 					if ((((double) rand()) / RAND_MAX) <= cum_sum/(cum_sum + token))
56 | 						table++;
57 | 				}
58 | 			}
59 | 
60 | 			for ( token = 0; token<table; token++)
61 | 			{
62 | 				double probrnd = ((double)(rand()) / RAND_MAX) * cum_sum;
63 | 				int Embedding_K = Binary_Search(probvec,probrnd,K);
64 | 				Xt1_VK[v*K + Embedding_K] += 1;
65 | 				Xt1_KJ[Embedding_K*J + j] += 1;
66 | 			}
67 | 		}
68 | 	}
69 | 	free(probvec);
70 | }
71 | 


--------------------------------------------------------------------------------
/pydpm/metric/roc_score.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | # -*- coding: utf-8 -*-
 3 | # Author: Xinyang Liu <lxy771258012@163.com>
 4 | # License: BSD-3-Clause
 5 | 
 6 | import numpy as np
 7 | from sklearn.metrics import roc_auc_score
 8 | from sklearn.metrics import average_precision_score
 9 | 
10 | class ROC_AP_SCORE(object):
11 | 
12 |     def __init__(self, edges_pos, edges_neg, adj_orig, emb=None):
13 | 
14 |         self.edges_pos = edges_pos
15 |         self.edges_neg = edges_neg
16 |         self.adj_orig = adj_orig
17 |         self.emb = emb
18 | 
19 |         self._get()
20 | 
21 |         # print(f'The AUC is:  {self._AUC:.4f} and AP is:   {self._AP:.4f}')
22 | 
23 |     def _get(self):
24 |         # if emb is None:
25 |         #     feed_dict.update({placeholders['dropout']: 0})
26 |         #     emb = sess.run(model.z_decoder_a, feed_dict=feed_dict)
27 | 
28 |         def sigmoid(x):
29 |             return 1 / (1 + np.exp(-x))
30 | 
31 |         def beta(x):
32 |             return 1 - np.exp(-x)
33 | 
34 |         # Predict on test set of edges
35 |         adj_rec = np.dot(self.emb, self.emb.T)
36 |         preds = []
37 |         pos = []
38 |         # print(adj_rec,'**************')
39 |         for e in self.edges_pos:
40 |             # preds.append(sigmoid(adj_rec[e[0], e[1]]))
41 |             # preds.append(adj_rec[e[0], e[1]])
42 |             preds.append(beta(adj_rec[e[0], e[1]]))
43 |             pos.append(self.adj_orig[e[0], e[1]])
44 | 
45 |         preds_neg = []
46 |         neg = []
47 |         for e in self.edges_neg:
48 |             # preds_neg.append(sigmoid(adj_rec[e[0], e[1]]))
49 |             # preds_neg.append(adj_rec[e[0], e[1]])
50 |             preds_neg.append(beta(adj_rec[e[0], e[1]]))
51 |             neg.append(self.adj_orig[e[0], e[1]])
52 | 
53 |         preds_all = np.hstack([preds, preds_neg])
54 |         labels_all = np.hstack([np.ones(len(preds)), np.zeros(len(preds_neg))])
55 |         roc_score = roc_auc_score(labels_all, preds_all)
56 |         ap_score = average_precision_score(labels_all, preds_all)
57 | 
58 |         self._AUC = roc_score
59 |         self._AP = ap_score
60 |         # return roc_score, ap_score
61 | 


--------------------------------------------------------------------------------
/pydpm/metric/cluster_acc.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | # -*- coding: utf-8 -*-
 3 | # Author: Xinyang Liu <lxy771258012@163.com>
 4 | # License: BSD-3-Clause
 5 | 
 6 | import numpy as np
 7 | from scipy.optimize import linear_sum_assignment
 8 | 
 9 | # from sklearn.utils.linear_assignment_ import linear_assignment
10 | # from sklearn.metrics.cluster import normalized_mutual_info_score as NMI, \
11 | #         adjusted_mutual_info_score as AMI, adjusted_rand_score as AR, silhouette_score as SI, calinski_harabasz_score as CH
12 | 
13 | class Cluster_ACC(object):
14 | 
15 | 	def __init__(self, y, ypred):
16 | 		'''
17 |         Inputs:
18 |             y: the ground_true, shape:(n_sample,)
19 |                ypred: pred_label, shape:(n_sample,)
20 | 
21 |         Outputs:
22 |             accuracy of cluster, in [0, 1]
23 | 		'''
24 | 		self.y = y
25 | 		self.ypred = ypred
26 | 
27 | 		self._get()
28 | 
29 | 		print(f'The cluster accuracy is: {self._cluster_acc:.4f}')
30 | 
31 | 	def _get(self):
32 | 		s = np.unique(self.ypred)
33 | 		t = np.unique(self.y)
34 | 
35 | 		N = len(np.unique(self.ypred))
36 | 		C = np.zeros((N, N), dtype=np.int32)
37 | 		for i in range(N):
38 | 			for j in range(N):
39 | 				idx = np.logical_and(self.ypred == s[i], self.y == t[j])
40 | 				C[i][j] = np.count_nonzero(idx)
41 | 
42 | 		# convert the C matrix to the 'true' cost
43 | 		Cmax = np.amax(C)
44 | 		C = Cmax - C
45 | 		indices = linear_sum_assignment(C)
46 | 		row = indices[:][:, 0]
47 | 		col = indices[:][:, 1]
48 | 		# calculating the accuracy according to the optimal assignment
49 | 		count = 0
50 | 		for i in range(N):
51 | 			idx = np.logical_and(self.ypred == s[row[i]], self.y == t[col[i]])
52 | 			count += np.count_nonzero(idx)
53 | 
54 | 		self._cluster_acc = 1.0 * count / len(self.y)
55 | 
56 | 		# y_true = y_true.astype(np.int64)
57 | 		# assert y_pred.size == y_true.size
58 | 		# D = max(y_pred.max(), y_true.max()) + 1
59 | 		# w = np.zeros((D, D), dtype=np.int64)
60 | 		# for i in range(y_pred.size):
61 | 		#     w[y_pred[i], y_true[i]] += 1
62 | 		# from sklearn.utils.linear_assignment_ import linear_assignment
63 | 		# ind = linear_assignment(w.max() - w) # Optimal label mapping based on the Hungarian algorithm
64 | 		# return sum([w[i, j] for i, j in ind]) * 1.0 / y_pred.size
65 | 
66 | 
67 | 
68 | 


--------------------------------------------------------------------------------
/pydpm/metric/normalized_mutual_information.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | # -*- coding: utf-8 -*-
 3 | # Author: Xinyang Liu <lxy771258012@163.com>
 4 | # License: BSD-3-Clause
 5 | 
 6 | import numpy as np
 7 | import copy
 8 | 
 9 | class NMI(object):
10 |     def __init__(self, A, B):
11 |         '''
12 |         Inputs:
13 |             A: [int], ground_truth, shape:(n_sample,)
14 |             B: [int], pred_label, shape:(n_sample,)
15 | 
16 |         Outputs:
17 |             NMI: [float], Normalized Mutual information of A and B
18 | 
19 |         '''
20 |         self.A = copy.deepcopy(A)
21 |         self.B = copy.deepcopy(B)
22 | 
23 |         self._get()
24 | 
25 |         print(f'The NMI is: {self._NMI:.4f}')
26 | 
27 |     def _get(self):
28 |         n_gnd = self.A.shape[0]
29 |         n_label = self.B.shape[0]
30 |         # assert n_gnd == n_label
31 | 
32 |         LabelA = np.unique(self.A)
33 |         nClassA = len(LabelA)
34 |         LabelB = np.unique(self.B)
35 |         nClassB = len(LabelB)
36 | 
37 |         if nClassB < nClassA:
38 |             self.A = np.concatenate((self.A, LabelA))
39 |             self.B = np.concatenate((self.B, LabelA))
40 |         else:
41 |             self.A = np.concatenate((self.A, LabelB))
42 |             self.B = np.concatenate((self.B, LabelB))
43 | 
44 |         G = np.zeros([nClassA, nClassA])
45 |         for i in range(nClassA):
46 |             for j in range(nClassA):
47 |                 G[i, j] = np.sum((self.A == LabelA[i]) * (self.B == LabelA[j]))
48 | 
49 |         sum_G = np.sum(G)
50 |         PA = np.sum(G, axis=1)
51 |         PA = PA/sum_G
52 |         PB = np.sum(G, axis=0)
53 |         PB = PB/sum_G
54 |         PAB = G/sum_G
55 | 
56 |         if np.sum((PA == 0)) > 0 or np.sum((PB == 0)):
57 |             print('error ! Smooth fail !')
58 |             self._NMI = np.nan
59 |         else:
60 |             HA = np.sum(-PA * np.log2(PA))
61 |             HB = np.sum(-PB * np.log2(PB))
62 |             PPP = PAB / np.tile(PB, (nClassA, 1)) / np.tile(PA.reshape(-1, 1), (1, nClassA))
63 |             PPP[np.where(abs(PPP) < 1e-12)] = 1  # avoid 'log 0'
64 |             MI = np.sum(PAB * np.log2(PPP))
65 |             NMI = MI / np.max((HA, HB))
66 |             # optional
67 |             # NMI = 2.0 * MI / (HA + HB)
68 |             # NMI = MI / np.sqrt(HA * HB)
69 |             self._NMI = NMI
70 | 
71 | 


--------------------------------------------------------------------------------
/pydpm/sampler/distribution_sampler_cpu.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | import ctypes
 4 | 
 5 | class distribution_sampler_cpu(object):
 6 | 
 7 |     def __init__(self):
 8 |         """
 9 |         The basic class for sampling distribution on cpu
10 |         """
11 |         super(distribution_sampler_cpu, self).__init__()
12 | 
13 |         # sampler for basic distributions
14 |         setattr(self, 'standard_normal', np.random.standard_normal)
15 |         setattr(self, 'normal', np.random.normal)
16 |         setattr(self, 'standard_gamma', np.random.standard_gamma)
17 |         setattr(self, 'gamma', np.random.gamma)
18 |         setattr(self, 'standard_cauchy', np.random.standard_cauchy)
19 |         # setattr(self, 'cauchy', np.random.cauchy) # numpy doesnot has this distribution
20 |         setattr(self, 'chisquare', np.random.chisquare)
21 |         setattr(self, 'beta', np.random.beta)
22 |         # setattr(self, 'crt', np.random.crt) # numpy doesnot has this distribution
23 |         setattr(self, 'dirichlet', np.random.dirichlet)
24 |         setattr(self, 'poisson', np.random.poisson)
25 |         setattr(self, 'weibull', np.random.weibull)
26 |         setattr(self, 'negative_binomial', np.random.negative_binomial)
27 |         setattr(self, 'lognormal', np.random.lognormal)
28 |         setattr(self, 'binomial', np.random.binomial)
29 |         setattr(self, 'multinomial', np.random.multinomial)
30 |         setattr(self, 'laplace', np.random.laplace)
31 |         setattr(self, 'logistic', np.random.logistic)
32 |         setattr(self, 'exponential', np.random.exponential)
33 |         setattr(self, 'standard_exponential', np.random.standard_exponential)
34 |         setattr(self, 'noncentral_chisquare', np.random.noncentral_chisquare)
35 |         setattr(self, 'zipf', np.random.zipf)
36 |         setattr(self, 'triangular', np.random.triangular)
37 |         setattr(self, 'noncentral_f', np.random.noncentral_f)
38 |         setattr(self, '_f', np.random.f)
39 |         # setattr(self, 't', np.random.t) # numpy doesnot has this distribution
40 |         setattr(self, 'geometric', np.random.geometric)
41 |         setattr(self, 'hypergeometric', np.random.hypergeometric)
42 |         setattr(self, 'gumbel', np.random.gumbel)
43 |         setattr(self, 'pareto', np.random.pareto)
44 |         setattr(self, 'power', np.random.power)
45 |         setattr(self, 'rayleigh', np.random.rayleigh)
46 | 


--------------------------------------------------------------------------------
/pydpm/sampler/_compact/multi_aug_cpu.c:
--------------------------------------------------------------------------------
 1 | #include<stdio.h>
 2 | #include<stdlib.h>
 3 | // Chaojie 2017_10_12
 4 | 
 5 | int Binary_Search(double *probvec, double prob, int K)
 6 | {
 7 | 	int kstart, kend, kmid;
 8 | 	// K : the length of probvec
 9 | 	if (prob <= probvec[0])
10 | 		return(0);
11 | 	else
12 | 	{
13 | 		for(kstart = 1, kend = K-1;;)
14 | 		{
15 | 			if (kstart >= kend)
16 | 				return(kend);
17 | 			else
18 | 			{
19 | 				kmid = (kstart + kend)/2;
20 | 				if (probvec[kmid-1]>=prob)
21 | 					kend = kmid - 1;
22 | 				else if (probvec[kmid]<prob)
23 | 					kstart = kmid + 1;
24 | 				else
25 | 					return(kmid);
26 | 			}
27 | 		}
28 | 	}
29 | 	return(kmid);
30 | }
31 | 	
32 | void Multi_Sample(double* X, double* Phi, double* Theta, double* XVK, double* XKJ, int V, int K, int J)
33 | {
34 | 	double* probvec = (double*)malloc(K * sizeof(double));
35 | 	
36 | //	if (probvec == NULL)
37 | //		printf("Malloc Error, No space!\n");
38 | 
39 | 	for (int v=0;v<V;v++)
40 | 	{
41 | 		for (int j=0;j<J;j++)
42 | 		{
43 | 			if(X[v*J+j]<0.5)
44 | 				continue;
45 | 			else
46 | 			{
47 | 				double cumsum = 0.0;
48 | 				for(int k=0;k<K;k++)
49 | 				{
50 | 					cumsum += Phi[v*K + k] * Theta[k*J + j];
51 | 					probvec[k] = cumsum;
52 | 				}
53 | 
54 | 				for (int token = 0; token<X[v*J + j]; token++)
55 | 				{
56 | 					double probrnd = ((double)(rand()) / RAND_MAX) * cumsum;
57 | 					int Embedding_K = Binary_Search(probvec,probrnd,K);
58 | 					XVK[v*K + Embedding_K] += 1;
59 | 					XKJ[Embedding_K*J + j] += 1;
60 | 				}
61 | 			}		
62 | 		}
63 | 	}
64 | 	free(probvec);
65 | }
66 | 
67 | void Multi_Input(double* X, double* Pro1, double* Pro2, double* X_t_1, double* X_t_2, int V, int J)
68 | {
69 | 	double* probvec = (double*)malloc(2 * sizeof(double));
70 | 	for (int v=0;v<V;v++)
71 | 	{
72 | 		for (int j=0;j<J;j++)
73 | 		{
74 | 			if(X[v*J+j]<0.5)
75 | 				continue;
76 | 			else
77 | 			{
78 | 				double cumsum = 0.0;
79 | 				cumsum += Pro1[v*J+j];
80 | 				probvec[0] = cumsum;
81 | 				cumsum += Pro2[v*J+j];
82 | 				probvec[1] = cumsum;
83 | 
84 | 				for (int token = 0; token<X[v*J + j]; token++)
85 | 				{
86 | 					double probrnd = ((double)(rand()) / RAND_MAX) * cumsum;
87 | 					int Embedding_K = Binary_Search(probvec,probrnd,2);
88 | 					if(Embedding_K == 0)
89 | 						X_t_1[v*J + j]+=1;
90 | 					else
91 | 						X_t_2[v*J + j]+=1;
92 | 				}
93 | 			}		
94 | 		}
95 | 	}
96 | 	free(probvec);
97 | }
98 | 
99 | 


--------------------------------------------------------------------------------
/pydpm/dataloader/image_data.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | # -*- coding: utf-8 -*-
 3 | # Author: Xinyang Liu <lxy771258012@163.com>
 4 | # License: BSD-3-Clause
 5 | 
 6 | import os
 7 | import numpy as np
 8 | from PIL import Image
 9 | import matplotlib.pyplot as plt
10 | 
11 | import torch
12 | from torch.utils.data import Dataset, DataLoader
13 | 
14 | import torchvision
15 | import torchvision.datasets as datasets
16 | 
17 | def tensor_transforms(data, transforms=lambda x:x):
18 |     data = data.numpy()
19 |     data = transforms(data)
20 |     return data
21 | 
22 | # ======================================== CustomDataset ======================================================== #
23 | 
24 | class CustomDataset(Dataset):
25 |     def __init__(self, file_path, mode='train', transform=None, target_transform=None):
26 |         super(CustomDataset, self).__init__()
27 |         self.file_path = os.path.join(file_path, mode)
28 |         self.transform = transform
29 |         self.target_transform = target_transform
30 |         self.classes = []
31 |         self.classes_num = 0
32 |         self.class_to_idx = {}
33 |         self.image_names = []
34 |         self.image_classes = []
35 |         self.classes_file = os.path.join(file_path, 'label.txt')
36 | 
37 |         with open(self.classes_file, 'r') as classes_list:
38 |             for line in classes_list:
39 |                 self.classes_num += 1
40 |                 self.classes.append(line)
41 |                 self.class_to_idx[line] = self.classes_num - 1
42 | 
43 |         with open(self.file_path, 'r') as image_class_file:
44 |             for line in image_class_file:
45 |                 image_class_pair = line.split('\t')
46 |                 self.image_names.append(image_class_pair[0])
47 |                 self.image_classes.append(image_class_pair[1])
48 | 
49 |     def __getitem__(self, idx):
50 |         image_path, target = self.image_names[idx], self.class_to_idx[self.image_classes[idx]]
51 | 
52 |         # Return a PIL Image
53 |         image = Image.open(image_path)
54 | 
55 |         if self.transform:
56 |             image = self.transform(image)
57 |         if self.target_transform is not None:
58 |             target = self.target_transform(target)
59 | 
60 |         return image, target
61 | 
62 |     def __len__(self):
63 |         return len(self.image_names)
64 | 
65 | 
66 | def image_dataloader(root='../dataset/mnist', transform=None, target_transform=None,
67 |                batch_size=500, shuffle=True, drop_last=True, num_workers=4):
68 |     dataset = CustomDataset(root, transform=transform, target_transform=target_transform)
69 | 
70 |     return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers, drop_last=drop_last), dataset.classes
71 | 
72 | 


--------------------------------------------------------------------------------
/pydpm/sampler/basic_sampler.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Author: Chaojie Wang <xd_silly@163.com>; Jiawen Wu <wjw19960807@163.com>; Wei Zhao <13279389260@163.com>
 3 | # License: BSD-3-Claus
 4 | 
 5 | import platform
 6 | 
 7 | 
 8 | class Basic_Sampler(object):
 9 |     def __init__(self, device='cpu', seed=0, *args, **kwargs):
10 |         """
11 |         The basic sampler model for training all probabilistic models in this package
12 |         Attributes:
13 |             @public:
14 | 
15 | 
16 |             @private:
17 |                 _model_setting : [Params] the model settings of the probabilistic model
18 |                 _hyper_params  : [Params] the hyper parameters of the probabilistic model
19 | 
20 |         """
21 |         super(Basic_Sampler, self).__init__()
22 | 
23 |         assert device in ['cpu', 'gpu'], 'Device Error, device should be "cpu" or "gpu" '
24 |         self.device = device
25 |         self.seed = seed
26 | 
27 |         system_type = platform.system()
28 |         assert system_type in ['Windows', 'Linux'], 'System Error, system should be "Windows" or "Linux" '
29 |         self.system_type = system_type
30 | 
31 |         if self.device == 'cpu':
32 |             self._cpu_sampler_initial()
33 | 
34 |         elif self.device == 'gpu':
35 |             self._gpu_sampler_initial()
36 | 
37 |     def _cpu_sampler_initial(self):
38 | 
39 |         from .distribution_sampler_cpu import distribution_sampler_cpu
40 |         sampler = distribution_sampler_cpu()
41 |         for distribution_name in dir(sampler):
42 |             if distribution_name[0] != '_':
43 |                 setattr(self, distribution_name, getattr(sampler, distribution_name))
44 |             else:
45 |                 continue
46 | 
47 |         from .model_sampler_cpu import model_sampler_cpu
48 |         sampler = model_sampler_cpu(self.system_type)
49 |         for distribution_name in dir(sampler):
50 |             if distribution_name[0] != '_':
51 |                 setattr(self, distribution_name, getattr(sampler, distribution_name))
52 |             else:
53 |                 continue
54 | 
55 | 
56 | 
57 |     def _gpu_sampler_initial(self):
58 | 
59 |         from .distribution_sampler_gpu import distribution_sampler_gpu
60 |         sampler = distribution_sampler_gpu(self.system_type)
61 |         for distribution_name in dir(sampler):
62 |             if distribution_name[0] != '_':
63 |                 setattr(self, distribution_name, getattr(sampler, distribution_name))
64 |             else:
65 |                 continue
66 | 
67 |         from .model_sampler_gpu import model_sampler_gpu
68 |         sampler = model_sampler_gpu(self.system_type)
69 |         for distribution_name in dir(sampler):
70 |             if distribution_name[0] != '_':
71 |                 setattr(self, distribution_name, getattr(sampler, distribution_name))
72 |             else:
73 |                 continue
74 | 
75 | 
76 | 
77 | 


--------------------------------------------------------------------------------
/pydpm/example/Bayesian_PM/Gaussian_Mixture_Model/GMM_Demo.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ===========================================
 3 | Gaussian Mixture Model
 4 | ===========================================
 5 | 
 6 | """
 7 | 
 8 | # Author: Xinyang Liu <lxy771258012@163.com>;
 9 | # License: BSD-3-Claus
10 | 
11 | import numpy as np
12 | import argparse
13 | from torchvision import datasets, transforms
14 | from pydpm.model import GMM
15 | from pydpm.metric import Cluster_ACC, NMI
16 | from pydpm.dataloader.image_data import tensor_transforms
17 | 
18 | # =========================================== ArgumentParser ===================================================================== #
19 | parser = argparse.ArgumentParser()
20 | 
21 | # device
22 | parser.add_argument("--device", type=str, default='gpu')
23 | 
24 | # dataset
25 | parser.add_argument("--data_path", type=str, default='../../../dataset/mnist/', help="the path of loading data")
26 | 
27 | # model
28 | parser.add_argument("--save_path", type=str, default='../../save_models', help="the path of saving model")
29 | parser.add_argument("--load_path", type=str, default='../../save_models/GMM.npy', help="the path of loading model")
30 | 
31 | parser.add_argument("--n_components", type=int, default=10, help="number of components according dataset")
32 | 
33 | # optim
34 | parser.add_argument("--num_epochs", type=int, default=100, help="number of epochs of training")
35 | 
36 | args = parser.parse_args()
37 | # =========================================== Dataset ===================================================================== #
38 | # define transform for dataset and load orginal dataset
39 | transform = transforms.Compose([transforms.ToTensor()])
40 | train_dataset = datasets.MNIST(root=args.data_path, train=True, download=True)
41 | test_dataset = datasets.MNIST(root=args.data_path, train=False, download=False)
42 | 
43 | # transform dataset and reshape the dataset into [batch_size, feature_num]
44 | train_data = tensor_transforms(train_dataset.data, transform)
45 | train_data = train_data.permute([1, 2, 0]).reshape([len(train_dataset), -1])  # len(train_dataset, 28*28)
46 | test_data = tensor_transforms(test_dataset.data, transform)
47 | test_data = test_data.permute([1, 2, 0]).reshape([len(test_dataset), -1])
48 | train_label = train_dataset.train_labels
49 | test_label = test_dataset.test_labels
50 | 
51 | # transpose the dataset to fit the model and convert a tensor to numpy array
52 | train_data = np.array(np.ceil(train_data[:999, :].numpy()), order='C')
53 | test_data = np.array(np.ceil(test_data[:999, :].numpy()), order='C')
54 | train_label = train_label.numpy()[:999]
55 | test_label = test_label.numpy()[:999]
56 | 
57 | # =========================================== Model ===================================================================== #
58 | # create the model and deploy it on gpu or cpu
59 | model = GMM(K=args.n_components, device=args.device)
60 | model.initial(train_data)  # use the shape of train_data to initialize the params of model
61 | 
62 | # train and evaluation
63 | cluster, train_local_params = model.train(data=train_data, num_epochs=args.num_epochs)
64 | 
65 | # Evaluation on dataset using accuracy of cluster and NMI
66 | res_acc = Cluster_ACC(train_label, cluster)
67 | res_nmi = NMI(train_label, cluster)
68 | 
69 | model.save()
70 | 


--------------------------------------------------------------------------------
/pydpm/example/Bayesian_PM/Factor_Analysis/FA_demo.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ===========================================
 3 | Factor Analysis
 4 | ===========================================
 5 | 
 6 | """
 7 | 
 8 | # Author: Xinyang Liu <lxy771258012@163.com>;
 9 | # License: BSD-3-Claus
10 | 
11 | import numpy as np
12 | import scipy.io as sio
13 | import argparse
14 | from torchvision import datasets, transforms
15 | import matplotlib.pyplot as plt
16 | from pydpm.model import FA
17 | from pydpm.utils.utils import *
18 | from pydpm.dataloader.image_data import tensor_transforms
19 | 
20 | # # load data
21 | # data = sio.loadmat('../../../dataset/FA_data.mat')
22 | # train_data = np.array(data['x1'])
23 | 
24 | # =========================================== ArgumentParser ===================================================================== #
25 | parser = argparse.ArgumentParser()
26 | 
27 | # device
28 | parser.add_argument("--device", type=str, default='gpu')
29 | 
30 | # dataset
31 | parser.add_argument("--data_path", type=str, default='../../../dataset/mnist/', help="the path of loading data")
32 | 
33 | # model
34 | parser.add_argument("--save_path", type=str, default='../../save_models', help="the path of saving model")
35 | parser.add_argument("--load_path", type=str, default='../../save_models/FA.npy', help="the path of loading model")
36 | 
37 | parser.add_argument("--z_dim", type=int, default=128, help="number of components according dataset")
38 | 
39 | # optim
40 | parser.add_argument("--num_epochs", type=int, default=1000, help="number of epochs of training")
41 | 
42 | args = parser.parse_args()
43 | # =========================================== Dataset ===================================================================== #
44 | # define transform for dataset and load orginal dataset
45 | transform = transforms.Compose([transforms.ToTensor()])
46 | train_dataset = datasets.MNIST(root=args.data_path, train=True, download=True)
47 | test_dataset = datasets.MNIST(root=args.data_path, train=False, download=False)
48 | 
49 | # transform dataset and reshape the dataset into [batch_size, feature_num]
50 | train_data = tensor_transforms(train_dataset.data, transform)
51 | train_data = train_data.permute([1, 2, 0]).reshape([len(train_dataset), -1])  # len(train_dataset, 28*28)
52 | test_data = tensor_transforms(test_dataset.data, transform)
53 | test_data = test_data.permute([1, 2, 0]).reshape([len(test_dataset), -1])
54 | train_label = train_dataset.train_labels
55 | test_label = test_dataset.test_labels
56 | 
57 | # transpose the dataset to fit the model and convert a tensor to numpy array
58 | # !!! Transposition, data: [D, N]
59 | train_data = np.array(np.ceil(train_data[:999, :].T.numpy()), order='C')
60 | # test_data = np.array(np.ceil(test_data[:999, :].T.numpy()), order='C')
61 | train_data = standardization(train_data)
62 | # test_data = standardization(test_data)
63 | train_label = train_label.numpy()[:999]
64 | # test_label = test_label.numpy()[:999]
65 | 
66 | # create the model and deploy it on gpu or cpu
67 | model =FA(args.z_dim, 'gpu')
68 | model.initial(train_data)  # use the shape of train_data to initialize the params of model
69 | 
70 | # train and evaluation
71 | train_local_params = model.train(train_data, args.num_epochs)
72 | 
73 | x_hat = np.matmul(train_local_params.w, train_local_params.z)
74 | 
75 | # visualization for one sample
76 | plt.plot(train_data[:, 880], 'ro', marker='*', label="train data")
77 | plt.plot(x_hat[:, 880], 'bo', marker='v', label="reconstruction")
78 | plt.legend(loc="best")
79 | plt.show()
80 | 
81 | # save the model after training
82 | model.save()
83 | 


--------------------------------------------------------------------------------
/pydpm/sampler/pre_process.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import time
 3 | import ctypes
 4 | 
 5 | 
 6 | def para_preprocess(times=1, in_type=np.float32, out_type=np.float32, *args):
 7 |     """
 8 |     preprocess the input parameters in sampling the distribution with gpu
 9 |     Inputs:
10 |         times   : [int] repeat times
11 |         in_type : [np.dtype] or list of np.dtype the dtype of the input parameters
12 |         out_type: [np.dtype] the dtype of the output sampling results
13 |         args[0] : [np.ndarray] the first variable in the target distribution
14 |         args[1] : [np.ndarray] the second variable in the target distribution
15 |     Outputs:
16 |         para_a      : [pycuda.gpuarray] the input matrix for the first parameter
17 |         para_b      : [pycuda.gpuarray] the input matrix for the second parameter
18 |         output      : [pycuda.gpuarray] the matrix on gpu to store the sampling result
19 |         para_scale  : [list] a list including the number of element and repeat times in the resulting matrix
20 |         para_seed   : [pycuda.gpuarray] seed matrix on gpu
21 |         partition   : [list] a list including
22 |         scalar_flag : [bool] if the resulting matrix is a scalar
23 |     """
24 |     assert len(args) <= 2, 'Value Error: the number of the input parameter in the sampling distribution should not be larger than 2'
25 | 
26 |     if len(args) == 1:
27 |         para_a = np.array(args[0], dtype=in_type, order='C')
28 |         # assert len(para_a.shape) <= 2, 'Shape Error: the dimension of the input parameter a in the sampling distribution shoud not be larger than 2'
29 | 
30 |         # obtain the output_scale and judge if the para_a is a scalar
31 |         if times > 1:
32 |             output_scale = para_a.shape + (times,)
33 |             scalar_flag = False
34 |         else:
35 |             output_scale = (1,) if para_a.shape == () else para_a.shape
36 |             scalar_flag = True if para_a.shape == () else False
37 | 
38 |     elif len(args) == 2:
39 |         if type(in_type) == type:
40 |             in_type = [in_type, in_type]
41 |         assert (type(in_type) == list and len(in_type) == 2)
42 | 
43 |         para_a = np.array(args[0], dtype=in_type[0], order='C')
44 |         para_b = np.array(args[1], dtype=in_type[1], order='C')
45 |         # assert len(para_a.shape) <= 2, 'Shape Error: the dimension of the input parameter a in the sampling distirbution shoud not be larger than 2'
46 |         # assert len(para_b.shape) <= 2, 'Shape Error: the dimension of the input parameter b in the sampling distirbution shoud not be larger than 2'
47 | 
48 |         # make sure the sizes of para_a and para_b are equal
49 |         if para_a.size == 1 and para_b.size != 1:
50 |             para_a = np.array(np.full(para_b.shape, para_a), dtype=in_type[0], order='C')
51 |         if para_b.size == 1 and para_a.size != 1:
52 |             para_b = np.array(np.full(para_a.shape, para_b), dtype=in_type[1], order='C')
53 | 
54 |         # obtain the output_scale and judge if the para_a is a scalar
55 |         if times > 1:
56 |             output_scale = para_a.shape + (times,)
57 |             scalar_flag = False
58 |         else:
59 |             output_scale = (1,) if para_a.shape == () else para_a.shape
60 |             scalar_flag = True if para_a.shape == () else False
61 | 
62 |     matrix_scale = para_a.size
63 |     nElems = para_a.size * times  # output_scale multi.
64 | 
65 |     # output
66 |     output = np.empty(output_scale, dtype=out_type, order='C')
67 | 
68 |     if len(args) == 1:
69 |         return matrix_scale, nElems, para_a, output, output_scale, scalar_flag
70 |     elif len(args) == 2:
71 |         return matrix_scale, nElems, para_a, para_b, output, output_scale, scalar_flag


--------------------------------------------------------------------------------
/pydpm/example/Bayesian_PM/Poisson_Gamma_Dynamic_System/PGDS_Demo.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ===========================================
 3 | Poisson Gamma Dynamical Systems Demo
 4 | Aaron Schein, Hanna Wallach and Mingyuan Zhou
 5 | Published in Neural Information Processing Systems 2016
 6 | 
 7 | ===========================================
 8 | 
 9 | """
10 | 
11 | # Author: Chaojie Wang <xd_silly@163.com>; Jiawen Wu <wjw19960807@163.com>; Wei Zhao <13279389260@163.com>
12 | # License: BSD-3-Clause
13 | 
14 | import numpy as np
15 | import argparse
16 | import scipy.io as sio
17 | 
18 | from torchvision import datasets, transforms
19 | 
20 | from pydpm.model import PGDS
21 | from pydpm.metric import ACC
22 | from pydpm.dataloader.image_data import tensor_transforms
23 | 
24 | # =========================================== ArgumentParser ===================================================================== #
25 | parser = argparse.ArgumentParser()
26 | 
27 | # device
28 | parser.add_argument("--device", type=str, default='gpu')
29 | 
30 | # dataset
31 | parser.add_argument("--data_path", type=str, default='../../../dataset/mnist/', help="the path of loading data")
32 | 
33 | # model
34 | parser.add_argument("--save_path", type=str, default='../../save_models', help="the path of saving model")
35 | parser.add_argument("--load_path", type=str, default='../../save_models/PGDS.npy', help="the path of loading model")
36 | 
37 | parser.add_argument("--z_dim", type=int, default=100, help="dimensionality of the z latent space")
38 | 
39 | # optim
40 | parser.add_argument("--num_epochs", type=int, default=200, help="number of epochs of training")
41 | 
42 | args = parser.parse_args()
43 | 
44 | # =========================================== Dataset ===================================================================== #
45 | # define transform for dataset and load orginal dataset
46 | transform = transforms.Compose([transforms.ToTensor()])
47 | train_dataset = datasets.MNIST(root=args.data_path, train=True, download=True)
48 | test_dataset = datasets.MNIST(root=args.data_path, train=False, download=False)
49 | 
50 | # transform dataset and reshape the dataset into [batch_size, feature_num]
51 | train_data = tensor_transforms(train_dataset.data, transform)
52 | train_data = train_data.permute([1, 2, 0]).reshape(len(train_dataset), -1)  # len(train_dataset, 28*28)
53 | test_data = tensor_transforms(test_dataset.data, transform)
54 | test_data = test_data.permute([1, 2, 0]).reshape(len(test_dataset), -1)
55 | train_label = train_dataset.train_labels
56 | test_label = test_dataset.test_labels
57 | 
58 | # transpose the dataset to fit the model and convert a tensor to numpy array
59 | train_data = np.array(np.ceil(train_data[:999, :].T.numpy() * 5), order='C')
60 | test_data = np.array(np.ceil(test_data[:999, :].T.numpy() * 5), order='C')
61 | train_label = train_label.numpy()[:999]
62 | test_label = test_label.numpy()[:999]
63 | 
64 | # =========================================== Dataset ===================================================================== #
65 | # define transform for dataset and load orginal dataset
66 | model = PGDS(K=args.z_dim, device=args.device)
67 | model.initial(train_data)
68 | 
69 | # train and evaluation
70 | train_local_params = model.train(data=train_data, num_epochs=args.num_epochs)
71 | train_local_params = model.test(data=train_data, num_epochs=args.num_epochs)
72 | test_local_params = model.test(data=test_data, num_epochs=args.num_epochs)
73 | 
74 | # save the model after training
75 | model.save(args.save_path)
76 | # load the model
77 | model.load(args.load_path)
78 | 
79 | # evaluate the model with classification accuracy
80 | # the demo accuracy can achieve 0.8739
81 | results = ACC(train_local_params.Theta, test_local_params.Theta, train_label, test_label, 'SVM')
82 | 
83 | 


--------------------------------------------------------------------------------
/pydpm/example/Bayesian_PM/Poisson_Gamma_Belief_Network/PGBN_Demo.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ===========================================
 3 | Poisson Gamma Belief Network
 4 | Mingyuan Zhou, Yulai Cong and Bo Chen
 5 | Published in Advances in Neural Information Processing System
 6 | 
 7 | ===========================================
 8 | 
 9 | """
10 | 
11 | # Author: Chaojie Wang <xd_silly@163.com>; Jiawen Wu <wjw19960807@163.com>; Wei Zhao <13279389260@163.com>
12 | # License: BSD-3-Clause
13 | 
14 | import numpy as np
15 | import argparse
16 | import scipy.io as sio
17 | 
18 | from torchvision import datasets, transforms
19 | 
20 | from pydpm.model import PGBN
21 | from pydpm.metric import ACC
22 | from pydpm.dataloader.image_data import tensor_transforms
23 | 
24 | # =========================================== ArgumentParser ===================================================================== #
25 | parser = argparse.ArgumentParser()
26 | 
27 | # device
28 | parser.add_argument("--device", type=str, default='gpu')
29 | 
30 | # dataset
31 | parser.add_argument("--data_path", type=str, default='../../../dataset/mnist/', help="the path of loading data")
32 | 
33 | # model
34 | parser.add_argument("--save_path", type=str, default='../../save_models', help="the path of saving model")
35 | parser.add_argument("--load_path", type=str, default='../../save_models/PGBN.npy', help="the path of loading model")
36 | 
37 | parser.add_argument("--z_dims", type=list, default=[128, 64, 32], help="number of topics at diffrent layers in PGBN")
38 | 
39 | # optim
40 | parser.add_argument("--num_epochs", type=int, default=100, help="number of epochs of training")
41 | 
42 | args = parser.parse_args()
43 | 
44 | # =========================================== Dataset ===================================================================== #
45 | # define transform for dataset and load orginal dataset
46 | transform = transforms.Compose([transforms.ToTensor()])
47 | train_dataset = datasets.MNIST(root=args.data_path, train=True, download=True)
48 | test_dataset = datasets.MNIST(root=args.data_path, train=False, download=False)
49 | 
50 | # transform dataset and reshape the dataset into [batch_size, feature_num]
51 | train_data = tensor_transforms(train_dataset.data, transform)
52 | train_data = train_data.permute([1, 2, 0]).reshape(len(train_dataset), -1)  # len(train_dataset, 28*28)
53 | test_data = tensor_transforms(test_dataset.data, transform)
54 | test_data = test_data.permute([1, 2, 0]).reshape(len(test_dataset), -1)
55 | train_label = train_dataset.train_labels
56 | test_label = test_dataset.test_labels
57 | 
58 | # transpose the dataset to fit the model and convert a tensor to numpy array
59 | train_data = np.array(np.ceil(train_data[:999, :].T.numpy() * 5), order='C')
60 | test_data = np.array(np.ceil(test_data[:999, :].T.numpy() * 5), order='C')
61 | train_label = train_label.numpy()[:999]
62 | test_label = test_label.numpy()[:999]
63 | 
64 | # =========================================== Model ===================================================================== #
65 | # create the model and deploy it on gpu or cpu
66 | model = PGBN(K=args.z_dims, device=args.device)
67 | model.initial(train_data)
68 | 
69 | # train and evaluation
70 | train_local_params = model.train(data=train_data, num_epochs=args.num_epochs)
71 | train_local_params = model.test(data=train_data, num_epochs=args.num_epochs)
72 | test_local_params = model.test(data=test_data, num_epochs=args.num_epochs)
73 | 
74 | # save the model after training
75 | model.save(args.save_path)
76 | # load the model
77 | model.load(args.load_path)
78 | 
79 | # evaluate the model with classification accuracy
80 | # the demo accuracy can achieve 0.8088
81 | results = ACC(train_local_params.Theta[0], test_local_params.Theta[0], train_label, test_label, 'SVM')
82 | 
83 | 
84 | 
85 | 


--------------------------------------------------------------------------------
/pydpm/example/Deep_Learning_PM/Variational_Autoencoder/VAE_Demo.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ===========================================
 3 | VAE
 4 | Auto-Encoding Variational Bayes
 5 | Diederik P. Kingma， Max Welling
 6 | Publihsed in 2014
 7 | 
 8 | ===========================================
 9 | """
10 | 
11 | # Author: Muyao Wang <flare935694542@163.com>, Xinyang Liu <lxy771258012@163.com>
12 | # License: BSD-3-Clause
13 | 
14 | import os
15 | import argparse
16 | import sys
17 | import torch
18 | import torch.optim as optim
19 | 
20 | from torchvision import datasets, transforms
21 | from torchvision.utils import save_image
22 | from pydpm.model import VAE
23 | 
24 | # =========================================== ArgumentParser ===================================================================== #
25 | parser = argparse.ArgumentParser()
26 | 
27 | # device
28 | parser.add_argument("--gpu_id", type=int, default=0)
29 | 
30 | # dataset
31 | parser.add_argument("--data_path", type=str, default='../../../dataset/mnist/', help="the path of loading data")
32 | parser.add_argument("--img_size", type=int, default=28, help="size of each image dimension")
33 | 
34 | # model
35 | parser.add_argument("--save_path", type=str, default='../../save_models', help="the path of saving model")
36 | parser.add_argument("--load_path", type=str, default='../../save_models/VAE.pth', help="the path of loading model")
37 | 
38 | parser.add_argument("--z_dim", type=int, default=2, help="dimensionality of the z latent space")
39 | parser.add_argument("--encoder_hid_dims", type=int, default=[512, 256], help="dimensionality of the latent space")
40 | parser.add_argument("--decoder_hid_dims", type=int, default=[512, 256], help="dimensionality of the latent space")
41 | 
42 | # optim
43 | parser.add_argument("--num_epochs", type=int, default=2, help="number of epochs of training")
44 | parser.add_argument("--batch_size", type=int, default=256, help="size of the batches")
45 | parser.add_argument("--lr", type=float, default=0.0002, help="adam: learning rate")
46 | 
47 | args = parser.parse_args()
48 | args.device = torch.device(f"cuda:{args.gpu_id}") if torch.cuda.is_available() else torch.device("cpu")
49 | 
50 | # =========================================== Dataset ===================================================================== #
51 | # mnist
52 | train_dataset = datasets.MNIST(root=args.data_path, train=True, transform=transforms.ToTensor(), download=True)
53 | test_dataset = datasets.MNIST(root=args.data_path, train=False, transform=transforms.ToTensor(), download=False)
54 | 
55 | train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=True)
56 | test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=args.batch_size, shuffle=False)
57 | 
58 | # =========================================== Model ===================================================================== #
59 | # model
60 | model = VAE(in_dim=args.img_size**2, z_dim=args.z_dim, encoder_hid_dims=args.encoder_hid_dims, decoder_hid_dims=args.decoder_hid_dims, device=args.device)
61 | model_opt = optim.Adam(model.parameters(), lr=args.lr)
62 | 
63 | # train
64 | for epoch_idx in range(args.num_epochs):
65 |     local_mu, local_log_var = model.train_one_epoch(dataloader=train_loader, model_opt=model_opt, epoch_idx=epoch_idx, args=args)
66 |     if epoch_idx % 25 == 0:
67 |         test_mu, test_log_var = model.test_one_epoch(dataloader=test_loader)
68 | 
69 | # save
70 | model.save(args.save_path)
71 | # load
72 | model.load(args.load_path)
73 | 
74 | # =================== Visualization ====================== #
75 | os.makedirs("../../output/images", exist_ok=True)
76 | print('sample image,please wait!')
77 | with torch.no_grad():
78 |     sample = model.sample(64)
79 |     save_image(sample.view(64, 1, 28, 28), '../../output/images/VAE_sample.png')
80 | print('complete!!!')
81 | 
82 | 


--------------------------------------------------------------------------------
/pydpm/example/Deep_Learning_PM/Restricted_Boltzmann_Machine/RBM_Demo.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ===========================================
 3 | RBM
 4 | A Practical Guide to Training
 5 | Restricted Boltzmann Machines
 6 | Geoffrey Hinton
 7 | Publihsed in 2010
 8 | ===========================================
 9 | """
10 | # Author: Muyao Wang <flare935694542@163.com>, Xinyang Liu <lxy771258012@163.com>
11 | # License: BSD-3-Clause
12 | 
13 | import os
14 | import argparse
15 | import numpy as np
16 | 
17 | import torch.utils.data
18 | import torch.optim as optim
19 | from torch.autograd import Variable
20 | 
21 | from torchvision import datasets, transforms
22 | from torchvision.utils import save_image
23 | 
24 | from pydpm.model import RBM
25 | 
26 | # =========================================== ArgumentParser ===================================================================== #
27 | parser = argparse.ArgumentParser()
28 | 
29 | # device
30 | parser.add_argument("--gpu_id", type=int, default=0)
31 | 
32 | # dataset
33 | parser.add_argument("--data_path", type=str, default='../../../dataset/mnist/', help="the path of loading data")
34 | parser.add_argument("--img_size", type=int, default=28, help="size of each image dimension")
35 | 
36 | # model
37 | parser.add_argument("--save_path", type=str, default='../../save_models', help="the path of saving model")
38 | parser.add_argument("--load_path", type=str, default='../../save_models/RBM.pth', help="the path of loading model")
39 | 
40 | parser.add_argument("--n_vis", type=int, default=784, help="dimensionality of visible units")
41 | parser.add_argument("--n_hin", type=int, default=500, help="dimensionality of latent units")
42 | parser.add_argument("--k", type=int, default=1, help="layers of RBM")
43 | 
44 | # optim
45 | parser.add_argument("--num_epochs", type=int, default=10, help="number of epochs of training")
46 | parser.add_argument("--batch_size", type=int, default=128, help="size of the batches")
47 | parser.add_argument("--lr", type=float, default=0.1, help="adam: learning rate")
48 | 
49 | args = parser.parse_args()
50 | args.device = torch.device(f"cuda:{args.gpu_id}") if torch.cuda.is_available() else torch.device("cpu")
51 | 
52 | # =========================================== Dataset ===================================================================== #
53 | # mnist
54 | train_dataset = datasets.MNIST(root=args.data_path, train=True, transform=transforms.Compose([transforms.ToTensor()]), download=True)
55 | test_dataset = datasets.MNIST(root=args.data_path, train=False, transform=transforms.Compose([transforms.ToTensor()]), download=False)
56 | 
57 | train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=True)
58 | test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=args.batch_size, shuffle=False)
59 | 
60 | # =========================================== Model ===================================================================== #
61 | # model
62 | model = RBM(n_vis=args.n_vis, n_hin=args.n_hin, k=args.k)
63 | model_opt = optim.SGD(model.parameters(), lr=args.lr)
64 | 
65 | # train
66 | for epoch_idx in range(args.num_epochs):
67 |     v, v1 = model.train_one_epoch(dataloader=train_loader, model_opt=model_opt, epoch_idx=epoch_idx, args=args)
68 | 
69 | # save
70 | model.save(args.save_path)
71 | # load
72 | model.load(args.load_path)
73 | 
74 | # =========================================== Visualization ===================================================================== #
75 | # visualize
76 | os.makedirs("../../output/images", exist_ok=True)
77 | print('sample image,please wait!')
78 | with torch.no_grad():
79 |     save_image(v.view(-1, 1, 28, 28), '../../output/images/RBM_l_real_' + '.png')
80 |     save_image(v1.view(-1, 1, 28, 28), '../../output/images/RBM_l_generate_' + '.png')
81 | print('complete!!!')
82 | 
83 | 
84 | # device .test_one_epoch
85 | 


--------------------------------------------------------------------------------
/pydpm/example/Bayesian_PM/Latent_Dirchilet_Allocation/LDA_Demo.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ===========================================
 3 | Latent Dirichlet Allocation
 4 | David M.Blei  Andrew Y.Ng  and  Michael I.Jordan
 5 | Published in Journal of Machine Learning 2003
 6 | 
 7 | ===========================================
 8 | 
 9 | """
10 | 
11 | # Author: Chaojie Wang <xd_silly@163.com>; Jiawen Wu <wjw19960807@163.com>; Wei Zhao <13279389260@163.com>
12 | # License: BSD-3-Clause
13 | 
14 | import os
15 | import numpy as np
16 | import argparse
17 | import scipy.io as sio
18 | from tqdm import tqdm
19 | 
20 | from torchvision import datasets, transforms
21 | 
22 | from pydpm.model import LDA
23 | from pydpm.metric import ACC
24 | from pydpm.dataloader.image_data import tensor_transforms
25 | 
26 | # =========================================== ArgumentParser ===================================================================== #
27 | parser = argparse.ArgumentParser()
28 | 
29 | # device
30 | parser.add_argument("--device", type=str, default='gpu')
31 | 
32 | # dataset
33 | parser.add_argument("--data_path", type=str, default='../../../dataset/mnist/', help="the path of loading data")
34 | 
35 | # model
36 | parser.add_argument("--save_path", type=str, default='../../save_models', help="the path of saving model")
37 | parser.add_argument("--load_path", type=str, default='../../save_models/LDA.npy', help="the path of loading model")
38 | 
39 | parser.add_argument("--z_dim", type=int, default=128, help="dimensionality of the z latent space")
40 | 
41 | # optim
42 | parser.add_argument("--num_epochs", type=int, default=100, help="number of epochs of training")
43 | 
44 | args = parser.parse_args()
45 | 
46 | # =========================================== Dataset ===================================================================== #
47 | # define transform for dataset and load orginal dataset
48 | transform = transforms.Compose([transforms.ToTensor()])
49 | train_dataset = datasets.MNIST(root=args.data_path, train=True, download=True)
50 | test_dataset = datasets.MNIST(root=args.data_path, train=False, download=False)
51 | 
52 | # transform dataset and reshape the dataset into [batch_size, feature_num]
53 | train_data = tensor_transforms(train_dataset.data, transform)
54 | train_data = train_data.permute([1, 2, 0]).reshape([len(train_dataset), -1])  # len(train_dataset, 28*28)
55 | test_data = tensor_transforms(test_dataset.data, transform)
56 | test_data = test_data.permute([1, 2, 0]).reshape([len(test_dataset), -1])
57 | train_label = train_dataset.train_labels
58 | test_label = test_dataset.test_labels
59 | 
60 | # transpose the dataset to fit the model and convert a tensor to numpy array
61 | train_data = np.array(np.ceil(train_data[:999, :].T.numpy() * 5), order='C')
62 | test_data = np.array(np.ceil(test_data[:999, :].T.numpy() * 5), order='C')
63 | train_label = train_label.numpy()[:999]
64 | test_label = test_label.numpy()[:999]
65 | 
66 | # =========================================== Model ===================================================================== #
67 | # create the model and deploy it on gpu or cpu
68 | model = LDA(K=args.z_dim, device=args.device)
69 | model.initial(train_data)  # use the shape of train_data to initialize the params of model
70 | 
71 | # train and evaluation
72 | train_local_params = model.train(data=train_data, num_epochs=args.num_epochs)
73 | train_local_params = model.test(data=train_data, num_epochs=args.num_epochs)
74 | test_local_params = model.test(data=test_data, num_epochs=args.num_epochs)
75 | 
76 | # save the model after training
77 | model.save(args.save_path)
78 | # load the model
79 | model.load(args.load_path)
80 | 
81 | # evaluate the model with classification accuracy
82 | # the demo accuracy can achieve 0.850
83 | results = ACC(train_local_params.Theta, test_local_params.Theta, train_label, test_label, 'SVM')
84 | 
85 | 
86 | 
87 | 
88 | 
89 | 


--------------------------------------------------------------------------------
/pydpm/example/Bayesian_PM/Poisson_Factor_Analysis/PFA_Demo.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ===========================================
 3 | Poisson Factor Analysis
 4 | Beta-Negative Binomial Process and Poisson Factor Analysis
 5 | Mingyuan Zhou, Lauren Hannah, David Dunson, Lawrence Carin
 6 | Publihsed in International Conference on Artificial Intelligence and Statistic 2012
 7 | 
 8 | ===========================================
 9 | 
10 | """
11 | 
12 | # Author: Chaojie Wang <xd_silly@163.com>; Jiawen Wu <wjw19960807@163.com>； Wei Zhao <13279389260@163.com>
13 | # License: BSD-3-Clause
14 | 
15 | import numpy as np
16 | import argparse
17 | import scipy.io as sio
18 | 
19 | from torchvision import datasets, transforms
20 | 
21 | from pydpm.model import PFA
22 | from pydpm.metric import ACC
23 | from pydpm.dataloader.image_data import tensor_transforms
24 | 
25 | # =========================================== ArgumentParser ===================================================================== #
26 | parser = argparse.ArgumentParser()
27 | 
28 | # device
29 | parser.add_argument("--device", type=str, default='gpu')
30 | 
31 | # dataset
32 | parser.add_argument("--data_path", type=str, default='../../../dataset/mnist/', help="the path of loading data")
33 | 
34 | # model
35 | parser.add_argument("--save_path", type=str, default='../../save_models', help="the path of saving model")
36 | parser.add_argument("--load_path", type=str, default='../../save_models/PFA.npy', help="the path of loading model")
37 | 
38 | parser.add_argument("--z_dim", type=int, default=128, help="dimensionality of the z latent space")
39 | 
40 | # optim
41 | parser.add_argument("--num_epochs", type=int, default=100, help="number of epochs of training")
42 | 
43 | args = parser.parse_args()
44 | 
45 | # =========================================== Dataset ===================================================================== #
46 | # define transform for dataset and load orginal dataset
47 | transform = transforms.Compose([transforms.ToTensor()])
48 | train_dataset = datasets.MNIST(root=args.data_path, train=True, download=True)
49 | test_dataset = datasets.MNIST(root=args.data_path, train=False, download=False)
50 | 
51 | # transform dataset and reshape the dataset into [batch_size, feature_num]
52 | train_data = tensor_transforms(train_dataset.data, transform)
53 | train_data = train_data.permute([1, 2, 0]).reshape(len(train_dataset), -1)  # len(train_dataset, 28*28)
54 | test_data = tensor_transforms(test_dataset.data, transform)
55 | test_data = test_data.permute([1, 2, 0]).reshape(len(test_dataset), -1)
56 | train_label = train_dataset.train_labels
57 | test_label = test_dataset.test_labels
58 | 
59 | # transpose the dataset to fit the model and convert a tensor to numpy array
60 | train_data = np.array(np.ceil(train_data[:999, :].T.numpy() * 5), order='C')
61 | test_data = np.array(np.ceil(test_data[:999, :].T.numpy() * 5), order='C')
62 | train_label = train_label.numpy()[:999]
63 | test_label = test_label.numpy()[:999]
64 | 
65 | # =========================================== Model ===================================================================== #
66 | # create the model and deploy it on gpu or cpu
67 | model = PFA(K=args.z_dim, device=args.device)
68 | model.initial(train_data)  # use the shape of train_data to initialize the params of model
69 | 
70 | # train and evaluation
71 | train_local_params = model.train(train_data, num_epochs=args.num_epochs)
72 | train_local_params = model.test(train_data, num_epochs=args.num_epochs)
73 | test_local_params = model.test(test_data, num_epochs=args.num_epochs)
74 | 
75 | # save the model after training
76 | model.save(args.save_path)
77 | # load the model
78 | model.load(args.load_path)
79 | 
80 | # evaluate the model with classification accuracy
81 | # the demo accuracy can achieve 0.8008
82 | results = ACC(train_local_params.Theta, test_local_params.Theta, train_label, test_label, 'SVM')
83 | 


--------------------------------------------------------------------------------
/pydpm/example/Bayesian_PM/Deep_Poisson_Gamma_Dynamic_System/DPGDS_Demo.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ===========================================
 3 | Deep Poisson Gamma Dynamical Systems Demo
 4 | Dandan Guo, Bo Chen and Hao Zhang
 5 | Published in Neural Information Processing Systems 2018
 6 | 
 7 | ===========================================
 8 | 
 9 | """
10 | 
11 | # Author: Chaojie Wang <xd_silly@163.com>; Jiawen Wu <wjw19960807@163.com>; Wei Zhao <13279389260@163.com>
12 | # License: BSD-3-Clause
13 | 
14 | import numpy as np
15 | import argparse
16 | import scipy.io as sio
17 | 
18 | from torchvision import datasets, transforms
19 | 
20 | from pydpm.model import DPGDS
21 | from pydpm.metric import ACC
22 | from pydpm.dataloader.image_data import tensor_transforms
23 | 
24 | # =========================================== ArgumentParser ===================================================================== #
25 | parser = argparse.ArgumentParser()
26 | 
27 | # device
28 | parser.add_argument("--device", type=str, default='gpu')
29 | 
30 | # dataset
31 | parser.add_argument("--data_path", type=str, default='../../../dataset/mnist/', help="the path of loading data")
32 | 
33 | # model
34 | parser.add_argument("--save_path", type=str, default='../../save_models', help="the path of saving model")
35 | parser.add_argument("--load_path", type=str, default='../../save_models/DPGDS.npy', help="the path of loading model")
36 | 
37 | parser.add_argument("--z_dims", type=list, default=[200, 100, 50], help="number of topics in DPGDS")
38 | 
39 | # optim
40 | parser.add_argument("--num_epochs", type=int, default=100, help="number of epochs of training")
41 | 
42 | args = parser.parse_args()
43 | 
44 | # =========================================== Dataset ===================================================================== #
45 | # define transform for dataset and load orginal dataset
46 | transform = transforms.Compose([transforms.ToTensor()])
47 | train_dataset = datasets.MNIST(root=args.data_path, train=True, download=True)
48 | test_dataset = datasets.MNIST(root=args.data_path, train=False, download=False)
49 | 
50 | # transform dataset and reshape the dataset into [batch_size, feature_num]
51 | train_data = tensor_transforms(train_dataset.data, transform)
52 | train_data = train_data.permute(1, 2, 0).reshape(len(train_dataset), -1)# len(train_dataset, 28*28)
53 | test_data = tensor_transforms(test_dataset.data, transform)
54 | test_data = test_data.permute(1, 2, 0).reshape(len(test_dataset), -1)
55 | train_label = train_dataset.train_labels
56 | test_label = test_dataset.test_labels
57 | 
58 | # transpose the dataset to fit the model and convert a tensor to numpy array
59 | train_data = np.array(np.ceil(train_data[:999, :].T.numpy() * 5), order='C')
60 | test_data = np.array(np.ceil(test_data[:999, :].T.numpy() * 5), order='C')
61 | train_label = train_label.numpy()[:999]
62 | test_label = test_label.numpy()[:999]
63 | 
64 | # =========================================== Model ===================================================================== #
65 | # create the model and deploy it on gpu or cpu
66 | model = DPGDS([200, 100, 50], 'gpu')  # topics of each layers
67 | model = DPGDS(K=args.z_dims, device=args.device)
68 | model.initial(train_data)  # use the shape of train_data to initialize the params of model
69 | 
70 | # train and evaluation
71 | train_local_params = model.train(data=train_data, num_epochs=args.num_epochs)
72 | train_local_params = model.test(data=train_data, num_epochs=args.num_epochs)
73 | test_local_params = model.test(data=test_data, num_epochs=args.num_epochs)
74 | 
75 | # save the model after training
76 | model.save(args.save_path)
77 | # load the model
78 | model.load(args.load_path)
79 | 
80 | # evaluate the model with classification accuracy
81 | # the demo accuracy can achieve 0.8519
82 | results = ACC(train_local_params.Theta[0], test_local_params.Theta[0], train_label, test_label, 'SVM')
83 | 
84 | 
85 | 
86 | 


--------------------------------------------------------------------------------
/pydpm/example/Bayesian_PM/Multimodal_Poisson_Gamma_Belief_Network/MPGBN_Demo.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ===========================================
 3 | Multimodal Poisson Gamma Belief Network
 4 | Chaojie Wang, Bo Chen and Mingyuan Zhou
 5 | Published in In AAAI Conference on Artificial Intelligence
 6 | 
 7 | ===========================================
 8 | 
 9 | """
10 | 
11 | # Author: Chaojie Wang <xd_silly@163.com>; Jiawen Wu <wjw19960807@163.com>; Wei Zhao <13279389260@163.com>
12 | # License: BSD-3-Clause
13 | 
14 | import numpy as np
15 | import argparse
16 | import scipy.io as sio
17 | 
18 | from torchvision import datasets, transforms
19 | 
20 | from pydpm.model import MPGBN
21 | from pydpm.metric import ACC
22 | from pydpm.dataloader.image_data import tensor_transforms
23 | 
24 | # =========================================== ArgumentParser ===================================================================== #
25 | parser = argparse.ArgumentParser()
26 | 
27 | # device
28 | parser.add_argument("--device", type=str, default='gpu')
29 | 
30 | # dataset
31 | parser.add_argument("--data_path", type=str, default='../../../dataset/mnist/', help="the path of loading data")
32 | 
33 | # model
34 | parser.add_argument("--save_path", type=str, default='../../save_models', help="the path of saving model")
35 | parser.add_argument("--load_path", type=str, default='../../save_models/MPGBN.npy', help="the path of loading model")
36 | 
37 | parser.add_argument("--z_dims", type=list, default=[128, 64, 32], help="number of topics at diffrent layers in MPGBN")
38 | 
39 | args = parser.parse_args()
40 | 
41 | # =========================================== Dataset ===================================================================== #
42 | # define transform for dataset and load orginal dataset
43 | transform = transforms.Compose([transforms.ToTensor()])
44 | train_dataset = datasets.MNIST(root=args.data_path, train=True, download=True)
45 | test_dataset = datasets.MNIST(root=args.data_path, train=False, download=False)
46 | 
47 | # transform dataset and reshape the dataset into [batch_size, feature_num]
48 | train_data = tensor_transforms(train_dataset.data, transform)
49 | train_data = train_data.permute([1, 2, 0]).reshape(len(train_dataset), -1)  # len(train_dataset, 28*28)
50 | test_data = tensor_transforms(test_dataset.data, transform)
51 | test_data = test_data.permute([1, 2, 0]).reshape(len(test_dataset), -1)
52 | train_label = train_dataset.train_labels
53 | test_label = test_dataset.test_labels
54 | 
55 | # transpose the dataset to fit the model and convert a tensor to numpy array
56 | train_data = np.array(np.ceil(train_data[:999, :].T.numpy() * 5), order='C')
57 | train_data_1 = train_data[:360, :]
58 | train_data_2 = train_data[360:, :]
59 | test_data = np.array(np.ceil(test_data[:999, :].T.numpy() * 5), order='C')
60 | test_data_1 = test_data[:360, :]
61 | test_data_2 = test_data[360:, :]
62 | train_label = train_label.numpy()[:999]
63 | test_label = test_label.numpy()[:999]
64 | 
65 | # =========================================== Model ===================================================================== #
66 | # create the model and deploy it on gpu or cpu
67 | model = MPGBN(K=args.z_dims, device=args.device)
68 | model.initial(train_data_1, train_data_2)  # use the shape of train_data_1 and train_data_2 to initialize the params of model
69 | 
70 | # train and evaluation
71 | train_local_params = model.train(train_data_1, train_data_2, num_epochs=args.num_epochs)
72 | train_local_params = model.test(train_data_1, train_data_2, num_epochs=args.num_epochs)
73 | test_local_params = model.test(test_data_1, test_data_2, num_epochs=args.num_epochs)
74 | 
75 | # save the model after training
76 | model.save(args.save_path)
77 | # load the model
78 | model.load(args.load_path)
79 | 
80 | # evaluate the model with classification accuracy
81 | # the demo accuracy can achieve 0.8549
82 | results = ACC(train_local_params.Theta[0], test_local_params.Theta[0], train_label, test_label, 'SVM')
83 | 
84 | 
85 | 
86 | 


--------------------------------------------------------------------------------
/pydpm/example/Bayesian_PM/Dirchilet_Belief_Network/DirBN_Demo.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ===========================================
 3 | Poisson Factor Analysis DirBN(Dirichlet belief networks) Demo
 4 | Dirichlet belief networks for topic structure learning
 5 | He Zhao, Lan Du, Wray Buntine, Mingyuan Zhou
 6 | Publihsed in Conference and Workshop on Neural Information Processing Systems 2018
 7 | 
 8 | ===========================================
 9 | 
10 | """
11 | 
12 | # Author: Chaojie Wang <xd_silly@163.com>; Jiawen Wu <wjw19960807@163.com>; Wei Zhao <13279389260@163.com>
13 | # License: BSD-3-Clause
14 | 
15 | import numpy as np
16 | import argparse
17 | import scipy.io as sio
18 | 
19 | from torchvision import datasets, transforms
20 | 
21 | from pydpm.model import DirBN
22 | from pydpm.metric import ACC
23 | from pydpm.dataloader.image_data import tensor_transforms
24 | 
25 | # =========================================== ArgumentParser ===================================================================== #
26 | parser = argparse.ArgumentParser()
27 | 
28 | # device
29 | parser.add_argument("--device", type=str, default='gpu')
30 | 
31 | # dataset
32 | parser.add_argument("--data_path", type=str, default='../../../dataset/mnist/', help="the path of loading data")
33 | 
34 | # model
35 | parser.add_argument("--save_path", type=str, default='../../save_models', help="the path of saving model")
36 | parser.add_argument("--load_path", type=str, default='../../save_models/DirBN.npy', help="the path of loading model")
37 | 
38 | parser.add_argument("--z_dims", type=list, default=[100, 100], help="number of topics of 2 layers in DirBN")
39 | 
40 | # optim
41 | parser.add_argument("--num_epochs", type=int, default=100, help="number of epochs of training")
42 | 
43 | args = parser.parse_args()
44 | 
45 | # =========================================== Dataset ===================================================================== #
46 | # define transform for dataset and load orginal dataset
47 | transform = transforms.Compose([transforms.ToTensor()])
48 | train_dataset = datasets.MNIST(root=args.data_path, train=True, download=True)
49 | test_dataset = datasets.MNIST(root=args.data_path, train=False, download=False)
50 | 
51 | # transform dataset and reshape the dataset into [batch_size, feature_num]
52 | train_data = tensor_transforms(train_dataset.data, transform)
53 | train_data = train_data.permute([1, 2, 0]).reshape(len(train_dataset), -1)  # len(train_dataset, 28*28)
54 | test_data = tensor_transforms(test_dataset.data, transform)
55 | test_data = test_data.permute([1, 2, 0]).reshape(len(test_dataset), -1)
56 | train_label = train_dataset.train_labels
57 | test_label = test_dataset.test_labels
58 | 
59 | # transpose the dataset to fit the model and convert a tensor to numpy array
60 | train_data = np.array(np.ceil(train_data[:999, :].T.numpy() * 5), order='C')
61 | test_data = np.array(np.ceil(test_data[:999, :].T.numpy() * 5), order='C')
62 | train_label = train_label.numpy()[:999]
63 | test_label = test_label.numpy()[:999]
64 | 
65 | # =========================================== Model ===================================================================== #
66 | # create the model and deploy it on gpu or cpu
67 | model = DirBN(K=args.z_dims, device=args.device)
68 | model.initial(train_data)  # use the shape of train_data to initialize the params of model
69 | 
70 | # train and evaluation
71 | for i in range(100):
72 |     train_local_params = model.train(data=train_data, num_epochs=args.num_epochs, is_initial_local=False)
73 | train_local_params = model.test(data=train_data, num_epochs=args.num_epochs)
74 | test_local_params = model.test(data=test_data, num_epochs=args.num_epochs)
75 | 
76 | # save the model after training
77 | model.save(args.save_path)
78 | # load the model
79 | model.load(args.load_path)
80 | 
81 | # evaluate the model with classification accuracy
82 | # the demo accuracy can achieve 0.78
83 | results = ACC(train_local_params.Theta, test_local_params.Theta, train_label, test_label, 'SVM')
84 | 
85 | 
86 | 
87 | 


--------------------------------------------------------------------------------
/pydpm/example/Deep_Learning_PM/Conditional_Variational_Auto-encoder/CVAE_Demo.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ===========================================
 3 | CVAE
 4 | Learning Structured Output Representation using Deep Conditional Generative Models
 5 | Kihyuk Sohn, Xinchen Yan and Honglak Lee
 6 | Publihsed in 2015
 7 | 
 8 | ===========================================
 9 | """
10 | # Author: Bufeng Ge <20009100138@stu.xidian.edu.cn>, Xinyang Liu <lxy771258012@163.com>
11 | # License: BSD-3-Clause
12 | 
13 | import os
14 | import argparse
15 | import sys
16 | import torch
17 | import numpy as np
18 | import torch.optim as optim
19 | 
20 | from torchvision import datasets, transforms
21 | from torchvision.utils import save_image
22 | from pydpm.model import CVAE
23 | 
24 | # =========================================== ArgumentParser ===================================================================== #
25 | parser = argparse.ArgumentParser()
26 | 
27 | # device
28 | parser.add_argument("--gpu_id", type=int, default=0)
29 | 
30 | # dataset
31 | parser.add_argument("--data_path", type=str, default='../../../dataset/mnist/', help="the path of loading data")
32 | parser.add_argument("--img_size", type=int, default=28, help="size of each image dimension")
33 | 
34 | # model
35 | parser.add_argument("--save_path", type=str, default='../../save_models', help="the path of saving model")
36 | parser.add_argument("--load_path", type=str, default='../../save_models/CVAE.pth', help="the path of loading model")
37 | 
38 | parser.add_argument("--z_dim", type=int, default=64, help="dimensionality of the z latent space")
39 | parser.add_argument("--encoder_hid_dims", type=int, default=[512, 256], help="dimensionality of the latent space")
40 | parser.add_argument("--decoder_hid_dims", type=int, default=[512, 256], help="dimensionality of the latent space")
41 | 
42 | # optim
43 | parser.add_argument("--num_epochs", type=int, default=1, help="number of epochs of training")
44 | parser.add_argument("--batch_size", type=int, default=256, help="size of the batches")
45 | parser.add_argument("--lr", type=float, default=0.0002, help="adam: learning rate")
46 | 
47 | args = parser.parse_args()
48 | args.device = torch.device(f"cuda:{args.gpu_id}") if torch.cuda.is_available() else torch.device("cpu")
49 | 
50 | # =========================================== Dataset ===================================================================== #
51 | # mnist
52 | train_dataset = datasets.MNIST(root=args.data_path, train=True, transform=transforms.ToTensor(), download=True)
53 | test_dataset = datasets.MNIST(root=args.data_path, train=False, transform=transforms.ToTensor(), download=False)
54 | args.cond_dim = len(train_dataset.classes)
55 | 
56 | train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=True)
57 | test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=args.batch_size, shuffle=False)
58 | 
59 | model = CVAE(cond_dim=args.cond_dim, in_dim=args.img_size**2, z_dim=args.z_dim, encoder_hid_dims=args.encoder_hid_dims, decoder_hid_dims=args.decoder_hid_dims, device='cuda:0')
60 | model_opt = optim.Adam(model.parameters(), lr=args.lr)
61 | 
62 | for epoch in range(args.num_epochs):
63 |     local_mu, local_log_var = model.train_one_epoch(model_opt=model_opt, dataloader=train_loader, epoch=epoch, n_epochs=args.num_epochs)
64 |     if epoch % 25 == 0:
65 |         test_mu, test_log_var = model.test_one_epoch(dataloader=test_loader)
66 | 
67 | # Save model
68 | model.save(args.save_path)
69 | # Load model
70 | model.load(args.load_path)
71 | 
72 | # =================== Visualization ====================== #
73 | os.makedirs("../../output/images", exist_ok=True)
74 | # sample image
75 | print('sample image, please wait!')
76 | with torch.no_grad():
77 |     sample = model.sample(64, torch.tensor([7]*64))#[random.randint(0,9) for i in range(64)])
78 |     save_image(sample.view(64,1,28, 28), '../../output/images/VAE_sample_7.png')
79 | print('complete!!!')
80 | 


--------------------------------------------------------------------------------
/pydpm/example/Deep_Learning_PM/VQ_Variational_Autoencoder/VQ_VAE_Demo.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ===========================================
 3 | VQ-VAE
 4 | Neural Discrete Representation Learning
 5 | Aaron van den Oord, Oriol Vinyals, Koray Kavukcuoglu
 6 | Publihsed in 2017
 7 | 
 8 | ===========================================
 9 | """
10 | 
11 | # Author: Muyao Wang <flare935694542@163.com>, Xinyang Liu <lxy771258012@163.com>
12 | # License: BSD-3-Clause
13 | 
14 | import os
15 | import argparse
16 | import sys
17 | import torch
18 | import torch.optim as optim
19 | 
20 | from torchvision import datasets, transforms
21 | from torchvision.utils import save_image
22 | from pydpm.model import VQVAE
23 | 
24 | # =========================================== ArgumentParser ===================================================================== #
25 | parser = argparse.ArgumentParser()
26 | 
27 | # device
28 | parser.add_argument("--gpu_id", type=int, default=0)
29 | 
30 | # dataset
31 | parser.add_argument("--data_path", type=str, default='../../../dataset/mnist/', help="the path of loading data")
32 | parser.add_argument("--img_size", type=int, default=28, help="size of each image dimension")
33 | 
34 | # model
35 | parser.add_argument("--save_path", type=str, default='../../save_models', help="the path of saving model")
36 | parser.add_argument("--load_path", type=str, default='../../save_models/VQVAE.pth', help="the path of loading model")
37 | parser.add_argument("--embed_dim", type=int, default=2, help="dimensionality of the codebook the same as z_dim")
38 | parser.add_argument("--num_embed", type=int, default=8000, help="number of codebook")
39 | parser.add_argument("--z_dim", type=int, default=2, help="dimensionality of the z latent space")
40 | parser.add_argument("--encoder_hid_dims", type=int, default=[512, 256], help="dimensionality of the latent space")
41 | parser.add_argument("--decoder_hid_dims", type=int, default=[512, 256], help="dimensionality of the latent space")
42 | 
43 | # optim
44 | parser.add_argument("--num_epochs", type=int, default=20, help="number of epochs of training")
45 | parser.add_argument("--batch_size", type=int, default=256, help="size of the batches")
46 | parser.add_argument("--lr", type=float, default=0.0002, help="adam: learning rate")
47 | 
48 | args = parser.parse_args()
49 | args.device = torch.device(f"cuda:{args.gpu_id}") if torch.cuda.is_available() else torch.device("cpu")
50 | 
51 | # =========================================== Dataset ===================================================================== #
52 | # mnist
53 | train_dataset = datasets.MNIST(root=args.data_path, train=True, transform=transforms.ToTensor(), download=True)
54 | test_dataset = datasets.MNIST(root=args.data_path, train=False, transform=transforms.ToTensor(), download=False)
55 | 
56 | train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=True)
57 | test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=args.batch_size, shuffle=False)
58 | 
59 | # =========================================== Model ===================================================================== #
60 | # model
61 | model = VQVAE(embed_dim=args.embed_dim, num_embed=args.num_embed, in_dim=args.img_size**2, z_dim=args.z_dim, encoder_hid_dims=args.encoder_hid_dims, decoder_hid_dims=args.decoder_hid_dims, device=args.device)
62 | model_opt = optim.Adam(model.parameters(), lr=args.lr)
63 | 
64 | # train
65 | for epoch_idx in range(args.num_epochs):
66 |     model.train_one_epoch(dataloader=train_loader, model_opt=model_opt, epoch_idx=epoch_idx, args=args)
67 |     if epoch_idx % 25 == 0:
68 |         model.test_one_epoch(dataloader=test_loader)
69 | 
70 | # save
71 | model.save(args.save_path)
72 | # load
73 | model.load(args.load_path)
74 | 
75 | # =================== Visualization ====================== #
76 | os.makedirs("../../output/images", exist_ok=True)
77 | print('sample image,please wait!')
78 | with torch.no_grad():
79 |     sample = model.sample(64)
80 |     save_image(sample.view(64, 1, 28, 28), '../../output/images/VQVAE_sample.png')
81 | print('complete!!!')
82 | 
83 | 


--------------------------------------------------------------------------------
/pydpm/example/Deep_Learning_PM/Normlizing_Flow/NFlow_Demo.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ===========================================
 3 | RealNVP
 4 | DENSITY ESTIMATION USING REAL NVP
 5 | Laurent Dinh, Jascha Sohl-Dickstein, Samy Bengio
 6 | Publihsed in  2017
 7 | 
 8 | ===========================================
 9 | """
10 | 
11 | # Author: Xinyang Liu <lxy771258012@163.com>
12 | # License: BSD-3-Clause
13 | 
14 | import os
15 | import argparse
16 | 
17 | import torch
18 | import torch.optim as optim
19 | from torch.utils.data import DataLoader
20 | 
21 | import matplotlib.pyplot as plt
22 | from sklearn.datasets import make_moons
23 | 
24 | from pydpm.model import NFlow
25 | from pydpm.model.deep_learning_pm.nflow import RealNVP_2D
26 | 
27 | # =========================================== ArgumentParser ===================================================================== #
28 | parser = argparse.ArgumentParser()
29 | 
30 | # device
31 | parser.add_argument("--gpu_id", type=int, default=0)
32 | 
33 | # dataset
34 | parser.add_argument("--data_path", type=str, default='../../../dataset/mnist/', help="the path of loading data")
35 | 
36 | # model
37 | parser.add_argument("--save_path", type=str, default='../../save_models', help="the path of saving model")
38 | parser.add_argument("--load_path", type=str, default='../../save_models/NFlow.pth', help="the path of loading model")
39 | 
40 | parser.add_argument("--sample_num", type=int, default=512)
41 | parser.add_argument("--flows_num", type=int, default=2)
42 | parser.add_argument("--flow_name", type=str, default="RealNVP_2D")
43 | parser.add_argument("--hid_dim", type=int, default=128)
44 | 
45 | # optim
46 | parser.add_argument("--num_epochs", type=int, default=1000)
47 | parser.add_argument("--batch_size", type=int, default=128, help="size of the batches")
48 | parser.add_argument("--lr", type=float, default=0.001, help="adam: learning rate")
49 | 
50 | args = parser.parse_args()
51 | args.device = torch.device(f"cuda:{args.gpu_id}") if torch.cuda.is_available() else torch.device("cpu")
52 | 
53 | # =========================================== Dataset ===================================================================== #
54 | # mnist
55 | data, label = make_moons(n_samples=args.sample_num, noise=0.05)
56 | data = torch.tensor(data, dtype=torch.float32)
57 | # Normalization
58 | for i in range(data.shape[1]):
59 |     data[:, i] = (data[:, i] - torch.mean(data[:, i])) / torch.std(data[:, i])
60 | dataloader = torch.utils.data.DataLoader(dataset=data, batch_size=args.batch_size, shuffle=True)
61 | 
62 | # =========================================== Model ===================================================================== #
63 | # model
64 | flows = [RealNVP_2D(dim=2, hidden_dim=args.hid_dim, device=args.device) for _ in range(args.flows_num)]
65 | 
66 | model = NFlow(in_dim=2, flows=flows, device=args.device)
67 | model_opt = optim.Adam(model.parameters(), lr=args.lr)
68 | 
69 | # train
70 | for epoch_idx in range(args.num_epochs):
71 |     local_z = model.train_one_epoch(model_opt=model_opt, dataloader=dataloader, epoch=epoch_idx, num_epochs=args.num_epochs)
72 |     if epoch_idx == args.num_epochs - 1:
73 |         test_local_z = model.test_one_epoch(dataloader=dataloader)
74 | 
75 | # save
76 | model.save(args.save_path)
77 | # load
78 | model.load(args.load_path)
79 | 
80 | # =========================================== Visualization ===================================================================== #
81 | # visualize
82 | os.makedirs("../output/images", exist_ok=True)
83 | print('sample image,please wait!')
84 | 
85 | plt.figure(figsize=(8, 3))
86 | plt.subplot(1, 3, 1)
87 | plt.scatter(data[:, 0], data[:, 1], marker=".", color="b", s=10)
88 | plt.title("Training data")
89 | plt.subplot(1, 3, 2)
90 | plt.scatter(test_local_z[:, 0], test_local_z[:, 1], marker=".", color="r", s=10)
91 | plt.title("Latent space")
92 | plt.subplot(1, 3, 3)
93 | samples = model.sample(args.sample_num).cpu().detach().numpy()
94 | plt.scatter(samples[:, 0], samples[:, 1], marker=".", color="b", s=10)
95 | plt.title("Generated samples")
96 | plt.savefig("../output/images/nflow.png")
97 | plt.show()
98 | 
99 | 


--------------------------------------------------------------------------------
/pydpm/example/Bayesian_PM/Deep_Poisson_Factor_Analysis/DPFA_Demo.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ===========================================
 3 | Deep Poisson Factor Analysis Demo
 4 | Scalable Deep Poisson Factor Analysis for Topic Modeling
 5 | Zhe Gan, Changyou Chen, Ricardo Henao, David Carlson, Lawrence Carin
 6 | Publised in International Conference on Machine Learning 2015
 7 | 
 8 | ===========================================
 9 | 
10 | """
11 | 
12 | # Author: Chaojie Wang <xd_silly@163.com>; Jiawen Wu <wjw19960807@163.com>; Wei Zhao <13279389260@163.com>
13 | # License: BSD-3-Clause
14 | 
15 | import numpy as np
16 | import argparse
17 | import scipy.io as sio
18 | 
19 | from torchvision import datasets, transforms
20 | 
21 | from pydpm.model import DPFA
22 | from pydpm.metric import ACC
23 | from pydpm.dataloader.image_data import tensor_transforms
24 | 
25 | # =========================================== ArgumentParser ===================================================================== #
26 | parser = argparse.ArgumentParser()
27 | 
28 | # device
29 | parser.add_argument("--device", type=str, default='gpu')
30 | 
31 | # dataset
32 | parser.add_argument("--data_path", type=str, default='../../../dataset/mnist/', help="the path of loading data")
33 | 
34 | # model
35 | parser.add_argument("--save_path", type=str, default='../../save_models', help="the path of saving model")
36 | parser.add_argument("--load_path", type=str, default='../../save_models/DPFA.npy', help="the path of loading model")
37 | 
38 | parser.add_argument("--z_dims", type=list, default=[128, 64, 32], help="numbers of topics of 3 layers in DPFA(PFA+DSBN+Gibbs)")
39 | 
40 | # optim
41 | parser.add_argument("--burnin", type=int, default=100, help="the iterations of burnin stage")
42 | parser.add_argument("--collection", type=int, default=80, help="the iterations of collection stage")
43 | 
44 | args = parser.parse_args()
45 | 
46 | # =========================================== Dataset ===================================================================== #
47 | # define transform for dataset and load orginal dataset
48 | # load dataset
49 | transform = transforms.Compose([transforms.ToTensor()])
50 | train_dataset = datasets.MNIST(root=args.data_path, train=True, download=True)
51 | test_dataset = datasets.MNIST(root=args.data_path, train=False, download=False)
52 | 
53 | # transform dataset and reshape the dataset into [batch_size, feature_num]
54 | train_data = tensor_transforms(train_dataset.data, transform)
55 | train_data = train_data.permute([1, 2, 0]).reshape(len(train_dataset), -1)  # len(train_dataset, 28*28)
56 | test_data = tensor_transforms(test_dataset.data, transform)
57 | test_data = test_data.permute([1, 2, 0]).reshape(len(test_dataset), -1)
58 | train_label = train_dataset.train_labels
59 | test_label = test_dataset.test_labels
60 | 
61 | # transpose the dataset to fit the model and convert a tensor to numpy array
62 | train_data = np.array(np.ceil(train_data[:999, :].T.numpy() * 5), order='C')
63 | test_data = np.array(np.ceil(test_data[:999, :].T.numpy() * 5), order='C')
64 | train_label = train_label.numpy()[:999]
65 | test_label = test_label.numpy()[:999]
66 | 
67 | # =========================================== Model ===================================================================== #
68 | # create the model and deploy it on gpu or cpu
69 | model = DPFA(K=args.z_dims, device=args.device)
70 | model.initial(train_data)  # use the shape of train_data to initialize the params of model
71 | 
72 | # train and evaluation
73 | train_local_params = model.train(train_data, burnin=args.burnin, collection=args.collection)
74 | train_local_params = model.test(train_data, burnin=args.burnin, collection=args.collection)
75 | test_local_params = model.test(test_data, burnin=args.burnin, collection=args.collection)
76 | 
77 | # save the model after training
78 | model.save(args.save_path)
79 | # load the model
80 | model.load(args.load_path)
81 | 
82 | # evaluate the model with classification accuracy
83 | # the demo accuracy can achieve 0.9099
84 | results = ACC(train_local_params.Theta, test_local_params.Theta, train_label, test_label, 'SVM')
85 | 
86 | 
87 | 
88 | 
89 | 


--------------------------------------------------------------------------------
/enviroment.yaml:
--------------------------------------------------------------------------------
  1 | name: pyDPM
  2 | channels:
  3 |   - pyg
  4 |   - pytorch
  5 |   - nvidia
  6 |   - defaults
  7 | dependencies:
  8 |   - _libgcc_mutex=0.1=main
  9 |   - _openmp_mutex=5.1=1_gnu
 10 |   - appdirs=1.4.4=pyhd3eb1b0_0
 11 |   - blas=1.0=mkl
 12 |   - brotlipy=0.7.0=py39h27cfd23_1003
 13 |   - bzip2=1.0.8=h7b6447c_0
 14 |   - ca-certificates=2023.01.10=h06a4308_0
 15 |   - certifi=2023.5.7=py39h06a4308_0
 16 |   - cffi=1.15.1=py39h74dc2b5_0
 17 |   - charset-normalizer=2.0.4=pyhd3eb1b0_0
 18 |   - cryptography=39.0.1=py39h9ce1e76_0
 19 |   - cuda-cudart=11.7.99=0
 20 |   - cuda-cupti=11.7.101=0
 21 |   - cuda-libraries=11.7.1=0
 22 |   - cuda-nvrtc=11.7.99=0
 23 |   - cuda-nvtx=11.7.91=0
 24 |   - cuda-runtime=11.7.1=0
 25 |   - ffmpeg=4.3=hf484d3e_0
 26 |   - freetype=2.12.1=h4a9f257_0
 27 |   - giflib=5.2.1=h5eee18b_3
 28 |   - gmp=6.2.1=h295c915_3
 29 |   - gnutls=3.6.15=he1e5248_0
 30 |   - idna=3.4=py39h06a4308_0
 31 |   - intel-openmp=2021.4.0=h06a4308_3561
 32 |   - jpeg=9e=h5eee18b_1
 33 |   - lame=3.100=h7b6447c_0
 34 |   - lcms2=2.12=h3be6417_0
 35 |   - ld_impl_linux-64=2.38=h1181459_1
 36 |   - lerc=3.0=h295c915_0
 37 |   - libcublas=11.10.3.66=0
 38 |   - libcufft=10.7.2.124=h4fbf590_0
 39 |   - libcufile=1.6.1.9=0
 40 |   - libcurand=10.3.2.106=0
 41 |   - libcusolver=11.4.0.1=0
 42 |   - libcusparse=11.7.4.91=0
 43 |   - libdeflate=1.17=h5eee18b_0
 44 |   - libffi=3.3=he6710b0_2
 45 |   - libgcc-ng=11.2.0=h1234567_1
 46 |   - libgfortran-ng=11.2.0=h00389a5_1
 47 |   - libgfortran5=11.2.0=h1234567_1
 48 |   - libgomp=11.2.0=h1234567_1
 49 |   - libiconv=1.16=h7f8727e_2
 50 |   - libidn2=2.3.4=h5eee18b_0
 51 |   - libnpp=11.7.4.75=0
 52 |   - libnvjpeg=11.8.0.2=0
 53 |   - libpng=1.6.39=h5eee18b_0
 54 |   - libstdcxx-ng=11.2.0=h1234567_1
 55 |   - libtasn1=4.19.0=h5eee18b_0
 56 |   - libtiff=4.5.0=h6a678d5_2
 57 |   - libunistring=0.9.10=h27cfd23_0
 58 |   - libwebp=1.2.4=h11a3e52_1
 59 |   - libwebp-base=1.2.4=h5eee18b_1
 60 |   - lz4-c=1.9.4=h6a678d5_0
 61 |   - mkl=2021.4.0=h06a4308_640
 62 |   - mkl-service=2.4.0=py39h7f8727e_0
 63 |   - mkl_fft=1.3.1=py39hd3c417c_0
 64 |   - mkl_random=1.2.2=py39h51133e4_0
 65 |   - ncurses=6.4=h6a678d5_0
 66 |   - nettle=3.7.3=hbbd107a_1
 67 |   - numpy=1.24.3=py39h14f4228_0
 68 |   - numpy-base=1.24.3=py39h31eccc5_0
 69 |   - openh264=2.1.1=h4ff587b_0
 70 |   - openssl=1.1.1t=h7f8727e_0
 71 |   - packaging=23.0=py39h06a4308_0
 72 |   - pillow=9.4.0=py39h6a678d5_0
 73 |   - pip=23.0.1=py39h06a4308_0
 74 |   - pooch=1.4.0=pyhd3eb1b0_0
 75 |   - portalocker=2.3.0=py39h06a4308_1
 76 |   - pycparser=2.21=pyhd3eb1b0_0
 77 |   - pyopenssl=23.0.0=py39h06a4308_0
 78 |   - pysocks=1.7.1=py39h06a4308_0
 79 |   - python=3.9.0=hdb3f193_2
 80 |   - pytorch=1.13.0=py3.9_cuda11.7_cudnn8.5.0_0
 81 |   - pytorch-cuda=11.7=h778d358_5
 82 |   - pytorch-mutex=1.0=cuda
 83 |   - pytorch-scatter=2.1.1=py39_torch_1.13.0_cu117
 84 |   - pytorch-sparse=0.6.17=py39_torch_1.13.0_cu117
 85 |   - readline=8.2=h5eee18b_0
 86 |   - requests=2.29.0=py39h06a4308_0
 87 |   - setuptools=66.0.0=py39h06a4308_0
 88 |   - six=1.16.0=pyhd3eb1b0_1
 89 |   - sqlite=3.41.2=h5eee18b_0
 90 |   - tk=8.6.12=h1ccaba5_0
 91 |   - torchaudio=0.13.0=py39_cu117
 92 |   - torchdata=0.5.0=py39
 93 |   - torchtext=0.14.0=py39
 94 |   - torchvision=0.14.0=py39_cu117
 95 |   - tqdm=4.65.0=py39hb070fc8_0
 96 |   - typing_extensions=4.5.0=py39h06a4308_0
 97 |   - tzdata=2023c=h04d1e81_0
 98 |   - urllib3=1.26.15=py39h06a4308_0
 99 |   - wheel=0.38.4=py39h06a4308_0
100 |   - xz=5.4.2=h5eee18b_0
101 |   - zlib=1.2.13=h5eee18b_0
102 |   - zstd=1.5.5=hc292b87_0
103 |   - pip:
104 |     - contourpy==1.0.7
105 |     - cycler==0.11.0
106 |     - fonttools==4.39.4
107 |     - gensim==4.3.1
108 |     - importlib-resources==5.12.0
109 |     - jinja2==3.1.2
110 |     - joblib==1.2.0
111 |     - kiwisolver==1.4.4
112 |     - markupsafe==2.1.2
113 |     - matplotlib==3.7.1
114 |     - psutil==5.9.5
115 |     - pyparsing==3.0.9
116 |     - python-dateutil==2.8.2
117 |     - scikit-learn==1.2.2
118 |     - scipy==1.10.1
119 |     - smart-open==6.3.0
120 |     - threadpoolctl==3.1.0
121 |     - torch-geometric==2.3.1
122 |     - zipp==3.15.0
123 | prefix: /home/chaojie.wang/anaconda3/envs/pyDPM
124 | 


--------------------------------------------------------------------------------
/pydpm/sampler/model_sampler_cpu.py:
--------------------------------------------------------------------------------
  1 | """
  2 | ===========================================
  3 | Model Sampler implemented on CPU
  4 | ===========================================
  5 | 
  6 | """
  7 | 
  8 | # Author: Chaojie Wang <xd_silly@163.com>; Jiawen Wu
  9 | # License: BSD-3-Clause
 10 | 
 11 | import numpy as np
 12 | import numpy.ctypeslib as npct
 13 | import ctypes  
 14 | from ctypes import *
 15 | import os
 16 | 
 17 | class model_sampler_cpu(object):
 18 | 
 19 |     def __init__(self, system_type='Windows', seed=0):
 20 |         """
 21 |         The basic class for sampling distribution on cpu
 22 |         """
 23 |         super(model_sampler_cpu, self).__init__()
 24 | 
 25 |         self.system_type = system_type
 26 |         self.seed = seed
 27 | 
 28 |         array_2d_double = npct.ndpointer(dtype=np.double, ndim=2, flags='C')
 29 |         array_1d_double = npct.ndpointer(dtype=np.double, ndim=1, flags='C')
 30 |         array_int = npct.ndpointer(dtype=np.int32, ndim=0, flags='C')
 31 |         ll = ctypes.cdll.LoadLibrary
 32 | 
 33 |         if system_type == "Windows":
 34 |             self.Crt_lib = ll(os.path.dirname(__file__) + "\_compact\crt_cpu.dll")
 35 |             self.Multi_lib = ll(os.path.dirname(__file__) + "\_compact\multi_aug_cpu.dll")
 36 |             self.Crt_Multi_lib = ll(os.path.dirname(__file__) + "\_compact\crt_multi_aug_cpu.dll")
 37 |         else:
 38 |             self.Crt_lib = ll(os.path.dirname(__file__) + "/_compact/crt_cpu.so")
 39 |             self.Multi_lib = ll(os.path.dirname(__file__) + "/_compact/multi_aug_cpu.so")
 40 |             self.Crt_Multi_lib = ll(os.path.dirname(__file__) + "/_compact/crt_multi_aug_cpu.so")
 41 | 
 42 | 
 43 |         self.Multi_lib.Multi_Sample.restype = None
 44 |         self.Multi_lib.Multi_Sample.argtypes = [array_2d_double, array_2d_double, array_2d_double, array_2d_double,
 45 |                                            array_2d_double, c_int, c_int, c_int]
 46 | 
 47 |         self.Crt_Multi_lib.Crt_Multi_Sample.restype = None
 48 |         self.Crt_Multi_lib.Crt_Multi_Sample.argtypes = [array_2d_double, array_2d_double, array_2d_double, array_2d_double,
 49 |                                                    array_2d_double, c_int, c_int, c_int]
 50 | 
 51 |         self.Crt_lib.Crt_Sample.restype = None
 52 |         self.Crt_lib.Crt_Sample.argtypes = [array_2d_double, array_2d_double, array_2d_double, c_int, c_int]
 53 | 
 54 |     def multi_aug(self, X_t, Phi_t, Theta_t):
 55 | 
 56 |         X_t = np.array(X_t, order='C').astype('double')
 57 |         Phi_t = np.array(Phi_t, order='C').astype('double')
 58 |         Theta_t = np.array(Theta_t, order='C').astype('double')
 59 | 
 60 |         V = X_t.shape[0]
 61 |         J = X_t.shape[1]
 62 |         K = Theta_t.shape[0]
 63 |         Xt_to_t1_t = np.zeros([K, J], order='C').astype('double')
 64 |         WSZS_t = np.zeros([V, K], order='C').astype('double')
 65 |         self.Multi_lib.Multi_Sample(X_t, Phi_t, Theta_t, WSZS_t, Xt_to_t1_t, V, K, J)
 66 | 
 67 |         return Xt_to_t1_t, WSZS_t
 68 | 
 69 |     def crt(self, Xt_to_t1_t, p):
 70 | 
 71 |         Xt_to_t1_t = np.array(Xt_to_t1_t, order='C')
 72 |         p = np.array(p, order='C')
 73 | 
 74 |         K_t = Xt_to_t1_t.shape[0]
 75 |         J = Xt_to_t1_t.shape[1]
 76 |         X_t1 = np.zeros([K_t, J], order='C').astype('double')
 77 | 
 78 |         self.Crt_lib.Crt_Sample(Xt_to_t1_t, p, X_t1, K_t, J)
 79 | 
 80 |         return X_t1
 81 | 
 82 |     def crt_multi_aug(self, Xt_to_t1_t, Phi_t1, Theta_t1):
 83 | 
 84 |         Xt_to_t1_t = np.array(Xt_to_t1_t, order='C').astype('double')
 85 |         Phi_t1 = np.array(Phi_t1, order='C').astype('double')
 86 |         Theta_t1 = np.array(Theta_t1, order='C').astype('double')
 87 | 
 88 |         K_t = Xt_to_t1_t.shape[0]
 89 |         J = Xt_to_t1_t.shape[1]
 90 |         K_t1 = Theta_t1.shape[0]
 91 |         Xt_to_t1_t1 = np.zeros([K_t1, J], order='C').astype('double')
 92 |         WSZS_t1 = np.zeros([K_t, K_t1], order='C').astype('double')
 93 | 
 94 |         self.Crt_Multi_lib.Crt_Multi_Sample(Xt_to_t1_t, Phi_t1, Theta_t1, WSZS_t1, Xt_to_t1_t1, K_t, K_t1, J)
 95 | 
 96 |         return Xt_to_t1_t1, WSZS_t1
 97 | 
 98 | 
 99 | 
100 | 


--------------------------------------------------------------------------------
/pydpm/example/Bayesian_PM/Graph_Poisson_Gamma_Belief_Network/GPGBN_Demo.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ===========================================
 3 | Deep Relational Topic Modeling via Graph Poisson Gamma Belief Network
 4 | Chaojie Wang, Hao Zhang, Bo Chen, Dongsheng Wang, Zhengjue Wang
 5 | Published in Advances in Neural Information Processing System 2020
 6 | 
 7 | ===========================================
 8 | 
 9 | """
10 | 
11 | # Author: Chaojie Wang <xd_silly@163.com>; Wei Zhao <13279389260@163.com>; Jiawen Wu <wjw19960807@163.com>
12 | # License: BSD-3-Clause
13 | 
14 | import os
15 | import numpy as np
16 | import argparse
17 | import scipy.io as sio
18 | 
19 | from torchvision import datasets, transforms
20 | from torch_geometric.datasets import Planetoid
21 | 
22 | from pydpm.model import GPGBN
23 | from pydpm.metric import ACC
24 | from pydpm.dataloader.image_data import tensor_transforms
25 | from pydpm.dataloader.graph_data import Graph_Processer
26 | from pydpm.utils import cosine_simlarity
27 | 
28 | # =========================================== ArgumentParser ===================================================================== #
29 | parser = argparse.ArgumentParser()
30 | 
31 | # device
32 | parser.add_argument("--device", type=str, default='gpu')
33 | 
34 | # dataset
35 | parser.add_argument("--data_path", type=str, default='../../../dataset/mnist/', help="the path of loading data")
36 | 
37 | # model
38 | parser.add_argument("--save_path", type=str, default='../../save_models', help="the path of saving model")
39 | parser.add_argument("--load_path", type=str, default='../../save_models/GPGBN.npy', help="the path of loading model")
40 | 
41 | parser.add_argument("--z_dims", type=list, default=[128, 64, 32], help="number of topics at diffrent layers in PGBN")
42 | 
43 | # optim
44 | parser.add_argument("--num_epochs", type=int, default=100, help="number of epochs of training")
45 | 
46 | args = parser.parse_args()
47 | 
48 | # =========================================== Dataset ===================================================================== #
49 | # define transform for dataset and load orginal dataset
50 | 
51 | # # load dataset (Cora) cost too much memory
52 | # path = '../../dataset/Planetoid'
53 | # if not os.path.exists(path):
54 | #     os.mkdir(path)
55 | # dataset = Planetoid(path, 'cora')
56 | # dataset = dataset[0]
57 | #
58 | # graph = graph_from_edges(dataset.edge_index, dataset.num_nodes, to_sparsetesor=False)[1]
59 | # # transpose the dataset to fit the model and convert a tensor to numpy array
60 | # train_data = dataset.x.T.numpy()
61 | 
62 | # load dataset (MNIST)
63 | # define transform for dataset and load orginal dataset
64 | transform = transforms.Compose([transforms.ToTensor()])
65 | train_dataset = datasets.MNIST(root=args.data_path, train=True, download=True)
66 | test_dataset = datasets.MNIST(root=args.data_path, train=False, download=False)
67 | 
68 | # transform dataset and reshape the dataset into [batch_size, feature_num]
69 | train_data = tensor_transforms(train_dataset.data, transform)
70 | train_data = train_data.permute([1, 2, 0]).reshape(len(train_dataset), -1)  # len(train_dataset, 28*28)
71 | test_data = tensor_transforms(test_dataset.data, transform)
72 | test_data = test_data.permute([1, 2, 0]).reshape(len(test_dataset), -1)
73 | train_label = train_dataset.train_labels
74 | test_label = test_dataset.test_labels
75 | 
76 | # transpose the dataset to fit the model and convert a tensor to numpy array
77 | train_data = np.array(np.ceil(train_data[:999, :].T.numpy() * 5), order='C')
78 | train_label = train_label.numpy()[:999]
79 | 
80 | # construct the adjacency matrix
81 | graph_processer = Graph_Processer()
82 | graph = graph_processer.graph_from_node_feature(train_data.T, 0.5, binary=False)
83 | 
84 | # =========================================== Model ===================================================================== #
85 | # create the model and deploy it on gpu or cpu
86 | model = GPGBN(K=args.z_dims, device=args.device)
87 | model.initial(train_data)
88 | 
89 | train_local_params = model.train(train_data, graph, num_epochs=args.num_epochs)
90 | 
91 | # save the model after training
92 | model.save(args.save_path)
93 | # load the model
94 | model.load(args.load_path)
95 | 
96 | 
97 | 


--------------------------------------------------------------------------------
/pydpm/example/Deep_Learning_PM/Deep_Convolution_Generative_Adversarial_Networks/DCGAN_Demo.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ===========================================
 3 | DCGAN
 4 | Unsupervised Representation Learning with Deep Convolutional Generative Adversarial Networks
 5 | Alec Radford, Luke Metz and Soumith Chintala
 6 | Publihsed in ICLR 2016
 7 | 
 8 | ===========================================
 9 | """
10 | 
11 | # Author: Xinyang Liu <lxy771258012@163.com>
12 | # License: BSD-3-Clause
13 | 
14 | import os
15 | import argparse
16 | 
17 | import torch
18 | from torch.utils.data import DataLoader
19 | 
20 | import torchvision.transforms as transforms
21 | from torchvision.utils import save_image
22 | from torchvision import datasets
23 | 
24 | from pydpm.model import DCGAN
25 | from pydpm.utils.utils import unnormalize_to_zero_to_one
26 | 
27 | # =========================================== ArgumentParser ===================================================================== #
28 | parser = argparse.ArgumentParser()
29 | 
30 | # device
31 | parser.add_argument("--gpu_id", type=int, default=0)
32 | 
33 | # dataset
34 | parser.add_argument("--data_path", type=str, default='../../../dataset/cifar/', help="the path of loading data")
35 | parser.add_argument("--img_size", type=int, default=28, help="size of each image dimension")
36 | 
37 | # model
38 | parser.add_argument("--save_path", type=str, default='../../save_models', help="the path of saving model")
39 | parser.add_argument("--load_path", type=str, default='../../save_models/DCGAN.pth', help="the path of loading model")
40 | 
41 | parser.add_argument("--z_dim", type=int, default=100, help="generator dimensionality of the noise")
42 | parser.add_argument("--in_channels", type=int, default=3, help="number of image channels")  # 1 for mnist
43 | parser.add_argument("--sample_interval", type=int, default=800, help="interval betwen image samples")
44 | 
45 | # optim
46 | parser.add_argument("--num_epochs", type=int, default=200, help="number of epochs of training")
47 | parser.add_argument("--batch_size", type=int, default=128, help="size of the batches")
48 | parser.add_argument("--lr", type=float, default=0.0002, help="adam: learning rate")
49 | parser.add_argument("--b1", type=float, default=0.5, help="adam: decay of first order momentum of gradient")
50 | parser.add_argument("--b2", type=float, default=0.999, help="adam: decay of first order momentum of gradient")
51 | 
52 | args = parser.parse_args()
53 | args.device = torch.device(f"cuda:{args.gpu_id}") if torch.cuda.is_available() else torch.device("cpu")
54 | 
55 | # =========================================== Dataset ===================================================================== #
56 | # mnist
57 | transform = transforms.Compose([
58 |         transforms.RandomHorizontalFlip(),
59 |         transforms.ToTensor(),
60 |         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
61 |     ])
62 | train_dataset = datasets.CIFAR10(root=args.data_path, train=True, transform=transform, download=True)
63 | test_dataset = datasets.CIFAR10(root=args.data_path, train=False, transform=transform, download=False)
64 | 
65 | train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=True)
66 | test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=args.batch_size, shuffle=False)
67 | 
68 | # =========================================== Model ===================================================================== #
69 | # model
70 | # Initialize generator and discriminator
71 | model = DCGAN(args, device=args.device)
72 | 
73 | # Optimizers
74 | model_opt_G = torch.optim.Adam(model.generator.parameters(), lr=args.lr, betas=(args.b1, args.b2))
75 | model_opt_D = torch.optim.Adam(model.discriminator.parameters(), lr=args.lr, betas=(args.b1, args.b2))
76 | 
77 | # train
78 | for epoch_idx in range(args.num_epochs):
79 |     model.train_one_epoch(model_opt_G=model_opt_G, model_opt_D=model_opt_D, dataloader=train_loader, sample_interval=args.sample_interval, epoch=epoch_idx, n_epochs=args.num_epochs)
80 |     if epoch_idx % 20 == 0:
81 |         model.save(args.save_path)
82 | # save
83 | model.save(args.save_path)
84 | # load
85 | model.load(args.load_path)
86 | 
87 | # ===================== Visualization ============================== #
88 | os.makedirs("../../output/images", exist_ok=True)
89 | print('sample image, please wait!')
90 | sample_images = model.sample(64)
91 | sample_images = unnormalize_to_zero_to_one(sample_images)
92 | save_image(sample_images, "../../output/images/DCGAN_images.png", nrow=8, normalize=True)
93 | print('complete!!!')


--------------------------------------------------------------------------------
/pydpm/example/Deep_Learning_PM/Generative_Adversarial_Network/GAN_Demo.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ===========================================
 3 | GAN
 4 | Generative Adversarial Networks
 5 | IJ Goodfellow，J Pouget-Abadie，M Mirza，B Xu，D Warde-Farley，S Ozair，A Courville，Y Bengio
 6 | Publihsed in 2014
 7 | 
 8 | ===========================================
 9 | """
10 | 
11 | # Author: Muyao Wang <flare935694542@163.com>, Xinyang Liu <lxy771258012@163.com>
12 | # License: BSD-3-Clause
13 | 
14 | import os
15 | import argparse
16 | 
17 | import torch
18 | from torch.utils.data import DataLoader
19 | 
20 | import torchvision.transforms as transforms
21 | from torchvision.utils import save_image
22 | from torchvision import datasets
23 | 
24 | from pydpm.model import GAN
25 | 
26 | # =========================================== ArgumentParser ===================================================================== #
27 | parser = argparse.ArgumentParser()
28 | 
29 | # device
30 | parser.add_argument("--gpu_id", type=int, default=0)
31 | 
32 | # dataset
33 | parser.add_argument("--data_path", type=str, default='../../../dataset/mnist/', help="the path of loading data")
34 | parser.add_argument("--img_size", type=int, default=28, help="size of each image dimension")
35 | 
36 | # model
37 | parser.add_argument("--save_path", type=str, default='../../save_models', help="the path of saving model")
38 | parser.add_argument("--load_path", type=str, default='../../save_models/GAN.pth', help="the path of loading model")
39 | 
40 | parser.add_argument("--b1", type=float, default=0.5, help="adam: decay of first order momentum of gradient")
41 | parser.add_argument("--b2", type=float, default=0.999, help="adam: decay of first order momentum of gradient")
42 | parser.add_argument("--g_z_dim", type=int, default=128, help="generator dimensionality of the noise")
43 | parser.add_argument("--g_hid_dims", type=list, default=[100, 200, 400, 800], help="generator dimensionality of the latent space")
44 | parser.add_argument("--d_hid_dims", type=list, default=[256, 128], help="discriminator dimensionality of the latent space")
45 | parser.add_argument("--channels", type=int, default=1, help="number of image channels")  # 1 for mnist
46 | parser.add_argument("--sample_interval", type=int, default=800, help="interval betwen image samples")
47 | 
48 | # optim
49 | parser.add_argument("--num_epochs", type=int, default=200, help="number of epochs of training")
50 | parser.add_argument("--batch_size", type=int, default=128, help="size of the batches")
51 | parser.add_argument("--lr", type=float, default=0.0002, help="adam: learning rate")
52 | 
53 | args = parser.parse_args()
54 | args.device = torch.device(f"cuda:{args.gpu_id}") if torch.cuda.is_available() else torch.device("cpu")
55 | 
56 | # =========================================== Dataset ===================================================================== #
57 | # mnist
58 | transform = transforms.Compose([transforms.Resize(args.img_size), transforms.ToTensor(), transforms.Normalize([0.5], [0.5])])
59 | train_dataset = datasets.MNIST(root=args.data_path, train=True, transform=transform, download=True)
60 | test_dataset = datasets.MNIST(root=args.data_path, train=False, transform=transform, download=False)
61 | 
62 | train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=True)
63 | test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=args.batch_size, shuffle=False)
64 | 
65 | # =========================================== Model ===================================================================== #
66 | # model
67 | # Initialize generator and discriminator
68 | img_shape = (args.channels, args.img_size, args.img_size)
69 | model = GAN(img_shape, g_z_dim=args.g_z_dim, g_hid_dims=args.g_hid_dims, d_hid_dims=args.d_hid_dims, device=args.device)
70 | 
71 | # Optimizers
72 | model_opt_G = torch.optim.Adam(model.generator.parameters(), lr=args.lr, betas=(args.b1, args.b2))
73 | model_opt_D = torch.optim.Adam(model.discriminator.parameters(), lr=args.lr, betas=(args.b1, args.b2))
74 | 
75 | # train
76 | for epoch_idx in range(args.num_epochs):
77 |     model.train_one_epoch(model_opt_G=model_opt_G, model_opt_D=model_opt_D, dataloader=train_loader, sample_interval=args.sample_interval, epoch=epoch_idx, n_epochs=args.num_epochs)
78 | 
79 | # save
80 | model.save(args.save_path)
81 | # load
82 | model.load(args.load_path)
83 | 
84 | # ===================== Visualization ============================== #
85 | os.makedirs("../../output/images", exist_ok=True)
86 | print('sample image,please wait!')
87 | save_image(model.sample(64), "../../output/images/GAN_images.png", nrow=8, normalize=True)
88 | print('complete!!!')


--------------------------------------------------------------------------------
/pydpm/example/Deep_Learning_PM/Wasserstein_Generative_Adversarial_Networks/WGAN_Demo.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ===========================================
 3 | WGAN
 4 | Wasserstein GAN
 5 | Martin Arjovsky, Soumith Chintala, and Leon Bottou,
 6 | Publihsed in 2017
 7 | 
 8 | ===========================================
 9 | """
10 | 
11 | # Author: Bufeng Ge <20009100138@stu.xidian.edu.cn>, Xinyang Liu <lxy771258012@163.com>
12 | # License: BSD-3-Clause
13 | 
14 | import os
15 | import argparse
16 | 
17 | import torch
18 | from torch.utils.data import DataLoader
19 | 
20 | import torchvision.transforms as transforms
21 | from torchvision.utils import save_image
22 | from torchvision import datasets
23 | 
24 | from pydpm.model import WGAN
25 |  
26 | # =========================================== ArgumentParser ===================================================================== #
27 | parser = argparse.ArgumentParser()
28 | 
29 | # device
30 | parser.add_argument("--gpu_id", type=int, default=0)
31 | 
32 | # dataset
33 | parser.add_argument("--data_path", type=str, default='../../../dataset/mnist/', help="the path of loading data")
34 | parser.add_argument("--img_size", type=int, default=28, help="size of each image dimension")
35 | 
36 | # model
37 | parser.add_argument("--save_path", type=str, default='../../save_models', help="the path of saving model")
38 | parser.add_argument("--load_path", type=str, default='../../save_models/WGAN.pth', help="the path of loading model")
39 | 
40 | parser.add_argument("--g_z_dim", type=int, default=128, help="generator dimensionality of the noise")
41 | parser.add_argument("--g_hid_dims", type=list, default=[100, 200, 400, 800], help="generator dimensionality of the latent space")
42 | parser.add_argument("--d_hid_dims", type=list, default=[256, 128], help="discriminator dimensionality of the latent space")
43 | parser.add_argument("--channels", type=int, default=1, help="number of image channels")  # 1 for mnist
44 | parser.add_argument("--sample_interval", type=int, default=100, help="interval betwen image samples")
45 | parser.add_argument("--n_critic", type=int, default=100, help="number of training steps for discriminator per iter")
46 | parser.add_argument("--clip_value", type=float, default=0.01, help="lower and upper clip value for disc. weights")
47 | 
48 | # optim
49 | parser.add_argument("--num_epochs", type=int, default=200, help="number of epochs of training")
50 | parser.add_argument("--batch_size", type=int, default=128, help="size of the batches")
51 | parser.add_argument("--lr", type=float, default=0.0002, help="adam: learning rate")
52 | 
53 | args = parser.parse_args()
54 | args.device = torch.device(f"cuda:{args.gpu_id}") if torch.cuda.is_available() else torch.device("cpu")
55 | 
56 | # =========================================== Dataset ===================================================================== #
57 | # mnist
58 | transform = transforms.Compose([transforms.Resize(args.img_size), transforms.ToTensor(), transforms.Normalize([0.5], [0.5])])
59 | train_dataset = datasets.MNIST(root=args.data_path, train=True, transform=transform, download=True)
60 | test_dataset = datasets.MNIST(root=args.data_path, train=False, transform=transform, download=False)
61 | 
62 | train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=True)
63 | test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=args.batch_size, shuffle=False)
64 | 
65 | # =========================================== Model ===================================================================== #
66 | # model
67 | # Initialize generator and discriminator
68 | img_shape = (args.channels, args.img_size, args.img_size)
69 | model = WGAN(img_shape, g_z_dim=args.g_z_dim, g_hid_dims=args.g_hid_dims, d_hid_dims=args.d_hid_dims, device=args.device)
70 | 
71 | # Optimizers
72 | model_opt_G = torch.optim.RMSprop(model.generator.parameters(), lr=args.lr)
73 | model_opt_D = torch.optim.RMSprop(model.discriminator.parameters(), lr=args.lr)
74 | 
75 | # train
76 | for epoch_idx in range(args.num_epochs):
77 |     model.train_one_epoch(args=args, model_opt_G=model_opt_G, model_opt_D=model_opt_D, dataloader=train_loader, epoch=epoch_idx, n_epochs=args.num_epochs)
78 | 
79 | 
80 | # save
81 | model.save(args.save_path)
82 | # load
83 | model.load(args.load_path)
84 | 
85 | # ===================== Visualization ================= #
86 | os.makedirs("../../output/images", exist_ok=True)
87 | print('sample image, please wait!')
88 | save_image(model.sample(64), "../../output/images/WGAN_images.png", nrow=8, normalize=True)
89 | print('complete!!!')
90 | 
91 | 


--------------------------------------------------------------------------------
/pydpm/example/Deep_Learning_PM/Information_Maximizing_Generative_Adversarial_Nets/InfoGAN_Demo.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ===========================================
 3 | InfoGAN
 4 | InfoGAN: Interpretable Representation Learning by Information Maximizing Generative Adversarial Nets
 5 | Xi Chen, Yan Duan, Rein Houthooft, John Schulman, Ilya Sutskever, Pieter Abbeel
 6 | Publihsed in 2016
 7 | 
 8 | ===========================================
 9 | """
10 | 
11 | # Author: Xinyang Liu <lxy771258012@163.com>
12 | # License: BSD-3-Clause
13 | 
14 | import os
15 | import argparse
16 | 
17 | import torch
18 | from torch.utils.data import DataLoader
19 | 
20 | import torchvision.transforms as transforms
21 | from torchvision.utils import save_image
22 | from torchvision import datasets
23 | 
24 | from pydpm.model import InfoGAN
25 | from pydpm.utils.utils import unnormalize_to_zero_to_one
26 | # =========================================== ArgumentParser ===================================================================== #
27 | parser = argparse.ArgumentParser()
28 | 
29 | # device
30 | parser.add_argument("--gpu_id", type=int, default=0)
31 | 
32 | # dataset
33 | parser.add_argument("--data_path", type=str, default='../../../dataset/mnist/', help="the path of loading data")
34 | parser.add_argument("--img_size", type=int, default=28, help="size of each image dimension")
35 | 
36 | # model
37 | parser.add_argument("--save_path", type=str, default='../../save_models', help="the path of saving model")
38 | parser.add_argument("--load_path", type=str, default='../../save_models/GAN.pth', help="the path of loading model")
39 | 
40 | parser.add_argument("--z_dim", type=int, default=62, help="generator dimensionality of the noise")
41 | parser.add_argument("--dis_ch", type=int, default=1, help="generator dimensionality of the latent space")
42 | parser.add_argument("--dis_ch_dim", type=int, default=10, help="discriminator dimensionality of the latent space")
43 | parser.add_argument("--con_ch", type=int, default=2, help="discriminator dimensionality of the latent space")
44 | parser.add_argument("--channels", type=int, default=1, help="number of image channels")  # 1 for mnist
45 | parser.add_argument("--sample_interval", type=int, default=800, help="interval betwen image samples")
46 | 
47 | # optim
48 | parser.add_argument("--num_epochs", type=int, default=200, help="number of epochs of training")
49 | parser.add_argument("--batch_size", type=int, default=128, help="size of the batches")
50 | parser.add_argument("--lr", type=float, default=0.0002, help="adam: learning rate")
51 | parser.add_argument("--b1", type=float, default=0.5, help="adam: decay of first order momentum of gradient")
52 | parser.add_argument("--b2", type=float, default=0.999, help="adam: decay of first order momentum of gradient")
53 | 
54 | args = parser.parse_args()
55 | args.device = torch.device(f"cuda:{args.gpu_id}") if torch.cuda.is_available() else torch.device("cpu")
56 | 
57 | # =========================================== Dataset ===================================================================== #
58 | # mnist
59 | transform = transforms.Compose([transforms.Resize(args.img_size), transforms.ToTensor()])
60 | train_dataset = datasets.MNIST(root=args.data_path, train=True, transform=transform, download=True)
61 | test_dataset = datasets.MNIST(root=args.data_path, train=False, transform=transform, download=False)
62 | 
63 | train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=True)
64 | test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=args.batch_size, shuffle=False)
65 | 
66 | # =========================================== Model ===================================================================== #
67 | # model
68 | # Initialize generator and discriminator
69 | img_shape = (args.channels, args.img_size, args.img_size)
70 | model = InfoGAN(args=args, device=args.device)
71 | 
72 | # Optimizers
73 | model_opt_G = torch.optim.Adam([{'params': model.generator.parameters()}, {'params': model.netQ.parameters()}], lr=args.lr, betas=(args.b1, args.b2))
74 | model_opt_D = torch.optim.Adam([{'params': model.discriminator.parameters()}, {'params': model.netD.parameters()}], lr=args.lr, betas=(args.b1, args.b2))
75 | 
76 | 
77 | # train
78 | for epoch_idx in range(args.num_epochs):
79 |     model.train_one_epoch(model_opt_G=model_opt_G, model_opt_D=model_opt_D, dataloader=train_loader, sample_interval=args.sample_interval, epoch=epoch_idx, n_epochs=args.num_epochs)
80 | 
81 | # save
82 | model.save(args.save_path)
83 | # load
84 | model.load(args.load_path)
85 | 
86 | # ===================== Visualization ============================== #
87 | os.makedirs("../../output/images", exist_ok=True)
88 | print('sample image, please wait!')
89 | save_image(model.sample(64), "../../output/images/InfoGAN_images.png", nrow=8, normalize=True)
90 | print('complete!!!')


--------------------------------------------------------------------------------
/pydpm/example/Deep_Learning_PM/Denoising_Diffusion_Probabilistic_Model/DDPM_Demo.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | ===========================================
  3 | DDPM
  4 | Denoising Diffusion Probabilistic Models
  5 | Jonathan Ho, Ajay Jain, Pieter Abbeel
  6 | Published in NIPS 2020
  7 | 
  8 | ===========================================
  9 | '''
 10 | 
 11 | # Author: Xinyang Liu <lxy771258012@163.com>, Muyao Wang <flare935694542@163.com>
 12 | # License: BSD-3-Clause
 13 | 
 14 | import os
 15 | import numpy as np
 16 | import argparse
 17 | 
 18 | import torch
 19 | import torch.optim as optim
 20 | from torch.utils.data import DataLoader
 21 | from torchvision import transforms
 22 | from torchvision.datasets import CIFAR10
 23 | from torchvision.utils import save_image
 24 | 
 25 | from pydpm.model import DDPM
 26 | 
 27 | parser = argparse.ArgumentParser()
 28 | 
 29 | # device
 30 | parser.add_argument("--gpu_id", type=int, default=0, help="the id of gpu to deploy")
 31 | 
 32 | # dataset
 33 | parser.add_argument("--dataset", type=str, default='CIFAR10', help="the name of dataset")
 34 | parser.add_argument("--dataset_path", type=str, default='../../dataset', help="the file path of dataset")
 35 | 
 36 | # network settings
 37 | parser.add_argument("--T", type=int, default=1000, help="Number of time steps in DDPM")
 38 | parser.add_argument("--in_channel", type=int, default=3, help="Number of channels in the input image")
 39 | parser.add_argument("--channel", type=int, default=128, help="Number of channels after head layer")
 40 | parser.add_argument("--channel_mult", type=list, default=[1, 2, 3, 4], help="Number of mult-channels")
 41 | parser.add_argument("--attn", type=list, default=[2], help="Number of attention-blocks")
 42 | parser.add_argument("--num_res_blocks", type=int, default=2, help="Number of residual-blocksn")
 43 | parser.add_argument("--dropout", type=list, default=0.15, help="Dropout ratio")
 44 | 
 45 | # ddpm settings
 46 | parser.add_argument("--beta_1", type=float, default=1e-4, help="the level of noise in first step of forward process")
 47 | parser.add_argument("--beta_T", type=float, default=0.02, help="The level of noise in T-th step of forward process")
 48 | 
 49 | # optimizer
 50 | parser.add_argument("--lr", type=float, default=0.0002, help="adam: learning rate")
 51 | parser.add_argument("--grad_clip", type=float, default=1., help="grad_clip")
 52 | 
 53 | # training
 54 | parser.add_argument("--num_epochs", type=int, default=30, help="number of epochs of training")
 55 | parser.add_argument("--batch_size", type=int, default=64, help="batch size of dataloader")
 56 | 
 57 | # sampling
 58 | parser.add_argument("--model_path", type=str, default="../../save_models/DDPM.pth", help="path to save/load model")
 59 | parser.add_argument("--noisy_path", type=str, default="../../output/noisy.png", help="path to save noisy")
 60 | parser.add_argument("--image_path", type=str, default="../../output/image.png", help="path to save sampled images")
 61 | 
 62 | args = parser.parse_args()
 63 | args.device = 'cpu' if not torch.cuda.is_available() else f'cuda:{args.gpu_id}'
 64 | 
 65 | # dataset
 66 | dataset = CIFAR10(
 67 |     root=args.dataset_path, train=True, download=True,
 68 |     transform=transforms.Compose([
 69 |         transforms.RandomHorizontalFlip(),
 70 |         transforms.ToTensor(),
 71 |         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
 72 |     ]))
 73 | dataloader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True, num_workers=4, drop_last=True, pin_memory=True)
 74 | if dataset == 'CIFAR10':
 75 |     args.in_channel = 3
 76 | elif dataset == 'mnist':
 77 |     args.in_channel = 1
 78 | else:
 79 |     assert print('args.in_channel must be given')
 80 | 
 81 | net_config = {"in_channel": args.in_channel,
 82 |               "channel": args.channel,
 83 |               "channel_mult": args.channel_mult,
 84 |               "attn": args.attn,
 85 |               "num_res_blocks": args.num_res_blocks,
 86 |               "dropout": args.dropout}
 87 | ddpm_config = {"beta_1": 1e-4, "beta_T": 0.02}
 88 | 
 89 | model = DDPM(T=args.T, net_cfg=net_config, ddpm_cfg=ddpm_config, device=args.device)
 90 | 
 91 | model_opt = torch.optim.AdamW(model.parameters(), lr=args.lr, weight_decay=1e-4)
 92 | cosine_scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer=model_opt, T_max=args.num_epochs, eta_min=0, last_epoch=-1)
 93 | 
 94 | for epoch in range(args.num_epochs):
 95 |     model.train_one_epoch(dataloader, model_opt, epoch, args)
 96 |     cosine_scheduler.step()
 97 |     if (epoch + 1) % 10 == 0:
 98 |         model.save()
 99 |         model_test = DDPM(T=args.T, net_cfg=net_config, ddpm_cfg=ddpm_config, device=args.device)
100 |         model_test.load(args.model_path)
101 |         noisy, images = model_test.test_one_epoch(net_model=model_test.net, args=args)
102 | 
103 |         saveNoisy = torch.clamp(noisy * 0.5 + 0.5, 0, 1)
104 |         save_image(saveNoisy, args.noisy_path, nrow=8)
105 |         sampledImgs = images * 0.5 + 0.5  # [0 ~ 1]
106 |         save_image(sampledImgs, args.image_path, nrow=8)
107 | 


--------------------------------------------------------------------------------
/pydpm/metric/topic_coherence.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | # -*- coding: utf-8 -*-
  3 | # Author: Xinyang Liu <lxy771258012@163.com>
  4 | # License: BSD-3-Clause
  5 | 
  6 | import numpy as np
  7 | from gensim.test.utils import common_corpus, common_dictionary
  8 | from gensim.models.coherencemodel import CoherenceModel
  9 | 
 10 | """
 11 | Examples
 12 | ---------
 13 | One way of using this feature is through providing a trained topic model. A dictionary has to be explicitly provided
 14 | if the model does not contain a dictionary already
 15 | 
 16 | .. sourcecode:: pycon
 17 | 	#
 18 | 	# >>> from gensim.test.utils import common_corpus, common_dictionary
 19 | 	# >>> from gensim.models.ldamodel import LdaModel
 20 | 	# >>> from gensim.models.coherencemodel import CoherenceModel
 21 | 	# >>>
 22 | 	# >>> model = LdaModel(common_corpus, 5, common_dictionary)
 23 | 	# >>>
 24 | 	# >>> cm = CoherenceModel(model=model, corpus=common_corpus, coherence='u_mass')
 25 | 	# >>> coherence = cm.get_coherence()  # get coherence value
 26 | 
 27 | Another way of using this feature is through providing tokenized topics such as:
 28 | 
 29 | .. sourcecode:: pycon
 30 | 
 31 | 	# >>> from gensim.test.utils import common_corpus, common_dictionary
 32 | 	# >>> from gensim.models.coherencemodel import CoherenceModel
 33 | 	# >>> topics = [
 34 | 	# ...     ['human', 'computer', 'system', 'interface'],
 35 | 	# ...     ['graph', 'minors', 'trees', 'eps']
 36 | 	# ... ]
 37 | 	# >>>
 38 | 	# >>> cm = CoherenceModel(topics=topics, corpus=common_corpus, dictionary=common_dictionary, coherence='u_mass')
 39 | 	# >>> coherence = cm.get_coherence()  # get coherence value
 40 | 
 41 | （Please visit https://radimrehurek.com/gensim/models/coherencemodel.html for more usage.）
 42 | 
 43 | """
 44 | 
 45 | class Topic_Coherence(object):
 46 | 	def __init__(self, model=None, topics=None, texts=None, corpus=None, dictionary=None,
 47 | 					 window_size=None, keyed_vectors=None, coherence='c_v', topn=20, processes=-1):
 48 | 		'''
 49 | 		Inputs:
 50 | 			model : :class:`~gensim.models.basemodel.BaseTopicModel`, optional
 51 | 				Pre-trained topic model, should be provided if topics is not provided.
 52 | 				Currently supports :class:`~gensim.models.ldamodel.LdaModel`,
 53 | 				:class:`~gensim.models.ldamulticore.LdaMulticore`, :class:`~gensim.models.wrappers.ldamallet.LdaMallet` and
 54 | 				:class:`~gensim.models.wrappers.ldavowpalwabbit.LdaVowpalWabbit`.
 55 | 				Use `topics` parameter to plug in an as yet unsupported model.
 56 | 			topics : list of list of str, optional
 57 | 				List of tokenized topics, if this is preferred over model - dictionary should be provided.
 58 | 			texts : list of list of str, optional
 59 | 				Tokenized texts, needed for coherence models that use sliding window based (i.e. coherence=`c_something`)
 60 | 				probability estimator .
 61 | 			corpus : iterable of list of (int, number), optional
 62 | 				Corpus in BoW format.
 63 | 			dictionary : :class:`~gensim.corpora.dictionary.Dictionary`, optional
 64 | 				Gensim dictionary mapping of id word to create corpus.
 65 | 				If `model.id2word` is present, this is not needed. If both are provided, passed `dictionary` will be used.
 66 | 			window_size : int, optional
 67 | 				Is the size of the window to be used for coherence measures using boolean sliding window as their
 68 | 				probability estimator. For 'u_mass' this doesn't matter.
 69 | 				If None - the default window sizes are used which are: 'c_v' - 110, 'c_uci' - 10, 'c_npmi' - 10.
 70 | 			coherence : {'u_mass', 'c_v', 'c_uci', 'c_npmi'}, optional
 71 | 				Coherence measure to be used.
 72 | 				Fastest method - 'u_mass', 'c_uci' also known as `c_pmi`.
 73 | 				For 'u_mass' corpus should be provided, if texts is provided, it will be converted to corpus
 74 | 				using the dictionary. For 'c_v', 'c_uci' and 'c_npmi' `texts` should be provided (`corpus` isn't needed)
 75 | 			topn : int, optional
 76 | 				Integer corresponding to the number of top words to be extracted from each topic.
 77 | 			processes : int, optional
 78 | 				Number of processes to use for probability estimation phase, any value less than 1 will be interpreted as
 79 | 				num_cpus - 1.
 80 | 
 81 | 		Outputs:
 82 | 			topic_coherence : [float], The topic coherence with model
 83 | 
 84 | 		'''
 85 | 		self.model = model
 86 | 		self.topics = topics
 87 | 		self.texts = texts
 88 | 		self.corpus = corpus
 89 | 		self.dictionary = dictionary
 90 | 		self.window_size = window_size
 91 | 		self.keyed_vectors = keyed_vectors
 92 | 		self.coherence = coherence
 93 | 		self.topn = topn
 94 | 		self.processes = processes
 95 | 
 96 | 		self._get()
 97 | 		print(f'The topic coherence score is: {self._topic_coherence:.4f}')
 98 | 
 99 | 	def _get(self):
100 | 
101 | 		cm = CoherenceModel(model=self.model, topics=self.topics, texts=self.texts, corpus=self.corpus,
102 | 							dictionary=self.dictionary, window_size=self.window_size, keyed_vectors=self.keyed_vectors,
103 | 							coherence=self.coherence, topn=self.topn, processes=self.processes)
104 | 
105 | 		self._topic_coherence = cm.get_coherence()
106 | 
107 | 


--------------------------------------------------------------------------------
/pydpm/example/Deep_Learning_PM/Real_NVP/Real_NVP_Demo.py:
--------------------------------------------------------------------------------
  1 | """Training procedure for real NVP.
  2 | """
  3 | 
  4 | import os
  5 | import argparse
  6 | import torch
  7 | import torch.distributions as distributions
  8 | import torch.optim as optim
  9 | from torchvision.utils import save_image
 10 | from torchvision import datasets, transforms
 11 | 
 12 | import numpy as np
 13 | from pydpm.model import RealNVP
 14 | from pydpm.model.deep_learning_pm.realnvp import DataInfo
 15 | 
 16 | # =========================================== ArgumentParser ===================================================================== #
 17 | parser = argparse.ArgumentParser()
 18 | 
 19 | # device
 20 | parser.add_argument("--gpu_id", type=int, default=0)
 21 | 
 22 | # dataset
 23 | parser.add_argument('--dataset', type=str, default='mnist', help='dataset to be modeled.')
 24 | parser.add_argument("--data_path", type=str, default='../../../dataset/mnist/', help="the path of loading data")
 25 | 
 26 | # model
 27 | parser.add_argument("--save_path", type=str, default='../../save_models', help="the path of saving model")
 28 | parser.add_argument("--load_path", type=str, default='../../save_models/realNVP.pth', help="the path of loading model")
 29 | 
 30 | parser.add_argument('--base_dim', type=int, default=64, help='features in residual blocks of first few layers.')
 31 | parser.add_argument('--res_blocks', type=int, default=8, help='number of residual blocks per group.')
 32 | parser.add_argument('--bottleneck', type=int, default=0, help='whether to use bottleneck in residual blocks.')
 33 | parser.add_argument('--skip', type=int, default=1, help='whether to use skip connection in coupling layers.')
 34 | parser.add_argument('--weight_norm', type=int, default=1, help='whether to apply weight normalization.')
 35 | parser.add_argument('--coupling_bn', type=int, default=1, help='whether to apply batchnorm after coupling layers.')
 36 | parser.add_argument('--affine', type=int, default=1, help='whether to use affine coupling.')
 37 | 
 38 | # optim
 39 | parser.add_argument('--batch_size', type=int, default=64, help='number of images in a mini-batch.')
 40 | parser.add_argument('--num_epochs', type=int, default=500, help='maximum number of training epoches.')
 41 | parser.add_argument('--sample_size', type=int, default=64, help='number of images to generate.')
 42 | parser.add_argument('--lr', type=float, default=1e-3, help='initial learning rate.')
 43 | parser.add_argument('--momentum', type=float, default=0.9, help='beta1 in Adam optimizer.')
 44 | parser.add_argument('--decay', type=float, default=0.999, help='beta2 in Adam optimizer.')
 45 | parser.add_argument('--scale_reg', type=float, default=5e-5, help='L2 regularization strength')
 46 | 
 47 | args = parser.parse_args()
 48 | args.device = torch.device(f"cuda:{args.gpu_id}") if torch.cuda.is_available() else torch.device("cpu")
 49 | 
 50 | # =========================================== Dataset ===================================================================== #
 51 | # mnist
 52 | data_info = DataInfo(args.dataset, 1, 28)  # if cifar10, channels: 1->3
 53 | train_dataset = datasets.MNIST(root=args.data_path, train=True, transform=transforms.ToTensor(), download=True)
 54 | test_dataset = datasets.MNIST(root=args.data_path, train=False, transform=transforms.ToTensor(), download=False)
 55 | 
 56 | train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=True)
 57 | test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=args.batch_size, shuffle=False)
 58 | args.image_size = 28
 59 | # =========================================== Model ===================================================================== #
 60 | # model
 61 | 
 62 | # Set prior
 63 | prior = distributions.Normal(torch.tensor(0.).to(args.device), torch.tensor(1.).to(args.device))
 64 | 
 65 | # Initial model
 66 | model = RealNVP(datainfo=data_info, prior=prior, device=args.device, args=args).to(args.device)
 67 | model_opt = optim.Adamax(model.parameters(), lr=args.lr, betas=(args.momentum, args.decay), eps=1e-7)
 68 | 
 69 | # train
 70 | best_log_ll = float('-inf')
 71 | for epoch_idx in range(args.num_epochs):
 72 |     log_ll_mean = model.train_one_epoch(dataloader=train_loader, model_opt=model_opt, epoch=epoch_idx, args=args)
 73 |     if epoch_idx % 5 == 0:
 74 |         log_ll_mean = model.test_one_epoch(dataloader=test_loader, epoch=epoch_idx, args=args)
 75 |         if log_ll_mean > best_log_ll:
 76 |             # save
 77 |             model.save(args.save_path)
 78 | 
 79 | # load
 80 | model.load(args.load_path)
 81 | 
 82 | # =================== Visualization ====================== #
 83 | os.makedirs("../../output/images", exist_ok=True)
 84 | print('sample image, please wait!')
 85 | with torch.no_grad():
 86 |     sample = model.sample(64)
 87 |     save_image(sample.view(64, 1, 28, 28), '../../output/images/realNVP_sample.png')
 88 | print('complete!!!')
 89 | 
 90 | 
 91 | 
 92 | 
 93 | 
 94 | 
 95 | 
 96 | 
 97 | 
 98 | 
 99 | 
100 | 
101 | 
102 | 
103 | 
104 | 
105 | 
106 | 
107 | 
108 | 
109 | 


--------------------------------------------------------------------------------
/pydpm/model/deep_learning_pm/rbm.py:
--------------------------------------------------------------------------------
  1 | """
  2 | ===========================================
  3 | RBM
  4 | A Practical Guide to Training
  5 | Restricted Boltzmann Machines
  6 | Geoffrey Hinton
  7 | Publihsed in 2010
  8 | ===========================================
  9 | """
 10 | # Author: Muyao Wang <flare935694542@163.com>, Xinyang Liu <lxy771258012@163.com>
 11 | # License: BSD-3-Clause
 12 | 
 13 | import os
 14 | import numpy as np
 15 | 
 16 | import torch
 17 | import torch.utils.data
 18 | import torch.nn as nn
 19 | import torch.nn.functional as F
 20 | from torch.autograd import Variable
 21 | 
 22 | 
 23 | class RBM(nn.Module):
 24 |     def __init__(self, n_vis=784, n_hin=500, k=5):
 25 |         """
 26 |         The basic model for RBM
 27 |         Inputs:
 28 |             n_vis : [int] number of visible units;
 29 |             n_hin : [int] number of latent units;
 30 |             k : [int] layers of RBM;
 31 |         """
 32 |         super(RBM, self).__init__()
 33 |         setattr(self, '_model_name', 'RBM')
 34 |         self.W = nn.Parameter(torch.randn(n_hin, n_vis) * 1e-2)
 35 |         self.v_bias = nn.Parameter(torch.zeros(n_vis))
 36 |         self.h_bias = nn.Parameter(torch.zeros(n_hin))
 37 |         self.k = k
 38 | 
 39 |     def sample_from_p(self, p):
 40 |         """
 41 |         Sample from p distribution
 42 |         Inputs:
 43 |             p : [tensor] distribution of p;
 44 |         Outputs:
 45 |             sample of p :[tensor] sample of p;
 46 |         """
 47 |         return F.relu(torch.sign(p - Variable(torch.rand(p.size()))))
 48 | 
 49 |     def v_to_h(self, v):
 50 |         """
 51 |         propagation from v to h
 52 |         Inputs:
 53 |             v : [tensor] distribution of v;
 54 |         Outputs:
 55 |             p_h : [tensor] prediction of h;
 56 |             sample_h : [tensor] sample of h;
 57 |         """
 58 |         p_h = F.sigmoid(F.linear(v, self.W, self.h_bias))
 59 |         sample_h = self.sample_from_p(p_h)
 60 |         return p_h, sample_h
 61 | 
 62 |     def h_to_v(self, h):
 63 |         """
 64 |         propagation from h to v
 65 |         Inputs:
 66 |             h : [tensor] distribution of h;
 67 |         Outputs:
 68 |             p_v : [tensor] prediction of v;
 69 |             sample_v : [tensor] sample of v;
 70 |         """
 71 |         p_v = F.sigmoid(F.linear(h, self.W.t(), self.v_bias))
 72 |         sample_v = self.sample_from_p(p_v)
 73 |         return p_v, sample_v
 74 | 
 75 |     def forward(self, v):
 76 |         """
 77 |          Forward process of RBM
 78 |          Inputs:
 79 |              v : [tensor] input of data;
 80 |          Outputs:
 81 |              v : [tensor] input of data;
 82 |              v_ : [tensor] prediction of v;
 83 |          """
 84 |         pre_h1, h1 = self.v_to_h(v)
 85 | 
 86 |         h_ = h1
 87 |         for _ in range(self.k):
 88 |             pre_v_, v_ = self.h_to_v(h_)
 89 |             pre_h_, h_ = self.v_to_h(v_)
 90 | 
 91 |         return v, v_
 92 | 
 93 |     def free_energy(self, v):
 94 |         """
 95 |         Free energy of RBM
 96 |         Inputs:
 97 |             v : [tensor] distribution of v;
 98 |         Outputs:
 99 |             free_energy : [tensor] free energy of whole system
100 |         """
101 |         vbias_term = v.mv(self.v_bias)
102 |         wx_b = F.linear(v, self.W, self.h_bias)
103 |         hidden_term = wx_b.exp().add(1).log().sum(1)
104 |         return (-hidden_term - vbias_term).mean()
105 | 
106 |     # train and test
107 |     def train_one_epoch(self, dataloader, model_opt, epoch_idx, args):
108 |         loss_ = []
109 |         for batch_idx, (data, target) in enumerate(dataloader):
110 |             data = Variable(data.view(-1, 784))
111 |             sample_data = data.bernoulli()
112 | 
113 |             v, v1 = self(sample_data)
114 |             loss = self.free_energy(v) - self.free_energy(v1)
115 |             loss_.append(loss.item())
116 |             model_opt.zero_grad()
117 |             loss.backward()
118 |             model_opt.step()
119 |         print('Train Epoch: {} Loss: {:.6f}'.format(epoch_idx, np.mean(loss_)))
120 |         return v, v1
121 | 
122 |     # save and load
123 |     def save(self, model_path: str = '../save_models'):
124 |         """
125 |         save model
126 |         Inputs:
127 |             model_path : [str] the path to save the model, default '../save_models/RBM.pth';
128 |         """
129 |         # create the directory path
130 |         if not os.path.isdir(model_path):
131 |             os.mkdir(model_path)
132 | 
133 |         # Save the model
134 |         torch.save({'state_dict': self.state_dict()}, model_path + '/' + self._model_name + '.pth')
135 |         print('model has been saved by ' + model_path + '/' + self._model_name + '.pth')
136 | 
137 |     def load(self, model_path):
138 |         """
139 |         load model
140 |         Inputs:
141 |             model_path : [str] the path to load the model;
142 |         """
143 |         assert os.path.exists(model_path), 'Path Error: can not find the path to load the model'
144 |         # Load the model
145 |         checkpoint = torch.load(model_path)
146 |         self.load_state_dict(checkpoint['state_dict'])


--------------------------------------------------------------------------------
/pydpm/example/Hybrid_PM/Weibull_Hybrid_Autoencoding_Inference/WHAI_Demo.py:
--------------------------------------------------------------------------------
  1 | """
  2 | ===========================================
  3 | WHAI: WEIBULL HYBRID AUTOENCODING INFERENCE FOR DEEP TOPIC MODELING (Demo)
  4 | Hao Zhang, Bo Chen， Dandan Guo and Mingyuan Zhou
  5 | Published as a conference paper at ICLR 2018
  6 | 
  7 | ===========================================
  8 | 
  9 | """
 10 | 
 11 | # Author: Xinyang Liu <lxy771258012@163.com>
 12 | # License: BSD-3-Clause
 13 | 
 14 | import os
 15 | import argparse
 16 | import numpy as np
 17 | import scipy.io as sio
 18 | from sklearn.cluster import k_means
 19 | from nltk.corpus import stopwords
 20 | os.environ["CUDA_VISIBLE_DEVICES"]="0"
 21 | 
 22 | import torch
 23 | from torch.utils.data import DataLoader
 24 | 
 25 | from torchtext.data.utils import get_tokenizer
 26 | from torchtext.datasets import AG_NEWS
 27 | 
 28 | from pydpm.model import WHAI
 29 | from pydpm.utils import *
 30 | from pydpm.metric import *
 31 | from pydpm.dataloader.text_data import Text_Processer, build_vocab_from_iterator
 32 | 
 33 | # =========================================== ArgumentParser ===================================================================== #
 34 | parser = argparse.ArgumentParser()
 35 | 
 36 | # device
 37 | parser.add_argument("--gpu_id", type=int, default=0, help="the id of gpu to deploy")
 38 | 
 39 | # dataset
 40 | parser.add_argument("--dataset", type=str, default='AG_NEWS', help="the name of dataset")
 41 | parser.add_argument("--data_path", type=str, default='../../../dataset', help="the file path of dataset")
 42 | 
 43 | # model
 44 | parser.add_argument("--voc_size", type=int, default=20000, help="the length of vocabulary")
 45 | parser.add_argument("--z_dims", type=list, default=[128, 64, 32], help="the list of z dimension")
 46 | parser.add_argument("--hid_dims", type=list, default=[200, 200, 200], help="the list of hidden dimension")
 47 | 
 48 | # optim
 49 | parser.add_argument("--num_epochs", type=int, default=1000, help="number of epochs of training")
 50 | parser.add_argument("--batch_size", type=int, default=256, help="size of the batches")
 51 | parser.add_argument("--lr", type=float, default=0.0002, help="adam: learning rate")
 52 | parser.add_argument("--MBratio", type=int, default=50, help="number of epochs of training")
 53 | 
 54 | args = parser.parse_args()
 55 | args.device = torch.device(f"cuda:{args.gpu_id}") if torch.cuda.is_available() else torch.device("cpu")
 56 | 
 57 | # =========================================== Dataset ===================================================================== #
 58 | # load dataset (AG_NEWS from torchtext)
 59 | train_iter, test_iter = AG_NEWS(args.data_path, split=('train', 'test'))
 60 | tokenizer = get_tokenizer("basic_english")
 61 | 
 62 | # build vocabulary
 63 | stop_words = list(stopwords.words('english'))
 64 | vocab = build_vocab_from_iterator(map(lambda x: tokenizer(x[1]), train_iter), specials=['<unk>', '<pad>', '<bos>', '<eos>'], special_first=True, stop_words=stop_words, max_tokens=args.voc_size)
 65 | vocab.set_default_index(vocab['<unk>'])
 66 | text_processer = Text_Processer(tokenizer=tokenizer, vocab=vocab)
 67 | 
 68 | # Get train/test label and data_file(tokens) from data_iter and convert them into clean file
 69 | train_files, train_labels = text_processer.file_from_iter(train_iter, tokenizer=tokenizer)
 70 | test_files, test_labels = text_processer.file_from_iter(test_iter, tokenizer=tokenizer)
 71 | 
 72 | # Take part of dataset for convenience
 73 | train_idxs = np.arange(7000)
 74 | np.random.shuffle(train_idxs)
 75 | train_files = [train_files[i] for i in train_idxs]
 76 | train_labels = [train_labels[i] for i in train_idxs]
 77 | 
 78 | test_idxs = np.arange(3000)
 79 | np.random.shuffle(test_idxs)
 80 | test_files = [test_files[i] for i in test_idxs]
 81 | test_labels = [test_labels[i] for i in test_idxs]
 82 | 
 83 | train_bows, train_labels = text_processer.bow_from_file(train_files, train_labels)
 84 | test_bows, test_labels = text_processer.bow_from_file(test_files, test_labels)
 85 | 
 86 | train_loader = DataLoader([train_data for train_data in zip(train_bows, train_labels)], batch_size=256, shuffle=False, num_workers=4, drop_last=True)
 87 | test_loader = DataLoader([test_data for test_data in zip(test_bows, test_labels)], batch_size=256, shuffle=False, num_workers=4, drop_last=True)
 88 | 
 89 | # =========================================== Model ===================================================================== #
 90 | model = WHAI(in_dim=args.voc_size, z_dims=args.z_dims, hid_dims=args.hid_dims, device=args.device, encode_prior=False)
 91 | model_opt = torch.optim.Adam(model.parameters())
 92 | 
 93 | for epoch_idx in range(args.num_epochs):
 94 |     train_local_params = model.train_one_epoch(dataloader=train_loader, optim=model_opt, epoch_idx=epoch_idx, args=args)
 95 | 
 96 |     if (epoch_idx+1) % 20 == 0:
 97 |         test_local_params = model.test_one_epoch(dataloader=test_loader)
 98 |         # calculate PPL
 99 |         x_hat = np.matmul(test_local_params.Theta[0], np.transpose(model.global_params.Phi[0]))
100 |         ppl = Perplexity(test_local_params.data, x_hat)
101 | 
102 |         # calculate NMI with train_local_params
103 |         cls_num = len(np.unique(train_labels + test_labels))
104 |         test_theta_norm = standardization(test_local_params.Theta[0])
105 |         tmp = k_means(test_theta_norm, cls_num)  # N*K
106 |         predict_label = tmp[1] + 1 # Some label start with '1' not '0', there should be 'tmp[1] + 1'
107 |         MI = NMI(test_local_params.label, predict_label)
108 | 
109 | 
110 | 
111 | 
112 | 


--------------------------------------------------------------------------------
/pydpm/example/Bayesian_PM/Convolutional_Poisson_Gamma_Belief_Network/CPFA_Demo.py:
--------------------------------------------------------------------------------
  1 | """
  2 | ===========================================
  3 | Convolutional Poisson Factor Analysis
  4 | Chaojie Wang  Sucheng Xiao  Bo Chen  and  Mingyuan Zhou
  5 | Published in International Conference on Machine Learning 2019
  6 | 
  7 | ===========================================
  8 | 
  9 | """
 10 | 
 11 | # Author: Chaojie Wang <xd_silly@163.com>; Jiawen Wu <wjw19960807@163.com>; Wei Zhao <13279389260@163.com>
 12 | # License: BSD-3-Clause
 13 | 
 14 | import numpy as np
 15 | import argparse
 16 | import scipy.io as sio
 17 | import _pickle as cPickle
 18 | 
 19 | from torch.utils.data import Dataset, DataLoader
 20 | from torchtext.data.utils import get_tokenizer
 21 | from torchtext.datasets import AG_NEWS
 22 | 
 23 | from pydpm.model import CPFA
 24 | from pydpm.metric import ACC
 25 | from pydpm.dataloader.text_data import Text_Processer, build_vocab_from_iterator
 26 | 
 27 | # =========================================== ArgumentParser ===================================================================== #
 28 | parser = argparse.ArgumentParser()
 29 | 
 30 | # device
 31 | parser.add_argument("--device", type=str, default='gpu')
 32 | 
 33 | # dataset
 34 | parser.add_argument("--data_path", type=str, default='../../../dataset/', help="the path of loading data")
 35 | 
 36 | # model
 37 | parser.add_argument("--save_path", type=str, default='../../save_models', help="the path of saving model")
 38 | parser.add_argument("--load_path", type=str, default='../../save_models/CPFA.npy', help="the path of loading model")
 39 | 
 40 | parser.add_argument("--z_dim", type=int, default=200, help="dimensionality of the z latent space")
 41 | 
 42 | # optim
 43 | parser.add_argument("--num_epochs", type=int, default=100, help="number of epochs of training")
 44 | 
 45 | args = parser.parse_args()
 46 | 
 47 | # =========================================== Dataset ===================================================================== #
 48 | # define transform for dataset and load orginal dataset
 49 | # load dataset (AG_NEWS from torchtext)
 50 | train_iter, test_iter = AG_NEWS(args.data_path, split=('train', 'test'))
 51 | tokenizer = get_tokenizer("basic_english")
 52 | 
 53 | # build vocabulary
 54 | vocab = build_vocab_from_iterator(map(lambda x: tokenizer(x[1]), train_iter), specials=['<unk>', '<pad>', '<bos>', '<eos>'], special_first=True, max_tokens=5000)
 55 | vocab.set_default_index(vocab['<unk>'])
 56 | text_processer = Text_Processer(tokenizer=tokenizer, vocab=vocab)
 57 | 
 58 | # Get train/test label and data_file(tokens) from data_iter and convert them into clean file
 59 | train_files, train_labels = text_processer.file_from_iter(train_iter, tokenizer=tokenizer)
 60 | test_files, test_labels = text_processer.file_from_iter(test_iter, tokenizer=tokenizer)
 61 | 
 62 | # Take part of dataset for convenience
 63 | train_idxs = np.arange(3000)
 64 | np.random.shuffle(train_idxs)
 65 | train_files = [train_files[i] for i in train_idxs]
 66 | train_labels = [train_labels[i] for i in train_idxs]
 67 | 
 68 | test_idxs = np.arange(1000)
 69 | np.random.shuffle(test_idxs)
 70 | test_files = [test_files[i] for i in test_idxs]
 71 | test_labels = [test_labels[i] for i in test_idxs]
 72 | 
 73 | # ===================================== mode 1, sparse input ====================================== #
 74 | # Build batch of word2index
 75 | train_sparse_batch, train_labels = text_processer.word_index_from_file(train_files, train_labels, to_sparse=True)
 76 | test_sparse_batch, test_labels = text_processer.word_index_from_file(test_files, test_labels, to_sparse=True)
 77 | print('Data has been processed!')
 78 | 
 79 | # create the model and deploy it on gpu or cpu
 80 | model = CPFA(K=args.z_dim, device=args.device)
 81 | model.initial(train_sparse_batch, is_sparse=True)  # use the shape of train_data to initialize the params of model
 82 | 
 83 | # train and evaluation
 84 | train_local_params = model.train(data=train_sparse_batch, is_sparse=True, num_epochs=args.num_epochs)
 85 | train_local_params = model.test(data=train_sparse_batch, is_sparse=True, num_epochs=args.num_epochs)
 86 | test_local_params = model.test(data=test_sparse_batch, is_sparse=True, num_epochs=args.num_epochs)
 87 | 
 88 | # save the model after training
 89 | model.save(args.save_path)
 90 | # load the model
 91 | model.load(args.load_path)
 92 | 
 93 | # evaluate the model with classification accuracy
 94 | # the demo accuracy can achieve 0.628
 95 | train_theta = np.sum(np.sum(train_local_params.W_nk, axis=3), axis=2).T
 96 | test_theta = np.sum(np.sum(test_local_params.W_nk, axis=3), axis=2).T
 97 | results = ACC(train_theta, test_theta, train_labels, test_labels, 'SVM')
 98 | 
 99 | 
100 | # # Use custom dataset
101 | # DATA = cPickle.load(open("../../dataset/TREC.pkl", "rb"), encoding='iso-8859-1')
102 | #
103 | # data_vab_list = DATA['Vocabulary']
104 | # data_vab_count_list = DATA['Vab_count']
105 | # data_vab_length = DATA['Vab_Size']
106 | # data_label = DATA['Label']
107 | # data_train_list = DATA['Train_Origin']
108 | # data_train_label = np.array(DATA['Train_Label'])
109 | # data_train_split = DATA['Train_Word_Split']
110 | # data_train_list_index = DATA['Train_Word2Index']
111 | # data_test_list = DATA['Test_Origin']
112 | # data_test_label = np.array(DATA['Test_Label'])
113 | # data_test_split = DATA['Test_Word_Split']
114 | # data_test_list_index = DATA['Test_Word2Index']
115 | 
116 | # train_sparse_batch, train_labels = text_processer.word_index_from_file(data_train_list_index, data_train_label, to_sparse=True)
117 | # test_sparse_batch, test_labels = text_processer.word_index_from_file(data_test_list_index, data_test_label, to_sparse=True)
118 | 


--------------------------------------------------------------------------------
/pydpm/example/Bayesian_PM/Convolutional_Poisson_Gamma_Belief_Network/CPGBN_Demo.py:
--------------------------------------------------------------------------------
  1 | """
  2 | ================================================
  3 | Convolutional Poisson Gamma Belief Network Demo
  4 | Chaojie Wang  Sucheng Xiao  Bo Chen  and  Mingyuan Zhou
  5 | Published in International Conference on Machine Learning 2019
  6 | 
  7 | ===========================================
  8 | 
  9 | """
 10 | 
 11 | # Author: Chaojie Wang <xd_silly@163.com>; Jiawen Wu <wjw19960807@163.com>
 12 | # License: BSD-3-Clause
 13 | 
 14 | import os
 15 | import numpy as np
 16 | import argparse
 17 | import scipy.io as sio
 18 | import _pickle as cPickle
 19 | 
 20 | from torchtext.data.utils import get_tokenizer
 21 | from torchtext.datasets import AG_NEWS
 22 | 
 23 | from pydpm.model import CPGBN
 24 | from pydpm.metric import ACC
 25 | from pydpm.dataloader.text_data import Text_Processer, build_vocab_from_iterator
 26 | 
 27 | # =========================================== ArgumentParser ===================================================================== #
 28 | parser = argparse.ArgumentParser()
 29 | 
 30 | # device
 31 | parser.add_argument("--device", type=str, default='gpu')
 32 | 
 33 | # dataset
 34 | parser.add_argument("--data_path", type=str, default='../../../dataset/', help="the path of loading data")
 35 | 
 36 | # model
 37 | parser.add_argument("--save_path", type=str, default='../../save_models', help="the path of saving model")
 38 | parser.add_argument("--load_path", type=str, default='../../save_models/CPGBN.npy', help="the path of loading model")
 39 | 
 40 | parser.add_argument("--z_dims", type=list, default=[200, 100, 50], help="number of topics at diffrent layers in CPGBN")
 41 | 
 42 | args = parser.parse_args()
 43 | 
 44 | # =========================================== Dataset ===================================================================== #
 45 | # define transform for dataset and load orginal dataset
 46 | # Load dataset (AG_NEWS from torchtext)
 47 | train_iter, test_iter = AG_NEWS(args.data_path, split=('train', 'test'))
 48 | tokenizer = get_tokenizer("basic_english")
 49 | 
 50 | # build vocabulary
 51 | vocab = build_vocab_from_iterator(map(lambda x: tokenizer(x[1]), train_iter), specials=['<unk>', '<pad>', '<bos>', '<eos>'], special_first=True, max_tokens=5000)
 52 | vocab.set_default_index(vocab['<unk>'])
 53 | text_processer = Text_Processer(tokenizer=tokenizer, vocab=vocab)
 54 | 
 55 | # Get train/test label and data_file(tokens) from data_iter and convert them into clean file
 56 | # stop_words = ['<unk>']# Defined by customer, as musch as possible
 57 | train_files, train_labels = text_processer.file_from_iter(train_iter, tokenizer=tokenizer)
 58 | test_files, test_labels = text_processer.file_from_iter(test_iter, tokenizer=tokenizer)
 59 | 
 60 | # Take part of dataset for convenience
 61 | train_idxs = np.arange(5000)
 62 | np.random.shuffle(train_idxs)
 63 | train_files = [train_files[i] for i in train_idxs]
 64 | train_labels = [train_labels[i] for i in train_idxs]
 65 | 
 66 | test_idxs = np.arange(1000)
 67 | np.random.shuffle(test_idxs)
 68 | test_files = [test_files[i] for i in test_idxs]
 69 | test_labels = [test_labels[i] for i in test_idxs]
 70 | 
 71 | # ===================================== mode 1, sparse input ====================================== #
 72 | # Build batch of word2index
 73 | train_sparse_batch, train_labels = text_processer.word_index_from_file(train_files, train_labels, to_sparse=True)
 74 | test_sparse_batch, test_labels = text_processer.word_index_from_file(test_files, test_labels, to_sparse=True)
 75 | print('Data has been processed!')
 76 | 
 77 | # =========================================== Model ===================================================================== #
 78 | # create the model and deploy it on gpu or cpu
 79 | model = CPGBN(K=args.z_dims, device=args.device)
 80 | model.initial(train_sparse_batch, is_sparse=True)  # use the shape of train_data to initialize the params of model
 81 | 
 82 | # train and evaluation
 83 | train_local_params = model.train(train_sparse_batch, is_sparse=True, iter_all=100)
 84 | train_local_params = model.test(train_sparse_batch, is_sparse=True, iter_all=100)
 85 | test_local_params = model.test(test_sparse_batch, is_sparse=True, iter_all=100)
 86 | 
 87 | # save the model after training
 88 | model.save(args.save_path)
 89 | # load the model
 90 | model.load(args.load_path)
 91 | 
 92 | # evaluate the model with classification accuracy
 93 | # the demo accuracy can achieve 0.631
 94 | train_theta = np.sum(np.sum(train_local_params.W_nk, axis=3), axis=2).T
 95 | test_theta = np.sum(np.sum(test_local_params.W_nk, axis=3), axis=2).T
 96 | results = ACC(train_theta, test_theta, train_labels, test_labels, 'SVM')
 97 | 
 98 | 
 99 | 
100 | # # Customer dataset
101 | 
102 | # DATA = cPickle.load(open("../../dataset/TREC.pkl", "rb"), encoding='iso-8859-1')
103 | #
104 | # data_vab_list = DATA['Vocabulary']
105 | # data_vab_count_list = DATA['Vab_count']
106 | # data_vab_length = DATA['Vab_Size']
107 | # data_label = DATA['Label']
108 | # data_train_list = DATA['Train_Origin']
109 | # data_train_label = np.array(DATA['Train_Label'])
110 | # data_train_split = DATA['Train_Word_Split']
111 | # data_train_list_index = DATA['Train_Word2Index']
112 | # data_test_list = DATA['Test_Origin']
113 | # data_test_label = np.array(DATA['Test_Label'])
114 | # data_test_split = DATA['Test_Word_Split']
115 | # data_test_list_index = DATA['Test_Word2Index']
116 | 
117 | # train_sparse_batch, train_labels = text_processer.word_index_from_file(data_train_list_index, data_train_label, to_sparse=True)
118 | # test_sparse_batch, test_labels = text_processer.word_index_from_file(data_test_list_index, data_test_label, to_sparse=True)
119 | 


--------------------------------------------------------------------------------
/pydpm/example/Bayesian_PM/Word_Embeddings_Deep_Topic_Model/WEDTM_Demo.py:
--------------------------------------------------------------------------------
  1 | """
  2 | ===========================================
  3 | WEDTM Demo
  4 | Inter and Intra Topic Structure Learning with Word Embeddings
  5 | He Zhao, Lan Du, Wray Buntine, Mingyaun Zhou
  6 | Published in International Council for Machinery Lubrication 2018
  7 | 
  8 | ===========================================
  9 | 
 10 | """
 11 | 
 12 | # Author: Chaojie Wang <xd_silly@163.com>; Jiawen Wu <wjw19960807@163.com>; Wei Zhao <13279389260@163.com>
 13 | # License: BSD-3-Clause
 14 | 
 15 | import numpy as np
 16 | import argparse
 17 | import scipy.io as sio
 18 | 
 19 | import nltk
 20 | from nltk.corpus import stopwords
 21 | 
 22 | import torch
 23 | from torch.utils.data import Dataset, DataLoader
 24 | from torchtext.data.utils import get_tokenizer
 25 | from torchtext.vocab import GloVe
 26 | from torchtext.datasets import AG_NEWS
 27 | 
 28 | from pydpm.model import WEDTM
 29 | from pydpm.metric import ACC
 30 | from pydpm.dataloader.text_data import Text_Processer, build_vocab_from_iterator
 31 | 
 32 | # =========================================== ArgumentParser ===================================================================== #
 33 | parser = argparse.ArgumentParser()
 34 | 
 35 | # device
 36 | parser.add_argument("--device", type=str, default='gpu')
 37 | 
 38 | # dataset
 39 | parser.add_argument("--data_path", type=str, default='../dataset/', help="the path of loading data")
 40 | 
 41 | # model
 42 | parser.add_argument("--save_path", type=str, default='../../save_models', help="the path of saving model")
 43 | parser.add_argument("--load_path", type=str, default='../../save_models/WEDTM.npy', help="the path of loading model")
 44 | 
 45 | parser.add_argument("--z_dim", type=int, default=100, help="number of topics in each layers")
 46 | parser.add_argument("--T", type=int, default=3, help="number of vertical layers")
 47 | parser.add_argument("--S", type=int, default=3, help="number of sub topics")
 48 | 
 49 | # optim
 50 | parser.add_argument("--num_epochs", type=int, default=300, help="number of epochs of training")
 51 | 
 52 | args = parser.parse_args()
 53 | 
 54 | # =========================================== Dataset ===================================================================== #
 55 | # define transform for dataset and load orginal dataset
 56 | # Load dataset (AG_NEWS from torchtext)
 57 | train_iter, test_iter = AG_NEWS(args.data_path, split=('train', 'test'))
 58 | tokenizer = get_tokenizer("basic_english")
 59 | 
 60 | # build vocabulary
 61 | # nltk.download('stopwords')
 62 | stop_words = list(stopwords.words('english'))
 63 | vocab_size = 7000
 64 | vocab = build_vocab_from_iterator(map(lambda x: tokenizer(x[1]), train_iter), special_first=False, stop_words=stop_words, max_tokens=vocab_size)
 65 | text_processer = Text_Processer(tokenizer=tokenizer, vocab=vocab)
 66 | 
 67 | # Get train/test label and data_file(tokens) from data_iter and convert them into clean file
 68 | train_files, train_labels = text_processer.file_from_iter(train_iter, tokenizer=tokenizer)
 69 | test_files, test_labels = text_processer.file_from_iter(test_iter, tokenizer=tokenizer)
 70 | 
 71 | # Take part of dataset for convenience
 72 | train_idxs = np.arange(5000)
 73 | np.random.shuffle(train_idxs)
 74 | train_files = [train_files[i] for i in train_idxs]
 75 | train_labels = [train_labels[i] for i in train_idxs]
 76 | 
 77 | test_idxs = np.arange(1000)
 78 | np.random.shuffle(test_idxs)
 79 | test_files = [test_files[i] for i in test_idxs]
 80 | test_labels = [test_labels[i] for i in test_idxs]
 81 | 
 82 | # Build word embedding
 83 | vector = GloVe(name='6B', dim=50)
 84 | voc_embedding = vector.get_vecs_by_tokens(vocab.get_itos(), lower_case_backup=True)
 85 | 
 86 | # Dataloader
 87 | train_bow, train_labels = text_processer.bow_from_file(train_files, train_labels, to_sparse=False)
 88 | test_bow, test_labels = text_processer.bow_from_file(test_files, test_labels, to_sparse=False)
 89 | 
 90 | # Transpose dataset to fit the model and convert a tensor to numpy
 91 | train_data = np.asarray(train_bow).T.astype(int)
 92 | test_data = np.asarray(test_bow).T.astype(int)
 93 | voc_embedding = voc_embedding.numpy()
 94 | 
 95 | print('Data has been processed!')
 96 | 
 97 | # =========================================== Model ===================================================================== #
 98 | # create the model and deploy it on gpu or cpu
 99 | model = WEDTM(K=[args.z_dim] * args.T, device=args.device)
100 | model.initial(train_data)  # use the shape of train_data to initialize the params of model
101 | 
102 | # train and evaluation
103 | train_local_params = model.train(voc_embedding, train_data, args.S, num_epochs=args.num_epochs, is_initial_local=False)
104 | train_local_params = model.test(voc_embedding, train_data, args.S, num_epochs=args.num_epochs)
105 | test_local_params = model.test(voc_embedding, test_data, args.S, num_epochs=args.num_epochs)
106 | 
107 | # save the model after training
108 | model.save(args.save_path)
109 | # load the model
110 | model.load(args.load_path)
111 | 
112 | # evaluate the model with classification accuracy
113 | results = ACC(train_local_params.Theta, test_local_params.Theta, train_labels, test_labels, 'SVM')
114 | 
115 | 
116 | # # load dataset (WS.mat from paper)
117 | # dataset = sio.loadmat('../../dataset/WS.mat')
118 | # train_data = np.asarray(dataset['doc'].todense()[:, dataset['train_idx'][0]-1])[:, ::10].astype(int)
119 | # test_data = np.asarray(dataset['doc'].todense()[:, dataset['test_idx'][0]-1])[:, ::5].astype(int)
120 | # train_label = dataset['labels'][dataset['train_idx'][0]-1][::10, :]
121 | # test_label = dataset['labels'][dataset['test_idx'][0]-1][::5, :]
122 | 
123 | 


--------------------------------------------------------------------------------
/pydpm/example/Hybrid_PM/Sawtooth_Embedding_Topic_Model/SawETM_Demo.py:
--------------------------------------------------------------------------------
  1 | """
  2 | ===========================================
  3 | Sawtooth Factorial Topic Embeddings Guided Gamma Belief Network
  4 | Zhibin Duan, Dongsheng Wang, Bo Chen, Chaojie Wang, Wenchao Chen, Yewen Li, Jie Ren and Mingyuan Zhou
  5 | Published as a conference paper at ICML 2021
  6 | 
  7 | ===========================================
  8 | 
  9 | """
 10 | 
 11 | # Author: Xinyang Liu <lxy771258012@163.com>
 12 | # License: BSD-3-Clause
 13 | 
 14 | import os
 15 | import argparse
 16 | import numpy as np
 17 | import scipy.io as sio
 18 | from sklearn.cluster import k_means
 19 | from nltk.corpus import stopwords
 20 | 
 21 | import torch
 22 | from torch.utils.data import DataLoader
 23 | 
 24 | from torchtext.data.utils import get_tokenizer
 25 | from torchtext.datasets import AG_NEWS
 26 | 
 27 | from pydpm.model import SawETM
 28 | from pydpm.utils import *
 29 | from pydpm.metric import *
 30 | from pydpm.dataloader.text_data import Text_Processer, build_vocab_from_iterator
 31 | 
 32 | # =========================================== ArgumentParser ===================================================================== #
 33 | parser = argparse.ArgumentParser()
 34 | 
 35 | # device
 36 | parser.add_argument("--gpu_id", type=int, default=0, help="the id of gpu to deploy")
 37 | 
 38 | # dataset
 39 | parser.add_argument("--dataset", type=str, default='AG_NEWS', help="the name of dataset")
 40 | parser.add_argument("--data_path", type=str, default='../../../dataset', help="the file path of dataset")
 41 | 
 42 | # model
 43 | parser.add_argument("--embed_size", type=int, default=64, help="the length of vocabulary")
 44 | parser.add_argument("--vocab_size", type=int, default=8000, help="the length of vocabulary")
 45 | parser.add_argument("--num_topics_list", type=list, default=[128, 64, 32], help="the list of z dimension")
 46 | parser.add_argument("--num_hiddens_list", type=list, default=[200, 200, 200], help="the list of hidden dimension")
 47 | parser.add_argument("--save_path", type=str, default='../../save_models', help="the path of saving model")
 48 | parser.add_argument("--load_path", type=str, default='../../save_models/SawETM.pth', help="the path of loading model")
 49 | 
 50 | # optim
 51 | parser.add_argument("--num_epochs", type=int, default=500, help="number of epochs of training")
 52 | parser.add_argument("--batch_size", type=int, default=128, help="size of the batches")
 53 | parser.add_argument("--lr", type=float, default=0.01, help="adam: learning rate")
 54 | parser.add_argument("--weight_decay", type=float, default=1e-5, help="l2 regularization strength")
 55 | 
 56 | args = parser.parse_args()
 57 | args.device = torch.device(f"cuda:{args.gpu_id}") if torch.cuda.is_available() else torch.device("cpu")
 58 | 
 59 | # =========================================== Dataset ===================================================================== #
 60 | # load dataset (AG_NEWS from torchtext)
 61 | train_iter, test_iter = AG_NEWS(args.data_path, split=('train', 'test'))
 62 | tokenizer = get_tokenizer("basic_english")
 63 | 
 64 | # build vocabulary
 65 | stop_words = list(stopwords.words('english'))
 66 | vocab = build_vocab_from_iterator(map(lambda x: tokenizer(x[1]), train_iter), specials=['<unk>', '<pad>', '<bos>', '<eos>'], special_first=True, stop_words=stop_words, max_tokens=args.vocab_size)
 67 | vocab.set_default_index(vocab['<unk>'])
 68 | text_processer = Text_Processer(tokenizer=tokenizer, vocab=vocab)
 69 | 
 70 | # Get train/test label and data_file(tokens) from data_iter and convert them into clean file
 71 | train_files, train_labels = text_processer.file_from_iter(train_iter, tokenizer=tokenizer)
 72 | test_files, test_labels = text_processer.file_from_iter(test_iter, tokenizer=tokenizer)
 73 | 
 74 | # Take part of dataset for convenience
 75 | train_idxs = np.arange(7000)
 76 | np.random.shuffle(train_idxs)
 77 | train_files = [train_files[i] for i in train_idxs]
 78 | train_labels = [train_labels[i] for i in train_idxs]
 79 | 
 80 | test_idxs = np.arange(3000)
 81 | np.random.shuffle(test_idxs)
 82 | test_files = [test_files[i] for i in test_idxs]
 83 | test_labels = [test_labels[i] for i in test_idxs]
 84 | 
 85 | train_bows, train_labels = text_processer.bow_from_file(train_files, train_labels)
 86 | test_bows, test_labels = text_processer.bow_from_file(test_files, test_labels)
 87 | 
 88 | train_loader = DataLoader([train_data for train_data in zip(train_bows, train_labels)], batch_size=256, shuffle=False, num_workers=4, drop_last=True)
 89 | test_loader = DataLoader([test_data for test_data in zip(test_bows, test_labels)], batch_size=256, shuffle=False, num_workers=4, drop_last=True)
 90 | 
 91 | # if args.pretrained_embeddings:
 92 | #     print('Using pretrained glove embeddings')
 93 | #     initial_embeddings = load_glove_embeddings(args.embed_size, vocab)
 94 | # else:
 95 | #     initial_embeddings = None
 96 | initial_embeddings = None
 97 | # =========================================== Model ===================================================================== #
 98 | model = SawETM(embed_size=args.embed_size, vocab_size=args.vocab_size, num_hiddens_list=args.num_hiddens_list, num_topics_list=args.num_topics_list, word_embeddings=initial_embeddings, device=args.device)
 99 | model.to(args.device)
100 | model_opt = torch.optim.Adam(params=model.parameters(),
101 |                              lr=args.lr,
102 |                              weight_decay=args.weight_decay)
103 | 
104 | 
105 | ###############  Training  ################
106 | 
107 | for epoch_idx in range(args.num_epochs):
108 |     _, _ = model.train_one_epoch(dataloader=train_loader, model_opt=model_opt, epoch_idx=epoch_idx, args=args)
109 | 
110 |     if (epoch_idx+1) % 20 == 0:
111 |         theta, labels = model.test_one_epoch(dataloader=test_loader)
112 | 
113 |         # calculate NMI with train_local_params
114 |         cls_num = len(np.unique(train_labels + test_labels))
115 |         test_theta_norm = standardization(theta)
116 |         tmp = k_means(test_theta_norm, cls_num)  # N*K
117 |         predict_label = tmp[1] + 1  # Some label start with '1' not '0', there should be 'tmp[1] + 1'
118 |         MI = NMI(labels, predict_label)
119 |         purity = Purity(labels, predict_label)
120 | 
121 | # save
122 | model.save(args.save_path)
123 | # load
124 | model.load(args.load_path)
125 | 
126 | 
127 | 
128 | 
129 | 
130 | 


--------------------------------------------------------------------------------
/pydpm/example/Hybrid_PM/Weibull_Graph_Attention_Autoencoder/WGAAE_Demo.py:
--------------------------------------------------------------------------------
  1 | # Author: Xinyang Liu <lxy771258012@163.com>
  2 | # License: BSD-3-Clause
  3 | 
  4 | import os
  5 | import argparse
  6 | import random
  7 | import numpy as np
  8 | import scipy.sparse as sp
  9 | 
 10 | import torch
 11 | import torch_geometric.transforms as T
 12 | from torch_geometric.datasets import Planetoid
 13 | 
 14 | from pydpm.model import WGAAE
 15 | from pydpm.utils import *
 16 | from pydpm.dataloader.graph_data import Graph_Processer
 17 | from pydpm.metric.roc_score import ROC_AP_SCORE
 18 | 
 19 | # =========================================== ArgumentParser ===================================================================== #
 20 | parser = argparse.ArgumentParser()
 21 | 
 22 | # device
 23 | parser.add_argument("--gpu_id", type=int, default=0, help="the id of gpu to deploy")
 24 | parser.add_argument('--seed', type=int, default=123, help='Setting random seed')
 25 | 
 26 | # dataset
 27 | parser.add_argument('--dataset', type=str, default='cora', help='Dataset string')
 28 | parser.add_argument('--dataset_path', type=str, default='../../../dataset/Planetoid', help="the file path of dataset")
 29 | 
 30 | # network settings
 31 | parser.add_argument('--z_dims', type=list, default=[64, 64, 64], help='Output dimension list')
 32 | parser.add_argument('--hid_dims', type=list, default=[128, 128, 128], help='Hidden dimension list')
 33 | parser.add_argument('--out_dim', type=int, default=32, help='Dimension of output')
 34 | parser.add_argument('--num_heads', type=int, default=4, help='Number of heads in GAT')
 35 | 
 36 | # optimizer
 37 | parser.add_argument("--lr", type=float, default=0.001, help="Adam: learning rate")
 38 | 
 39 | # training
 40 | parser.add_argument('--task', type=str, default='prediction', help='Prediction, clustering or classification')
 41 | parser.add_argument("--num_epochs", type=int, default=30000, help="Number of epochs of training")
 42 | parser.add_argument('--is_subgraph', type=bool, default=False, help='Whether subgraph')
 43 | parser.add_argument('--is_sample', type=bool, default=True, help='Whether sample nodes')
 44 | parser.add_argument('--num_sample', type=int, default=1500, help='Number of sampling nodes')
 45 | # parser.add_argument("--batch_size", type=int, default=1000, help="Size of the batches")
 46 | parser.add_argument("--MBratio", type=int, default=100, help="number of epochs of training")
 47 | parser.add_argument('--graph_lh', type=str, default='Laplacian', help='Graph likelihood')
 48 | parser.add_argument('--lambda', type=float, default=1.0, help='lamda')
 49 | parser.add_argument('--theta_norm', type=bool, default=False, help='Whether theta norm')
 50 | 
 51 | args = parser.parse_args()
 52 | args.device = 'cpu' if not torch.cuda.is_available() else f'cuda:{args.gpu_id}'
 53 | 
 54 | seed_everything(args.seed)
 55 | 
 56 | # =========================================== Dataset ===================================================================== #
 57 | # Prepare for dataset
 58 | dataset = Planetoid(args.dataset_path, args.dataset)
 59 | data = dataset[0].to(args.device)
 60 | data.edge_index = data.edge_index[[1, 0]]
 61 | graph_processer = Graph_Processer()
 62 | 
 63 | adj_csc = graph_processer.graph_from_edges(data.edge_index, data.num_nodes).tocsc()
 64 | adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = graph_processer.edges_split_from_graph(adj_csc)
 65 | 
 66 | # For encoder input and graph likelihood
 67 | adj_train = adj_train + sp.eye(adj_train.shape[0])
 68 | data.edge_index = graph_processer.edges_from_graph(adj_train.tocoo(), args.device)
 69 | 
 70 | # =========================================== Model ===================================================================== #
 71 | model = WGAAE(in_dim=dataset.num_features, out_dim=args.out_dim, z_dims=args.z_dims, hid_dims=args.hid_dims, num_heads=args.num_heads, device=args.device)
 72 | optim = torch.optim.Adam(model.parameters())
 73 | 
 74 | # Training
 75 | best_AUC = best_AP = 0
 76 | for epoch_index in range(args.num_epochs):
 77 |     if epoch_index <= 200:
 78 |         for i in range(20):
 79 |             _, _ = model.train_one_epoch(data=data, optim=optim, epoch_index=epoch_index, is_sample=args.is_sample, is_subgraph=args.is_subgraph, args=args, is_train=False)
 80 |     else:
 81 |         for i in range(5):
 82 |             _, _ = model.train_one_epoch(data=data, optim=optim, epoch_index=epoch_index, is_sample=args.is_sample, is_subgraph=args.is_subgraph, args=args, is_train=False)
 83 |     train_local_params, Loss = model.train_one_epoch(data=data, optim=optim, epoch_index=epoch_index, is_sample=args.is_sample, is_subgraph=args.is_subgraph, args=args)
 84 | 
 85 |     if args.task == 'classification':
 86 |         [train_loss, train_loss_cls, train_recon_llh, train_graph_llh] = Loss
 87 |     else:
 88 |         [train_loss, train_recon_llh, train_graph_llh] = Loss
 89 | 
 90 |     if epoch_index % 1 == 0:
 91 |         test_local_params, Loss = model.test_one_epoch(data, is_sample=args.is_sample, is_subgraph=args.is_subgraph, args=args)
 92 | 
 93 |         # if args.task == 'classification':
 94 |         #     [test_loss, test_loss_cls, test_recon_llh, test_graph_llh] = Loss
 95 |         # else:
 96 |         #     [test_loss, test_recon_llh, test_graph_llh] = Loss
 97 |         # On classification task
 98 |         # accs = []
 99 |         # for mask in [dataset.train_mask, dataset.val_mask, dataset.test_mask]:
100 |         #     accs.append(int((pred[mask] == dataset.y[mask]).sum()) / int(mask.sum()))
101 |         # [train_acc, val_acc, tmp_test_acc] = accs
102 |         # best_test_acc = np.maximum(best_test_acc, tmp_test_acc)
103 | 
104 |         # On prediction task
105 |         theta = test_local_params[1]
106 |         # Construct theta_concat for prediction
107 |         theta_concat = None
108 |         for layer in range(model._model_setting.num_layers):
109 |             if layer == 0:
110 |                 theta_concat = model.u[layer] * theta[layer]
111 |             else:
112 |                 theta_concat = torch.cat([theta_concat, model.u[layer] * theta[layer]], 0)
113 |         theta_concat = theta_concat.cpu().detach().numpy()
114 | 
115 |         metric = ROC_AP_SCORE(test_edges, test_edges_false, adj_csc, emb=theta_concat.T)
116 |         best_AUC = np.maximum(best_AUC, metric._AUC)
117 |         best_AP = np.maximum(best_AP, metric._AP)
118 | 
119 |         print(f'Epoch[{epoch_index}|{args.num_epochs}]: loss:{train_loss}, graph_lh:{train_graph_llh}, recon_lh:{train_recon_llh}.'
120 |               f' best_AUC:{best_AUC}, best_AP: {best_AP}')
121 | 
122 | 
123 | 
124 | 
125 | 


--------------------------------------------------------------------------------
/pydpm/model/deep_learning_pm/dcgan.py:
--------------------------------------------------------------------------------
  1 | """
  2 | ===========================================
  3 | DCGAN
  4 | Unsupervised Representation Learning with Deep Convolutional Generative Adversarial Networks
  5 | Alec Radford, Luke Metz and Soumith Chintala
  6 | Publihsed in ICLR 2016
  7 | 
  8 | ===========================================
  9 | """
 10 | 
 11 | # Author: Xinyang Liu <lxy771258012@163.com>
 12 | # License: BSD-3-Clause
 13 | 
 14 | import numpy as np
 15 | import torch.nn as nn
 16 | import torch
 17 | from torch.autograd import Variable
 18 | from torchvision.utils import save_image
 19 | from pydpm.utils.utils import unnormalize_to_zero_to_one
 20 | from tqdm import tqdm
 21 | import os
 22 | 
 23 | class DCGAN(nn.Module):
 24 |     def __init__(self, args, device='cuda:0'):
 25 |         super(DCGAN, self).__init__()
 26 |         setattr(self, '_model_name', 'DCGAN')
 27 |         self.z_dim = args.z_dim
 28 |         self.generator = Generator(args.in_channels, z_dim=self.z_dim).to(device)
 29 |         self.discriminator = Discriminator(args.in_channels).to(device)
 30 |         self.adversarial_loss = torch.nn.BCELoss().to(device)
 31 |         self.in_channel = args.in_channels
 32 |         self.device = device
 33 |         self.Tensor = torch.FloatTensor if self.device == 'cpu' else torch.cuda.FloatTensor
 34 | 
 35 |     def sample(self, batch_size):
 36 |         """
 37 |         Sample from generator
 38 |         Inputs:
 39 |             batch_size : [int] number of img which you want;
 40 |         Outputs:
 41 |             gen_imgs : [tensor] a batch of images
 42 |         """
 43 |         # Sample noise as generator input
 44 |         z = torch.tensor(np.random.normal(0, 1, (batch_size, self.z_dim, 1, 1))).type(self.Tensor).to(self.device)
 45 |         # Generate a batch of images
 46 |         gen_imgs = self.generator(z)
 47 |         return gen_imgs
 48 | 
 49 |     def train_one_epoch(self, model_opt_G, model_opt_D, dataloader, sample_interval, epoch, n_epochs):
 50 |         '''
 51 |         Train for one epoch
 52 |         Inputs:
 53 |             model_opt_G     : Optimizer for generator
 54 |             model_opt_D     : Optimizer for discriminator
 55 |             dataloader      : Train dataset with form of dataloader
 56 |             sample_interval : interval betwen image samples while training
 57 |             epoch           : Current epoch on training stage
 58 |             n_epoch         : Total number of epochs on training stage
 59 |         '''
 60 |         G_loss_t, D_loss_t = 0, 0
 61 |         train_bar = tqdm(iterable=dataloader)
 62 |         for i, (imgs, _) in enumerate(train_bar):
 63 |             train_bar.set_description(f'Epoch [{epoch}/{n_epochs}]')
 64 |             train_bar.set_postfix(G_loss=G_loss_t / (i + 1), D_loss=D_loss_t / (i + 1))
 65 | 
 66 |             imgs = imgs.to(self.device)
 67 |             # Adversarial ground truths
 68 |             valid = Variable(self.Tensor(imgs.size(0), 1).fill_(1.0), requires_grad=False).to(self.device)
 69 |             fake = Variable(self.Tensor(imgs.size(0), 1).fill_(0.0), requires_grad=False).to(self.device)
 70 |             real_imgs = Variable(imgs.type(self.Tensor))
 71 | 
 72 |             gen_imgs = self.sample(imgs.shape[0])
 73 | 
 74 |             # Train Discriminator
 75 |             model_opt_D.zero_grad()
 76 |             real_loss = self.adversarial_loss(self.discriminator(real_imgs).view(imgs.shape[0], 1), valid)
 77 |             fake_loss = self.adversarial_loss(self.discriminator(gen_imgs.detach()).view(imgs.shape[0], 1), fake)
 78 |             d_loss = (real_loss + fake_loss) / 2
 79 |             d_loss.backward()
 80 |             model_opt_D.step()
 81 |             D_loss_t += d_loss.item()
 82 | 
 83 |             # Train Generator
 84 |             model_opt_G.zero_grad()
 85 |             g_loss = self.adversarial_loss(self.discriminator(gen_imgs).view(imgs.shape[0], 1), valid)
 86 |             g_loss.backward()
 87 |             model_opt_G.step()
 88 |             G_loss_t += g_loss.item()
 89 | 
 90 |             batches_done = epoch * len(dataloader) + i
 91 |             if batches_done % sample_interval == 0:
 92 |                 sample_images = gen_imgs.data.cpu()[:25]
 93 |                 sample_images = unnormalize_to_zero_to_one(sample_images)
 94 |                 save_image(sample_images, "../../output/images/dcgan_%d.png" % batches_done, nrow=5, normalize=True)
 95 | 
 96 |     def save(self, model_path: str = '../save_models'):
 97 |         """
 98 |         save model
 99 |         Inputs:
100 |             model_path : [str] the path to save the model, default '../save_models/DCGAN.pth';
101 |         """
102 |         # Save the model
103 |         torch.save({'state_dict': self.state_dict()}, model_path + '/' + self._model_name + '.pth')
104 |         print('model has been saved by ' + model_path + '/' + self._model_name + '.pth')
105 | 
106 | 
107 |     def load(self, model_path):
108 |         """
109 |         load model
110 |         Inputs:
111 |             model_path : [str] the path to load the model;
112 |         """
113 |         assert os.path.exists(model_path), 'Path Error: can not find the path to load the model'
114 |         # Load the model
115 |         checkpoint = torch.load(model_path)
116 |         self.load_state_dict(checkpoint['state_dict'])
117 | 
118 | class Generator(torch.nn.Module):
119 |     def __init__(self, in_channels, z_dim=100):
120 |         '''
121 |             in_channels : channels of input data
122 |             z_dim : dimension of latent vector
123 |         '''
124 |         super().__init__()
125 |         self.convT_layers = nn.Sequential(
126 |             nn.ConvTranspose2d(in_channels=z_dim, out_channels=1024, kernel_size=4, stride=1, padding=0),
127 |             nn.BatchNorm2d(num_features=1024),
128 |             nn.ReLU(True),
129 | 
130 |             # [bs, 1024, 4, 4]
131 |             nn.ConvTranspose2d(in_channels=1024, out_channels=512, kernel_size=4, stride=2, padding=1),
132 |             nn.BatchNorm2d(num_features=512),
133 |             nn.ReLU(True),
134 | 
135 |             # [bs, 512, 8, 8]
136 |             nn.ConvTranspose2d(in_channels=512, out_channels=256, kernel_size=4, stride=2, padding=1),
137 |             nn.BatchNorm2d(num_features=256),
138 |             nn.ReLU(True),
139 | 
140 |             # [bs, 256, 16, 16]
141 |             nn.ConvTranspose2d(in_channels=256, out_channels=in_channels, kernel_size=4, stride=2, padding=1))
142 |             # [bs, c, 32, 32]
143 | 
144 |         self.output = nn.Tanh()
145 | 
146 |     def forward(self, x):
147 |         x = self.convT_layers(x)
148 |         x = self.output(x)
149 |         return x
150 | 
151 | 
152 | class Discriminator(torch.nn.Module):
153 |     def __init__(self, in_channels):
154 |         '''
155 |             in_channels : channels of input data
156 |         '''
157 |         super().__init__()
158 |         self.conv_layers = nn.Sequential(
159 |             # [bs, c, 32, 32]
160 |             nn.Conv2d(in_channels=in_channels, out_channels=256, kernel_size=4, stride=2, padding=1),
161 |             nn.LeakyReLU(0.2, inplace=True),
162 | 
163 |             # [bs, 256, 16, 16]
164 |             nn.Conv2d(in_channels=256, out_channels=512, kernel_size=4, stride=2, padding=1),
165 |             nn.BatchNorm2d(512),
166 |             nn.LeakyReLU(0.2, inplace=True),
167 | 
168 |             # [bs, 512, 8, 8]
169 |             nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=4, stride=2, padding=1),
170 |             nn.BatchNorm2d(1024),
171 |             nn.LeakyReLU(0.2, inplace=True))
172 |             # [bs, 1024, 4, 4]
173 | 
174 |         self.output = nn.Sequential(
175 |             nn.Conv2d(in_channels=1024, out_channels=1, kernel_size=4, stride=1, padding=0),
176 |             # [bs, 1, 1, 1]
177 |             nn.Sigmoid())
178 | 
179 |     def forward(self, x):
180 |         x = self.conv_layers(x)
181 |         x = self.output(x)
182 |         return x
183 | 


--------------------------------------------------------------------------------