├── pydpm ├── model │ ├── hybrid_pm │ │ └── __init__.py │ ├── bayesian_pm │ │ └── __init__.py │ ├── deep_learning_pm │ │ ├── __init__.py │ │ ├── rbm.py │ │ └── dcgan.py │ ├── __init__.py │ └── basic_model.py ├── example │ ├── Bayesian_PM │ │ ├── __init__.py │ │ ├── Factor_Analysis │ │ │ ├── __init__.py │ │ │ ├── readme.md │ │ │ └── FA_demo.py │ │ ├── Dirchilet_Belief_Network │ │ │ ├── __init__.py │ │ │ ├── readme.md │ │ │ └── DirBN_Demo.py │ │ ├── Gaussian_Mixture_Model │ │ │ ├── __init__.py │ │ │ ├── readme.md │ │ │ └── GMM_Demo.py │ │ ├── Poisson_Factor_Analysis │ │ │ ├── __init__.py │ │ │ ├── readme.md │ │ │ └── PFA_Demo.py │ │ ├── Deep_Poisson_Factor_Analysis │ │ │ ├── __init__.py │ │ │ ├── readme.md │ │ │ └── DPFA_Demo.py │ │ ├── Latent_Dirchilet_Allocation │ │ │ ├── __init__.py │ │ │ ├── readme.md │ │ │ └── LDA_Demo.py │ │ ├── Poisson_Gamma_Belief_Network │ │ │ ├── __init__.py │ │ │ ├── readme.md │ │ │ └── PGBN_Demo.py │ │ ├── Poisson_Gamma_Dynamic_System │ │ │ ├── __init__.py │ │ │ ├── readme.md │ │ │ └── PGDS_Demo.py │ │ ├── Deep_Poisson_Gamma_Dynamic_System │ │ │ ├── __init__.py │ │ │ ├── readme.md │ │ │ └── DPGDS_Demo.py │ │ ├── Graph_Poisson_Gamma_Belief_Network │ │ │ ├── __init__.py │ │ │ ├── readme.md │ │ │ └── GPGBN_Demo.py │ │ ├── Word_Embeddings_Deep_Topic_Model │ │ │ ├── __init__.py │ │ │ ├── readme.md │ │ │ └── WEDTM_Demo.py │ │ ├── Multimodal_Poisson_Gamma_Belief_Network │ │ │ ├── __init__.py │ │ │ ├── readme.md │ │ │ └── MPGBN_Demo.py │ │ └── Convolutional_Poisson_Gamma_Belief_Network │ │ │ ├── __init__.py │ │ │ ├── readme.md │ │ │ ├── CPFA_Demo.py │ │ │ └── CPGBN_Demo.py │ ├── Hybrid_PM │ │ ├── __init__.py │ │ ├── HyperMiner │ │ │ ├── __init__.py │ │ │ ├── HyperMiner.py │ │ │ └── readme.md │ │ ├── TopicNet │ │ │ ├── __init__.py │ │ │ ├── TopicNet.py │ │ │ └── readme.md │ │ ├── Sawtooth_Embedding_Topic_Model │ │ │ ├── __init__.py │ │ │ ├── readme.md │ │ │ └── SawETM_Demo.py │ │ ├── Deep_Coupling_Embedding_Topic_Model │ │ │ ├── __init__.py │ │ │ ├── dcETM.py │ │ │ └── readme.md │ │ ├── Weibull_Graph_Attention_Autoencoder │ │ │ ├── __init__.py │ │ │ ├── readme.md │ │ │ └── WGAAE_Demo.py │ │ ├── Weibull_Hybrid_Autoencoding_Inference │ │ │ ├── __init__.py │ │ │ ├── readme.md │ │ │ └── WHAI_Demo.py │ │ ├── Knowledge_Aware_Bayesian_Deep_Topic_Model │ │ │ ├── __init__.py │ │ │ ├── KG_ETM.py │ │ │ └── readme.md │ │ ├── Multimodal_Weibull_Variational_Autoencoder │ │ │ ├── __init__.py │ │ │ ├── MWVAE.py │ │ │ └── readme.md │ │ ├── Variational_Temporal_Deep_Generative_Model │ │ │ ├── __init__.py │ │ │ ├── RGBN.py │ │ │ └── readme.md │ │ ├── Generative_Text_Convolutional_Neural_Network │ │ │ ├── GTCNN.py │ │ │ ├── __init__.py │ │ │ └── readme.md │ │ └── Variational_Edge_Parition_Graph_Neural_Network │ │ │ ├── VEPM.py │ │ │ ├── __init__.py │ │ │ └── readme.md │ ├── Deep_Learning_PM │ │ ├── __init__.py │ │ ├── Real_NVP │ │ │ ├── __init__.py │ │ │ ├── readme.md │ │ │ └── Real_NVP_Demo.py │ │ ├── Normlizing_Flow │ │ │ ├── __init__.py │ │ │ ├── readme.md │ │ │ └── NFlow_Demo.py │ │ ├── Variational_Autoencoder │ │ │ ├── __init__.py │ │ │ ├── readme.md │ │ │ └── VAE_Demo.py │ │ ├── Generative_Adversarial_Network │ │ │ ├── __init__.py │ │ │ ├── readme.md │ │ │ └── GAN_Demo.py │ │ ├── Restricted_Boltzmann_Machine │ │ │ ├── __init__.py │ │ │ ├── readme.md │ │ │ └── RBM_Demo.py │ │ ├── VQ_Variational_Autoencoder │ │ │ ├── __init__.py │ │ │ ├── readme.md │ │ │ └── VQ_VAE_Demo.py │ │ ├── Conditional_Variational_Auto-encoder │ │ │ ├── __init__.py │ │ │ ├── readme.md │ │ │ └── CVAE_Demo.py │ │ ├── Denoising_Diffusion_Probabilistic_Model │ │ │ ├── __init__.py │ │ │ ├── readme.md │ │ │ └── DDPM_Demo.py │ │ ├── Wasserstein_Generative_Adversarial_Networks │ │ │ ├── __init__.py │ │ │ ├── readme.md │ │ │ └── WGAN_Demo.py │ │ ├── Deep_Convolution_Generative_Adversarial_Networks │ │ │ ├── __init__.py │ │ │ ├── readme.md │ │ │ └── DCGAN_Demo.py │ │ └── Information_Maximizing_Generative_Adversarial_Nets │ │ │ ├── __init__.py │ │ │ ├── readme.md │ │ │ └── InfoGAN_Demo.py │ └── __init__.py ├── utils │ ├── __init__.py │ └── utils.py ├── __init__.py ├── sampler │ ├── __init__.py │ ├── _compact │ │ ├── model_sampler_linux.h │ │ ├── distribution_sampler_linux.h │ │ ├── model_sampler_win.h │ │ ├── distribution_sampler_win.h │ │ ├── crt_cpu.c │ │ ├── crt_multi_aug_cpu.c │ │ └── multi_aug_cpu.c │ ├── distribution_sampler_cpu.py │ ├── basic_sampler.py │ ├── pre_process.py │ └── model_sampler_cpu.py ├── dataloader │ ├── __init__.py │ └── image_data.py └── metric │ ├── __init__.py │ ├── reconstruction.py │ ├── purity.py │ ├── perplexity.py │ ├── accuracy.py │ ├── roc_score.py │ ├── cluster_acc.py │ ├── normalized_mutual_information.py │ └── topic_coherence.py ├── .gitignore ├── docs └── imgs │ ├── intro.png │ ├── compare_numpy.png │ ├── dpm_tutorial.png │ ├── pydpm_logo_1.png │ ├── pydpm_logo_2.png │ ├── pydpm_framework.png │ ├── pydpm_workflow.png │ ├── pydpm_workflow1.png │ ├── pydpm_workflow2.png │ ├── compare_tf2_torch.png │ ├── pydpm_framework_3.png │ ├── pydpm_framework_4.png │ ├── compare_cupy_pycuda.png │ ├── pydpm_framework_new.png │ └── pydpm_framework_old.png ├── CONTRIBUTING.md ├── MANIFEST.in ├── setup.py └── enviroment.yaml /pydpm/model/hybrid_pm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | */.DS_Store -------------------------------------------------------------------------------- /pydpm/example/Bayesian_PM/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pydpm/example/Hybrid_PM/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pydpm/model/bayesian_pm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pydpm/example/Deep_Learning_PM/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pydpm/model/deep_learning_pm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pydpm/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .utils import * -------------------------------------------------------------------------------- /pydpm/example/Hybrid_PM/HyperMiner/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pydpm/example/Hybrid_PM/TopicNet/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pydpm/example/Bayesian_PM/Factor_Analysis/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pydpm/example/Deep_Learning_PM/Real_NVP/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pydpm/example/Hybrid_PM/HyperMiner/HyperMiner.py: -------------------------------------------------------------------------------- 1 | #TODO -------------------------------------------------------------------------------- /pydpm/example/Hybrid_PM/TopicNet/TopicNet.py: -------------------------------------------------------------------------------- 1 | #TODO -------------------------------------------------------------------------------- /pydpm/example/Deep_Learning_PM/Normlizing_Flow/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pydpm/__init__.py: -------------------------------------------------------------------------------- 1 | '''need '__init__.py' to package pydpm''' 2 | -------------------------------------------------------------------------------- /pydpm/example/Bayesian_PM/Dirchilet_Belief_Network/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pydpm/example/Bayesian_PM/Gaussian_Mixture_Model/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pydpm/example/Bayesian_PM/Poisson_Factor_Analysis/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pydpm/example/Bayesian_PM/Deep_Poisson_Factor_Analysis/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pydpm/example/Bayesian_PM/Latent_Dirchilet_Allocation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pydpm/example/Bayesian_PM/Poisson_Gamma_Belief_Network/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pydpm/example/Bayesian_PM/Poisson_Gamma_Dynamic_System/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pydpm/example/Deep_Learning_PM/Variational_Autoencoder/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pydpm/example/Hybrid_PM/Sawtooth_Embedding_Topic_Model/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pydpm/sampler/__init__.py: -------------------------------------------------------------------------------- 1 | from .basic_sampler import Basic_Sampler -------------------------------------------------------------------------------- /pydpm/example/Bayesian_PM/Deep_Poisson_Gamma_Dynamic_System/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pydpm/example/Bayesian_PM/Graph_Poisson_Gamma_Belief_Network/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pydpm/example/Bayesian_PM/Word_Embeddings_Deep_Topic_Model/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pydpm/example/Deep_Learning_PM/Generative_Adversarial_Network/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pydpm/example/Deep_Learning_PM/Restricted_Boltzmann_Machine/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pydpm/example/Deep_Learning_PM/VQ_Variational_Autoencoder/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pydpm/example/Hybrid_PM/Deep_Coupling_Embedding_Topic_Model/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pydpm/example/Hybrid_PM/Deep_Coupling_Embedding_Topic_Model/dcETM.py: -------------------------------------------------------------------------------- 1 | #TODO -------------------------------------------------------------------------------- /pydpm/example/Hybrid_PM/Weibull_Graph_Attention_Autoencoder/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pydpm/example/Hybrid_PM/Weibull_Hybrid_Autoencoding_Inference/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pydpm/example/Bayesian_PM/Multimodal_Poisson_Gamma_Belief_Network/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pydpm/example/Hybrid_PM/Knowledge_Aware_Bayesian_Deep_Topic_Model/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pydpm/example/Hybrid_PM/Multimodal_Weibull_Variational_Autoencoder/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pydpm/example/Hybrid_PM/Variational_Temporal_Deep_Generative_Model/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pydpm/example/Bayesian_PM/Convolutional_Poisson_Gamma_Belief_Network/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pydpm/example/Deep_Learning_PM/Conditional_Variational_Auto-encoder/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pydpm/example/Deep_Learning_PM/Denoising_Diffusion_Probabilistic_Model/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pydpm/example/Hybrid_PM/Generative_Text_Convolutional_Neural_Network/GTCNN.py: -------------------------------------------------------------------------------- 1 | #TODO -------------------------------------------------------------------------------- /pydpm/example/Hybrid_PM/Generative_Text_Convolutional_Neural_Network/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pydpm/example/Hybrid_PM/Knowledge_Aware_Bayesian_Deep_Topic_Model/KG_ETM.py: -------------------------------------------------------------------------------- 1 | #TODO -------------------------------------------------------------------------------- /pydpm/example/Hybrid_PM/Multimodal_Weibull_Variational_Autoencoder/MWVAE.py: -------------------------------------------------------------------------------- 1 | #TODO -------------------------------------------------------------------------------- /pydpm/example/Hybrid_PM/Variational_Edge_Parition_Graph_Neural_Network/VEPM.py: -------------------------------------------------------------------------------- 1 | #TODO -------------------------------------------------------------------------------- /pydpm/example/Hybrid_PM/Variational_Edge_Parition_Graph_Neural_Network/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pydpm/example/Hybrid_PM/Variational_Temporal_Deep_Generative_Model/RGBN.py: -------------------------------------------------------------------------------- 1 | #TODO -------------------------------------------------------------------------------- /pydpm/example/Deep_Learning_PM/Wasserstein_Generative_Adversarial_Networks/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/imgs/intro.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BoChenGroup/PyDPM/HEAD/docs/imgs/intro.png -------------------------------------------------------------------------------- /pydpm/example/Deep_Learning_PM/Deep_Convolution_Generative_Adversarial_Networks/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pydpm/example/Deep_Learning_PM/Information_Maximizing_Generative_Adversarial_Nets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/imgs/compare_numpy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BoChenGroup/PyDPM/HEAD/docs/imgs/compare_numpy.png -------------------------------------------------------------------------------- /docs/imgs/dpm_tutorial.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BoChenGroup/PyDPM/HEAD/docs/imgs/dpm_tutorial.png -------------------------------------------------------------------------------- /docs/imgs/pydpm_logo_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BoChenGroup/PyDPM/HEAD/docs/imgs/pydpm_logo_1.png -------------------------------------------------------------------------------- /docs/imgs/pydpm_logo_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BoChenGroup/PyDPM/HEAD/docs/imgs/pydpm_logo_2.png -------------------------------------------------------------------------------- /docs/imgs/pydpm_framework.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BoChenGroup/PyDPM/HEAD/docs/imgs/pydpm_framework.png -------------------------------------------------------------------------------- /docs/imgs/pydpm_workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BoChenGroup/PyDPM/HEAD/docs/imgs/pydpm_workflow.png -------------------------------------------------------------------------------- /docs/imgs/pydpm_workflow1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BoChenGroup/PyDPM/HEAD/docs/imgs/pydpm_workflow1.png -------------------------------------------------------------------------------- /docs/imgs/pydpm_workflow2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BoChenGroup/PyDPM/HEAD/docs/imgs/pydpm_workflow2.png -------------------------------------------------------------------------------- /pydpm/dataloader/__init__.py: -------------------------------------------------------------------------------- 1 | from .graph_data import * 2 | from .image_data import * 3 | from .text_data import * -------------------------------------------------------------------------------- /pydpm/example/Bayesian_PM/Factor_Analysis/readme.md: -------------------------------------------------------------------------------- 1 | This is the demo code for the Factor Analysis (FA). 2 | 3 | -------------------------------------------------------------------------------- /docs/imgs/compare_tf2_torch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BoChenGroup/PyDPM/HEAD/docs/imgs/compare_tf2_torch.png -------------------------------------------------------------------------------- /docs/imgs/pydpm_framework_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BoChenGroup/PyDPM/HEAD/docs/imgs/pydpm_framework_3.png -------------------------------------------------------------------------------- /docs/imgs/pydpm_framework_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BoChenGroup/PyDPM/HEAD/docs/imgs/pydpm_framework_4.png -------------------------------------------------------------------------------- /docs/imgs/compare_cupy_pycuda.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BoChenGroup/PyDPM/HEAD/docs/imgs/compare_cupy_pycuda.png -------------------------------------------------------------------------------- /docs/imgs/pydpm_framework_new.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BoChenGroup/PyDPM/HEAD/docs/imgs/pydpm_framework_new.png -------------------------------------------------------------------------------- /docs/imgs/pydpm_framework_old.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BoChenGroup/PyDPM/HEAD/docs/imgs/pydpm_framework_old.png -------------------------------------------------------------------------------- /pydpm/example/Bayesian_PM/Gaussian_Mixture_Model/readme.md: -------------------------------------------------------------------------------- 1 | This is the demo code for the Gaussian Mixture Model (GMM). 2 | -------------------------------------------------------------------------------- /pydpm/example/Bayesian_PM/Poisson_Gamma_Belief_Network/readme.md: -------------------------------------------------------------------------------- 1 | This is the demo code for the Poisson Gamma Belief Network (PGBN), from the 2015 NeurIPS paper with the title "The Poisson Gamma Belief Network". 2 | -------------------------------------------------------------------------------- /pydpm/example/Deep_Learning_PM/Normlizing_Flow/readme.md: -------------------------------------------------------------------------------- 1 | This is the demo code for the RealNVP_2D, one of the traditional flow-based models from the 2017 paper with the title "Density Estimation Using Real NVP". 2 | -------------------------------------------------------------------------------- /pydpm/example/Deep_Learning_PM/VQ_Variational_Autoencoder/readme.md: -------------------------------------------------------------------------------- 1 | This is the demo code for the Neural Discrete Representation Learning (VQ-VAE), from the 2017 paper with the title "Neural Discrete Representation Learning". 2 | -------------------------------------------------------------------------------- /pydpm/example/Bayesian_PM/Poisson_Factor_Analysis/readme.md: -------------------------------------------------------------------------------- 1 | This is the demo code for the Poisson Factor Analysis (PFA), from the 2012 AISTATS paper with the title "Poisson Factor Analysis". 2 | 3 | The source project can be found in https://github.com/yxnchen/PFA 4 | -------------------------------------------------------------------------------- /pydpm/example/Deep_Learning_PM/Conditional_Variational_Auto-encoder/readme.md: -------------------------------------------------------------------------------- 1 | This is the demo code for the Conditional Variational Autoencoder (VAE), from the 2015 paper with the title "Learning Structured Output Representation using Deep Conditional Generative Models". 2 | 3 | -------------------------------------------------------------------------------- /pydpm/example/Deep_Learning_PM/Real_NVP/readme.md: -------------------------------------------------------------------------------- 1 | This is the demo code for the more powerful RealNVP, one of the traditional flow-based models from the 2017 paper with the title "Density Estimation Using Real NVP". 2 | Reference code can be found in https://github.com/fmu2/realNVP -------------------------------------------------------------------------------- /pydpm/example/Deep_Learning_PM/Variational_Autoencoder/readme.md: -------------------------------------------------------------------------------- 1 | This is the demo code for the Variational Autoencoder (VAE), from the 2014 paper with the title "Auto-Encoding Variational Bayes". 2 | 3 | The source project can be found in https://github.com/AntixK/PyTorch-VAE 4 | -------------------------------------------------------------------------------- /pydpm/example/Bayesian_PM/Poisson_Gamma_Dynamic_System/readme.md: -------------------------------------------------------------------------------- 1 | This is the demo code for the Poisson-Gamma Dynamical Systems (PGDS), from the 2016 NeurIPS paper with the title "Poisson-Gamma Dynamical Systems". 2 | 3 | The source project can be found in https://github.com/aschein/pgds 4 | -------------------------------------------------------------------------------- /pydpm/example/Deep_Learning_PM/Generative_Adversarial_Network/readme.md: -------------------------------------------------------------------------------- 1 | This is the demo code for the Generative Adversarial Networks (GAN), from the 2014 paper with the title "Generative Adversarial Networks". 2 | 3 | The source project can be found in https://github.com/yfeng95/GAN 4 | -------------------------------------------------------------------------------- /pydpm/example/Bayesian_PM/Latent_Dirchilet_Allocation/readme.md: -------------------------------------------------------------------------------- 1 | This is the demo code for the Latent Dirichlet Allocation (LDA), from the 2003 JMLR paper with the title "Knowledge-Aware Bayesian Deep Topic Model". 2 | 3 | The source project can be found in https://github.com/lda-project/lda 4 | -------------------------------------------------------------------------------- /pydpm/example/__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | example of models and GPU distribution sampler. 3 | To run models demo, you need to download dataset files 'mnist_gray.mat'&'TREC.pkl' and put it under pydpm.example.dataset 4 | dataset url: https://1drv.ms/u/s!AlkDawhaUUBWtHRWuNESEdOsDz7V?e=LQlGLW 5 | ''' 6 | -------------------------------------------------------------------------------- /pydpm/example/Bayesian_PM/Dirchilet_Belief_Network/readme.md: -------------------------------------------------------------------------------- 1 | This is the demo code for the Dirichlet Belief Networks (DirBN), from the 2018 NeurIPS paper with the title "Dirichlet belief networks for topic structure learning". 2 | 3 | The source project can be found in https://github.com/ethanhezhao/DirBN 4 | -------------------------------------------------------------------------------- /pydpm/example/Deep_Learning_PM/Deep_Convolution_Generative_Adversarial_Networks/readme.md: -------------------------------------------------------------------------------- 1 | This is the demo code for the Deep Convolution Generative Adversarial Networks (DCGAN), from the 2016 paper with the title "Unsupervised Representation Learning with Deep Convolutional Generative Adversarial Networks". 2 | 3 | -------------------------------------------------------------------------------- /pydpm/example/Deep_Learning_PM/Wasserstein_Generative_Adversarial_Networks/readme.md: -------------------------------------------------------------------------------- 1 | This is the demo code for the Wasserstein Generative Adversarial Networks (GAN), from the 2017 paper with the title "Wasserstein GAN". 2 | 3 | The source project can be found in https://github.com/martinarjovsky/WassersteinGAN 4 | -------------------------------------------------------------------------------- /pydpm/example/Bayesian_PM/Deep_Poisson_Factor_Analysis/readme.md: -------------------------------------------------------------------------------- 1 | This is the demo code for the Deep Poisson Factor Analysis (DPFA), from the 2015 ICML paper with the title "Scalable Deep Poisson Factor Analysis for Topic Modeling". 2 | 3 | The source project can be found in https://github.com/zhegan27/dpfa_icml2015 4 | -------------------------------------------------------------------------------- /pydpm/example/Bayesian_PM/Deep_Poisson_Gamma_Dynamic_System/readme.md: -------------------------------------------------------------------------------- 1 | This is the demo code for the Deep Poisson Gamma Dynamical Systems Demo (DPGDS), from the 2018 NeurIPS paper with the title "Deep Poisson Gamma Dynamical Systems Demo". 2 | 3 | The source project can be found in https://github.com/BoChenGroup/DPGDS 4 | -------------------------------------------------------------------------------- /pydpm/example/Hybrid_PM/TopicNet/readme.md: -------------------------------------------------------------------------------- 1 | This is the demo code for the TopicNet, from the 2021 NeurIPS paper with the title "TopicNet: Semantic Graph-Guaided Topic Discovery". 2 | 3 | The source project can be found in https://github.com/BoChenGroup/TopicNet 4 | 5 | The details of this model will be coming soon. 6 | -------------------------------------------------------------------------------- /pydpm/example/Hybrid_PM/HyperMiner/readme.md: -------------------------------------------------------------------------------- 1 | This is the demo code for the HyperMiner, from the paper with the title "HyperMiner: Topic Taxonomy Mining with Hyperbolic Embedding". 2 | 3 | The source project can be found in https://github.com/BoChenGroup/HyperMiner 4 | 5 | The details of this model will be coming soon. 6 | -------------------------------------------------------------------------------- /pydpm/example/Bayesian_PM/Word_Embeddings_Deep_Topic_Model/readme.md: -------------------------------------------------------------------------------- 1 | This is the demo code for the Word Embeddings Deep Topic Model (WEDTM), from the 2018 ICML paper with the title "Inter and Intra Topic Structure Learning with Word Embeddings". 2 | 3 | The source project can be found in https://github.com/ethanhezhao/WEDTM 4 | -------------------------------------------------------------------------------- /pydpm/example/Bayesian_PM/Multimodal_Poisson_Gamma_Belief_Network/readme.md: -------------------------------------------------------------------------------- 1 | This is the demo code for the Mutlimodal Poisson Gamma Belief Network (MPGBN), from the 2022 NeurIPS paper with the title "Mutlimodal Poisson Gamma Belief Network". 2 | 3 | The source project can be found in https://github.com/BoChenGroup/Multimodal_PGBN 4 | -------------------------------------------------------------------------------- /pydpm/example/Deep_Learning_PM/Restricted_Boltzmann_Machine/readme.md: -------------------------------------------------------------------------------- 1 | This is the demo code for the Restricted Boltzmann Machines (RBM), from the 2010 paper with the title "A Practical guide to training restricted Boltzmann machines". 2 | 3 | The source project can be found in https://github.com/echen/restricted-boltzmann-machines 4 | -------------------------------------------------------------------------------- /pydpm/example/Bayesian_PM/Convolutional_Poisson_Gamma_Belief_Network/readme.md: -------------------------------------------------------------------------------- 1 | This is the demo code for the Convolutional Poisson Gamma Belief Network (CPGBN), from the 2019 ICML paper with the title "Convolutional Poisson Gamma Belief Network Demo". 2 | 3 | The source project can be found in https://github.com/BoChenGroup/CPGBN 4 | 5 | -------------------------------------------------------------------------------- /pydpm/example/Hybrid_PM/Weibull_Hybrid_Autoencoding_Inference/readme.md: -------------------------------------------------------------------------------- 1 | This is the demo code for the Weibull Hybrid Autoencoding Inference (WHAI), from the 2018 ICLR paper with the title "WHAI: Weibull Hybrid Autoencoding Inference for Deep Topic Modeling". 2 | 3 | The source project can be found in https://github.com/BoChenGroup/WHAI 4 | -------------------------------------------------------------------------------- /pydpm/example/Bayesian_PM/Graph_Poisson_Gamma_Belief_Network/readme.md: -------------------------------------------------------------------------------- 1 | This is the demo code for the Graph Poisson Gamma Belief Network (GPGBN), from the 2020 NeurIPS paper with the title "Deep Relational Topic Modeling via Graph Poisson Gamma Belief Network". 2 | 3 | The source project can be found in https://github.com/BoChenGroup/GPGBN 4 | -------------------------------------------------------------------------------- /pydpm/example/Hybrid_PM/Weibull_Graph_Attention_Autoencoder/readme.md: -------------------------------------------------------------------------------- 1 | This is the demo code for the Weibull Graph Attention Autoencoder (WGAAE), from the 2020 NeurIPS paper with the title "Bayesian Attention Modules". 2 | 3 | The source project can be found in https://github.com/chaojiewang94/WGAAE 4 | 5 | The details of this model will be coming soon. 6 | -------------------------------------------------------------------------------- /pydpm/example/Hybrid_PM/Multimodal_Weibull_Variational_Autoencoder/readme.md: -------------------------------------------------------------------------------- 1 | This is the demo code for the Multimodal Weibull Variational Autoencoder (MEVAE), from the 2022 IEEE Transactions on Cybernetics paper with the title "Multimodal Weibull Variational Autoencoder for Jointly Modeling Image-Text Data". 2 | 3 | The details of this model will be coming soon. 4 | -------------------------------------------------------------------------------- /pydpm/metric/__init__.py: -------------------------------------------------------------------------------- 1 | from .accuracy import ACC 2 | from .reconstruction import Poisson_Likelihood, Reconstruct_Error 3 | from .cluster_acc import Cluster_ACC 4 | from .topic_coherence import Topic_Coherence 5 | from .perplexity import Perplexity 6 | from .normalized_mutual_information import NMI 7 | from .roc_score import ROC_AP_SCORE 8 | from .purity import Purity 9 | -------------------------------------------------------------------------------- /pydpm/example/Deep_Learning_PM/Denoising_Diffusion_Probabilistic_Model/readme.md: -------------------------------------------------------------------------------- 1 | This is the demo code for the Denoising Diffusion Probabilistic Models (DDPM), from the 2020 NeurIPS paper with the title "Denoising Diffusion Probabilistic Models". 2 | 3 | The source project can be found in https://github.com/abarankab/DDPM 4 | 5 | The details of this model will be coming soon. 6 | -------------------------------------------------------------------------------- /pydpm/example/Hybrid_PM/Knowledge_Aware_Bayesian_Deep_Topic_Model/readme.md: -------------------------------------------------------------------------------- 1 | This is the demo code for the Knowledge Graph Embedding Topic Model (KG-ETM), from the 2022 NeurIPS paper with the title "Knowledge-Aware Bayesian Deep Topic Model". 2 | 3 | The source project can be found in https://github.com/BoChenGroup/TopicKG 4 | 5 | The details of this model will be coming soon. 6 | -------------------------------------------------------------------------------- /pydpm/example/Hybrid_PM/Variational_Temporal_Deep_Generative_Model/readme.md: -------------------------------------------------------------------------------- 1 | This is the demo code for the Recurrent Gamma Belief Network (RGBN), from the 2020 ICLR paper with the title "Recurrent Hierarchical Topic-Guided Neural Language Models". 2 | 3 | The source project can be found in https://github.com/BoChenGroup/rGBN_RNN 4 | 5 | The details of this model will be coming soon. 6 | -------------------------------------------------------------------------------- /pydpm/example/Hybrid_PM/Deep_Coupling_Embedding_Topic_Model/readme.md: -------------------------------------------------------------------------------- 1 | This is the demo code for the Deep Coupling Embedding Topic Model (dc-ETM), from the 2022 NeurIPS paper with the title "Alleviating ''Posterior Collapse'' in Deep Topic Models via Policy Gradient". 2 | 3 | The source project can be found in https://github.com/BoChenGroup/dc-ETM 4 | 5 | The details of this model will be coming soon. 6 | -------------------------------------------------------------------------------- /pydpm/example/Deep_Learning_PM/Information_Maximizing_Generative_Adversarial_Nets/readme.md: -------------------------------------------------------------------------------- 1 | This is the demo code for the Information Maximizing Generative Adversarial Nets (infoGAN), from the 2014 paper with the title "InfoGAN: Interpretable Representation Learning by Information Maximizing Generative Adversarial Nets". 2 | 3 | The source project can be found in https://github.com/Natsu6767/InfoGAN-PyTorch 4 | -------------------------------------------------------------------------------- /pydpm/example/Hybrid_PM/Variational_Edge_Parition_Graph_Neural_Network/readme.md: -------------------------------------------------------------------------------- 1 | This is the demo code for the Variational Edge Parition Graph Neural Network (VEPM), from the 2022 NeurIPS paper with the title "A Variational Edge Partition Model for Supervised Graph Representation Learning". 2 | 3 | The source project can be found in https://github.com/YH-UtMSB/VEPM 4 | 5 | The details of this model will be coming soon. 6 | -------------------------------------------------------------------------------- /pydpm/example/Hybrid_PM/Generative_Text_Convolutional_Neural_Network/readme.md: -------------------------------------------------------------------------------- 1 | This is the demo code for the Generative Text Convolutional Neural Network (GTCNN), from the paper with the title "Generative Text Convolutional Neural Network for Hierarchial Document Representation Learning". 2 | 3 | The source project can be found in https://github.com/BoChenGroup/GTCNN 4 | 5 | The details of this model will be coming soon. 6 | -------------------------------------------------------------------------------- /pydpm/example/Hybrid_PM/Sawtooth_Embedding_Topic_Model/readme.md: -------------------------------------------------------------------------------- 1 | This is the demo code for the Sawtooth Embedding Topic Model (SawETM), from the 2021 paper with the title "Sawtooth Factorial Topic Embeddings Guided Gamma Belief Network". 2 | 3 | The source project can be found in https://github.com/BoChenGroup/SawETM and the implementation in PyDPM references https://github.com/NoviceStone/HyperMiner/blob/main/models/sawetm.py 4 | -------------------------------------------------------------------------------- /pydpm/sampler/_compact/model_sampler_linux.h: -------------------------------------------------------------------------------- 1 | #include "cuda_runtime.h" 2 | #include "device_launch_parameters.h" 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | // status 9 | #define blockDimX 32 10 | #define blockDimY 4 // blockDimX * Y should be multiples of 32, and no more than 1024 11 | #define gridDimX 128 12 | #define nStatus (blockDimX * blockDimY * gridDimX) 13 | 14 | // const 15 | #define one_third 0.333333333333333 16 | #define Pi 3.141592654 17 | 18 | // gamma 19 | #define nThreads_x 32 20 | #define nThreads_y 4 21 | #define nThreads (nThreads_x * nThreads_y) 22 | 23 | -------------------------------------------------------------------------------- /pydpm/metric/reconstruction.py: -------------------------------------------------------------------------------- 1 | """ 2 | =========================================== 3 | Metric 4 | =========================================== 5 | 6 | """ 7 | 8 | # Author: Chaojie Wang ; Jiawen Wu 9 | # License: BSD-3-Clause 10 | 11 | import numpy as np 12 | 13 | from scipy.special import gamma 14 | from ..utils import * 15 | 16 | def Poisson_Likelihood(X, X_re): 17 | 18 | # X[np.where(X>100)] = 100 19 | # X_re[np.where(X>100)] = 100 20 | 21 | Likelihood = np.sum(X*log_max(X_re) - X_re - log_max(gamma(X_re + 1))) 22 | return Likelihood 23 | 24 | def Reconstruct_Error(X, X_re): 25 | return np.power(X - X_re, 2).sum() -------------------------------------------------------------------------------- /pydpm/sampler/_compact/distribution_sampler_linux.h: -------------------------------------------------------------------------------- 1 | #include "cuda_runtime.h" 2 | #include "device_launch_parameters.h" 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | // status 9 | #define blockDimX 32 10 | #define blockDimY 4 // blockDimX * Y should be multiples of 32, and no more than 1024 11 | #define gridDimX 128 12 | #define nStatus (blockDimX * blockDimY * gridDimX) 13 | 14 | // const 15 | #define one_third 0.333333333333333 16 | #define Pi 3.141592654 17 | 18 | // gamma 19 | #define nThreads_gamma_x 32 20 | #define nThreads_gamma_y 4 21 | #define nThreads_gamma (nThreads_gamma_x * nThreads_gamma_y) 22 | 23 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to PyDPM 2 | 3 | If you are interested in contributing to PyDPM, your contributions will likely fall into one of the following two categories: 4 | 5 | 1. You want to implement a new feature: 6 | - In general, we accept any features as long as they fit the scope of this package. If you are unsure about this or need help on the design/implementation of your feature, post about it in an issue. 7 | 2. You want to fix a bug: 8 | - Feel free to send a Pull Request any time you encounter a bug. Please provide a clear and concise description of what the bug was. If you are unsure about if this is a bug at all or how to fix, post about it in an issue. 9 | 10 | Once you finish implementing a feature or bug-fix, please send a Pull Request to https://github.com/BoChenGroup/PyDPM. 11 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include pydpm/_sampler/_compact/crt_cpu.c 2 | include pydpm/_sampler/_compact/crt_cpu.dll 3 | include pydpm/_sampler/_compact/crt_cpu.so 4 | include pydpm/_sampler/_compact/crt_multi_aug_cpu.c 5 | include pydpm/_sampler/_compact/crt_multi_aug_cpu.dll 6 | include pydpm/_sampler/_compact/crt_multi_aug_cpu.so 7 | include pydpm/_sampler/_compact/multi_aug_cpu.c 8 | include pydpm/_sampler/_compact/multi_aug_cpu.dll 9 | include pydpm/_sampler/_compact/multi_aug_cpu.so 10 | include pydpm/_sampler/_compact/sampler_kernel_win.cu 11 | include pydpm/_sampler/_compact/sampler_kernel_win.h 12 | include pydpm/_sampler/_compact/sampler_kernel_linux.cu 13 | include pydpm/_sampler/_compact/sampler_kernel_linux.h 14 | include compare_numpy.jpg 15 | include compare_tf2_torch.jpg 16 | include pydpm_framework.png 17 | -------------------------------------------------------------------------------- /pydpm/sampler/_compact/model_sampler_win.h: -------------------------------------------------------------------------------- 1 | #include "cuda_runtime.h" 2 | #include "device_launch_parameters.h" 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | // DLL export function 9 | // if not use DLLEXPORT, the function will be unable to be transferred on Windows 10 | #define DLLEXPORT extern "C" __declspec(dllexport) 11 | 12 | // status 13 | #define blockDimX 32 14 | #define blockDimY 4 // blockDimX * Y should be multiples of 32, and no more than 1024 15 | #define gridDimX 128 16 | #define nStatus (blockDimX * blockDimY * gridDimX) 17 | 18 | // const 19 | #define one_third 0.333333333333333 20 | #define Pi 3.141592654 21 | 22 | // gamma 23 | #define nThreads_x 32 24 | #define nThreads_y 4 25 | #define nThreads (nThreads_x * nThreads_y) 26 | 27 | -------------------------------------------------------------------------------- /pydpm/sampler/_compact/distribution_sampler_win.h: -------------------------------------------------------------------------------- 1 | #include "cuda_runtime.h" 2 | #include "device_launch_parameters.h" 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | // DLL export function 9 | // if not use DLLEXPORT, the function will be unable to be transferred on Windows 10 | #define DLLEXPORT extern "C" __declspec(dllexport) 11 | 12 | // status 13 | #define blockDimX 32 14 | #define blockDimY 4 // blockDimX * Y should be multiples of 32, and no more than 1024 15 | #define gridDimX 128 16 | #define nStatus (blockDimX * blockDimY * gridDimX) 17 | 18 | // const 19 | #define one_third 0.333333333333333 20 | #define Pi 3.141592654 21 | 22 | // gamma 23 | #define nThreads_gamma_x 32 24 | #define nThreads_gamma_y 4 25 | #define nThreads_gamma (nThreads_gamma_x * nThreads_gamma_y) 26 | 27 | -------------------------------------------------------------------------------- /pydpm/metric/purity.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | # Author: Xinyang Liu 4 | # License: BSD-3-Clause 5 | 6 | import numpy as np 7 | 8 | class Purity(object): 9 | 10 | def __init__(self, y, ypred): 11 | """ 12 | Inputs: 13 | y: the ground_true, shape:(n_sample,) 14 | ypred: pred_label, shape:(n_sample,) 15 | Output: 16 | accuracy of cluster, in [0, 1] 17 | """ 18 | self.y = y 19 | self.ypred = ypred 20 | 21 | self._get() 22 | 23 | print(f'The cluster purity is: {self._purity:.4f}') 24 | 25 | def _get(self): 26 | 27 | clusters = np.unique(self.ypred) 28 | counts = [] 29 | for c in clusters: 30 | indices = np.where(self.ypred == c)[0] 31 | max_votes = np.bincount(self.y[indices]).max() 32 | counts.append(max_votes) 33 | self._purity = sum(counts) / self.y.shape[0] 34 | -------------------------------------------------------------------------------- /pydpm/metric/perplexity.py: -------------------------------------------------------------------------------- 1 | """ 2 | =========================================== 3 | per-held-word perplexity 4 | =========================================== 5 | 6 | """ 7 | #!/usr/bin/python3 8 | # -*- coding: utf-8 -*- 9 | # Author: Xinyang Liu 10 | # License: BSD-3-Clause 11 | 12 | import numpy as np 13 | from ..utils import * 14 | 15 | class Perplexity(object): 16 | def __init__(self, x, x_hat): 17 | ''' 18 | Inputs: 19 | x: [float] np.ndarray, N*V test matrix, the observations for test_data 20 | x_hat: [float] np.ndarray, N*V reconstruct matrix 21 | Outputs: 22 | PPL: [float], the perplexity score 23 | ''' 24 | 25 | self.x = x 26 | self.x_hat = x_hat 27 | 28 | self._get() 29 | 30 | print(f'The PPL is: {self._PPL:.4f}') 31 | 32 | def _get(self): 33 | 34 | self.x_hat = self.x_hat / (np.sum(self.x_hat, axis=1, keepdims=True) + realmin) 35 | ppl = -1.0 * self.x * np.log(self.x_hat + realmin) / np.sum(self.x) 36 | ppl = np.exp(ppl.sum()) 37 | 38 | self._PPL = ppl 39 | -------------------------------------------------------------------------------- /pydpm/model/__init__.py: -------------------------------------------------------------------------------- 1 | from .basic_model import Params, Basic_Model 2 | from .bayesian_pm.lda import LDA 3 | from .bayesian_pm.pfa import PFA 4 | from .bayesian_pm.pgbn import PGBN 5 | from .bayesian_pm.cpfa import CPFA 6 | from .bayesian_pm.cpgbn import CPGBN 7 | from .bayesian_pm.pgds import PGDS 8 | from .bayesian_pm.dpgds import DPGDS 9 | from .bayesian_pm.wedtm import WEDTM 10 | from .bayesian_pm.dirbn import DirBN 11 | from .bayesian_pm.dpfa import DPFA 12 | from .bayesian_pm.gpgbn import GPGBN 13 | from .bayesian_pm.mpgbn import MPGBN 14 | from .bayesian_pm.fa import FA 15 | from .bayesian_pm.gmm import GMM 16 | from .deep_learning_pm.vae import VAE 17 | from .deep_learning_pm.cvae import CVAE 18 | form .deep_learning_pm.vqvae import VQVAE 19 | from .deep_learning_pm.gan import GAN 20 | from .deep_learning_pm.wgan import WGAN 21 | from .deep_learning_pm.dcgan import DCGAN 22 | from .deep_learning_pm.infogan import InfoGAN 23 | from .deep_learning_pm.rbm import RBM 24 | from .deep_learning_pm.ddpm import DDPM 25 | from .deep_learning_pm.nflow import NFlow 26 | from .deep_learning_pm.realnvp import RealNVP 27 | from .hybrid_pm.whai import WHAI 28 | from .hybrid_pm.wgaae import WGAAE 29 | from .hybrid_pm.sawetm import SawETM 30 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | 4 | from setuptools import setup, find_packages 5 | 6 | 7 | setup( 8 | name='pydpm', 9 | version='5.0.0', 10 | description='A python library focuses on constructing deep probabilistic models on GPU.', 11 | py_modules=['pydpm'], 12 | long_description=open('README.md').read(), 13 | long_description_content_type='text/markdown', 14 | author='Chaojie Wang, Wei Zhao, Xinyang Liu, Jiawen Wu', 15 | author_email='xd_silly@163.com', 16 | maintainer='BoChenGroup', 17 | maintainer_email='13279389260@163.com', 18 | license='Apache License Version 2.0', 19 | packages=find_packages(), 20 | # package_data={'pydpm': c_package_data}, 21 | # data_files=c_package_data, 22 | include_package_data=True, # include all files 23 | platforms=["Windows", "Linux"], 24 | url='https://github.com/BoChenGroup/Pydpm', 25 | requires=['numpy', 'scipy', 'sklearn', 'PyTorch', 'ctypes', 'subprocess', ], 26 | classifiers=[ 27 | 'Environment :: GPU :: NVIDIA CUDA', 28 | 'Intended Audience :: Developers', 29 | 'Programming Language :: Python :: 3', 30 | 'Topic :: Software Development :: Libraries' 31 | ], 32 | ) 33 | -------------------------------------------------------------------------------- /pydpm/sampler/_compact/crt_cpu.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | // Chaojie 2017_10_12 4 | // No Check 5 | 6 | int Binary_Search(double *probvec, double prob, int K) 7 | { 8 | int kstart, kend, kmid; 9 | // K : the length of probvec 10 | if (prob <= probvec[0]) 11 | return(0); 12 | else 13 | { 14 | for(kstart = 1, kend = K-1;;) 15 | { 16 | if (kstart >= kend) 17 | return(kend); 18 | else 19 | { 20 | kmid = (kstart + kend)/2; 21 | if (probvec[kmid-1]>=prob) 22 | kend = kmid - 1; 23 | else if (probvec[kmid]; Jiawen Wu ; Wei Zhao <13279389260@163.com> 3 | # License: BSD-3-Claus 4 | 5 | class Params(object): 6 | def __init__(self): 7 | """ 8 | The basic class for storing the parameters in the probabilistic model 9 | """ 10 | super(Params, self).__init__() 11 | 12 | 13 | class Basic_Model(object): 14 | def __init__(self, *args, **kwargs): 15 | """ 16 | The basic model for all probabilistic models in this package 17 | Attributes: 18 | @public: 19 | global_params : [Params] the global parameters of the probabilistic model 20 | local_params : [Params] the local parameters of the probabilistic model 21 | 22 | @private: 23 | _model_setting : [Params] the model settings of the probabilistic model 24 | _hyper_params : [Params] the hyper parameters of the probabilistic model 25 | 26 | """ 27 | super(Basic_Model, self).__init__() 28 | 29 | setattr(self, 'global_params', Params()) 30 | setattr(self, 'local_params', Params()) 31 | 32 | setattr(self, '_model_setting', Params()) 33 | setattr(self, '_hyper_params', Params()) 34 | 35 | -------------------------------------------------------------------------------- /pydpm/metric/accuracy.py: -------------------------------------------------------------------------------- 1 | """ 2 | =========================================== 3 | Metric to evaluate the performance of the classification 4 | =========================================== 5 | 6 | """ 7 | 8 | # Author: Chaojie Wang ; Jiawen Wu ; Wei Zhao <13279389260@163.com> 9 | # License: BSD-3-Clause 10 | 11 | import numpy as np 12 | from sklearn import svm 13 | 14 | 15 | class ACC(object): 16 | 17 | def __init__(self, x_tr: np.ndarray, x_te: np.ndarray, y_tr: np.ndarray, y_te: np.ndarray, model='SVM'): 18 | ''' 19 | Inputs: 20 | x_tr : [np.ndarray] K*N_train matrix, N_train latent features of length K 21 | x_te : [np.ndarray] K*N_test matrix, N_test latent features of length K 22 | y_tr : [np.ndarray] N_train vector, labels of N_train latent features 23 | y_te : [np.ndarray] N_test vector, labels of N_test latent features 24 | 25 | Outputs: 26 | accuracy: [float] scalar, the accuracy score 27 | 28 | ''' 29 | self.x_tr = x_tr 30 | self.x_te = x_te 31 | self.y_tr = y_tr 32 | self.y_te = y_te 33 | 34 | if model == 'SVM': 35 | self._svm() 36 | else: 37 | print("Please input metric model correctly. Options: 'SVM'") 38 | 39 | print(f'The classification accuracy with {model} is: {self._accuracy:.4f}') 40 | 41 | 42 | def _svm(self): 43 | 44 | self.model = svm.SVC() 45 | self.model.fit(self.x_tr.T, self.y_tr) 46 | print(f'Optimization Finished') 47 | self._accuracy = self.model.score(self.x_te.T, self.y_te) 48 | 49 | -------------------------------------------------------------------------------- /pydpm/utils/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | =========================================== 3 | Metric 4 | =========================================== 5 | 6 | """ 7 | 8 | # Author: Chaojie Wang ; Jiawen Wu; Xinyang Liu 9 | # License: BSD-3-Clause 10 | import os 11 | import random 12 | import numpy as np 13 | 14 | import torch 15 | 16 | realmin = 2.2e-10 17 | 18 | # randomness 19 | def seed_everything(seed_value): 20 | random.seed(seed_value) 21 | np.random.seed(seed_value) 22 | torch.manual_seed(seed_value) 23 | os.environ['PYTHONHASHSEED'] = str(seed_value) 24 | 25 | if torch.cuda.is_available(): 26 | torch.cuda.manual_seed(seed_value) 27 | torch.cuda.manual_seed_all(seed_value) 28 | torch.backends.cudnn.deterministic = True 29 | torch.backends.cudnn.benchmark = True 30 | 31 | # math 32 | def log_max(x): 33 | return np.log(np.maximum(x, realmin)) 34 | 35 | def cosine_simlarity(A, B): 36 | # A: N*D, B: N*D 37 | [N, D] = A.shape 38 | inter_product = np.matmul(A, np.transpose(B)) # N*N 39 | len_A = np.sqrt(np.sum(A * A, axis=1, keepdims=True)) 40 | len_B = np.sqrt(np.sum(B * B, axis=1, keepdims=True)) 41 | len_AB = np.matmul(len_A, np.transpose(len_B)) 42 | cos_AB = inter_product / (len_AB + realmin) 43 | cos_AB[(np.arange(N), np.arange(N))] = 1 44 | return cos_AB 45 | 46 | def standardization(data): 47 | mu = np.mean(data, axis=1, keepdims=True) 48 | sigma = np.std(data, axis=1, keepdims=True) 49 | return (data - mu) / (sigma + 2.2e-8) 50 | 51 | def normalize_to_neg_one_to_one(img): 52 | return img * 2 - 1 53 | 54 | def unnormalize_to_zero_to_one(t): 55 | return (t + 1) * 0.5 -------------------------------------------------------------------------------- /pydpm/sampler/_compact/crt_multi_aug_cpu.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | // Chaojie 2017_10_12 4 | // No Check 5 | 6 | int Binary_Search(double *probvec, double prob, int K) 7 | { 8 | int kstart, kend, kmid; 9 | // K : the length of probvec 10 | if (prob <= probvec[0]) 11 | return(0); 12 | else 13 | { 14 | for(kstart = 1, kend = K-1;;) 15 | { 16 | if (kstart >= kend) 17 | return(kend); 18 | else 19 | { 20 | kmid = (kstart + kend)/2; 21 | if (probvec[kmid-1]>=prob) 22 | kend = kmid - 1; 23 | else if (probvec[kmid] 4 | # License: BSD-3-Clause 5 | 6 | import numpy as np 7 | from sklearn.metrics import roc_auc_score 8 | from sklearn.metrics import average_precision_score 9 | 10 | class ROC_AP_SCORE(object): 11 | 12 | def __init__(self, edges_pos, edges_neg, adj_orig, emb=None): 13 | 14 | self.edges_pos = edges_pos 15 | self.edges_neg = edges_neg 16 | self.adj_orig = adj_orig 17 | self.emb = emb 18 | 19 | self._get() 20 | 21 | # print(f'The AUC is: {self._AUC:.4f} and AP is: {self._AP:.4f}') 22 | 23 | def _get(self): 24 | # if emb is None: 25 | # feed_dict.update({placeholders['dropout']: 0}) 26 | # emb = sess.run(model.z_decoder_a, feed_dict=feed_dict) 27 | 28 | def sigmoid(x): 29 | return 1 / (1 + np.exp(-x)) 30 | 31 | def beta(x): 32 | return 1 - np.exp(-x) 33 | 34 | # Predict on test set of edges 35 | adj_rec = np.dot(self.emb, self.emb.T) 36 | preds = [] 37 | pos = [] 38 | # print(adj_rec,'**************') 39 | for e in self.edges_pos: 40 | # preds.append(sigmoid(adj_rec[e[0], e[1]])) 41 | # preds.append(adj_rec[e[0], e[1]]) 42 | preds.append(beta(adj_rec[e[0], e[1]])) 43 | pos.append(self.adj_orig[e[0], e[1]]) 44 | 45 | preds_neg = [] 46 | neg = [] 47 | for e in self.edges_neg: 48 | # preds_neg.append(sigmoid(adj_rec[e[0], e[1]])) 49 | # preds_neg.append(adj_rec[e[0], e[1]]) 50 | preds_neg.append(beta(adj_rec[e[0], e[1]])) 51 | neg.append(self.adj_orig[e[0], e[1]]) 52 | 53 | preds_all = np.hstack([preds, preds_neg]) 54 | labels_all = np.hstack([np.ones(len(preds)), np.zeros(len(preds_neg))]) 55 | roc_score = roc_auc_score(labels_all, preds_all) 56 | ap_score = average_precision_score(labels_all, preds_all) 57 | 58 | self._AUC = roc_score 59 | self._AP = ap_score 60 | # return roc_score, ap_score 61 | -------------------------------------------------------------------------------- /pydpm/metric/cluster_acc.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | # Author: Xinyang Liu 4 | # License: BSD-3-Clause 5 | 6 | import numpy as np 7 | from scipy.optimize import linear_sum_assignment 8 | 9 | # from sklearn.utils.linear_assignment_ import linear_assignment 10 | # from sklearn.metrics.cluster import normalized_mutual_info_score as NMI, \ 11 | # adjusted_mutual_info_score as AMI, adjusted_rand_score as AR, silhouette_score as SI, calinski_harabasz_score as CH 12 | 13 | class Cluster_ACC(object): 14 | 15 | def __init__(self, y, ypred): 16 | ''' 17 | Inputs: 18 | y: the ground_true, shape:(n_sample,) 19 | ypred: pred_label, shape:(n_sample,) 20 | 21 | Outputs: 22 | accuracy of cluster, in [0, 1] 23 | ''' 24 | self.y = y 25 | self.ypred = ypred 26 | 27 | self._get() 28 | 29 | print(f'The cluster accuracy is: {self._cluster_acc:.4f}') 30 | 31 | def _get(self): 32 | s = np.unique(self.ypred) 33 | t = np.unique(self.y) 34 | 35 | N = len(np.unique(self.ypred)) 36 | C = np.zeros((N, N), dtype=np.int32) 37 | for i in range(N): 38 | for j in range(N): 39 | idx = np.logical_and(self.ypred == s[i], self.y == t[j]) 40 | C[i][j] = np.count_nonzero(idx) 41 | 42 | # convert the C matrix to the 'true' cost 43 | Cmax = np.amax(C) 44 | C = Cmax - C 45 | indices = linear_sum_assignment(C) 46 | row = indices[:][:, 0] 47 | col = indices[:][:, 1] 48 | # calculating the accuracy according to the optimal assignment 49 | count = 0 50 | for i in range(N): 51 | idx = np.logical_and(self.ypred == s[row[i]], self.y == t[col[i]]) 52 | count += np.count_nonzero(idx) 53 | 54 | self._cluster_acc = 1.0 * count / len(self.y) 55 | 56 | # y_true = y_true.astype(np.int64) 57 | # assert y_pred.size == y_true.size 58 | # D = max(y_pred.max(), y_true.max()) + 1 59 | # w = np.zeros((D, D), dtype=np.int64) 60 | # for i in range(y_pred.size): 61 | # w[y_pred[i], y_true[i]] += 1 62 | # from sklearn.utils.linear_assignment_ import linear_assignment 63 | # ind = linear_assignment(w.max() - w) # Optimal label mapping based on the Hungarian algorithm 64 | # return sum([w[i, j] for i, j in ind]) * 1.0 / y_pred.size 65 | 66 | 67 | 68 | -------------------------------------------------------------------------------- /pydpm/metric/normalized_mutual_information.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | # Author: Xinyang Liu 4 | # License: BSD-3-Clause 5 | 6 | import numpy as np 7 | import copy 8 | 9 | class NMI(object): 10 | def __init__(self, A, B): 11 | ''' 12 | Inputs: 13 | A: [int], ground_truth, shape:(n_sample,) 14 | B: [int], pred_label, shape:(n_sample,) 15 | 16 | Outputs: 17 | NMI: [float], Normalized Mutual information of A and B 18 | 19 | ''' 20 | self.A = copy.deepcopy(A) 21 | self.B = copy.deepcopy(B) 22 | 23 | self._get() 24 | 25 | print(f'The NMI is: {self._NMI:.4f}') 26 | 27 | def _get(self): 28 | n_gnd = self.A.shape[0] 29 | n_label = self.B.shape[0] 30 | # assert n_gnd == n_label 31 | 32 | LabelA = np.unique(self.A) 33 | nClassA = len(LabelA) 34 | LabelB = np.unique(self.B) 35 | nClassB = len(LabelB) 36 | 37 | if nClassB < nClassA: 38 | self.A = np.concatenate((self.A, LabelA)) 39 | self.B = np.concatenate((self.B, LabelA)) 40 | else: 41 | self.A = np.concatenate((self.A, LabelB)) 42 | self.B = np.concatenate((self.B, LabelB)) 43 | 44 | G = np.zeros([nClassA, nClassA]) 45 | for i in range(nClassA): 46 | for j in range(nClassA): 47 | G[i, j] = np.sum((self.A == LabelA[i]) * (self.B == LabelA[j])) 48 | 49 | sum_G = np.sum(G) 50 | PA = np.sum(G, axis=1) 51 | PA = PA/sum_G 52 | PB = np.sum(G, axis=0) 53 | PB = PB/sum_G 54 | PAB = G/sum_G 55 | 56 | if np.sum((PA == 0)) > 0 or np.sum((PB == 0)): 57 | print('error ! Smooth fail !') 58 | self._NMI = np.nan 59 | else: 60 | HA = np.sum(-PA * np.log2(PA)) 61 | HB = np.sum(-PB * np.log2(PB)) 62 | PPP = PAB / np.tile(PB, (nClassA, 1)) / np.tile(PA.reshape(-1, 1), (1, nClassA)) 63 | PPP[np.where(abs(PPP) < 1e-12)] = 1 # avoid 'log 0' 64 | MI = np.sum(PAB * np.log2(PPP)) 65 | NMI = MI / np.max((HA, HB)) 66 | # optional 67 | # NMI = 2.0 * MI / (HA + HB) 68 | # NMI = MI / np.sqrt(HA * HB) 69 | self._NMI = NMI 70 | 71 | -------------------------------------------------------------------------------- /pydpm/sampler/distribution_sampler_cpu.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import ctypes 4 | 5 | class distribution_sampler_cpu(object): 6 | 7 | def __init__(self): 8 | """ 9 | The basic class for sampling distribution on cpu 10 | """ 11 | super(distribution_sampler_cpu, self).__init__() 12 | 13 | # sampler for basic distributions 14 | setattr(self, 'standard_normal', np.random.standard_normal) 15 | setattr(self, 'normal', np.random.normal) 16 | setattr(self, 'standard_gamma', np.random.standard_gamma) 17 | setattr(self, 'gamma', np.random.gamma) 18 | setattr(self, 'standard_cauchy', np.random.standard_cauchy) 19 | # setattr(self, 'cauchy', np.random.cauchy) # numpy doesnot has this distribution 20 | setattr(self, 'chisquare', np.random.chisquare) 21 | setattr(self, 'beta', np.random.beta) 22 | # setattr(self, 'crt', np.random.crt) # numpy doesnot has this distribution 23 | setattr(self, 'dirichlet', np.random.dirichlet) 24 | setattr(self, 'poisson', np.random.poisson) 25 | setattr(self, 'weibull', np.random.weibull) 26 | setattr(self, 'negative_binomial', np.random.negative_binomial) 27 | setattr(self, 'lognormal', np.random.lognormal) 28 | setattr(self, 'binomial', np.random.binomial) 29 | setattr(self, 'multinomial', np.random.multinomial) 30 | setattr(self, 'laplace', np.random.laplace) 31 | setattr(self, 'logistic', np.random.logistic) 32 | setattr(self, 'exponential', np.random.exponential) 33 | setattr(self, 'standard_exponential', np.random.standard_exponential) 34 | setattr(self, 'noncentral_chisquare', np.random.noncentral_chisquare) 35 | setattr(self, 'zipf', np.random.zipf) 36 | setattr(self, 'triangular', np.random.triangular) 37 | setattr(self, 'noncentral_f', np.random.noncentral_f) 38 | setattr(self, '_f', np.random.f) 39 | # setattr(self, 't', np.random.t) # numpy doesnot has this distribution 40 | setattr(self, 'geometric', np.random.geometric) 41 | setattr(self, 'hypergeometric', np.random.hypergeometric) 42 | setattr(self, 'gumbel', np.random.gumbel) 43 | setattr(self, 'pareto', np.random.pareto) 44 | setattr(self, 'power', np.random.power) 45 | setattr(self, 'rayleigh', np.random.rayleigh) 46 | -------------------------------------------------------------------------------- /pydpm/sampler/_compact/multi_aug_cpu.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | // Chaojie 2017_10_12 4 | 5 | int Binary_Search(double *probvec, double prob, int K) 6 | { 7 | int kstart, kend, kmid; 8 | // K : the length of probvec 9 | if (prob <= probvec[0]) 10 | return(0); 11 | else 12 | { 13 | for(kstart = 1, kend = K-1;;) 14 | { 15 | if (kstart >= kend) 16 | return(kend); 17 | else 18 | { 19 | kmid = (kstart + kend)/2; 20 | if (probvec[kmid-1]>=prob) 21 | kend = kmid - 1; 22 | else if (probvec[kmid] 4 | # License: BSD-3-Clause 5 | 6 | import os 7 | import numpy as np 8 | from PIL import Image 9 | import matplotlib.pyplot as plt 10 | 11 | import torch 12 | from torch.utils.data import Dataset, DataLoader 13 | 14 | import torchvision 15 | import torchvision.datasets as datasets 16 | 17 | def tensor_transforms(data, transforms=lambda x:x): 18 | data = data.numpy() 19 | data = transforms(data) 20 | return data 21 | 22 | # ======================================== CustomDataset ======================================================== # 23 | 24 | class CustomDataset(Dataset): 25 | def __init__(self, file_path, mode='train', transform=None, target_transform=None): 26 | super(CustomDataset, self).__init__() 27 | self.file_path = os.path.join(file_path, mode) 28 | self.transform = transform 29 | self.target_transform = target_transform 30 | self.classes = [] 31 | self.classes_num = 0 32 | self.class_to_idx = {} 33 | self.image_names = [] 34 | self.image_classes = [] 35 | self.classes_file = os.path.join(file_path, 'label.txt') 36 | 37 | with open(self.classes_file, 'r') as classes_list: 38 | for line in classes_list: 39 | self.classes_num += 1 40 | self.classes.append(line) 41 | self.class_to_idx[line] = self.classes_num - 1 42 | 43 | with open(self.file_path, 'r') as image_class_file: 44 | for line in image_class_file: 45 | image_class_pair = line.split('\t') 46 | self.image_names.append(image_class_pair[0]) 47 | self.image_classes.append(image_class_pair[1]) 48 | 49 | def __getitem__(self, idx): 50 | image_path, target = self.image_names[idx], self.class_to_idx[self.image_classes[idx]] 51 | 52 | # Return a PIL Image 53 | image = Image.open(image_path) 54 | 55 | if self.transform: 56 | image = self.transform(image) 57 | if self.target_transform is not None: 58 | target = self.target_transform(target) 59 | 60 | return image, target 61 | 62 | def __len__(self): 63 | return len(self.image_names) 64 | 65 | 66 | def image_dataloader(root='../dataset/mnist', transform=None, target_transform=None, 67 | batch_size=500, shuffle=True, drop_last=True, num_workers=4): 68 | dataset = CustomDataset(root, transform=transform, target_transform=target_transform) 69 | 70 | return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers, drop_last=drop_last), dataset.classes 71 | 72 | -------------------------------------------------------------------------------- /pydpm/sampler/basic_sampler.py: -------------------------------------------------------------------------------- 1 | 2 | # Author: Chaojie Wang ; Jiawen Wu ; Wei Zhao <13279389260@163.com> 3 | # License: BSD-3-Claus 4 | 5 | import platform 6 | 7 | 8 | class Basic_Sampler(object): 9 | def __init__(self, device='cpu', seed=0, *args, **kwargs): 10 | """ 11 | The basic sampler model for training all probabilistic models in this package 12 | Attributes: 13 | @public: 14 | 15 | 16 | @private: 17 | _model_setting : [Params] the model settings of the probabilistic model 18 | _hyper_params : [Params] the hyper parameters of the probabilistic model 19 | 20 | """ 21 | super(Basic_Sampler, self).__init__() 22 | 23 | assert device in ['cpu', 'gpu'], 'Device Error, device should be "cpu" or "gpu" ' 24 | self.device = device 25 | self.seed = seed 26 | 27 | system_type = platform.system() 28 | assert system_type in ['Windows', 'Linux'], 'System Error, system should be "Windows" or "Linux" ' 29 | self.system_type = system_type 30 | 31 | if self.device == 'cpu': 32 | self._cpu_sampler_initial() 33 | 34 | elif self.device == 'gpu': 35 | self._gpu_sampler_initial() 36 | 37 | def _cpu_sampler_initial(self): 38 | 39 | from .distribution_sampler_cpu import distribution_sampler_cpu 40 | sampler = distribution_sampler_cpu() 41 | for distribution_name in dir(sampler): 42 | if distribution_name[0] != '_': 43 | setattr(self, distribution_name, getattr(sampler, distribution_name)) 44 | else: 45 | continue 46 | 47 | from .model_sampler_cpu import model_sampler_cpu 48 | sampler = model_sampler_cpu(self.system_type) 49 | for distribution_name in dir(sampler): 50 | if distribution_name[0] != '_': 51 | setattr(self, distribution_name, getattr(sampler, distribution_name)) 52 | else: 53 | continue 54 | 55 | 56 | 57 | def _gpu_sampler_initial(self): 58 | 59 | from .distribution_sampler_gpu import distribution_sampler_gpu 60 | sampler = distribution_sampler_gpu(self.system_type) 61 | for distribution_name in dir(sampler): 62 | if distribution_name[0] != '_': 63 | setattr(self, distribution_name, getattr(sampler, distribution_name)) 64 | else: 65 | continue 66 | 67 | from .model_sampler_gpu import model_sampler_gpu 68 | sampler = model_sampler_gpu(self.system_type) 69 | for distribution_name in dir(sampler): 70 | if distribution_name[0] != '_': 71 | setattr(self, distribution_name, getattr(sampler, distribution_name)) 72 | else: 73 | continue 74 | 75 | 76 | 77 | -------------------------------------------------------------------------------- /pydpm/example/Bayesian_PM/Gaussian_Mixture_Model/GMM_Demo.py: -------------------------------------------------------------------------------- 1 | """ 2 | =========================================== 3 | Gaussian Mixture Model 4 | =========================================== 5 | 6 | """ 7 | 8 | # Author: Xinyang Liu ; 9 | # License: BSD-3-Claus 10 | 11 | import numpy as np 12 | import argparse 13 | from torchvision import datasets, transforms 14 | from pydpm.model import GMM 15 | from pydpm.metric import Cluster_ACC, NMI 16 | from pydpm.dataloader.image_data import tensor_transforms 17 | 18 | # =========================================== ArgumentParser ===================================================================== # 19 | parser = argparse.ArgumentParser() 20 | 21 | # device 22 | parser.add_argument("--device", type=str, default='gpu') 23 | 24 | # dataset 25 | parser.add_argument("--data_path", type=str, default='../../../dataset/mnist/', help="the path of loading data") 26 | 27 | # model 28 | parser.add_argument("--save_path", type=str, default='../../save_models', help="the path of saving model") 29 | parser.add_argument("--load_path", type=str, default='../../save_models/GMM.npy', help="the path of loading model") 30 | 31 | parser.add_argument("--n_components", type=int, default=10, help="number of components according dataset") 32 | 33 | # optim 34 | parser.add_argument("--num_epochs", type=int, default=100, help="number of epochs of training") 35 | 36 | args = parser.parse_args() 37 | # =========================================== Dataset ===================================================================== # 38 | # define transform for dataset and load orginal dataset 39 | transform = transforms.Compose([transforms.ToTensor()]) 40 | train_dataset = datasets.MNIST(root=args.data_path, train=True, download=True) 41 | test_dataset = datasets.MNIST(root=args.data_path, train=False, download=False) 42 | 43 | # transform dataset and reshape the dataset into [batch_size, feature_num] 44 | train_data = tensor_transforms(train_dataset.data, transform) 45 | train_data = train_data.permute([1, 2, 0]).reshape([len(train_dataset), -1]) # len(train_dataset, 28*28) 46 | test_data = tensor_transforms(test_dataset.data, transform) 47 | test_data = test_data.permute([1, 2, 0]).reshape([len(test_dataset), -1]) 48 | train_label = train_dataset.train_labels 49 | test_label = test_dataset.test_labels 50 | 51 | # transpose the dataset to fit the model and convert a tensor to numpy array 52 | train_data = np.array(np.ceil(train_data[:999, :].numpy()), order='C') 53 | test_data = np.array(np.ceil(test_data[:999, :].numpy()), order='C') 54 | train_label = train_label.numpy()[:999] 55 | test_label = test_label.numpy()[:999] 56 | 57 | # =========================================== Model ===================================================================== # 58 | # create the model and deploy it on gpu or cpu 59 | model = GMM(K=args.n_components, device=args.device) 60 | model.initial(train_data) # use the shape of train_data to initialize the params of model 61 | 62 | # train and evaluation 63 | cluster, train_local_params = model.train(data=train_data, num_epochs=args.num_epochs) 64 | 65 | # Evaluation on dataset using accuracy of cluster and NMI 66 | res_acc = Cluster_ACC(train_label, cluster) 67 | res_nmi = NMI(train_label, cluster) 68 | 69 | model.save() 70 | -------------------------------------------------------------------------------- /pydpm/example/Bayesian_PM/Factor_Analysis/FA_demo.py: -------------------------------------------------------------------------------- 1 | """ 2 | =========================================== 3 | Factor Analysis 4 | =========================================== 5 | 6 | """ 7 | 8 | # Author: Xinyang Liu ; 9 | # License: BSD-3-Claus 10 | 11 | import numpy as np 12 | import scipy.io as sio 13 | import argparse 14 | from torchvision import datasets, transforms 15 | import matplotlib.pyplot as plt 16 | from pydpm.model import FA 17 | from pydpm.utils.utils import * 18 | from pydpm.dataloader.image_data import tensor_transforms 19 | 20 | # # load data 21 | # data = sio.loadmat('../../../dataset/FA_data.mat') 22 | # train_data = np.array(data['x1']) 23 | 24 | # =========================================== ArgumentParser ===================================================================== # 25 | parser = argparse.ArgumentParser() 26 | 27 | # device 28 | parser.add_argument("--device", type=str, default='gpu') 29 | 30 | # dataset 31 | parser.add_argument("--data_path", type=str, default='../../../dataset/mnist/', help="the path of loading data") 32 | 33 | # model 34 | parser.add_argument("--save_path", type=str, default='../../save_models', help="the path of saving model") 35 | parser.add_argument("--load_path", type=str, default='../../save_models/FA.npy', help="the path of loading model") 36 | 37 | parser.add_argument("--z_dim", type=int, default=128, help="number of components according dataset") 38 | 39 | # optim 40 | parser.add_argument("--num_epochs", type=int, default=1000, help="number of epochs of training") 41 | 42 | args = parser.parse_args() 43 | # =========================================== Dataset ===================================================================== # 44 | # define transform for dataset and load orginal dataset 45 | transform = transforms.Compose([transforms.ToTensor()]) 46 | train_dataset = datasets.MNIST(root=args.data_path, train=True, download=True) 47 | test_dataset = datasets.MNIST(root=args.data_path, train=False, download=False) 48 | 49 | # transform dataset and reshape the dataset into [batch_size, feature_num] 50 | train_data = tensor_transforms(train_dataset.data, transform) 51 | train_data = train_data.permute([1, 2, 0]).reshape([len(train_dataset), -1]) # len(train_dataset, 28*28) 52 | test_data = tensor_transforms(test_dataset.data, transform) 53 | test_data = test_data.permute([1, 2, 0]).reshape([len(test_dataset), -1]) 54 | train_label = train_dataset.train_labels 55 | test_label = test_dataset.test_labels 56 | 57 | # transpose the dataset to fit the model and convert a tensor to numpy array 58 | # !!! Transposition, data: [D, N] 59 | train_data = np.array(np.ceil(train_data[:999, :].T.numpy()), order='C') 60 | # test_data = np.array(np.ceil(test_data[:999, :].T.numpy()), order='C') 61 | train_data = standardization(train_data) 62 | # test_data = standardization(test_data) 63 | train_label = train_label.numpy()[:999] 64 | # test_label = test_label.numpy()[:999] 65 | 66 | # create the model and deploy it on gpu or cpu 67 | model =FA(args.z_dim, 'gpu') 68 | model.initial(train_data) # use the shape of train_data to initialize the params of model 69 | 70 | # train and evaluation 71 | train_local_params = model.train(train_data, args.num_epochs) 72 | 73 | x_hat = np.matmul(train_local_params.w, train_local_params.z) 74 | 75 | # visualization for one sample 76 | plt.plot(train_data[:, 880], 'ro', marker='*', label="train data") 77 | plt.plot(x_hat[:, 880], 'bo', marker='v', label="reconstruction") 78 | plt.legend(loc="best") 79 | plt.show() 80 | 81 | # save the model after training 82 | model.save() 83 | -------------------------------------------------------------------------------- /pydpm/sampler/pre_process.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import time 3 | import ctypes 4 | 5 | 6 | def para_preprocess(times=1, in_type=np.float32, out_type=np.float32, *args): 7 | """ 8 | preprocess the input parameters in sampling the distribution with gpu 9 | Inputs: 10 | times : [int] repeat times 11 | in_type : [np.dtype] or list of np.dtype the dtype of the input parameters 12 | out_type: [np.dtype] the dtype of the output sampling results 13 | args[0] : [np.ndarray] the first variable in the target distribution 14 | args[1] : [np.ndarray] the second variable in the target distribution 15 | Outputs: 16 | para_a : [pycuda.gpuarray] the input matrix for the first parameter 17 | para_b : [pycuda.gpuarray] the input matrix for the second parameter 18 | output : [pycuda.gpuarray] the matrix on gpu to store the sampling result 19 | para_scale : [list] a list including the number of element and repeat times in the resulting matrix 20 | para_seed : [pycuda.gpuarray] seed matrix on gpu 21 | partition : [list] a list including 22 | scalar_flag : [bool] if the resulting matrix is a scalar 23 | """ 24 | assert len(args) <= 2, 'Value Error: the number of the input parameter in the sampling distribution should not be larger than 2' 25 | 26 | if len(args) == 1: 27 | para_a = np.array(args[0], dtype=in_type, order='C') 28 | # assert len(para_a.shape) <= 2, 'Shape Error: the dimension of the input parameter a in the sampling distribution shoud not be larger than 2' 29 | 30 | # obtain the output_scale and judge if the para_a is a scalar 31 | if times > 1: 32 | output_scale = para_a.shape + (times,) 33 | scalar_flag = False 34 | else: 35 | output_scale = (1,) if para_a.shape == () else para_a.shape 36 | scalar_flag = True if para_a.shape == () else False 37 | 38 | elif len(args) == 2: 39 | if type(in_type) == type: 40 | in_type = [in_type, in_type] 41 | assert (type(in_type) == list and len(in_type) == 2) 42 | 43 | para_a = np.array(args[0], dtype=in_type[0], order='C') 44 | para_b = np.array(args[1], dtype=in_type[1], order='C') 45 | # assert len(para_a.shape) <= 2, 'Shape Error: the dimension of the input parameter a in the sampling distirbution shoud not be larger than 2' 46 | # assert len(para_b.shape) <= 2, 'Shape Error: the dimension of the input parameter b in the sampling distirbution shoud not be larger than 2' 47 | 48 | # make sure the sizes of para_a and para_b are equal 49 | if para_a.size == 1 and para_b.size != 1: 50 | para_a = np.array(np.full(para_b.shape, para_a), dtype=in_type[0], order='C') 51 | if para_b.size == 1 and para_a.size != 1: 52 | para_b = np.array(np.full(para_a.shape, para_b), dtype=in_type[1], order='C') 53 | 54 | # obtain the output_scale and judge if the para_a is a scalar 55 | if times > 1: 56 | output_scale = para_a.shape + (times,) 57 | scalar_flag = False 58 | else: 59 | output_scale = (1,) if para_a.shape == () else para_a.shape 60 | scalar_flag = True if para_a.shape == () else False 61 | 62 | matrix_scale = para_a.size 63 | nElems = para_a.size * times # output_scale multi. 64 | 65 | # output 66 | output = np.empty(output_scale, dtype=out_type, order='C') 67 | 68 | if len(args) == 1: 69 | return matrix_scale, nElems, para_a, output, output_scale, scalar_flag 70 | elif len(args) == 2: 71 | return matrix_scale, nElems, para_a, para_b, output, output_scale, scalar_flag -------------------------------------------------------------------------------- /pydpm/example/Bayesian_PM/Poisson_Gamma_Dynamic_System/PGDS_Demo.py: -------------------------------------------------------------------------------- 1 | """ 2 | =========================================== 3 | Poisson Gamma Dynamical Systems Demo 4 | Aaron Schein, Hanna Wallach and Mingyuan Zhou 5 | Published in Neural Information Processing Systems 2016 6 | 7 | =========================================== 8 | 9 | """ 10 | 11 | # Author: Chaojie Wang ; Jiawen Wu ; Wei Zhao <13279389260@163.com> 12 | # License: BSD-3-Clause 13 | 14 | import numpy as np 15 | import argparse 16 | import scipy.io as sio 17 | 18 | from torchvision import datasets, transforms 19 | 20 | from pydpm.model import PGDS 21 | from pydpm.metric import ACC 22 | from pydpm.dataloader.image_data import tensor_transforms 23 | 24 | # =========================================== ArgumentParser ===================================================================== # 25 | parser = argparse.ArgumentParser() 26 | 27 | # device 28 | parser.add_argument("--device", type=str, default='gpu') 29 | 30 | # dataset 31 | parser.add_argument("--data_path", type=str, default='../../../dataset/mnist/', help="the path of loading data") 32 | 33 | # model 34 | parser.add_argument("--save_path", type=str, default='../../save_models', help="the path of saving model") 35 | parser.add_argument("--load_path", type=str, default='../../save_models/PGDS.npy', help="the path of loading model") 36 | 37 | parser.add_argument("--z_dim", type=int, default=100, help="dimensionality of the z latent space") 38 | 39 | # optim 40 | parser.add_argument("--num_epochs", type=int, default=200, help="number of epochs of training") 41 | 42 | args = parser.parse_args() 43 | 44 | # =========================================== Dataset ===================================================================== # 45 | # define transform for dataset and load orginal dataset 46 | transform = transforms.Compose([transforms.ToTensor()]) 47 | train_dataset = datasets.MNIST(root=args.data_path, train=True, download=True) 48 | test_dataset = datasets.MNIST(root=args.data_path, train=False, download=False) 49 | 50 | # transform dataset and reshape the dataset into [batch_size, feature_num] 51 | train_data = tensor_transforms(train_dataset.data, transform) 52 | train_data = train_data.permute([1, 2, 0]).reshape(len(train_dataset), -1) # len(train_dataset, 28*28) 53 | test_data = tensor_transforms(test_dataset.data, transform) 54 | test_data = test_data.permute([1, 2, 0]).reshape(len(test_dataset), -1) 55 | train_label = train_dataset.train_labels 56 | test_label = test_dataset.test_labels 57 | 58 | # transpose the dataset to fit the model and convert a tensor to numpy array 59 | train_data = np.array(np.ceil(train_data[:999, :].T.numpy() * 5), order='C') 60 | test_data = np.array(np.ceil(test_data[:999, :].T.numpy() * 5), order='C') 61 | train_label = train_label.numpy()[:999] 62 | test_label = test_label.numpy()[:999] 63 | 64 | # =========================================== Dataset ===================================================================== # 65 | # define transform for dataset and load orginal dataset 66 | model = PGDS(K=args.z_dim, device=args.device) 67 | model.initial(train_data) 68 | 69 | # train and evaluation 70 | train_local_params = model.train(data=train_data, num_epochs=args.num_epochs) 71 | train_local_params = model.test(data=train_data, num_epochs=args.num_epochs) 72 | test_local_params = model.test(data=test_data, num_epochs=args.num_epochs) 73 | 74 | # save the model after training 75 | model.save(args.save_path) 76 | # load the model 77 | model.load(args.load_path) 78 | 79 | # evaluate the model with classification accuracy 80 | # the demo accuracy can achieve 0.8739 81 | results = ACC(train_local_params.Theta, test_local_params.Theta, train_label, test_label, 'SVM') 82 | 83 | -------------------------------------------------------------------------------- /pydpm/example/Bayesian_PM/Poisson_Gamma_Belief_Network/PGBN_Demo.py: -------------------------------------------------------------------------------- 1 | """ 2 | =========================================== 3 | Poisson Gamma Belief Network 4 | Mingyuan Zhou, Yulai Cong and Bo Chen 5 | Published in Advances in Neural Information Processing System 6 | 7 | =========================================== 8 | 9 | """ 10 | 11 | # Author: Chaojie Wang ; Jiawen Wu ; Wei Zhao <13279389260@163.com> 12 | # License: BSD-3-Clause 13 | 14 | import numpy as np 15 | import argparse 16 | import scipy.io as sio 17 | 18 | from torchvision import datasets, transforms 19 | 20 | from pydpm.model import PGBN 21 | from pydpm.metric import ACC 22 | from pydpm.dataloader.image_data import tensor_transforms 23 | 24 | # =========================================== ArgumentParser ===================================================================== # 25 | parser = argparse.ArgumentParser() 26 | 27 | # device 28 | parser.add_argument("--device", type=str, default='gpu') 29 | 30 | # dataset 31 | parser.add_argument("--data_path", type=str, default='../../../dataset/mnist/', help="the path of loading data") 32 | 33 | # model 34 | parser.add_argument("--save_path", type=str, default='../../save_models', help="the path of saving model") 35 | parser.add_argument("--load_path", type=str, default='../../save_models/PGBN.npy', help="the path of loading model") 36 | 37 | parser.add_argument("--z_dims", type=list, default=[128, 64, 32], help="number of topics at diffrent layers in PGBN") 38 | 39 | # optim 40 | parser.add_argument("--num_epochs", type=int, default=100, help="number of epochs of training") 41 | 42 | args = parser.parse_args() 43 | 44 | # =========================================== Dataset ===================================================================== # 45 | # define transform for dataset and load orginal dataset 46 | transform = transforms.Compose([transforms.ToTensor()]) 47 | train_dataset = datasets.MNIST(root=args.data_path, train=True, download=True) 48 | test_dataset = datasets.MNIST(root=args.data_path, train=False, download=False) 49 | 50 | # transform dataset and reshape the dataset into [batch_size, feature_num] 51 | train_data = tensor_transforms(train_dataset.data, transform) 52 | train_data = train_data.permute([1, 2, 0]).reshape(len(train_dataset), -1) # len(train_dataset, 28*28) 53 | test_data = tensor_transforms(test_dataset.data, transform) 54 | test_data = test_data.permute([1, 2, 0]).reshape(len(test_dataset), -1) 55 | train_label = train_dataset.train_labels 56 | test_label = test_dataset.test_labels 57 | 58 | # transpose the dataset to fit the model and convert a tensor to numpy array 59 | train_data = np.array(np.ceil(train_data[:999, :].T.numpy() * 5), order='C') 60 | test_data = np.array(np.ceil(test_data[:999, :].T.numpy() * 5), order='C') 61 | train_label = train_label.numpy()[:999] 62 | test_label = test_label.numpy()[:999] 63 | 64 | # =========================================== Model ===================================================================== # 65 | # create the model and deploy it on gpu or cpu 66 | model = PGBN(K=args.z_dims, device=args.device) 67 | model.initial(train_data) 68 | 69 | # train and evaluation 70 | train_local_params = model.train(data=train_data, num_epochs=args.num_epochs) 71 | train_local_params = model.test(data=train_data, num_epochs=args.num_epochs) 72 | test_local_params = model.test(data=test_data, num_epochs=args.num_epochs) 73 | 74 | # save the model after training 75 | model.save(args.save_path) 76 | # load the model 77 | model.load(args.load_path) 78 | 79 | # evaluate the model with classification accuracy 80 | # the demo accuracy can achieve 0.8088 81 | results = ACC(train_local_params.Theta[0], test_local_params.Theta[0], train_label, test_label, 'SVM') 82 | 83 | 84 | 85 | -------------------------------------------------------------------------------- /pydpm/example/Deep_Learning_PM/Variational_Autoencoder/VAE_Demo.py: -------------------------------------------------------------------------------- 1 | """ 2 | =========================================== 3 | VAE 4 | Auto-Encoding Variational Bayes 5 | Diederik P. Kingma, Max Welling 6 | Publihsed in 2014 7 | 8 | =========================================== 9 | """ 10 | 11 | # Author: Muyao Wang , Xinyang Liu 12 | # License: BSD-3-Clause 13 | 14 | import os 15 | import argparse 16 | import sys 17 | import torch 18 | import torch.optim as optim 19 | 20 | from torchvision import datasets, transforms 21 | from torchvision.utils import save_image 22 | from pydpm.model import VAE 23 | 24 | # =========================================== ArgumentParser ===================================================================== # 25 | parser = argparse.ArgumentParser() 26 | 27 | # device 28 | parser.add_argument("--gpu_id", type=int, default=0) 29 | 30 | # dataset 31 | parser.add_argument("--data_path", type=str, default='../../../dataset/mnist/', help="the path of loading data") 32 | parser.add_argument("--img_size", type=int, default=28, help="size of each image dimension") 33 | 34 | # model 35 | parser.add_argument("--save_path", type=str, default='../../save_models', help="the path of saving model") 36 | parser.add_argument("--load_path", type=str, default='../../save_models/VAE.pth', help="the path of loading model") 37 | 38 | parser.add_argument("--z_dim", type=int, default=2, help="dimensionality of the z latent space") 39 | parser.add_argument("--encoder_hid_dims", type=int, default=[512, 256], help="dimensionality of the latent space") 40 | parser.add_argument("--decoder_hid_dims", type=int, default=[512, 256], help="dimensionality of the latent space") 41 | 42 | # optim 43 | parser.add_argument("--num_epochs", type=int, default=2, help="number of epochs of training") 44 | parser.add_argument("--batch_size", type=int, default=256, help="size of the batches") 45 | parser.add_argument("--lr", type=float, default=0.0002, help="adam: learning rate") 46 | 47 | args = parser.parse_args() 48 | args.device = torch.device(f"cuda:{args.gpu_id}") if torch.cuda.is_available() else torch.device("cpu") 49 | 50 | # =========================================== Dataset ===================================================================== # 51 | # mnist 52 | train_dataset = datasets.MNIST(root=args.data_path, train=True, transform=transforms.ToTensor(), download=True) 53 | test_dataset = datasets.MNIST(root=args.data_path, train=False, transform=transforms.ToTensor(), download=False) 54 | 55 | train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=True) 56 | test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=args.batch_size, shuffle=False) 57 | 58 | # =========================================== Model ===================================================================== # 59 | # model 60 | model = VAE(in_dim=args.img_size**2, z_dim=args.z_dim, encoder_hid_dims=args.encoder_hid_dims, decoder_hid_dims=args.decoder_hid_dims, device=args.device) 61 | model_opt = optim.Adam(model.parameters(), lr=args.lr) 62 | 63 | # train 64 | for epoch_idx in range(args.num_epochs): 65 | local_mu, local_log_var = model.train_one_epoch(dataloader=train_loader, model_opt=model_opt, epoch_idx=epoch_idx, args=args) 66 | if epoch_idx % 25 == 0: 67 | test_mu, test_log_var = model.test_one_epoch(dataloader=test_loader) 68 | 69 | # save 70 | model.save(args.save_path) 71 | # load 72 | model.load(args.load_path) 73 | 74 | # =================== Visualization ====================== # 75 | os.makedirs("../../output/images", exist_ok=True) 76 | print('sample image,please wait!') 77 | with torch.no_grad(): 78 | sample = model.sample(64) 79 | save_image(sample.view(64, 1, 28, 28), '../../output/images/VAE_sample.png') 80 | print('complete!!!') 81 | 82 | -------------------------------------------------------------------------------- /pydpm/example/Deep_Learning_PM/Restricted_Boltzmann_Machine/RBM_Demo.py: -------------------------------------------------------------------------------- 1 | """ 2 | =========================================== 3 | RBM 4 | A Practical Guide to Training 5 | Restricted Boltzmann Machines 6 | Geoffrey Hinton 7 | Publihsed in 2010 8 | =========================================== 9 | """ 10 | # Author: Muyao Wang , Xinyang Liu 11 | # License: BSD-3-Clause 12 | 13 | import os 14 | import argparse 15 | import numpy as np 16 | 17 | import torch.utils.data 18 | import torch.optim as optim 19 | from torch.autograd import Variable 20 | 21 | from torchvision import datasets, transforms 22 | from torchvision.utils import save_image 23 | 24 | from pydpm.model import RBM 25 | 26 | # =========================================== ArgumentParser ===================================================================== # 27 | parser = argparse.ArgumentParser() 28 | 29 | # device 30 | parser.add_argument("--gpu_id", type=int, default=0) 31 | 32 | # dataset 33 | parser.add_argument("--data_path", type=str, default='../../../dataset/mnist/', help="the path of loading data") 34 | parser.add_argument("--img_size", type=int, default=28, help="size of each image dimension") 35 | 36 | # model 37 | parser.add_argument("--save_path", type=str, default='../../save_models', help="the path of saving model") 38 | parser.add_argument("--load_path", type=str, default='../../save_models/RBM.pth', help="the path of loading model") 39 | 40 | parser.add_argument("--n_vis", type=int, default=784, help="dimensionality of visible units") 41 | parser.add_argument("--n_hin", type=int, default=500, help="dimensionality of latent units") 42 | parser.add_argument("--k", type=int, default=1, help="layers of RBM") 43 | 44 | # optim 45 | parser.add_argument("--num_epochs", type=int, default=10, help="number of epochs of training") 46 | parser.add_argument("--batch_size", type=int, default=128, help="size of the batches") 47 | parser.add_argument("--lr", type=float, default=0.1, help="adam: learning rate") 48 | 49 | args = parser.parse_args() 50 | args.device = torch.device(f"cuda:{args.gpu_id}") if torch.cuda.is_available() else torch.device("cpu") 51 | 52 | # =========================================== Dataset ===================================================================== # 53 | # mnist 54 | train_dataset = datasets.MNIST(root=args.data_path, train=True, transform=transforms.Compose([transforms.ToTensor()]), download=True) 55 | test_dataset = datasets.MNIST(root=args.data_path, train=False, transform=transforms.Compose([transforms.ToTensor()]), download=False) 56 | 57 | train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=True) 58 | test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=args.batch_size, shuffle=False) 59 | 60 | # =========================================== Model ===================================================================== # 61 | # model 62 | model = RBM(n_vis=args.n_vis, n_hin=args.n_hin, k=args.k) 63 | model_opt = optim.SGD(model.parameters(), lr=args.lr) 64 | 65 | # train 66 | for epoch_idx in range(args.num_epochs): 67 | v, v1 = model.train_one_epoch(dataloader=train_loader, model_opt=model_opt, epoch_idx=epoch_idx, args=args) 68 | 69 | # save 70 | model.save(args.save_path) 71 | # load 72 | model.load(args.load_path) 73 | 74 | # =========================================== Visualization ===================================================================== # 75 | # visualize 76 | os.makedirs("../../output/images", exist_ok=True) 77 | print('sample image,please wait!') 78 | with torch.no_grad(): 79 | save_image(v.view(-1, 1, 28, 28), '../../output/images/RBM_l_real_' + '.png') 80 | save_image(v1.view(-1, 1, 28, 28), '../../output/images/RBM_l_generate_' + '.png') 81 | print('complete!!!') 82 | 83 | 84 | # device .test_one_epoch 85 | -------------------------------------------------------------------------------- /pydpm/example/Bayesian_PM/Latent_Dirchilet_Allocation/LDA_Demo.py: -------------------------------------------------------------------------------- 1 | """ 2 | =========================================== 3 | Latent Dirichlet Allocation 4 | David M.Blei Andrew Y.Ng and Michael I.Jordan 5 | Published in Journal of Machine Learning 2003 6 | 7 | =========================================== 8 | 9 | """ 10 | 11 | # Author: Chaojie Wang ; Jiawen Wu ; Wei Zhao <13279389260@163.com> 12 | # License: BSD-3-Clause 13 | 14 | import os 15 | import numpy as np 16 | import argparse 17 | import scipy.io as sio 18 | from tqdm import tqdm 19 | 20 | from torchvision import datasets, transforms 21 | 22 | from pydpm.model import LDA 23 | from pydpm.metric import ACC 24 | from pydpm.dataloader.image_data import tensor_transforms 25 | 26 | # =========================================== ArgumentParser ===================================================================== # 27 | parser = argparse.ArgumentParser() 28 | 29 | # device 30 | parser.add_argument("--device", type=str, default='gpu') 31 | 32 | # dataset 33 | parser.add_argument("--data_path", type=str, default='../../../dataset/mnist/', help="the path of loading data") 34 | 35 | # model 36 | parser.add_argument("--save_path", type=str, default='../../save_models', help="the path of saving model") 37 | parser.add_argument("--load_path", type=str, default='../../save_models/LDA.npy', help="the path of loading model") 38 | 39 | parser.add_argument("--z_dim", type=int, default=128, help="dimensionality of the z latent space") 40 | 41 | # optim 42 | parser.add_argument("--num_epochs", type=int, default=100, help="number of epochs of training") 43 | 44 | args = parser.parse_args() 45 | 46 | # =========================================== Dataset ===================================================================== # 47 | # define transform for dataset and load orginal dataset 48 | transform = transforms.Compose([transforms.ToTensor()]) 49 | train_dataset = datasets.MNIST(root=args.data_path, train=True, download=True) 50 | test_dataset = datasets.MNIST(root=args.data_path, train=False, download=False) 51 | 52 | # transform dataset and reshape the dataset into [batch_size, feature_num] 53 | train_data = tensor_transforms(train_dataset.data, transform) 54 | train_data = train_data.permute([1, 2, 0]).reshape([len(train_dataset), -1]) # len(train_dataset, 28*28) 55 | test_data = tensor_transforms(test_dataset.data, transform) 56 | test_data = test_data.permute([1, 2, 0]).reshape([len(test_dataset), -1]) 57 | train_label = train_dataset.train_labels 58 | test_label = test_dataset.test_labels 59 | 60 | # transpose the dataset to fit the model and convert a tensor to numpy array 61 | train_data = np.array(np.ceil(train_data[:999, :].T.numpy() * 5), order='C') 62 | test_data = np.array(np.ceil(test_data[:999, :].T.numpy() * 5), order='C') 63 | train_label = train_label.numpy()[:999] 64 | test_label = test_label.numpy()[:999] 65 | 66 | # =========================================== Model ===================================================================== # 67 | # create the model and deploy it on gpu or cpu 68 | model = LDA(K=args.z_dim, device=args.device) 69 | model.initial(train_data) # use the shape of train_data to initialize the params of model 70 | 71 | # train and evaluation 72 | train_local_params = model.train(data=train_data, num_epochs=args.num_epochs) 73 | train_local_params = model.test(data=train_data, num_epochs=args.num_epochs) 74 | test_local_params = model.test(data=test_data, num_epochs=args.num_epochs) 75 | 76 | # save the model after training 77 | model.save(args.save_path) 78 | # load the model 79 | model.load(args.load_path) 80 | 81 | # evaluate the model with classification accuracy 82 | # the demo accuracy can achieve 0.850 83 | results = ACC(train_local_params.Theta, test_local_params.Theta, train_label, test_label, 'SVM') 84 | 85 | 86 | 87 | 88 | 89 | -------------------------------------------------------------------------------- /pydpm/example/Bayesian_PM/Poisson_Factor_Analysis/PFA_Demo.py: -------------------------------------------------------------------------------- 1 | """ 2 | =========================================== 3 | Poisson Factor Analysis 4 | Beta-Negative Binomial Process and Poisson Factor Analysis 5 | Mingyuan Zhou, Lauren Hannah, David Dunson, Lawrence Carin 6 | Publihsed in International Conference on Artificial Intelligence and Statistic 2012 7 | 8 | =========================================== 9 | 10 | """ 11 | 12 | # Author: Chaojie Wang ; Jiawen Wu ; Wei Zhao <13279389260@163.com> 13 | # License: BSD-3-Clause 14 | 15 | import numpy as np 16 | import argparse 17 | import scipy.io as sio 18 | 19 | from torchvision import datasets, transforms 20 | 21 | from pydpm.model import PFA 22 | from pydpm.metric import ACC 23 | from pydpm.dataloader.image_data import tensor_transforms 24 | 25 | # =========================================== ArgumentParser ===================================================================== # 26 | parser = argparse.ArgumentParser() 27 | 28 | # device 29 | parser.add_argument("--device", type=str, default='gpu') 30 | 31 | # dataset 32 | parser.add_argument("--data_path", type=str, default='../../../dataset/mnist/', help="the path of loading data") 33 | 34 | # model 35 | parser.add_argument("--save_path", type=str, default='../../save_models', help="the path of saving model") 36 | parser.add_argument("--load_path", type=str, default='../../save_models/PFA.npy', help="the path of loading model") 37 | 38 | parser.add_argument("--z_dim", type=int, default=128, help="dimensionality of the z latent space") 39 | 40 | # optim 41 | parser.add_argument("--num_epochs", type=int, default=100, help="number of epochs of training") 42 | 43 | args = parser.parse_args() 44 | 45 | # =========================================== Dataset ===================================================================== # 46 | # define transform for dataset and load orginal dataset 47 | transform = transforms.Compose([transforms.ToTensor()]) 48 | train_dataset = datasets.MNIST(root=args.data_path, train=True, download=True) 49 | test_dataset = datasets.MNIST(root=args.data_path, train=False, download=False) 50 | 51 | # transform dataset and reshape the dataset into [batch_size, feature_num] 52 | train_data = tensor_transforms(train_dataset.data, transform) 53 | train_data = train_data.permute([1, 2, 0]).reshape(len(train_dataset), -1) # len(train_dataset, 28*28) 54 | test_data = tensor_transforms(test_dataset.data, transform) 55 | test_data = test_data.permute([1, 2, 0]).reshape(len(test_dataset), -1) 56 | train_label = train_dataset.train_labels 57 | test_label = test_dataset.test_labels 58 | 59 | # transpose the dataset to fit the model and convert a tensor to numpy array 60 | train_data = np.array(np.ceil(train_data[:999, :].T.numpy() * 5), order='C') 61 | test_data = np.array(np.ceil(test_data[:999, :].T.numpy() * 5), order='C') 62 | train_label = train_label.numpy()[:999] 63 | test_label = test_label.numpy()[:999] 64 | 65 | # =========================================== Model ===================================================================== # 66 | # create the model and deploy it on gpu or cpu 67 | model = PFA(K=args.z_dim, device=args.device) 68 | model.initial(train_data) # use the shape of train_data to initialize the params of model 69 | 70 | # train and evaluation 71 | train_local_params = model.train(train_data, num_epochs=args.num_epochs) 72 | train_local_params = model.test(train_data, num_epochs=args.num_epochs) 73 | test_local_params = model.test(test_data, num_epochs=args.num_epochs) 74 | 75 | # save the model after training 76 | model.save(args.save_path) 77 | # load the model 78 | model.load(args.load_path) 79 | 80 | # evaluate the model with classification accuracy 81 | # the demo accuracy can achieve 0.8008 82 | results = ACC(train_local_params.Theta, test_local_params.Theta, train_label, test_label, 'SVM') 83 | -------------------------------------------------------------------------------- /pydpm/example/Bayesian_PM/Deep_Poisson_Gamma_Dynamic_System/DPGDS_Demo.py: -------------------------------------------------------------------------------- 1 | """ 2 | =========================================== 3 | Deep Poisson Gamma Dynamical Systems Demo 4 | Dandan Guo, Bo Chen and Hao Zhang 5 | Published in Neural Information Processing Systems 2018 6 | 7 | =========================================== 8 | 9 | """ 10 | 11 | # Author: Chaojie Wang ; Jiawen Wu ; Wei Zhao <13279389260@163.com> 12 | # License: BSD-3-Clause 13 | 14 | import numpy as np 15 | import argparse 16 | import scipy.io as sio 17 | 18 | from torchvision import datasets, transforms 19 | 20 | from pydpm.model import DPGDS 21 | from pydpm.metric import ACC 22 | from pydpm.dataloader.image_data import tensor_transforms 23 | 24 | # =========================================== ArgumentParser ===================================================================== # 25 | parser = argparse.ArgumentParser() 26 | 27 | # device 28 | parser.add_argument("--device", type=str, default='gpu') 29 | 30 | # dataset 31 | parser.add_argument("--data_path", type=str, default='../../../dataset/mnist/', help="the path of loading data") 32 | 33 | # model 34 | parser.add_argument("--save_path", type=str, default='../../save_models', help="the path of saving model") 35 | parser.add_argument("--load_path", type=str, default='../../save_models/DPGDS.npy', help="the path of loading model") 36 | 37 | parser.add_argument("--z_dims", type=list, default=[200, 100, 50], help="number of topics in DPGDS") 38 | 39 | # optim 40 | parser.add_argument("--num_epochs", type=int, default=100, help="number of epochs of training") 41 | 42 | args = parser.parse_args() 43 | 44 | # =========================================== Dataset ===================================================================== # 45 | # define transform for dataset and load orginal dataset 46 | transform = transforms.Compose([transforms.ToTensor()]) 47 | train_dataset = datasets.MNIST(root=args.data_path, train=True, download=True) 48 | test_dataset = datasets.MNIST(root=args.data_path, train=False, download=False) 49 | 50 | # transform dataset and reshape the dataset into [batch_size, feature_num] 51 | train_data = tensor_transforms(train_dataset.data, transform) 52 | train_data = train_data.permute(1, 2, 0).reshape(len(train_dataset), -1)# len(train_dataset, 28*28) 53 | test_data = tensor_transforms(test_dataset.data, transform) 54 | test_data = test_data.permute(1, 2, 0).reshape(len(test_dataset), -1) 55 | train_label = train_dataset.train_labels 56 | test_label = test_dataset.test_labels 57 | 58 | # transpose the dataset to fit the model and convert a tensor to numpy array 59 | train_data = np.array(np.ceil(train_data[:999, :].T.numpy() * 5), order='C') 60 | test_data = np.array(np.ceil(test_data[:999, :].T.numpy() * 5), order='C') 61 | train_label = train_label.numpy()[:999] 62 | test_label = test_label.numpy()[:999] 63 | 64 | # =========================================== Model ===================================================================== # 65 | # create the model and deploy it on gpu or cpu 66 | model = DPGDS([200, 100, 50], 'gpu') # topics of each layers 67 | model = DPGDS(K=args.z_dims, device=args.device) 68 | model.initial(train_data) # use the shape of train_data to initialize the params of model 69 | 70 | # train and evaluation 71 | train_local_params = model.train(data=train_data, num_epochs=args.num_epochs) 72 | train_local_params = model.test(data=train_data, num_epochs=args.num_epochs) 73 | test_local_params = model.test(data=test_data, num_epochs=args.num_epochs) 74 | 75 | # save the model after training 76 | model.save(args.save_path) 77 | # load the model 78 | model.load(args.load_path) 79 | 80 | # evaluate the model with classification accuracy 81 | # the demo accuracy can achieve 0.8519 82 | results = ACC(train_local_params.Theta[0], test_local_params.Theta[0], train_label, test_label, 'SVM') 83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /pydpm/example/Bayesian_PM/Multimodal_Poisson_Gamma_Belief_Network/MPGBN_Demo.py: -------------------------------------------------------------------------------- 1 | """ 2 | =========================================== 3 | Multimodal Poisson Gamma Belief Network 4 | Chaojie Wang, Bo Chen and Mingyuan Zhou 5 | Published in In AAAI Conference on Artificial Intelligence 6 | 7 | =========================================== 8 | 9 | """ 10 | 11 | # Author: Chaojie Wang ; Jiawen Wu ; Wei Zhao <13279389260@163.com> 12 | # License: BSD-3-Clause 13 | 14 | import numpy as np 15 | import argparse 16 | import scipy.io as sio 17 | 18 | from torchvision import datasets, transforms 19 | 20 | from pydpm.model import MPGBN 21 | from pydpm.metric import ACC 22 | from pydpm.dataloader.image_data import tensor_transforms 23 | 24 | # =========================================== ArgumentParser ===================================================================== # 25 | parser = argparse.ArgumentParser() 26 | 27 | # device 28 | parser.add_argument("--device", type=str, default='gpu') 29 | 30 | # dataset 31 | parser.add_argument("--data_path", type=str, default='../../../dataset/mnist/', help="the path of loading data") 32 | 33 | # model 34 | parser.add_argument("--save_path", type=str, default='../../save_models', help="the path of saving model") 35 | parser.add_argument("--load_path", type=str, default='../../save_models/MPGBN.npy', help="the path of loading model") 36 | 37 | parser.add_argument("--z_dims", type=list, default=[128, 64, 32], help="number of topics at diffrent layers in MPGBN") 38 | 39 | args = parser.parse_args() 40 | 41 | # =========================================== Dataset ===================================================================== # 42 | # define transform for dataset and load orginal dataset 43 | transform = transforms.Compose([transforms.ToTensor()]) 44 | train_dataset = datasets.MNIST(root=args.data_path, train=True, download=True) 45 | test_dataset = datasets.MNIST(root=args.data_path, train=False, download=False) 46 | 47 | # transform dataset and reshape the dataset into [batch_size, feature_num] 48 | train_data = tensor_transforms(train_dataset.data, transform) 49 | train_data = train_data.permute([1, 2, 0]).reshape(len(train_dataset), -1) # len(train_dataset, 28*28) 50 | test_data = tensor_transforms(test_dataset.data, transform) 51 | test_data = test_data.permute([1, 2, 0]).reshape(len(test_dataset), -1) 52 | train_label = train_dataset.train_labels 53 | test_label = test_dataset.test_labels 54 | 55 | # transpose the dataset to fit the model and convert a tensor to numpy array 56 | train_data = np.array(np.ceil(train_data[:999, :].T.numpy() * 5), order='C') 57 | train_data_1 = train_data[:360, :] 58 | train_data_2 = train_data[360:, :] 59 | test_data = np.array(np.ceil(test_data[:999, :].T.numpy() * 5), order='C') 60 | test_data_1 = test_data[:360, :] 61 | test_data_2 = test_data[360:, :] 62 | train_label = train_label.numpy()[:999] 63 | test_label = test_label.numpy()[:999] 64 | 65 | # =========================================== Model ===================================================================== # 66 | # create the model and deploy it on gpu or cpu 67 | model = MPGBN(K=args.z_dims, device=args.device) 68 | model.initial(train_data_1, train_data_2) # use the shape of train_data_1 and train_data_2 to initialize the params of model 69 | 70 | # train and evaluation 71 | train_local_params = model.train(train_data_1, train_data_2, num_epochs=args.num_epochs) 72 | train_local_params = model.test(train_data_1, train_data_2, num_epochs=args.num_epochs) 73 | test_local_params = model.test(test_data_1, test_data_2, num_epochs=args.num_epochs) 74 | 75 | # save the model after training 76 | model.save(args.save_path) 77 | # load the model 78 | model.load(args.load_path) 79 | 80 | # evaluate the model with classification accuracy 81 | # the demo accuracy can achieve 0.8549 82 | results = ACC(train_local_params.Theta[0], test_local_params.Theta[0], train_label, test_label, 'SVM') 83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /pydpm/example/Bayesian_PM/Dirchilet_Belief_Network/DirBN_Demo.py: -------------------------------------------------------------------------------- 1 | """ 2 | =========================================== 3 | Poisson Factor Analysis DirBN(Dirichlet belief networks) Demo 4 | Dirichlet belief networks for topic structure learning 5 | He Zhao, Lan Du, Wray Buntine, Mingyuan Zhou 6 | Publihsed in Conference and Workshop on Neural Information Processing Systems 2018 7 | 8 | =========================================== 9 | 10 | """ 11 | 12 | # Author: Chaojie Wang ; Jiawen Wu ; Wei Zhao <13279389260@163.com> 13 | # License: BSD-3-Clause 14 | 15 | import numpy as np 16 | import argparse 17 | import scipy.io as sio 18 | 19 | from torchvision import datasets, transforms 20 | 21 | from pydpm.model import DirBN 22 | from pydpm.metric import ACC 23 | from pydpm.dataloader.image_data import tensor_transforms 24 | 25 | # =========================================== ArgumentParser ===================================================================== # 26 | parser = argparse.ArgumentParser() 27 | 28 | # device 29 | parser.add_argument("--device", type=str, default='gpu') 30 | 31 | # dataset 32 | parser.add_argument("--data_path", type=str, default='../../../dataset/mnist/', help="the path of loading data") 33 | 34 | # model 35 | parser.add_argument("--save_path", type=str, default='../../save_models', help="the path of saving model") 36 | parser.add_argument("--load_path", type=str, default='../../save_models/DirBN.npy', help="the path of loading model") 37 | 38 | parser.add_argument("--z_dims", type=list, default=[100, 100], help="number of topics of 2 layers in DirBN") 39 | 40 | # optim 41 | parser.add_argument("--num_epochs", type=int, default=100, help="number of epochs of training") 42 | 43 | args = parser.parse_args() 44 | 45 | # =========================================== Dataset ===================================================================== # 46 | # define transform for dataset and load orginal dataset 47 | transform = transforms.Compose([transforms.ToTensor()]) 48 | train_dataset = datasets.MNIST(root=args.data_path, train=True, download=True) 49 | test_dataset = datasets.MNIST(root=args.data_path, train=False, download=False) 50 | 51 | # transform dataset and reshape the dataset into [batch_size, feature_num] 52 | train_data = tensor_transforms(train_dataset.data, transform) 53 | train_data = train_data.permute([1, 2, 0]).reshape(len(train_dataset), -1) # len(train_dataset, 28*28) 54 | test_data = tensor_transforms(test_dataset.data, transform) 55 | test_data = test_data.permute([1, 2, 0]).reshape(len(test_dataset), -1) 56 | train_label = train_dataset.train_labels 57 | test_label = test_dataset.test_labels 58 | 59 | # transpose the dataset to fit the model and convert a tensor to numpy array 60 | train_data = np.array(np.ceil(train_data[:999, :].T.numpy() * 5), order='C') 61 | test_data = np.array(np.ceil(test_data[:999, :].T.numpy() * 5), order='C') 62 | train_label = train_label.numpy()[:999] 63 | test_label = test_label.numpy()[:999] 64 | 65 | # =========================================== Model ===================================================================== # 66 | # create the model and deploy it on gpu or cpu 67 | model = DirBN(K=args.z_dims, device=args.device) 68 | model.initial(train_data) # use the shape of train_data to initialize the params of model 69 | 70 | # train and evaluation 71 | for i in range(100): 72 | train_local_params = model.train(data=train_data, num_epochs=args.num_epochs, is_initial_local=False) 73 | train_local_params = model.test(data=train_data, num_epochs=args.num_epochs) 74 | test_local_params = model.test(data=test_data, num_epochs=args.num_epochs) 75 | 76 | # save the model after training 77 | model.save(args.save_path) 78 | # load the model 79 | model.load(args.load_path) 80 | 81 | # evaluate the model with classification accuracy 82 | # the demo accuracy can achieve 0.78 83 | results = ACC(train_local_params.Theta, test_local_params.Theta, train_label, test_label, 'SVM') 84 | 85 | 86 | 87 | -------------------------------------------------------------------------------- /pydpm/example/Deep_Learning_PM/Conditional_Variational_Auto-encoder/CVAE_Demo.py: -------------------------------------------------------------------------------- 1 | """ 2 | =========================================== 3 | CVAE 4 | Learning Structured Output Representation using Deep Conditional Generative Models 5 | Kihyuk Sohn, Xinchen Yan and Honglak Lee 6 | Publihsed in 2015 7 | 8 | =========================================== 9 | """ 10 | # Author: Bufeng Ge <20009100138@stu.xidian.edu.cn>, Xinyang Liu 11 | # License: BSD-3-Clause 12 | 13 | import os 14 | import argparse 15 | import sys 16 | import torch 17 | import numpy as np 18 | import torch.optim as optim 19 | 20 | from torchvision import datasets, transforms 21 | from torchvision.utils import save_image 22 | from pydpm.model import CVAE 23 | 24 | # =========================================== ArgumentParser ===================================================================== # 25 | parser = argparse.ArgumentParser() 26 | 27 | # device 28 | parser.add_argument("--gpu_id", type=int, default=0) 29 | 30 | # dataset 31 | parser.add_argument("--data_path", type=str, default='../../../dataset/mnist/', help="the path of loading data") 32 | parser.add_argument("--img_size", type=int, default=28, help="size of each image dimension") 33 | 34 | # model 35 | parser.add_argument("--save_path", type=str, default='../../save_models', help="the path of saving model") 36 | parser.add_argument("--load_path", type=str, default='../../save_models/CVAE.pth', help="the path of loading model") 37 | 38 | parser.add_argument("--z_dim", type=int, default=64, help="dimensionality of the z latent space") 39 | parser.add_argument("--encoder_hid_dims", type=int, default=[512, 256], help="dimensionality of the latent space") 40 | parser.add_argument("--decoder_hid_dims", type=int, default=[512, 256], help="dimensionality of the latent space") 41 | 42 | # optim 43 | parser.add_argument("--num_epochs", type=int, default=1, help="number of epochs of training") 44 | parser.add_argument("--batch_size", type=int, default=256, help="size of the batches") 45 | parser.add_argument("--lr", type=float, default=0.0002, help="adam: learning rate") 46 | 47 | args = parser.parse_args() 48 | args.device = torch.device(f"cuda:{args.gpu_id}") if torch.cuda.is_available() else torch.device("cpu") 49 | 50 | # =========================================== Dataset ===================================================================== # 51 | # mnist 52 | train_dataset = datasets.MNIST(root=args.data_path, train=True, transform=transforms.ToTensor(), download=True) 53 | test_dataset = datasets.MNIST(root=args.data_path, train=False, transform=transforms.ToTensor(), download=False) 54 | args.cond_dim = len(train_dataset.classes) 55 | 56 | train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=True) 57 | test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=args.batch_size, shuffle=False) 58 | 59 | model = CVAE(cond_dim=args.cond_dim, in_dim=args.img_size**2, z_dim=args.z_dim, encoder_hid_dims=args.encoder_hid_dims, decoder_hid_dims=args.decoder_hid_dims, device='cuda:0') 60 | model_opt = optim.Adam(model.parameters(), lr=args.lr) 61 | 62 | for epoch in range(args.num_epochs): 63 | local_mu, local_log_var = model.train_one_epoch(model_opt=model_opt, dataloader=train_loader, epoch=epoch, n_epochs=args.num_epochs) 64 | if epoch % 25 == 0: 65 | test_mu, test_log_var = model.test_one_epoch(dataloader=test_loader) 66 | 67 | # Save model 68 | model.save(args.save_path) 69 | # Load model 70 | model.load(args.load_path) 71 | 72 | # =================== Visualization ====================== # 73 | os.makedirs("../../output/images", exist_ok=True) 74 | # sample image 75 | print('sample image, please wait!') 76 | with torch.no_grad(): 77 | sample = model.sample(64, torch.tensor([7]*64))#[random.randint(0,9) for i in range(64)]) 78 | save_image(sample.view(64,1,28, 28), '../../output/images/VAE_sample_7.png') 79 | print('complete!!!') 80 | -------------------------------------------------------------------------------- /pydpm/example/Deep_Learning_PM/VQ_Variational_Autoencoder/VQ_VAE_Demo.py: -------------------------------------------------------------------------------- 1 | """ 2 | =========================================== 3 | VQ-VAE 4 | Neural Discrete Representation Learning 5 | Aaron van den Oord, Oriol Vinyals, Koray Kavukcuoglu 6 | Publihsed in 2017 7 | 8 | =========================================== 9 | """ 10 | 11 | # Author: Muyao Wang , Xinyang Liu 12 | # License: BSD-3-Clause 13 | 14 | import os 15 | import argparse 16 | import sys 17 | import torch 18 | import torch.optim as optim 19 | 20 | from torchvision import datasets, transforms 21 | from torchvision.utils import save_image 22 | from pydpm.model import VQVAE 23 | 24 | # =========================================== ArgumentParser ===================================================================== # 25 | parser = argparse.ArgumentParser() 26 | 27 | # device 28 | parser.add_argument("--gpu_id", type=int, default=0) 29 | 30 | # dataset 31 | parser.add_argument("--data_path", type=str, default='../../../dataset/mnist/', help="the path of loading data") 32 | parser.add_argument("--img_size", type=int, default=28, help="size of each image dimension") 33 | 34 | # model 35 | parser.add_argument("--save_path", type=str, default='../../save_models', help="the path of saving model") 36 | parser.add_argument("--load_path", type=str, default='../../save_models/VQVAE.pth', help="the path of loading model") 37 | parser.add_argument("--embed_dim", type=int, default=2, help="dimensionality of the codebook the same as z_dim") 38 | parser.add_argument("--num_embed", type=int, default=8000, help="number of codebook") 39 | parser.add_argument("--z_dim", type=int, default=2, help="dimensionality of the z latent space") 40 | parser.add_argument("--encoder_hid_dims", type=int, default=[512, 256], help="dimensionality of the latent space") 41 | parser.add_argument("--decoder_hid_dims", type=int, default=[512, 256], help="dimensionality of the latent space") 42 | 43 | # optim 44 | parser.add_argument("--num_epochs", type=int, default=20, help="number of epochs of training") 45 | parser.add_argument("--batch_size", type=int, default=256, help="size of the batches") 46 | parser.add_argument("--lr", type=float, default=0.0002, help="adam: learning rate") 47 | 48 | args = parser.parse_args() 49 | args.device = torch.device(f"cuda:{args.gpu_id}") if torch.cuda.is_available() else torch.device("cpu") 50 | 51 | # =========================================== Dataset ===================================================================== # 52 | # mnist 53 | train_dataset = datasets.MNIST(root=args.data_path, train=True, transform=transforms.ToTensor(), download=True) 54 | test_dataset = datasets.MNIST(root=args.data_path, train=False, transform=transforms.ToTensor(), download=False) 55 | 56 | train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=True) 57 | test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=args.batch_size, shuffle=False) 58 | 59 | # =========================================== Model ===================================================================== # 60 | # model 61 | model = VQVAE(embed_dim=args.embed_dim, num_embed=args.num_embed, in_dim=args.img_size**2, z_dim=args.z_dim, encoder_hid_dims=args.encoder_hid_dims, decoder_hid_dims=args.decoder_hid_dims, device=args.device) 62 | model_opt = optim.Adam(model.parameters(), lr=args.lr) 63 | 64 | # train 65 | for epoch_idx in range(args.num_epochs): 66 | model.train_one_epoch(dataloader=train_loader, model_opt=model_opt, epoch_idx=epoch_idx, args=args) 67 | if epoch_idx % 25 == 0: 68 | model.test_one_epoch(dataloader=test_loader) 69 | 70 | # save 71 | model.save(args.save_path) 72 | # load 73 | model.load(args.load_path) 74 | 75 | # =================== Visualization ====================== # 76 | os.makedirs("../../output/images", exist_ok=True) 77 | print('sample image,please wait!') 78 | with torch.no_grad(): 79 | sample = model.sample(64) 80 | save_image(sample.view(64, 1, 28, 28), '../../output/images/VQVAE_sample.png') 81 | print('complete!!!') 82 | 83 | -------------------------------------------------------------------------------- /pydpm/example/Deep_Learning_PM/Normlizing_Flow/NFlow_Demo.py: -------------------------------------------------------------------------------- 1 | """ 2 | =========================================== 3 | RealNVP 4 | DENSITY ESTIMATION USING REAL NVP 5 | Laurent Dinh, Jascha Sohl-Dickstein, Samy Bengio 6 | Publihsed in 2017 7 | 8 | =========================================== 9 | """ 10 | 11 | # Author: Xinyang Liu 12 | # License: BSD-3-Clause 13 | 14 | import os 15 | import argparse 16 | 17 | import torch 18 | import torch.optim as optim 19 | from torch.utils.data import DataLoader 20 | 21 | import matplotlib.pyplot as plt 22 | from sklearn.datasets import make_moons 23 | 24 | from pydpm.model import NFlow 25 | from pydpm.model.deep_learning_pm.nflow import RealNVP_2D 26 | 27 | # =========================================== ArgumentParser ===================================================================== # 28 | parser = argparse.ArgumentParser() 29 | 30 | # device 31 | parser.add_argument("--gpu_id", type=int, default=0) 32 | 33 | # dataset 34 | parser.add_argument("--data_path", type=str, default='../../../dataset/mnist/', help="the path of loading data") 35 | 36 | # model 37 | parser.add_argument("--save_path", type=str, default='../../save_models', help="the path of saving model") 38 | parser.add_argument("--load_path", type=str, default='../../save_models/NFlow.pth', help="the path of loading model") 39 | 40 | parser.add_argument("--sample_num", type=int, default=512) 41 | parser.add_argument("--flows_num", type=int, default=2) 42 | parser.add_argument("--flow_name", type=str, default="RealNVP_2D") 43 | parser.add_argument("--hid_dim", type=int, default=128) 44 | 45 | # optim 46 | parser.add_argument("--num_epochs", type=int, default=1000) 47 | parser.add_argument("--batch_size", type=int, default=128, help="size of the batches") 48 | parser.add_argument("--lr", type=float, default=0.001, help="adam: learning rate") 49 | 50 | args = parser.parse_args() 51 | args.device = torch.device(f"cuda:{args.gpu_id}") if torch.cuda.is_available() else torch.device("cpu") 52 | 53 | # =========================================== Dataset ===================================================================== # 54 | # mnist 55 | data, label = make_moons(n_samples=args.sample_num, noise=0.05) 56 | data = torch.tensor(data, dtype=torch.float32) 57 | # Normalization 58 | for i in range(data.shape[1]): 59 | data[:, i] = (data[:, i] - torch.mean(data[:, i])) / torch.std(data[:, i]) 60 | dataloader = torch.utils.data.DataLoader(dataset=data, batch_size=args.batch_size, shuffle=True) 61 | 62 | # =========================================== Model ===================================================================== # 63 | # model 64 | flows = [RealNVP_2D(dim=2, hidden_dim=args.hid_dim, device=args.device) for _ in range(args.flows_num)] 65 | 66 | model = NFlow(in_dim=2, flows=flows, device=args.device) 67 | model_opt = optim.Adam(model.parameters(), lr=args.lr) 68 | 69 | # train 70 | for epoch_idx in range(args.num_epochs): 71 | local_z = model.train_one_epoch(model_opt=model_opt, dataloader=dataloader, epoch=epoch_idx, num_epochs=args.num_epochs) 72 | if epoch_idx == args.num_epochs - 1: 73 | test_local_z = model.test_one_epoch(dataloader=dataloader) 74 | 75 | # save 76 | model.save(args.save_path) 77 | # load 78 | model.load(args.load_path) 79 | 80 | # =========================================== Visualization ===================================================================== # 81 | # visualize 82 | os.makedirs("../output/images", exist_ok=True) 83 | print('sample image,please wait!') 84 | 85 | plt.figure(figsize=(8, 3)) 86 | plt.subplot(1, 3, 1) 87 | plt.scatter(data[:, 0], data[:, 1], marker=".", color="b", s=10) 88 | plt.title("Training data") 89 | plt.subplot(1, 3, 2) 90 | plt.scatter(test_local_z[:, 0], test_local_z[:, 1], marker=".", color="r", s=10) 91 | plt.title("Latent space") 92 | plt.subplot(1, 3, 3) 93 | samples = model.sample(args.sample_num).cpu().detach().numpy() 94 | plt.scatter(samples[:, 0], samples[:, 1], marker=".", color="b", s=10) 95 | plt.title("Generated samples") 96 | plt.savefig("../output/images/nflow.png") 97 | plt.show() 98 | 99 | -------------------------------------------------------------------------------- /pydpm/example/Bayesian_PM/Deep_Poisson_Factor_Analysis/DPFA_Demo.py: -------------------------------------------------------------------------------- 1 | """ 2 | =========================================== 3 | Deep Poisson Factor Analysis Demo 4 | Scalable Deep Poisson Factor Analysis for Topic Modeling 5 | Zhe Gan, Changyou Chen, Ricardo Henao, David Carlson, Lawrence Carin 6 | Publised in International Conference on Machine Learning 2015 7 | 8 | =========================================== 9 | 10 | """ 11 | 12 | # Author: Chaojie Wang ; Jiawen Wu ; Wei Zhao <13279389260@163.com> 13 | # License: BSD-3-Clause 14 | 15 | import numpy as np 16 | import argparse 17 | import scipy.io as sio 18 | 19 | from torchvision import datasets, transforms 20 | 21 | from pydpm.model import DPFA 22 | from pydpm.metric import ACC 23 | from pydpm.dataloader.image_data import tensor_transforms 24 | 25 | # =========================================== ArgumentParser ===================================================================== # 26 | parser = argparse.ArgumentParser() 27 | 28 | # device 29 | parser.add_argument("--device", type=str, default='gpu') 30 | 31 | # dataset 32 | parser.add_argument("--data_path", type=str, default='../../../dataset/mnist/', help="the path of loading data") 33 | 34 | # model 35 | parser.add_argument("--save_path", type=str, default='../../save_models', help="the path of saving model") 36 | parser.add_argument("--load_path", type=str, default='../../save_models/DPFA.npy', help="the path of loading model") 37 | 38 | parser.add_argument("--z_dims", type=list, default=[128, 64, 32], help="numbers of topics of 3 layers in DPFA(PFA+DSBN+Gibbs)") 39 | 40 | # optim 41 | parser.add_argument("--burnin", type=int, default=100, help="the iterations of burnin stage") 42 | parser.add_argument("--collection", type=int, default=80, help="the iterations of collection stage") 43 | 44 | args = parser.parse_args() 45 | 46 | # =========================================== Dataset ===================================================================== # 47 | # define transform for dataset and load orginal dataset 48 | # load dataset 49 | transform = transforms.Compose([transforms.ToTensor()]) 50 | train_dataset = datasets.MNIST(root=args.data_path, train=True, download=True) 51 | test_dataset = datasets.MNIST(root=args.data_path, train=False, download=False) 52 | 53 | # transform dataset and reshape the dataset into [batch_size, feature_num] 54 | train_data = tensor_transforms(train_dataset.data, transform) 55 | train_data = train_data.permute([1, 2, 0]).reshape(len(train_dataset), -1) # len(train_dataset, 28*28) 56 | test_data = tensor_transforms(test_dataset.data, transform) 57 | test_data = test_data.permute([1, 2, 0]).reshape(len(test_dataset), -1) 58 | train_label = train_dataset.train_labels 59 | test_label = test_dataset.test_labels 60 | 61 | # transpose the dataset to fit the model and convert a tensor to numpy array 62 | train_data = np.array(np.ceil(train_data[:999, :].T.numpy() * 5), order='C') 63 | test_data = np.array(np.ceil(test_data[:999, :].T.numpy() * 5), order='C') 64 | train_label = train_label.numpy()[:999] 65 | test_label = test_label.numpy()[:999] 66 | 67 | # =========================================== Model ===================================================================== # 68 | # create the model and deploy it on gpu or cpu 69 | model = DPFA(K=args.z_dims, device=args.device) 70 | model.initial(train_data) # use the shape of train_data to initialize the params of model 71 | 72 | # train and evaluation 73 | train_local_params = model.train(train_data, burnin=args.burnin, collection=args.collection) 74 | train_local_params = model.test(train_data, burnin=args.burnin, collection=args.collection) 75 | test_local_params = model.test(test_data, burnin=args.burnin, collection=args.collection) 76 | 77 | # save the model after training 78 | model.save(args.save_path) 79 | # load the model 80 | model.load(args.load_path) 81 | 82 | # evaluate the model with classification accuracy 83 | # the demo accuracy can achieve 0.9099 84 | results = ACC(train_local_params.Theta, test_local_params.Theta, train_label, test_label, 'SVM') 85 | 86 | 87 | 88 | 89 | -------------------------------------------------------------------------------- /enviroment.yaml: -------------------------------------------------------------------------------- 1 | name: pyDPM 2 | channels: 3 | - pyg 4 | - pytorch 5 | - nvidia 6 | - defaults 7 | dependencies: 8 | - _libgcc_mutex=0.1=main 9 | - _openmp_mutex=5.1=1_gnu 10 | - appdirs=1.4.4=pyhd3eb1b0_0 11 | - blas=1.0=mkl 12 | - brotlipy=0.7.0=py39h27cfd23_1003 13 | - bzip2=1.0.8=h7b6447c_0 14 | - ca-certificates=2023.01.10=h06a4308_0 15 | - certifi=2023.5.7=py39h06a4308_0 16 | - cffi=1.15.1=py39h74dc2b5_0 17 | - charset-normalizer=2.0.4=pyhd3eb1b0_0 18 | - cryptography=39.0.1=py39h9ce1e76_0 19 | - cuda-cudart=11.7.99=0 20 | - cuda-cupti=11.7.101=0 21 | - cuda-libraries=11.7.1=0 22 | - cuda-nvrtc=11.7.99=0 23 | - cuda-nvtx=11.7.91=0 24 | - cuda-runtime=11.7.1=0 25 | - ffmpeg=4.3=hf484d3e_0 26 | - freetype=2.12.1=h4a9f257_0 27 | - giflib=5.2.1=h5eee18b_3 28 | - gmp=6.2.1=h295c915_3 29 | - gnutls=3.6.15=he1e5248_0 30 | - idna=3.4=py39h06a4308_0 31 | - intel-openmp=2021.4.0=h06a4308_3561 32 | - jpeg=9e=h5eee18b_1 33 | - lame=3.100=h7b6447c_0 34 | - lcms2=2.12=h3be6417_0 35 | - ld_impl_linux-64=2.38=h1181459_1 36 | - lerc=3.0=h295c915_0 37 | - libcublas=11.10.3.66=0 38 | - libcufft=10.7.2.124=h4fbf590_0 39 | - libcufile=1.6.1.9=0 40 | - libcurand=10.3.2.106=0 41 | - libcusolver=11.4.0.1=0 42 | - libcusparse=11.7.4.91=0 43 | - libdeflate=1.17=h5eee18b_0 44 | - libffi=3.3=he6710b0_2 45 | - libgcc-ng=11.2.0=h1234567_1 46 | - libgfortran-ng=11.2.0=h00389a5_1 47 | - libgfortran5=11.2.0=h1234567_1 48 | - libgomp=11.2.0=h1234567_1 49 | - libiconv=1.16=h7f8727e_2 50 | - libidn2=2.3.4=h5eee18b_0 51 | - libnpp=11.7.4.75=0 52 | - libnvjpeg=11.8.0.2=0 53 | - libpng=1.6.39=h5eee18b_0 54 | - libstdcxx-ng=11.2.0=h1234567_1 55 | - libtasn1=4.19.0=h5eee18b_0 56 | - libtiff=4.5.0=h6a678d5_2 57 | - libunistring=0.9.10=h27cfd23_0 58 | - libwebp=1.2.4=h11a3e52_1 59 | - libwebp-base=1.2.4=h5eee18b_1 60 | - lz4-c=1.9.4=h6a678d5_0 61 | - mkl=2021.4.0=h06a4308_640 62 | - mkl-service=2.4.0=py39h7f8727e_0 63 | - mkl_fft=1.3.1=py39hd3c417c_0 64 | - mkl_random=1.2.2=py39h51133e4_0 65 | - ncurses=6.4=h6a678d5_0 66 | - nettle=3.7.3=hbbd107a_1 67 | - numpy=1.24.3=py39h14f4228_0 68 | - numpy-base=1.24.3=py39h31eccc5_0 69 | - openh264=2.1.1=h4ff587b_0 70 | - openssl=1.1.1t=h7f8727e_0 71 | - packaging=23.0=py39h06a4308_0 72 | - pillow=9.4.0=py39h6a678d5_0 73 | - pip=23.0.1=py39h06a4308_0 74 | - pooch=1.4.0=pyhd3eb1b0_0 75 | - portalocker=2.3.0=py39h06a4308_1 76 | - pycparser=2.21=pyhd3eb1b0_0 77 | - pyopenssl=23.0.0=py39h06a4308_0 78 | - pysocks=1.7.1=py39h06a4308_0 79 | - python=3.9.0=hdb3f193_2 80 | - pytorch=1.13.0=py3.9_cuda11.7_cudnn8.5.0_0 81 | - pytorch-cuda=11.7=h778d358_5 82 | - pytorch-mutex=1.0=cuda 83 | - pytorch-scatter=2.1.1=py39_torch_1.13.0_cu117 84 | - pytorch-sparse=0.6.17=py39_torch_1.13.0_cu117 85 | - readline=8.2=h5eee18b_0 86 | - requests=2.29.0=py39h06a4308_0 87 | - setuptools=66.0.0=py39h06a4308_0 88 | - six=1.16.0=pyhd3eb1b0_1 89 | - sqlite=3.41.2=h5eee18b_0 90 | - tk=8.6.12=h1ccaba5_0 91 | - torchaudio=0.13.0=py39_cu117 92 | - torchdata=0.5.0=py39 93 | - torchtext=0.14.0=py39 94 | - torchvision=0.14.0=py39_cu117 95 | - tqdm=4.65.0=py39hb070fc8_0 96 | - typing_extensions=4.5.0=py39h06a4308_0 97 | - tzdata=2023c=h04d1e81_0 98 | - urllib3=1.26.15=py39h06a4308_0 99 | - wheel=0.38.4=py39h06a4308_0 100 | - xz=5.4.2=h5eee18b_0 101 | - zlib=1.2.13=h5eee18b_0 102 | - zstd=1.5.5=hc292b87_0 103 | - pip: 104 | - contourpy==1.0.7 105 | - cycler==0.11.0 106 | - fonttools==4.39.4 107 | - gensim==4.3.1 108 | - importlib-resources==5.12.0 109 | - jinja2==3.1.2 110 | - joblib==1.2.0 111 | - kiwisolver==1.4.4 112 | - markupsafe==2.1.2 113 | - matplotlib==3.7.1 114 | - psutil==5.9.5 115 | - pyparsing==3.0.9 116 | - python-dateutil==2.8.2 117 | - scikit-learn==1.2.2 118 | - scipy==1.10.1 119 | - smart-open==6.3.0 120 | - threadpoolctl==3.1.0 121 | - torch-geometric==2.3.1 122 | - zipp==3.15.0 123 | prefix: /home/chaojie.wang/anaconda3/envs/pyDPM 124 | -------------------------------------------------------------------------------- /pydpm/sampler/model_sampler_cpu.py: -------------------------------------------------------------------------------- 1 | """ 2 | =========================================== 3 | Model Sampler implemented on CPU 4 | =========================================== 5 | 6 | """ 7 | 8 | # Author: Chaojie Wang ; Jiawen Wu 9 | # License: BSD-3-Clause 10 | 11 | import numpy as np 12 | import numpy.ctypeslib as npct 13 | import ctypes 14 | from ctypes import * 15 | import os 16 | 17 | class model_sampler_cpu(object): 18 | 19 | def __init__(self, system_type='Windows', seed=0): 20 | """ 21 | The basic class for sampling distribution on cpu 22 | """ 23 | super(model_sampler_cpu, self).__init__() 24 | 25 | self.system_type = system_type 26 | self.seed = seed 27 | 28 | array_2d_double = npct.ndpointer(dtype=np.double, ndim=2, flags='C') 29 | array_1d_double = npct.ndpointer(dtype=np.double, ndim=1, flags='C') 30 | array_int = npct.ndpointer(dtype=np.int32, ndim=0, flags='C') 31 | ll = ctypes.cdll.LoadLibrary 32 | 33 | if system_type == "Windows": 34 | self.Crt_lib = ll(os.path.dirname(__file__) + "\_compact\crt_cpu.dll") 35 | self.Multi_lib = ll(os.path.dirname(__file__) + "\_compact\multi_aug_cpu.dll") 36 | self.Crt_Multi_lib = ll(os.path.dirname(__file__) + "\_compact\crt_multi_aug_cpu.dll") 37 | else: 38 | self.Crt_lib = ll(os.path.dirname(__file__) + "/_compact/crt_cpu.so") 39 | self.Multi_lib = ll(os.path.dirname(__file__) + "/_compact/multi_aug_cpu.so") 40 | self.Crt_Multi_lib = ll(os.path.dirname(__file__) + "/_compact/crt_multi_aug_cpu.so") 41 | 42 | 43 | self.Multi_lib.Multi_Sample.restype = None 44 | self.Multi_lib.Multi_Sample.argtypes = [array_2d_double, array_2d_double, array_2d_double, array_2d_double, 45 | array_2d_double, c_int, c_int, c_int] 46 | 47 | self.Crt_Multi_lib.Crt_Multi_Sample.restype = None 48 | self.Crt_Multi_lib.Crt_Multi_Sample.argtypes = [array_2d_double, array_2d_double, array_2d_double, array_2d_double, 49 | array_2d_double, c_int, c_int, c_int] 50 | 51 | self.Crt_lib.Crt_Sample.restype = None 52 | self.Crt_lib.Crt_Sample.argtypes = [array_2d_double, array_2d_double, array_2d_double, c_int, c_int] 53 | 54 | def multi_aug(self, X_t, Phi_t, Theta_t): 55 | 56 | X_t = np.array(X_t, order='C').astype('double') 57 | Phi_t = np.array(Phi_t, order='C').astype('double') 58 | Theta_t = np.array(Theta_t, order='C').astype('double') 59 | 60 | V = X_t.shape[0] 61 | J = X_t.shape[1] 62 | K = Theta_t.shape[0] 63 | Xt_to_t1_t = np.zeros([K, J], order='C').astype('double') 64 | WSZS_t = np.zeros([V, K], order='C').astype('double') 65 | self.Multi_lib.Multi_Sample(X_t, Phi_t, Theta_t, WSZS_t, Xt_to_t1_t, V, K, J) 66 | 67 | return Xt_to_t1_t, WSZS_t 68 | 69 | def crt(self, Xt_to_t1_t, p): 70 | 71 | Xt_to_t1_t = np.array(Xt_to_t1_t, order='C') 72 | p = np.array(p, order='C') 73 | 74 | K_t = Xt_to_t1_t.shape[0] 75 | J = Xt_to_t1_t.shape[1] 76 | X_t1 = np.zeros([K_t, J], order='C').astype('double') 77 | 78 | self.Crt_lib.Crt_Sample(Xt_to_t1_t, p, X_t1, K_t, J) 79 | 80 | return X_t1 81 | 82 | def crt_multi_aug(self, Xt_to_t1_t, Phi_t1, Theta_t1): 83 | 84 | Xt_to_t1_t = np.array(Xt_to_t1_t, order='C').astype('double') 85 | Phi_t1 = np.array(Phi_t1, order='C').astype('double') 86 | Theta_t1 = np.array(Theta_t1, order='C').astype('double') 87 | 88 | K_t = Xt_to_t1_t.shape[0] 89 | J = Xt_to_t1_t.shape[1] 90 | K_t1 = Theta_t1.shape[0] 91 | Xt_to_t1_t1 = np.zeros([K_t1, J], order='C').astype('double') 92 | WSZS_t1 = np.zeros([K_t, K_t1], order='C').astype('double') 93 | 94 | self.Crt_Multi_lib.Crt_Multi_Sample(Xt_to_t1_t, Phi_t1, Theta_t1, WSZS_t1, Xt_to_t1_t1, K_t, K_t1, J) 95 | 96 | return Xt_to_t1_t1, WSZS_t1 97 | 98 | 99 | 100 | -------------------------------------------------------------------------------- /pydpm/example/Bayesian_PM/Graph_Poisson_Gamma_Belief_Network/GPGBN_Demo.py: -------------------------------------------------------------------------------- 1 | """ 2 | =========================================== 3 | Deep Relational Topic Modeling via Graph Poisson Gamma Belief Network 4 | Chaojie Wang, Hao Zhang, Bo Chen, Dongsheng Wang, Zhengjue Wang 5 | Published in Advances in Neural Information Processing System 2020 6 | 7 | =========================================== 8 | 9 | """ 10 | 11 | # Author: Chaojie Wang ; Wei Zhao <13279389260@163.com>; Jiawen Wu 12 | # License: BSD-3-Clause 13 | 14 | import os 15 | import numpy as np 16 | import argparse 17 | import scipy.io as sio 18 | 19 | from torchvision import datasets, transforms 20 | from torch_geometric.datasets import Planetoid 21 | 22 | from pydpm.model import GPGBN 23 | from pydpm.metric import ACC 24 | from pydpm.dataloader.image_data import tensor_transforms 25 | from pydpm.dataloader.graph_data import Graph_Processer 26 | from pydpm.utils import cosine_simlarity 27 | 28 | # =========================================== ArgumentParser ===================================================================== # 29 | parser = argparse.ArgumentParser() 30 | 31 | # device 32 | parser.add_argument("--device", type=str, default='gpu') 33 | 34 | # dataset 35 | parser.add_argument("--data_path", type=str, default='../../../dataset/mnist/', help="the path of loading data") 36 | 37 | # model 38 | parser.add_argument("--save_path", type=str, default='../../save_models', help="the path of saving model") 39 | parser.add_argument("--load_path", type=str, default='../../save_models/GPGBN.npy', help="the path of loading model") 40 | 41 | parser.add_argument("--z_dims", type=list, default=[128, 64, 32], help="number of topics at diffrent layers in PGBN") 42 | 43 | # optim 44 | parser.add_argument("--num_epochs", type=int, default=100, help="number of epochs of training") 45 | 46 | args = parser.parse_args() 47 | 48 | # =========================================== Dataset ===================================================================== # 49 | # define transform for dataset and load orginal dataset 50 | 51 | # # load dataset (Cora) cost too much memory 52 | # path = '../../dataset/Planetoid' 53 | # if not os.path.exists(path): 54 | # os.mkdir(path) 55 | # dataset = Planetoid(path, 'cora') 56 | # dataset = dataset[0] 57 | # 58 | # graph = graph_from_edges(dataset.edge_index, dataset.num_nodes, to_sparsetesor=False)[1] 59 | # # transpose the dataset to fit the model and convert a tensor to numpy array 60 | # train_data = dataset.x.T.numpy() 61 | 62 | # load dataset (MNIST) 63 | # define transform for dataset and load orginal dataset 64 | transform = transforms.Compose([transforms.ToTensor()]) 65 | train_dataset = datasets.MNIST(root=args.data_path, train=True, download=True) 66 | test_dataset = datasets.MNIST(root=args.data_path, train=False, download=False) 67 | 68 | # transform dataset and reshape the dataset into [batch_size, feature_num] 69 | train_data = tensor_transforms(train_dataset.data, transform) 70 | train_data = train_data.permute([1, 2, 0]).reshape(len(train_dataset), -1) # len(train_dataset, 28*28) 71 | test_data = tensor_transforms(test_dataset.data, transform) 72 | test_data = test_data.permute([1, 2, 0]).reshape(len(test_dataset), -1) 73 | train_label = train_dataset.train_labels 74 | test_label = test_dataset.test_labels 75 | 76 | # transpose the dataset to fit the model and convert a tensor to numpy array 77 | train_data = np.array(np.ceil(train_data[:999, :].T.numpy() * 5), order='C') 78 | train_label = train_label.numpy()[:999] 79 | 80 | # construct the adjacency matrix 81 | graph_processer = Graph_Processer() 82 | graph = graph_processer.graph_from_node_feature(train_data.T, 0.5, binary=False) 83 | 84 | # =========================================== Model ===================================================================== # 85 | # create the model and deploy it on gpu or cpu 86 | model = GPGBN(K=args.z_dims, device=args.device) 87 | model.initial(train_data) 88 | 89 | train_local_params = model.train(train_data, graph, num_epochs=args.num_epochs) 90 | 91 | # save the model after training 92 | model.save(args.save_path) 93 | # load the model 94 | model.load(args.load_path) 95 | 96 | 97 | -------------------------------------------------------------------------------- /pydpm/example/Deep_Learning_PM/Deep_Convolution_Generative_Adversarial_Networks/DCGAN_Demo.py: -------------------------------------------------------------------------------- 1 | """ 2 | =========================================== 3 | DCGAN 4 | Unsupervised Representation Learning with Deep Convolutional Generative Adversarial Networks 5 | Alec Radford, Luke Metz and Soumith Chintala 6 | Publihsed in ICLR 2016 7 | 8 | =========================================== 9 | """ 10 | 11 | # Author: Xinyang Liu 12 | # License: BSD-3-Clause 13 | 14 | import os 15 | import argparse 16 | 17 | import torch 18 | from torch.utils.data import DataLoader 19 | 20 | import torchvision.transforms as transforms 21 | from torchvision.utils import save_image 22 | from torchvision import datasets 23 | 24 | from pydpm.model import DCGAN 25 | from pydpm.utils.utils import unnormalize_to_zero_to_one 26 | 27 | # =========================================== ArgumentParser ===================================================================== # 28 | parser = argparse.ArgumentParser() 29 | 30 | # device 31 | parser.add_argument("--gpu_id", type=int, default=0) 32 | 33 | # dataset 34 | parser.add_argument("--data_path", type=str, default='../../../dataset/cifar/', help="the path of loading data") 35 | parser.add_argument("--img_size", type=int, default=28, help="size of each image dimension") 36 | 37 | # model 38 | parser.add_argument("--save_path", type=str, default='../../save_models', help="the path of saving model") 39 | parser.add_argument("--load_path", type=str, default='../../save_models/DCGAN.pth', help="the path of loading model") 40 | 41 | parser.add_argument("--z_dim", type=int, default=100, help="generator dimensionality of the noise") 42 | parser.add_argument("--in_channels", type=int, default=3, help="number of image channels") # 1 for mnist 43 | parser.add_argument("--sample_interval", type=int, default=800, help="interval betwen image samples") 44 | 45 | # optim 46 | parser.add_argument("--num_epochs", type=int, default=200, help="number of epochs of training") 47 | parser.add_argument("--batch_size", type=int, default=128, help="size of the batches") 48 | parser.add_argument("--lr", type=float, default=0.0002, help="adam: learning rate") 49 | parser.add_argument("--b1", type=float, default=0.5, help="adam: decay of first order momentum of gradient") 50 | parser.add_argument("--b2", type=float, default=0.999, help="adam: decay of first order momentum of gradient") 51 | 52 | args = parser.parse_args() 53 | args.device = torch.device(f"cuda:{args.gpu_id}") if torch.cuda.is_available() else torch.device("cpu") 54 | 55 | # =========================================== Dataset ===================================================================== # 56 | # mnist 57 | transform = transforms.Compose([ 58 | transforms.RandomHorizontalFlip(), 59 | transforms.ToTensor(), 60 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), 61 | ]) 62 | train_dataset = datasets.CIFAR10(root=args.data_path, train=True, transform=transform, download=True) 63 | test_dataset = datasets.CIFAR10(root=args.data_path, train=False, transform=transform, download=False) 64 | 65 | train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=True) 66 | test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=args.batch_size, shuffle=False) 67 | 68 | # =========================================== Model ===================================================================== # 69 | # model 70 | # Initialize generator and discriminator 71 | model = DCGAN(args, device=args.device) 72 | 73 | # Optimizers 74 | model_opt_G = torch.optim.Adam(model.generator.parameters(), lr=args.lr, betas=(args.b1, args.b2)) 75 | model_opt_D = torch.optim.Adam(model.discriminator.parameters(), lr=args.lr, betas=(args.b1, args.b2)) 76 | 77 | # train 78 | for epoch_idx in range(args.num_epochs): 79 | model.train_one_epoch(model_opt_G=model_opt_G, model_opt_D=model_opt_D, dataloader=train_loader, sample_interval=args.sample_interval, epoch=epoch_idx, n_epochs=args.num_epochs) 80 | if epoch_idx % 20 == 0: 81 | model.save(args.save_path) 82 | # save 83 | model.save(args.save_path) 84 | # load 85 | model.load(args.load_path) 86 | 87 | # ===================== Visualization ============================== # 88 | os.makedirs("../../output/images", exist_ok=True) 89 | print('sample image, please wait!') 90 | sample_images = model.sample(64) 91 | sample_images = unnormalize_to_zero_to_one(sample_images) 92 | save_image(sample_images, "../../output/images/DCGAN_images.png", nrow=8, normalize=True) 93 | print('complete!!!') -------------------------------------------------------------------------------- /pydpm/example/Deep_Learning_PM/Generative_Adversarial_Network/GAN_Demo.py: -------------------------------------------------------------------------------- 1 | """ 2 | =========================================== 3 | GAN 4 | Generative Adversarial Networks 5 | IJ Goodfellow,J Pouget-Abadie,M Mirza,B Xu,D Warde-Farley,S Ozair,A Courville,Y Bengio 6 | Publihsed in 2014 7 | 8 | =========================================== 9 | """ 10 | 11 | # Author: Muyao Wang , Xinyang Liu 12 | # License: BSD-3-Clause 13 | 14 | import os 15 | import argparse 16 | 17 | import torch 18 | from torch.utils.data import DataLoader 19 | 20 | import torchvision.transforms as transforms 21 | from torchvision.utils import save_image 22 | from torchvision import datasets 23 | 24 | from pydpm.model import GAN 25 | 26 | # =========================================== ArgumentParser ===================================================================== # 27 | parser = argparse.ArgumentParser() 28 | 29 | # device 30 | parser.add_argument("--gpu_id", type=int, default=0) 31 | 32 | # dataset 33 | parser.add_argument("--data_path", type=str, default='../../../dataset/mnist/', help="the path of loading data") 34 | parser.add_argument("--img_size", type=int, default=28, help="size of each image dimension") 35 | 36 | # model 37 | parser.add_argument("--save_path", type=str, default='../../save_models', help="the path of saving model") 38 | parser.add_argument("--load_path", type=str, default='../../save_models/GAN.pth', help="the path of loading model") 39 | 40 | parser.add_argument("--b1", type=float, default=0.5, help="adam: decay of first order momentum of gradient") 41 | parser.add_argument("--b2", type=float, default=0.999, help="adam: decay of first order momentum of gradient") 42 | parser.add_argument("--g_z_dim", type=int, default=128, help="generator dimensionality of the noise") 43 | parser.add_argument("--g_hid_dims", type=list, default=[100, 200, 400, 800], help="generator dimensionality of the latent space") 44 | parser.add_argument("--d_hid_dims", type=list, default=[256, 128], help="discriminator dimensionality of the latent space") 45 | parser.add_argument("--channels", type=int, default=1, help="number of image channels") # 1 for mnist 46 | parser.add_argument("--sample_interval", type=int, default=800, help="interval betwen image samples") 47 | 48 | # optim 49 | parser.add_argument("--num_epochs", type=int, default=200, help="number of epochs of training") 50 | parser.add_argument("--batch_size", type=int, default=128, help="size of the batches") 51 | parser.add_argument("--lr", type=float, default=0.0002, help="adam: learning rate") 52 | 53 | args = parser.parse_args() 54 | args.device = torch.device(f"cuda:{args.gpu_id}") if torch.cuda.is_available() else torch.device("cpu") 55 | 56 | # =========================================== Dataset ===================================================================== # 57 | # mnist 58 | transform = transforms.Compose([transforms.Resize(args.img_size), transforms.ToTensor(), transforms.Normalize([0.5], [0.5])]) 59 | train_dataset = datasets.MNIST(root=args.data_path, train=True, transform=transform, download=True) 60 | test_dataset = datasets.MNIST(root=args.data_path, train=False, transform=transform, download=False) 61 | 62 | train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=True) 63 | test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=args.batch_size, shuffle=False) 64 | 65 | # =========================================== Model ===================================================================== # 66 | # model 67 | # Initialize generator and discriminator 68 | img_shape = (args.channels, args.img_size, args.img_size) 69 | model = GAN(img_shape, g_z_dim=args.g_z_dim, g_hid_dims=args.g_hid_dims, d_hid_dims=args.d_hid_dims, device=args.device) 70 | 71 | # Optimizers 72 | model_opt_G = torch.optim.Adam(model.generator.parameters(), lr=args.lr, betas=(args.b1, args.b2)) 73 | model_opt_D = torch.optim.Adam(model.discriminator.parameters(), lr=args.lr, betas=(args.b1, args.b2)) 74 | 75 | # train 76 | for epoch_idx in range(args.num_epochs): 77 | model.train_one_epoch(model_opt_G=model_opt_G, model_opt_D=model_opt_D, dataloader=train_loader, sample_interval=args.sample_interval, epoch=epoch_idx, n_epochs=args.num_epochs) 78 | 79 | # save 80 | model.save(args.save_path) 81 | # load 82 | model.load(args.load_path) 83 | 84 | # ===================== Visualization ============================== # 85 | os.makedirs("../../output/images", exist_ok=True) 86 | print('sample image,please wait!') 87 | save_image(model.sample(64), "../../output/images/GAN_images.png", nrow=8, normalize=True) 88 | print('complete!!!') -------------------------------------------------------------------------------- /pydpm/example/Deep_Learning_PM/Wasserstein_Generative_Adversarial_Networks/WGAN_Demo.py: -------------------------------------------------------------------------------- 1 | """ 2 | =========================================== 3 | WGAN 4 | Wasserstein GAN 5 | Martin Arjovsky, Soumith Chintala, and Leon Bottou, 6 | Publihsed in 2017 7 | 8 | =========================================== 9 | """ 10 | 11 | # Author: Bufeng Ge <20009100138@stu.xidian.edu.cn>, Xinyang Liu 12 | # License: BSD-3-Clause 13 | 14 | import os 15 | import argparse 16 | 17 | import torch 18 | from torch.utils.data import DataLoader 19 | 20 | import torchvision.transforms as transforms 21 | from torchvision.utils import save_image 22 | from torchvision import datasets 23 | 24 | from pydpm.model import WGAN 25 | 26 | # =========================================== ArgumentParser ===================================================================== # 27 | parser = argparse.ArgumentParser() 28 | 29 | # device 30 | parser.add_argument("--gpu_id", type=int, default=0) 31 | 32 | # dataset 33 | parser.add_argument("--data_path", type=str, default='../../../dataset/mnist/', help="the path of loading data") 34 | parser.add_argument("--img_size", type=int, default=28, help="size of each image dimension") 35 | 36 | # model 37 | parser.add_argument("--save_path", type=str, default='../../save_models', help="the path of saving model") 38 | parser.add_argument("--load_path", type=str, default='../../save_models/WGAN.pth', help="the path of loading model") 39 | 40 | parser.add_argument("--g_z_dim", type=int, default=128, help="generator dimensionality of the noise") 41 | parser.add_argument("--g_hid_dims", type=list, default=[100, 200, 400, 800], help="generator dimensionality of the latent space") 42 | parser.add_argument("--d_hid_dims", type=list, default=[256, 128], help="discriminator dimensionality of the latent space") 43 | parser.add_argument("--channels", type=int, default=1, help="number of image channels") # 1 for mnist 44 | parser.add_argument("--sample_interval", type=int, default=100, help="interval betwen image samples") 45 | parser.add_argument("--n_critic", type=int, default=100, help="number of training steps for discriminator per iter") 46 | parser.add_argument("--clip_value", type=float, default=0.01, help="lower and upper clip value for disc. weights") 47 | 48 | # optim 49 | parser.add_argument("--num_epochs", type=int, default=200, help="number of epochs of training") 50 | parser.add_argument("--batch_size", type=int, default=128, help="size of the batches") 51 | parser.add_argument("--lr", type=float, default=0.0002, help="adam: learning rate") 52 | 53 | args = parser.parse_args() 54 | args.device = torch.device(f"cuda:{args.gpu_id}") if torch.cuda.is_available() else torch.device("cpu") 55 | 56 | # =========================================== Dataset ===================================================================== # 57 | # mnist 58 | transform = transforms.Compose([transforms.Resize(args.img_size), transforms.ToTensor(), transforms.Normalize([0.5], [0.5])]) 59 | train_dataset = datasets.MNIST(root=args.data_path, train=True, transform=transform, download=True) 60 | test_dataset = datasets.MNIST(root=args.data_path, train=False, transform=transform, download=False) 61 | 62 | train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=True) 63 | test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=args.batch_size, shuffle=False) 64 | 65 | # =========================================== Model ===================================================================== # 66 | # model 67 | # Initialize generator and discriminator 68 | img_shape = (args.channels, args.img_size, args.img_size) 69 | model = WGAN(img_shape, g_z_dim=args.g_z_dim, g_hid_dims=args.g_hid_dims, d_hid_dims=args.d_hid_dims, device=args.device) 70 | 71 | # Optimizers 72 | model_opt_G = torch.optim.RMSprop(model.generator.parameters(), lr=args.lr) 73 | model_opt_D = torch.optim.RMSprop(model.discriminator.parameters(), lr=args.lr) 74 | 75 | # train 76 | for epoch_idx in range(args.num_epochs): 77 | model.train_one_epoch(args=args, model_opt_G=model_opt_G, model_opt_D=model_opt_D, dataloader=train_loader, epoch=epoch_idx, n_epochs=args.num_epochs) 78 | 79 | 80 | # save 81 | model.save(args.save_path) 82 | # load 83 | model.load(args.load_path) 84 | 85 | # ===================== Visualization ================= # 86 | os.makedirs("../../output/images", exist_ok=True) 87 | print('sample image, please wait!') 88 | save_image(model.sample(64), "../../output/images/WGAN_images.png", nrow=8, normalize=True) 89 | print('complete!!!') 90 | 91 | -------------------------------------------------------------------------------- /pydpm/example/Deep_Learning_PM/Information_Maximizing_Generative_Adversarial_Nets/InfoGAN_Demo.py: -------------------------------------------------------------------------------- 1 | """ 2 | =========================================== 3 | InfoGAN 4 | InfoGAN: Interpretable Representation Learning by Information Maximizing Generative Adversarial Nets 5 | Xi Chen, Yan Duan, Rein Houthooft, John Schulman, Ilya Sutskever, Pieter Abbeel 6 | Publihsed in 2016 7 | 8 | =========================================== 9 | """ 10 | 11 | # Author: Xinyang Liu 12 | # License: BSD-3-Clause 13 | 14 | import os 15 | import argparse 16 | 17 | import torch 18 | from torch.utils.data import DataLoader 19 | 20 | import torchvision.transforms as transforms 21 | from torchvision.utils import save_image 22 | from torchvision import datasets 23 | 24 | from pydpm.model import InfoGAN 25 | from pydpm.utils.utils import unnormalize_to_zero_to_one 26 | # =========================================== ArgumentParser ===================================================================== # 27 | parser = argparse.ArgumentParser() 28 | 29 | # device 30 | parser.add_argument("--gpu_id", type=int, default=0) 31 | 32 | # dataset 33 | parser.add_argument("--data_path", type=str, default='../../../dataset/mnist/', help="the path of loading data") 34 | parser.add_argument("--img_size", type=int, default=28, help="size of each image dimension") 35 | 36 | # model 37 | parser.add_argument("--save_path", type=str, default='../../save_models', help="the path of saving model") 38 | parser.add_argument("--load_path", type=str, default='../../save_models/GAN.pth', help="the path of loading model") 39 | 40 | parser.add_argument("--z_dim", type=int, default=62, help="generator dimensionality of the noise") 41 | parser.add_argument("--dis_ch", type=int, default=1, help="generator dimensionality of the latent space") 42 | parser.add_argument("--dis_ch_dim", type=int, default=10, help="discriminator dimensionality of the latent space") 43 | parser.add_argument("--con_ch", type=int, default=2, help="discriminator dimensionality of the latent space") 44 | parser.add_argument("--channels", type=int, default=1, help="number of image channels") # 1 for mnist 45 | parser.add_argument("--sample_interval", type=int, default=800, help="interval betwen image samples") 46 | 47 | # optim 48 | parser.add_argument("--num_epochs", type=int, default=200, help="number of epochs of training") 49 | parser.add_argument("--batch_size", type=int, default=128, help="size of the batches") 50 | parser.add_argument("--lr", type=float, default=0.0002, help="adam: learning rate") 51 | parser.add_argument("--b1", type=float, default=0.5, help="adam: decay of first order momentum of gradient") 52 | parser.add_argument("--b2", type=float, default=0.999, help="adam: decay of first order momentum of gradient") 53 | 54 | args = parser.parse_args() 55 | args.device = torch.device(f"cuda:{args.gpu_id}") if torch.cuda.is_available() else torch.device("cpu") 56 | 57 | # =========================================== Dataset ===================================================================== # 58 | # mnist 59 | transform = transforms.Compose([transforms.Resize(args.img_size), transforms.ToTensor()]) 60 | train_dataset = datasets.MNIST(root=args.data_path, train=True, transform=transform, download=True) 61 | test_dataset = datasets.MNIST(root=args.data_path, train=False, transform=transform, download=False) 62 | 63 | train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=True) 64 | test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=args.batch_size, shuffle=False) 65 | 66 | # =========================================== Model ===================================================================== # 67 | # model 68 | # Initialize generator and discriminator 69 | img_shape = (args.channels, args.img_size, args.img_size) 70 | model = InfoGAN(args=args, device=args.device) 71 | 72 | # Optimizers 73 | model_opt_G = torch.optim.Adam([{'params': model.generator.parameters()}, {'params': model.netQ.parameters()}], lr=args.lr, betas=(args.b1, args.b2)) 74 | model_opt_D = torch.optim.Adam([{'params': model.discriminator.parameters()}, {'params': model.netD.parameters()}], lr=args.lr, betas=(args.b1, args.b2)) 75 | 76 | 77 | # train 78 | for epoch_idx in range(args.num_epochs): 79 | model.train_one_epoch(model_opt_G=model_opt_G, model_opt_D=model_opt_D, dataloader=train_loader, sample_interval=args.sample_interval, epoch=epoch_idx, n_epochs=args.num_epochs) 80 | 81 | # save 82 | model.save(args.save_path) 83 | # load 84 | model.load(args.load_path) 85 | 86 | # ===================== Visualization ============================== # 87 | os.makedirs("../../output/images", exist_ok=True) 88 | print('sample image, please wait!') 89 | save_image(model.sample(64), "../../output/images/InfoGAN_images.png", nrow=8, normalize=True) 90 | print('complete!!!') -------------------------------------------------------------------------------- /pydpm/example/Deep_Learning_PM/Denoising_Diffusion_Probabilistic_Model/DDPM_Demo.py: -------------------------------------------------------------------------------- 1 | ''' 2 | =========================================== 3 | DDPM 4 | Denoising Diffusion Probabilistic Models 5 | Jonathan Ho, Ajay Jain, Pieter Abbeel 6 | Published in NIPS 2020 7 | 8 | =========================================== 9 | ''' 10 | 11 | # Author: Xinyang Liu , Muyao Wang 12 | # License: BSD-3-Clause 13 | 14 | import os 15 | import numpy as np 16 | import argparse 17 | 18 | import torch 19 | import torch.optim as optim 20 | from torch.utils.data import DataLoader 21 | from torchvision import transforms 22 | from torchvision.datasets import CIFAR10 23 | from torchvision.utils import save_image 24 | 25 | from pydpm.model import DDPM 26 | 27 | parser = argparse.ArgumentParser() 28 | 29 | # device 30 | parser.add_argument("--gpu_id", type=int, default=0, help="the id of gpu to deploy") 31 | 32 | # dataset 33 | parser.add_argument("--dataset", type=str, default='CIFAR10', help="the name of dataset") 34 | parser.add_argument("--dataset_path", type=str, default='../../dataset', help="the file path of dataset") 35 | 36 | # network settings 37 | parser.add_argument("--T", type=int, default=1000, help="Number of time steps in DDPM") 38 | parser.add_argument("--in_channel", type=int, default=3, help="Number of channels in the input image") 39 | parser.add_argument("--channel", type=int, default=128, help="Number of channels after head layer") 40 | parser.add_argument("--channel_mult", type=list, default=[1, 2, 3, 4], help="Number of mult-channels") 41 | parser.add_argument("--attn", type=list, default=[2], help="Number of attention-blocks") 42 | parser.add_argument("--num_res_blocks", type=int, default=2, help="Number of residual-blocksn") 43 | parser.add_argument("--dropout", type=list, default=0.15, help="Dropout ratio") 44 | 45 | # ddpm settings 46 | parser.add_argument("--beta_1", type=float, default=1e-4, help="the level of noise in first step of forward process") 47 | parser.add_argument("--beta_T", type=float, default=0.02, help="The level of noise in T-th step of forward process") 48 | 49 | # optimizer 50 | parser.add_argument("--lr", type=float, default=0.0002, help="adam: learning rate") 51 | parser.add_argument("--grad_clip", type=float, default=1., help="grad_clip") 52 | 53 | # training 54 | parser.add_argument("--num_epochs", type=int, default=30, help="number of epochs of training") 55 | parser.add_argument("--batch_size", type=int, default=64, help="batch size of dataloader") 56 | 57 | # sampling 58 | parser.add_argument("--model_path", type=str, default="../../save_models/DDPM.pth", help="path to save/load model") 59 | parser.add_argument("--noisy_path", type=str, default="../../output/noisy.png", help="path to save noisy") 60 | parser.add_argument("--image_path", type=str, default="../../output/image.png", help="path to save sampled images") 61 | 62 | args = parser.parse_args() 63 | args.device = 'cpu' if not torch.cuda.is_available() else f'cuda:{args.gpu_id}' 64 | 65 | # dataset 66 | dataset = CIFAR10( 67 | root=args.dataset_path, train=True, download=True, 68 | transform=transforms.Compose([ 69 | transforms.RandomHorizontalFlip(), 70 | transforms.ToTensor(), 71 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), 72 | ])) 73 | dataloader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True, num_workers=4, drop_last=True, pin_memory=True) 74 | if dataset == 'CIFAR10': 75 | args.in_channel = 3 76 | elif dataset == 'mnist': 77 | args.in_channel = 1 78 | else: 79 | assert print('args.in_channel must be given') 80 | 81 | net_config = {"in_channel": args.in_channel, 82 | "channel": args.channel, 83 | "channel_mult": args.channel_mult, 84 | "attn": args.attn, 85 | "num_res_blocks": args.num_res_blocks, 86 | "dropout": args.dropout} 87 | ddpm_config = {"beta_1": 1e-4, "beta_T": 0.02} 88 | 89 | model = DDPM(T=args.T, net_cfg=net_config, ddpm_cfg=ddpm_config, device=args.device) 90 | 91 | model_opt = torch.optim.AdamW(model.parameters(), lr=args.lr, weight_decay=1e-4) 92 | cosine_scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer=model_opt, T_max=args.num_epochs, eta_min=0, last_epoch=-1) 93 | 94 | for epoch in range(args.num_epochs): 95 | model.train_one_epoch(dataloader, model_opt, epoch, args) 96 | cosine_scheduler.step() 97 | if (epoch + 1) % 10 == 0: 98 | model.save() 99 | model_test = DDPM(T=args.T, net_cfg=net_config, ddpm_cfg=ddpm_config, device=args.device) 100 | model_test.load(args.model_path) 101 | noisy, images = model_test.test_one_epoch(net_model=model_test.net, args=args) 102 | 103 | saveNoisy = torch.clamp(noisy * 0.5 + 0.5, 0, 1) 104 | save_image(saveNoisy, args.noisy_path, nrow=8) 105 | sampledImgs = images * 0.5 + 0.5 # [0 ~ 1] 106 | save_image(sampledImgs, args.image_path, nrow=8) 107 | -------------------------------------------------------------------------------- /pydpm/metric/topic_coherence.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | # Author: Xinyang Liu 4 | # License: BSD-3-Clause 5 | 6 | import numpy as np 7 | from gensim.test.utils import common_corpus, common_dictionary 8 | from gensim.models.coherencemodel import CoherenceModel 9 | 10 | """ 11 | Examples 12 | --------- 13 | One way of using this feature is through providing a trained topic model. A dictionary has to be explicitly provided 14 | if the model does not contain a dictionary already 15 | 16 | .. sourcecode:: pycon 17 | # 18 | # >>> from gensim.test.utils import common_corpus, common_dictionary 19 | # >>> from gensim.models.ldamodel import LdaModel 20 | # >>> from gensim.models.coherencemodel import CoherenceModel 21 | # >>> 22 | # >>> model = LdaModel(common_corpus, 5, common_dictionary) 23 | # >>> 24 | # >>> cm = CoherenceModel(model=model, corpus=common_corpus, coherence='u_mass') 25 | # >>> coherence = cm.get_coherence() # get coherence value 26 | 27 | Another way of using this feature is through providing tokenized topics such as: 28 | 29 | .. sourcecode:: pycon 30 | 31 | # >>> from gensim.test.utils import common_corpus, common_dictionary 32 | # >>> from gensim.models.coherencemodel import CoherenceModel 33 | # >>> topics = [ 34 | # ... ['human', 'computer', 'system', 'interface'], 35 | # ... ['graph', 'minors', 'trees', 'eps'] 36 | # ... ] 37 | # >>> 38 | # >>> cm = CoherenceModel(topics=topics, corpus=common_corpus, dictionary=common_dictionary, coherence='u_mass') 39 | # >>> coherence = cm.get_coherence() # get coherence value 40 | 41 | (Please visit https://radimrehurek.com/gensim/models/coherencemodel.html for more usage.) 42 | 43 | """ 44 | 45 | class Topic_Coherence(object): 46 | def __init__(self, model=None, topics=None, texts=None, corpus=None, dictionary=None, 47 | window_size=None, keyed_vectors=None, coherence='c_v', topn=20, processes=-1): 48 | ''' 49 | Inputs: 50 | model : :class:`~gensim.models.basemodel.BaseTopicModel`, optional 51 | Pre-trained topic model, should be provided if topics is not provided. 52 | Currently supports :class:`~gensim.models.ldamodel.LdaModel`, 53 | :class:`~gensim.models.ldamulticore.LdaMulticore`, :class:`~gensim.models.wrappers.ldamallet.LdaMallet` and 54 | :class:`~gensim.models.wrappers.ldavowpalwabbit.LdaVowpalWabbit`. 55 | Use `topics` parameter to plug in an as yet unsupported model. 56 | topics : list of list of str, optional 57 | List of tokenized topics, if this is preferred over model - dictionary should be provided. 58 | texts : list of list of str, optional 59 | Tokenized texts, needed for coherence models that use sliding window based (i.e. coherence=`c_something`) 60 | probability estimator . 61 | corpus : iterable of list of (int, number), optional 62 | Corpus in BoW format. 63 | dictionary : :class:`~gensim.corpora.dictionary.Dictionary`, optional 64 | Gensim dictionary mapping of id word to create corpus. 65 | If `model.id2word` is present, this is not needed. If both are provided, passed `dictionary` will be used. 66 | window_size : int, optional 67 | Is the size of the window to be used for coherence measures using boolean sliding window as their 68 | probability estimator. For 'u_mass' this doesn't matter. 69 | If None - the default window sizes are used which are: 'c_v' - 110, 'c_uci' - 10, 'c_npmi' - 10. 70 | coherence : {'u_mass', 'c_v', 'c_uci', 'c_npmi'}, optional 71 | Coherence measure to be used. 72 | Fastest method - 'u_mass', 'c_uci' also known as `c_pmi`. 73 | For 'u_mass' corpus should be provided, if texts is provided, it will be converted to corpus 74 | using the dictionary. For 'c_v', 'c_uci' and 'c_npmi' `texts` should be provided (`corpus` isn't needed) 75 | topn : int, optional 76 | Integer corresponding to the number of top words to be extracted from each topic. 77 | processes : int, optional 78 | Number of processes to use for probability estimation phase, any value less than 1 will be interpreted as 79 | num_cpus - 1. 80 | 81 | Outputs: 82 | topic_coherence : [float], The topic coherence with model 83 | 84 | ''' 85 | self.model = model 86 | self.topics = topics 87 | self.texts = texts 88 | self.corpus = corpus 89 | self.dictionary = dictionary 90 | self.window_size = window_size 91 | self.keyed_vectors = keyed_vectors 92 | self.coherence = coherence 93 | self.topn = topn 94 | self.processes = processes 95 | 96 | self._get() 97 | print(f'The topic coherence score is: {self._topic_coherence:.4f}') 98 | 99 | def _get(self): 100 | 101 | cm = CoherenceModel(model=self.model, topics=self.topics, texts=self.texts, corpus=self.corpus, 102 | dictionary=self.dictionary, window_size=self.window_size, keyed_vectors=self.keyed_vectors, 103 | coherence=self.coherence, topn=self.topn, processes=self.processes) 104 | 105 | self._topic_coherence = cm.get_coherence() 106 | 107 | -------------------------------------------------------------------------------- /pydpm/example/Deep_Learning_PM/Real_NVP/Real_NVP_Demo.py: -------------------------------------------------------------------------------- 1 | """Training procedure for real NVP. 2 | """ 3 | 4 | import os 5 | import argparse 6 | import torch 7 | import torch.distributions as distributions 8 | import torch.optim as optim 9 | from torchvision.utils import save_image 10 | from torchvision import datasets, transforms 11 | 12 | import numpy as np 13 | from pydpm.model import RealNVP 14 | from pydpm.model.deep_learning_pm.realnvp import DataInfo 15 | 16 | # =========================================== ArgumentParser ===================================================================== # 17 | parser = argparse.ArgumentParser() 18 | 19 | # device 20 | parser.add_argument("--gpu_id", type=int, default=0) 21 | 22 | # dataset 23 | parser.add_argument('--dataset', type=str, default='mnist', help='dataset to be modeled.') 24 | parser.add_argument("--data_path", type=str, default='../../../dataset/mnist/', help="the path of loading data") 25 | 26 | # model 27 | parser.add_argument("--save_path", type=str, default='../../save_models', help="the path of saving model") 28 | parser.add_argument("--load_path", type=str, default='../../save_models/realNVP.pth', help="the path of loading model") 29 | 30 | parser.add_argument('--base_dim', type=int, default=64, help='features in residual blocks of first few layers.') 31 | parser.add_argument('--res_blocks', type=int, default=8, help='number of residual blocks per group.') 32 | parser.add_argument('--bottleneck', type=int, default=0, help='whether to use bottleneck in residual blocks.') 33 | parser.add_argument('--skip', type=int, default=1, help='whether to use skip connection in coupling layers.') 34 | parser.add_argument('--weight_norm', type=int, default=1, help='whether to apply weight normalization.') 35 | parser.add_argument('--coupling_bn', type=int, default=1, help='whether to apply batchnorm after coupling layers.') 36 | parser.add_argument('--affine', type=int, default=1, help='whether to use affine coupling.') 37 | 38 | # optim 39 | parser.add_argument('--batch_size', type=int, default=64, help='number of images in a mini-batch.') 40 | parser.add_argument('--num_epochs', type=int, default=500, help='maximum number of training epoches.') 41 | parser.add_argument('--sample_size', type=int, default=64, help='number of images to generate.') 42 | parser.add_argument('--lr', type=float, default=1e-3, help='initial learning rate.') 43 | parser.add_argument('--momentum', type=float, default=0.9, help='beta1 in Adam optimizer.') 44 | parser.add_argument('--decay', type=float, default=0.999, help='beta2 in Adam optimizer.') 45 | parser.add_argument('--scale_reg', type=float, default=5e-5, help='L2 regularization strength') 46 | 47 | args = parser.parse_args() 48 | args.device = torch.device(f"cuda:{args.gpu_id}") if torch.cuda.is_available() else torch.device("cpu") 49 | 50 | # =========================================== Dataset ===================================================================== # 51 | # mnist 52 | data_info = DataInfo(args.dataset, 1, 28) # if cifar10, channels: 1->3 53 | train_dataset = datasets.MNIST(root=args.data_path, train=True, transform=transforms.ToTensor(), download=True) 54 | test_dataset = datasets.MNIST(root=args.data_path, train=False, transform=transforms.ToTensor(), download=False) 55 | 56 | train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=True) 57 | test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=args.batch_size, shuffle=False) 58 | args.image_size = 28 59 | # =========================================== Model ===================================================================== # 60 | # model 61 | 62 | # Set prior 63 | prior = distributions.Normal(torch.tensor(0.).to(args.device), torch.tensor(1.).to(args.device)) 64 | 65 | # Initial model 66 | model = RealNVP(datainfo=data_info, prior=prior, device=args.device, args=args).to(args.device) 67 | model_opt = optim.Adamax(model.parameters(), lr=args.lr, betas=(args.momentum, args.decay), eps=1e-7) 68 | 69 | # train 70 | best_log_ll = float('-inf') 71 | for epoch_idx in range(args.num_epochs): 72 | log_ll_mean = model.train_one_epoch(dataloader=train_loader, model_opt=model_opt, epoch=epoch_idx, args=args) 73 | if epoch_idx % 5 == 0: 74 | log_ll_mean = model.test_one_epoch(dataloader=test_loader, epoch=epoch_idx, args=args) 75 | if log_ll_mean > best_log_ll: 76 | # save 77 | model.save(args.save_path) 78 | 79 | # load 80 | model.load(args.load_path) 81 | 82 | # =================== Visualization ====================== # 83 | os.makedirs("../../output/images", exist_ok=True) 84 | print('sample image, please wait!') 85 | with torch.no_grad(): 86 | sample = model.sample(64) 87 | save_image(sample.view(64, 1, 28, 28), '../../output/images/realNVP_sample.png') 88 | print('complete!!!') 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | -------------------------------------------------------------------------------- /pydpm/model/deep_learning_pm/rbm.py: -------------------------------------------------------------------------------- 1 | """ 2 | =========================================== 3 | RBM 4 | A Practical Guide to Training 5 | Restricted Boltzmann Machines 6 | Geoffrey Hinton 7 | Publihsed in 2010 8 | =========================================== 9 | """ 10 | # Author: Muyao Wang , Xinyang Liu 11 | # License: BSD-3-Clause 12 | 13 | import os 14 | import numpy as np 15 | 16 | import torch 17 | import torch.utils.data 18 | import torch.nn as nn 19 | import torch.nn.functional as F 20 | from torch.autograd import Variable 21 | 22 | 23 | class RBM(nn.Module): 24 | def __init__(self, n_vis=784, n_hin=500, k=5): 25 | """ 26 | The basic model for RBM 27 | Inputs: 28 | n_vis : [int] number of visible units; 29 | n_hin : [int] number of latent units; 30 | k : [int] layers of RBM; 31 | """ 32 | super(RBM, self).__init__() 33 | setattr(self, '_model_name', 'RBM') 34 | self.W = nn.Parameter(torch.randn(n_hin, n_vis) * 1e-2) 35 | self.v_bias = nn.Parameter(torch.zeros(n_vis)) 36 | self.h_bias = nn.Parameter(torch.zeros(n_hin)) 37 | self.k = k 38 | 39 | def sample_from_p(self, p): 40 | """ 41 | Sample from p distribution 42 | Inputs: 43 | p : [tensor] distribution of p; 44 | Outputs: 45 | sample of p :[tensor] sample of p; 46 | """ 47 | return F.relu(torch.sign(p - Variable(torch.rand(p.size())))) 48 | 49 | def v_to_h(self, v): 50 | """ 51 | propagation from v to h 52 | Inputs: 53 | v : [tensor] distribution of v; 54 | Outputs: 55 | p_h : [tensor] prediction of h; 56 | sample_h : [tensor] sample of h; 57 | """ 58 | p_h = F.sigmoid(F.linear(v, self.W, self.h_bias)) 59 | sample_h = self.sample_from_p(p_h) 60 | return p_h, sample_h 61 | 62 | def h_to_v(self, h): 63 | """ 64 | propagation from h to v 65 | Inputs: 66 | h : [tensor] distribution of h; 67 | Outputs: 68 | p_v : [tensor] prediction of v; 69 | sample_v : [tensor] sample of v; 70 | """ 71 | p_v = F.sigmoid(F.linear(h, self.W.t(), self.v_bias)) 72 | sample_v = self.sample_from_p(p_v) 73 | return p_v, sample_v 74 | 75 | def forward(self, v): 76 | """ 77 | Forward process of RBM 78 | Inputs: 79 | v : [tensor] input of data; 80 | Outputs: 81 | v : [tensor] input of data; 82 | v_ : [tensor] prediction of v; 83 | """ 84 | pre_h1, h1 = self.v_to_h(v) 85 | 86 | h_ = h1 87 | for _ in range(self.k): 88 | pre_v_, v_ = self.h_to_v(h_) 89 | pre_h_, h_ = self.v_to_h(v_) 90 | 91 | return v, v_ 92 | 93 | def free_energy(self, v): 94 | """ 95 | Free energy of RBM 96 | Inputs: 97 | v : [tensor] distribution of v; 98 | Outputs: 99 | free_energy : [tensor] free energy of whole system 100 | """ 101 | vbias_term = v.mv(self.v_bias) 102 | wx_b = F.linear(v, self.W, self.h_bias) 103 | hidden_term = wx_b.exp().add(1).log().sum(1) 104 | return (-hidden_term - vbias_term).mean() 105 | 106 | # train and test 107 | def train_one_epoch(self, dataloader, model_opt, epoch_idx, args): 108 | loss_ = [] 109 | for batch_idx, (data, target) in enumerate(dataloader): 110 | data = Variable(data.view(-1, 784)) 111 | sample_data = data.bernoulli() 112 | 113 | v, v1 = self(sample_data) 114 | loss = self.free_energy(v) - self.free_energy(v1) 115 | loss_.append(loss.item()) 116 | model_opt.zero_grad() 117 | loss.backward() 118 | model_opt.step() 119 | print('Train Epoch: {} Loss: {:.6f}'.format(epoch_idx, np.mean(loss_))) 120 | return v, v1 121 | 122 | # save and load 123 | def save(self, model_path: str = '../save_models'): 124 | """ 125 | save model 126 | Inputs: 127 | model_path : [str] the path to save the model, default '../save_models/RBM.pth'; 128 | """ 129 | # create the directory path 130 | if not os.path.isdir(model_path): 131 | os.mkdir(model_path) 132 | 133 | # Save the model 134 | torch.save({'state_dict': self.state_dict()}, model_path + '/' + self._model_name + '.pth') 135 | print('model has been saved by ' + model_path + '/' + self._model_name + '.pth') 136 | 137 | def load(self, model_path): 138 | """ 139 | load model 140 | Inputs: 141 | model_path : [str] the path to load the model; 142 | """ 143 | assert os.path.exists(model_path), 'Path Error: can not find the path to load the model' 144 | # Load the model 145 | checkpoint = torch.load(model_path) 146 | self.load_state_dict(checkpoint['state_dict']) -------------------------------------------------------------------------------- /pydpm/example/Hybrid_PM/Weibull_Hybrid_Autoencoding_Inference/WHAI_Demo.py: -------------------------------------------------------------------------------- 1 | """ 2 | =========================================== 3 | WHAI: WEIBULL HYBRID AUTOENCODING INFERENCE FOR DEEP TOPIC MODELING (Demo) 4 | Hao Zhang, Bo Chen, Dandan Guo and Mingyuan Zhou 5 | Published as a conference paper at ICLR 2018 6 | 7 | =========================================== 8 | 9 | """ 10 | 11 | # Author: Xinyang Liu 12 | # License: BSD-3-Clause 13 | 14 | import os 15 | import argparse 16 | import numpy as np 17 | import scipy.io as sio 18 | from sklearn.cluster import k_means 19 | from nltk.corpus import stopwords 20 | os.environ["CUDA_VISIBLE_DEVICES"]="0" 21 | 22 | import torch 23 | from torch.utils.data import DataLoader 24 | 25 | from torchtext.data.utils import get_tokenizer 26 | from torchtext.datasets import AG_NEWS 27 | 28 | from pydpm.model import WHAI 29 | from pydpm.utils import * 30 | from pydpm.metric import * 31 | from pydpm.dataloader.text_data import Text_Processer, build_vocab_from_iterator 32 | 33 | # =========================================== ArgumentParser ===================================================================== # 34 | parser = argparse.ArgumentParser() 35 | 36 | # device 37 | parser.add_argument("--gpu_id", type=int, default=0, help="the id of gpu to deploy") 38 | 39 | # dataset 40 | parser.add_argument("--dataset", type=str, default='AG_NEWS', help="the name of dataset") 41 | parser.add_argument("--data_path", type=str, default='../../../dataset', help="the file path of dataset") 42 | 43 | # model 44 | parser.add_argument("--voc_size", type=int, default=20000, help="the length of vocabulary") 45 | parser.add_argument("--z_dims", type=list, default=[128, 64, 32], help="the list of z dimension") 46 | parser.add_argument("--hid_dims", type=list, default=[200, 200, 200], help="the list of hidden dimension") 47 | 48 | # optim 49 | parser.add_argument("--num_epochs", type=int, default=1000, help="number of epochs of training") 50 | parser.add_argument("--batch_size", type=int, default=256, help="size of the batches") 51 | parser.add_argument("--lr", type=float, default=0.0002, help="adam: learning rate") 52 | parser.add_argument("--MBratio", type=int, default=50, help="number of epochs of training") 53 | 54 | args = parser.parse_args() 55 | args.device = torch.device(f"cuda:{args.gpu_id}") if torch.cuda.is_available() else torch.device("cpu") 56 | 57 | # =========================================== Dataset ===================================================================== # 58 | # load dataset (AG_NEWS from torchtext) 59 | train_iter, test_iter = AG_NEWS(args.data_path, split=('train', 'test')) 60 | tokenizer = get_tokenizer("basic_english") 61 | 62 | # build vocabulary 63 | stop_words = list(stopwords.words('english')) 64 | vocab = build_vocab_from_iterator(map(lambda x: tokenizer(x[1]), train_iter), specials=['', '', '', ''], special_first=True, stop_words=stop_words, max_tokens=args.voc_size) 65 | vocab.set_default_index(vocab['']) 66 | text_processer = Text_Processer(tokenizer=tokenizer, vocab=vocab) 67 | 68 | # Get train/test label and data_file(tokens) from data_iter and convert them into clean file 69 | train_files, train_labels = text_processer.file_from_iter(train_iter, tokenizer=tokenizer) 70 | test_files, test_labels = text_processer.file_from_iter(test_iter, tokenizer=tokenizer) 71 | 72 | # Take part of dataset for convenience 73 | train_idxs = np.arange(7000) 74 | np.random.shuffle(train_idxs) 75 | train_files = [train_files[i] for i in train_idxs] 76 | train_labels = [train_labels[i] for i in train_idxs] 77 | 78 | test_idxs = np.arange(3000) 79 | np.random.shuffle(test_idxs) 80 | test_files = [test_files[i] for i in test_idxs] 81 | test_labels = [test_labels[i] for i in test_idxs] 82 | 83 | train_bows, train_labels = text_processer.bow_from_file(train_files, train_labels) 84 | test_bows, test_labels = text_processer.bow_from_file(test_files, test_labels) 85 | 86 | train_loader = DataLoader([train_data for train_data in zip(train_bows, train_labels)], batch_size=256, shuffle=False, num_workers=4, drop_last=True) 87 | test_loader = DataLoader([test_data for test_data in zip(test_bows, test_labels)], batch_size=256, shuffle=False, num_workers=4, drop_last=True) 88 | 89 | # =========================================== Model ===================================================================== # 90 | model = WHAI(in_dim=args.voc_size, z_dims=args.z_dims, hid_dims=args.hid_dims, device=args.device, encode_prior=False) 91 | model_opt = torch.optim.Adam(model.parameters()) 92 | 93 | for epoch_idx in range(args.num_epochs): 94 | train_local_params = model.train_one_epoch(dataloader=train_loader, optim=model_opt, epoch_idx=epoch_idx, args=args) 95 | 96 | if (epoch_idx+1) % 20 == 0: 97 | test_local_params = model.test_one_epoch(dataloader=test_loader) 98 | # calculate PPL 99 | x_hat = np.matmul(test_local_params.Theta[0], np.transpose(model.global_params.Phi[0])) 100 | ppl = Perplexity(test_local_params.data, x_hat) 101 | 102 | # calculate NMI with train_local_params 103 | cls_num = len(np.unique(train_labels + test_labels)) 104 | test_theta_norm = standardization(test_local_params.Theta[0]) 105 | tmp = k_means(test_theta_norm, cls_num) # N*K 106 | predict_label = tmp[1] + 1 # Some label start with '1' not '0', there should be 'tmp[1] + 1' 107 | MI = NMI(test_local_params.label, predict_label) 108 | 109 | 110 | 111 | 112 | -------------------------------------------------------------------------------- /pydpm/example/Bayesian_PM/Convolutional_Poisson_Gamma_Belief_Network/CPFA_Demo.py: -------------------------------------------------------------------------------- 1 | """ 2 | =========================================== 3 | Convolutional Poisson Factor Analysis 4 | Chaojie Wang Sucheng Xiao Bo Chen and Mingyuan Zhou 5 | Published in International Conference on Machine Learning 2019 6 | 7 | =========================================== 8 | 9 | """ 10 | 11 | # Author: Chaojie Wang ; Jiawen Wu ; Wei Zhao <13279389260@163.com> 12 | # License: BSD-3-Clause 13 | 14 | import numpy as np 15 | import argparse 16 | import scipy.io as sio 17 | import _pickle as cPickle 18 | 19 | from torch.utils.data import Dataset, DataLoader 20 | from torchtext.data.utils import get_tokenizer 21 | from torchtext.datasets import AG_NEWS 22 | 23 | from pydpm.model import CPFA 24 | from pydpm.metric import ACC 25 | from pydpm.dataloader.text_data import Text_Processer, build_vocab_from_iterator 26 | 27 | # =========================================== ArgumentParser ===================================================================== # 28 | parser = argparse.ArgumentParser() 29 | 30 | # device 31 | parser.add_argument("--device", type=str, default='gpu') 32 | 33 | # dataset 34 | parser.add_argument("--data_path", type=str, default='../../../dataset/', help="the path of loading data") 35 | 36 | # model 37 | parser.add_argument("--save_path", type=str, default='../../save_models', help="the path of saving model") 38 | parser.add_argument("--load_path", type=str, default='../../save_models/CPFA.npy', help="the path of loading model") 39 | 40 | parser.add_argument("--z_dim", type=int, default=200, help="dimensionality of the z latent space") 41 | 42 | # optim 43 | parser.add_argument("--num_epochs", type=int, default=100, help="number of epochs of training") 44 | 45 | args = parser.parse_args() 46 | 47 | # =========================================== Dataset ===================================================================== # 48 | # define transform for dataset and load orginal dataset 49 | # load dataset (AG_NEWS from torchtext) 50 | train_iter, test_iter = AG_NEWS(args.data_path, split=('train', 'test')) 51 | tokenizer = get_tokenizer("basic_english") 52 | 53 | # build vocabulary 54 | vocab = build_vocab_from_iterator(map(lambda x: tokenizer(x[1]), train_iter), specials=['', '', '', ''], special_first=True, max_tokens=5000) 55 | vocab.set_default_index(vocab['']) 56 | text_processer = Text_Processer(tokenizer=tokenizer, vocab=vocab) 57 | 58 | # Get train/test label and data_file(tokens) from data_iter and convert them into clean file 59 | train_files, train_labels = text_processer.file_from_iter(train_iter, tokenizer=tokenizer) 60 | test_files, test_labels = text_processer.file_from_iter(test_iter, tokenizer=tokenizer) 61 | 62 | # Take part of dataset for convenience 63 | train_idxs = np.arange(3000) 64 | np.random.shuffle(train_idxs) 65 | train_files = [train_files[i] for i in train_idxs] 66 | train_labels = [train_labels[i] for i in train_idxs] 67 | 68 | test_idxs = np.arange(1000) 69 | np.random.shuffle(test_idxs) 70 | test_files = [test_files[i] for i in test_idxs] 71 | test_labels = [test_labels[i] for i in test_idxs] 72 | 73 | # ===================================== mode 1, sparse input ====================================== # 74 | # Build batch of word2index 75 | train_sparse_batch, train_labels = text_processer.word_index_from_file(train_files, train_labels, to_sparse=True) 76 | test_sparse_batch, test_labels = text_processer.word_index_from_file(test_files, test_labels, to_sparse=True) 77 | print('Data has been processed!') 78 | 79 | # create the model and deploy it on gpu or cpu 80 | model = CPFA(K=args.z_dim, device=args.device) 81 | model.initial(train_sparse_batch, is_sparse=True) # use the shape of train_data to initialize the params of model 82 | 83 | # train and evaluation 84 | train_local_params = model.train(data=train_sparse_batch, is_sparse=True, num_epochs=args.num_epochs) 85 | train_local_params = model.test(data=train_sparse_batch, is_sparse=True, num_epochs=args.num_epochs) 86 | test_local_params = model.test(data=test_sparse_batch, is_sparse=True, num_epochs=args.num_epochs) 87 | 88 | # save the model after training 89 | model.save(args.save_path) 90 | # load the model 91 | model.load(args.load_path) 92 | 93 | # evaluate the model with classification accuracy 94 | # the demo accuracy can achieve 0.628 95 | train_theta = np.sum(np.sum(train_local_params.W_nk, axis=3), axis=2).T 96 | test_theta = np.sum(np.sum(test_local_params.W_nk, axis=3), axis=2).T 97 | results = ACC(train_theta, test_theta, train_labels, test_labels, 'SVM') 98 | 99 | 100 | # # Use custom dataset 101 | # DATA = cPickle.load(open("../../dataset/TREC.pkl", "rb"), encoding='iso-8859-1') 102 | # 103 | # data_vab_list = DATA['Vocabulary'] 104 | # data_vab_count_list = DATA['Vab_count'] 105 | # data_vab_length = DATA['Vab_Size'] 106 | # data_label = DATA['Label'] 107 | # data_train_list = DATA['Train_Origin'] 108 | # data_train_label = np.array(DATA['Train_Label']) 109 | # data_train_split = DATA['Train_Word_Split'] 110 | # data_train_list_index = DATA['Train_Word2Index'] 111 | # data_test_list = DATA['Test_Origin'] 112 | # data_test_label = np.array(DATA['Test_Label']) 113 | # data_test_split = DATA['Test_Word_Split'] 114 | # data_test_list_index = DATA['Test_Word2Index'] 115 | 116 | # train_sparse_batch, train_labels = text_processer.word_index_from_file(data_train_list_index, data_train_label, to_sparse=True) 117 | # test_sparse_batch, test_labels = text_processer.word_index_from_file(data_test_list_index, data_test_label, to_sparse=True) 118 | -------------------------------------------------------------------------------- /pydpm/example/Bayesian_PM/Convolutional_Poisson_Gamma_Belief_Network/CPGBN_Demo.py: -------------------------------------------------------------------------------- 1 | """ 2 | ================================================ 3 | Convolutional Poisson Gamma Belief Network Demo 4 | Chaojie Wang Sucheng Xiao Bo Chen and Mingyuan Zhou 5 | Published in International Conference on Machine Learning 2019 6 | 7 | =========================================== 8 | 9 | """ 10 | 11 | # Author: Chaojie Wang ; Jiawen Wu 12 | # License: BSD-3-Clause 13 | 14 | import os 15 | import numpy as np 16 | import argparse 17 | import scipy.io as sio 18 | import _pickle as cPickle 19 | 20 | from torchtext.data.utils import get_tokenizer 21 | from torchtext.datasets import AG_NEWS 22 | 23 | from pydpm.model import CPGBN 24 | from pydpm.metric import ACC 25 | from pydpm.dataloader.text_data import Text_Processer, build_vocab_from_iterator 26 | 27 | # =========================================== ArgumentParser ===================================================================== # 28 | parser = argparse.ArgumentParser() 29 | 30 | # device 31 | parser.add_argument("--device", type=str, default='gpu') 32 | 33 | # dataset 34 | parser.add_argument("--data_path", type=str, default='../../../dataset/', help="the path of loading data") 35 | 36 | # model 37 | parser.add_argument("--save_path", type=str, default='../../save_models', help="the path of saving model") 38 | parser.add_argument("--load_path", type=str, default='../../save_models/CPGBN.npy', help="the path of loading model") 39 | 40 | parser.add_argument("--z_dims", type=list, default=[200, 100, 50], help="number of topics at diffrent layers in CPGBN") 41 | 42 | args = parser.parse_args() 43 | 44 | # =========================================== Dataset ===================================================================== # 45 | # define transform for dataset and load orginal dataset 46 | # Load dataset (AG_NEWS from torchtext) 47 | train_iter, test_iter = AG_NEWS(args.data_path, split=('train', 'test')) 48 | tokenizer = get_tokenizer("basic_english") 49 | 50 | # build vocabulary 51 | vocab = build_vocab_from_iterator(map(lambda x: tokenizer(x[1]), train_iter), specials=['', '', '', ''], special_first=True, max_tokens=5000) 52 | vocab.set_default_index(vocab['']) 53 | text_processer = Text_Processer(tokenizer=tokenizer, vocab=vocab) 54 | 55 | # Get train/test label and data_file(tokens) from data_iter and convert them into clean file 56 | # stop_words = ['']# Defined by customer, as musch as possible 57 | train_files, train_labels = text_processer.file_from_iter(train_iter, tokenizer=tokenizer) 58 | test_files, test_labels = text_processer.file_from_iter(test_iter, tokenizer=tokenizer) 59 | 60 | # Take part of dataset for convenience 61 | train_idxs = np.arange(5000) 62 | np.random.shuffle(train_idxs) 63 | train_files = [train_files[i] for i in train_idxs] 64 | train_labels = [train_labels[i] for i in train_idxs] 65 | 66 | test_idxs = np.arange(1000) 67 | np.random.shuffle(test_idxs) 68 | test_files = [test_files[i] for i in test_idxs] 69 | test_labels = [test_labels[i] for i in test_idxs] 70 | 71 | # ===================================== mode 1, sparse input ====================================== # 72 | # Build batch of word2index 73 | train_sparse_batch, train_labels = text_processer.word_index_from_file(train_files, train_labels, to_sparse=True) 74 | test_sparse_batch, test_labels = text_processer.word_index_from_file(test_files, test_labels, to_sparse=True) 75 | print('Data has been processed!') 76 | 77 | # =========================================== Model ===================================================================== # 78 | # create the model and deploy it on gpu or cpu 79 | model = CPGBN(K=args.z_dims, device=args.device) 80 | model.initial(train_sparse_batch, is_sparse=True) # use the shape of train_data to initialize the params of model 81 | 82 | # train and evaluation 83 | train_local_params = model.train(train_sparse_batch, is_sparse=True, iter_all=100) 84 | train_local_params = model.test(train_sparse_batch, is_sparse=True, iter_all=100) 85 | test_local_params = model.test(test_sparse_batch, is_sparse=True, iter_all=100) 86 | 87 | # save the model after training 88 | model.save(args.save_path) 89 | # load the model 90 | model.load(args.load_path) 91 | 92 | # evaluate the model with classification accuracy 93 | # the demo accuracy can achieve 0.631 94 | train_theta = np.sum(np.sum(train_local_params.W_nk, axis=3), axis=2).T 95 | test_theta = np.sum(np.sum(test_local_params.W_nk, axis=3), axis=2).T 96 | results = ACC(train_theta, test_theta, train_labels, test_labels, 'SVM') 97 | 98 | 99 | 100 | # # Customer dataset 101 | 102 | # DATA = cPickle.load(open("../../dataset/TREC.pkl", "rb"), encoding='iso-8859-1') 103 | # 104 | # data_vab_list = DATA['Vocabulary'] 105 | # data_vab_count_list = DATA['Vab_count'] 106 | # data_vab_length = DATA['Vab_Size'] 107 | # data_label = DATA['Label'] 108 | # data_train_list = DATA['Train_Origin'] 109 | # data_train_label = np.array(DATA['Train_Label']) 110 | # data_train_split = DATA['Train_Word_Split'] 111 | # data_train_list_index = DATA['Train_Word2Index'] 112 | # data_test_list = DATA['Test_Origin'] 113 | # data_test_label = np.array(DATA['Test_Label']) 114 | # data_test_split = DATA['Test_Word_Split'] 115 | # data_test_list_index = DATA['Test_Word2Index'] 116 | 117 | # train_sparse_batch, train_labels = text_processer.word_index_from_file(data_train_list_index, data_train_label, to_sparse=True) 118 | # test_sparse_batch, test_labels = text_processer.word_index_from_file(data_test_list_index, data_test_label, to_sparse=True) 119 | -------------------------------------------------------------------------------- /pydpm/example/Bayesian_PM/Word_Embeddings_Deep_Topic_Model/WEDTM_Demo.py: -------------------------------------------------------------------------------- 1 | """ 2 | =========================================== 3 | WEDTM Demo 4 | Inter and Intra Topic Structure Learning with Word Embeddings 5 | He Zhao, Lan Du, Wray Buntine, Mingyaun Zhou 6 | Published in International Council for Machinery Lubrication 2018 7 | 8 | =========================================== 9 | 10 | """ 11 | 12 | # Author: Chaojie Wang ; Jiawen Wu ; Wei Zhao <13279389260@163.com> 13 | # License: BSD-3-Clause 14 | 15 | import numpy as np 16 | import argparse 17 | import scipy.io as sio 18 | 19 | import nltk 20 | from nltk.corpus import stopwords 21 | 22 | import torch 23 | from torch.utils.data import Dataset, DataLoader 24 | from torchtext.data.utils import get_tokenizer 25 | from torchtext.vocab import GloVe 26 | from torchtext.datasets import AG_NEWS 27 | 28 | from pydpm.model import WEDTM 29 | from pydpm.metric import ACC 30 | from pydpm.dataloader.text_data import Text_Processer, build_vocab_from_iterator 31 | 32 | # =========================================== ArgumentParser ===================================================================== # 33 | parser = argparse.ArgumentParser() 34 | 35 | # device 36 | parser.add_argument("--device", type=str, default='gpu') 37 | 38 | # dataset 39 | parser.add_argument("--data_path", type=str, default='../dataset/', help="the path of loading data") 40 | 41 | # model 42 | parser.add_argument("--save_path", type=str, default='../../save_models', help="the path of saving model") 43 | parser.add_argument("--load_path", type=str, default='../../save_models/WEDTM.npy', help="the path of loading model") 44 | 45 | parser.add_argument("--z_dim", type=int, default=100, help="number of topics in each layers") 46 | parser.add_argument("--T", type=int, default=3, help="number of vertical layers") 47 | parser.add_argument("--S", type=int, default=3, help="number of sub topics") 48 | 49 | # optim 50 | parser.add_argument("--num_epochs", type=int, default=300, help="number of epochs of training") 51 | 52 | args = parser.parse_args() 53 | 54 | # =========================================== Dataset ===================================================================== # 55 | # define transform for dataset and load orginal dataset 56 | # Load dataset (AG_NEWS from torchtext) 57 | train_iter, test_iter = AG_NEWS(args.data_path, split=('train', 'test')) 58 | tokenizer = get_tokenizer("basic_english") 59 | 60 | # build vocabulary 61 | # nltk.download('stopwords') 62 | stop_words = list(stopwords.words('english')) 63 | vocab_size = 7000 64 | vocab = build_vocab_from_iterator(map(lambda x: tokenizer(x[1]), train_iter), special_first=False, stop_words=stop_words, max_tokens=vocab_size) 65 | text_processer = Text_Processer(tokenizer=tokenizer, vocab=vocab) 66 | 67 | # Get train/test label and data_file(tokens) from data_iter and convert them into clean file 68 | train_files, train_labels = text_processer.file_from_iter(train_iter, tokenizer=tokenizer) 69 | test_files, test_labels = text_processer.file_from_iter(test_iter, tokenizer=tokenizer) 70 | 71 | # Take part of dataset for convenience 72 | train_idxs = np.arange(5000) 73 | np.random.shuffle(train_idxs) 74 | train_files = [train_files[i] for i in train_idxs] 75 | train_labels = [train_labels[i] for i in train_idxs] 76 | 77 | test_idxs = np.arange(1000) 78 | np.random.shuffle(test_idxs) 79 | test_files = [test_files[i] for i in test_idxs] 80 | test_labels = [test_labels[i] for i in test_idxs] 81 | 82 | # Build word embedding 83 | vector = GloVe(name='6B', dim=50) 84 | voc_embedding = vector.get_vecs_by_tokens(vocab.get_itos(), lower_case_backup=True) 85 | 86 | # Dataloader 87 | train_bow, train_labels = text_processer.bow_from_file(train_files, train_labels, to_sparse=False) 88 | test_bow, test_labels = text_processer.bow_from_file(test_files, test_labels, to_sparse=False) 89 | 90 | # Transpose dataset to fit the model and convert a tensor to numpy 91 | train_data = np.asarray(train_bow).T.astype(int) 92 | test_data = np.asarray(test_bow).T.astype(int) 93 | voc_embedding = voc_embedding.numpy() 94 | 95 | print('Data has been processed!') 96 | 97 | # =========================================== Model ===================================================================== # 98 | # create the model and deploy it on gpu or cpu 99 | model = WEDTM(K=[args.z_dim] * args.T, device=args.device) 100 | model.initial(train_data) # use the shape of train_data to initialize the params of model 101 | 102 | # train and evaluation 103 | train_local_params = model.train(voc_embedding, train_data, args.S, num_epochs=args.num_epochs, is_initial_local=False) 104 | train_local_params = model.test(voc_embedding, train_data, args.S, num_epochs=args.num_epochs) 105 | test_local_params = model.test(voc_embedding, test_data, args.S, num_epochs=args.num_epochs) 106 | 107 | # save the model after training 108 | model.save(args.save_path) 109 | # load the model 110 | model.load(args.load_path) 111 | 112 | # evaluate the model with classification accuracy 113 | results = ACC(train_local_params.Theta, test_local_params.Theta, train_labels, test_labels, 'SVM') 114 | 115 | 116 | # # load dataset (WS.mat from paper) 117 | # dataset = sio.loadmat('../../dataset/WS.mat') 118 | # train_data = np.asarray(dataset['doc'].todense()[:, dataset['train_idx'][0]-1])[:, ::10].astype(int) 119 | # test_data = np.asarray(dataset['doc'].todense()[:, dataset['test_idx'][0]-1])[:, ::5].astype(int) 120 | # train_label = dataset['labels'][dataset['train_idx'][0]-1][::10, :] 121 | # test_label = dataset['labels'][dataset['test_idx'][0]-1][::5, :] 122 | 123 | -------------------------------------------------------------------------------- /pydpm/example/Hybrid_PM/Sawtooth_Embedding_Topic_Model/SawETM_Demo.py: -------------------------------------------------------------------------------- 1 | """ 2 | =========================================== 3 | Sawtooth Factorial Topic Embeddings Guided Gamma Belief Network 4 | Zhibin Duan, Dongsheng Wang, Bo Chen, Chaojie Wang, Wenchao Chen, Yewen Li, Jie Ren and Mingyuan Zhou 5 | Published as a conference paper at ICML 2021 6 | 7 | =========================================== 8 | 9 | """ 10 | 11 | # Author: Xinyang Liu 12 | # License: BSD-3-Clause 13 | 14 | import os 15 | import argparse 16 | import numpy as np 17 | import scipy.io as sio 18 | from sklearn.cluster import k_means 19 | from nltk.corpus import stopwords 20 | 21 | import torch 22 | from torch.utils.data import DataLoader 23 | 24 | from torchtext.data.utils import get_tokenizer 25 | from torchtext.datasets import AG_NEWS 26 | 27 | from pydpm.model import SawETM 28 | from pydpm.utils import * 29 | from pydpm.metric import * 30 | from pydpm.dataloader.text_data import Text_Processer, build_vocab_from_iterator 31 | 32 | # =========================================== ArgumentParser ===================================================================== # 33 | parser = argparse.ArgumentParser() 34 | 35 | # device 36 | parser.add_argument("--gpu_id", type=int, default=0, help="the id of gpu to deploy") 37 | 38 | # dataset 39 | parser.add_argument("--dataset", type=str, default='AG_NEWS', help="the name of dataset") 40 | parser.add_argument("--data_path", type=str, default='../../../dataset', help="the file path of dataset") 41 | 42 | # model 43 | parser.add_argument("--embed_size", type=int, default=64, help="the length of vocabulary") 44 | parser.add_argument("--vocab_size", type=int, default=8000, help="the length of vocabulary") 45 | parser.add_argument("--num_topics_list", type=list, default=[128, 64, 32], help="the list of z dimension") 46 | parser.add_argument("--num_hiddens_list", type=list, default=[200, 200, 200], help="the list of hidden dimension") 47 | parser.add_argument("--save_path", type=str, default='../../save_models', help="the path of saving model") 48 | parser.add_argument("--load_path", type=str, default='../../save_models/SawETM.pth', help="the path of loading model") 49 | 50 | # optim 51 | parser.add_argument("--num_epochs", type=int, default=500, help="number of epochs of training") 52 | parser.add_argument("--batch_size", type=int, default=128, help="size of the batches") 53 | parser.add_argument("--lr", type=float, default=0.01, help="adam: learning rate") 54 | parser.add_argument("--weight_decay", type=float, default=1e-5, help="l2 regularization strength") 55 | 56 | args = parser.parse_args() 57 | args.device = torch.device(f"cuda:{args.gpu_id}") if torch.cuda.is_available() else torch.device("cpu") 58 | 59 | # =========================================== Dataset ===================================================================== # 60 | # load dataset (AG_NEWS from torchtext) 61 | train_iter, test_iter = AG_NEWS(args.data_path, split=('train', 'test')) 62 | tokenizer = get_tokenizer("basic_english") 63 | 64 | # build vocabulary 65 | stop_words = list(stopwords.words('english')) 66 | vocab = build_vocab_from_iterator(map(lambda x: tokenizer(x[1]), train_iter), specials=['', '', '', ''], special_first=True, stop_words=stop_words, max_tokens=args.vocab_size) 67 | vocab.set_default_index(vocab['']) 68 | text_processer = Text_Processer(tokenizer=tokenizer, vocab=vocab) 69 | 70 | # Get train/test label and data_file(tokens) from data_iter and convert them into clean file 71 | train_files, train_labels = text_processer.file_from_iter(train_iter, tokenizer=tokenizer) 72 | test_files, test_labels = text_processer.file_from_iter(test_iter, tokenizer=tokenizer) 73 | 74 | # Take part of dataset for convenience 75 | train_idxs = np.arange(7000) 76 | np.random.shuffle(train_idxs) 77 | train_files = [train_files[i] for i in train_idxs] 78 | train_labels = [train_labels[i] for i in train_idxs] 79 | 80 | test_idxs = np.arange(3000) 81 | np.random.shuffle(test_idxs) 82 | test_files = [test_files[i] for i in test_idxs] 83 | test_labels = [test_labels[i] for i in test_idxs] 84 | 85 | train_bows, train_labels = text_processer.bow_from_file(train_files, train_labels) 86 | test_bows, test_labels = text_processer.bow_from_file(test_files, test_labels) 87 | 88 | train_loader = DataLoader([train_data for train_data in zip(train_bows, train_labels)], batch_size=256, shuffle=False, num_workers=4, drop_last=True) 89 | test_loader = DataLoader([test_data for test_data in zip(test_bows, test_labels)], batch_size=256, shuffle=False, num_workers=4, drop_last=True) 90 | 91 | # if args.pretrained_embeddings: 92 | # print('Using pretrained glove embeddings') 93 | # initial_embeddings = load_glove_embeddings(args.embed_size, vocab) 94 | # else: 95 | # initial_embeddings = None 96 | initial_embeddings = None 97 | # =========================================== Model ===================================================================== # 98 | model = SawETM(embed_size=args.embed_size, vocab_size=args.vocab_size, num_hiddens_list=args.num_hiddens_list, num_topics_list=args.num_topics_list, word_embeddings=initial_embeddings, device=args.device) 99 | model.to(args.device) 100 | model_opt = torch.optim.Adam(params=model.parameters(), 101 | lr=args.lr, 102 | weight_decay=args.weight_decay) 103 | 104 | 105 | ############### Training ################ 106 | 107 | for epoch_idx in range(args.num_epochs): 108 | _, _ = model.train_one_epoch(dataloader=train_loader, model_opt=model_opt, epoch_idx=epoch_idx, args=args) 109 | 110 | if (epoch_idx+1) % 20 == 0: 111 | theta, labels = model.test_one_epoch(dataloader=test_loader) 112 | 113 | # calculate NMI with train_local_params 114 | cls_num = len(np.unique(train_labels + test_labels)) 115 | test_theta_norm = standardization(theta) 116 | tmp = k_means(test_theta_norm, cls_num) # N*K 117 | predict_label = tmp[1] + 1 # Some label start with '1' not '0', there should be 'tmp[1] + 1' 118 | MI = NMI(labels, predict_label) 119 | purity = Purity(labels, predict_label) 120 | 121 | # save 122 | model.save(args.save_path) 123 | # load 124 | model.load(args.load_path) 125 | 126 | 127 | 128 | 129 | 130 | -------------------------------------------------------------------------------- /pydpm/example/Hybrid_PM/Weibull_Graph_Attention_Autoencoder/WGAAE_Demo.py: -------------------------------------------------------------------------------- 1 | # Author: Xinyang Liu 2 | # License: BSD-3-Clause 3 | 4 | import os 5 | import argparse 6 | import random 7 | import numpy as np 8 | import scipy.sparse as sp 9 | 10 | import torch 11 | import torch_geometric.transforms as T 12 | from torch_geometric.datasets import Planetoid 13 | 14 | from pydpm.model import WGAAE 15 | from pydpm.utils import * 16 | from pydpm.dataloader.graph_data import Graph_Processer 17 | from pydpm.metric.roc_score import ROC_AP_SCORE 18 | 19 | # =========================================== ArgumentParser ===================================================================== # 20 | parser = argparse.ArgumentParser() 21 | 22 | # device 23 | parser.add_argument("--gpu_id", type=int, default=0, help="the id of gpu to deploy") 24 | parser.add_argument('--seed', type=int, default=123, help='Setting random seed') 25 | 26 | # dataset 27 | parser.add_argument('--dataset', type=str, default='cora', help='Dataset string') 28 | parser.add_argument('--dataset_path', type=str, default='../../../dataset/Planetoid', help="the file path of dataset") 29 | 30 | # network settings 31 | parser.add_argument('--z_dims', type=list, default=[64, 64, 64], help='Output dimension list') 32 | parser.add_argument('--hid_dims', type=list, default=[128, 128, 128], help='Hidden dimension list') 33 | parser.add_argument('--out_dim', type=int, default=32, help='Dimension of output') 34 | parser.add_argument('--num_heads', type=int, default=4, help='Number of heads in GAT') 35 | 36 | # optimizer 37 | parser.add_argument("--lr", type=float, default=0.001, help="Adam: learning rate") 38 | 39 | # training 40 | parser.add_argument('--task', type=str, default='prediction', help='Prediction, clustering or classification') 41 | parser.add_argument("--num_epochs", type=int, default=30000, help="Number of epochs of training") 42 | parser.add_argument('--is_subgraph', type=bool, default=False, help='Whether subgraph') 43 | parser.add_argument('--is_sample', type=bool, default=True, help='Whether sample nodes') 44 | parser.add_argument('--num_sample', type=int, default=1500, help='Number of sampling nodes') 45 | # parser.add_argument("--batch_size", type=int, default=1000, help="Size of the batches") 46 | parser.add_argument("--MBratio", type=int, default=100, help="number of epochs of training") 47 | parser.add_argument('--graph_lh', type=str, default='Laplacian', help='Graph likelihood') 48 | parser.add_argument('--lambda', type=float, default=1.0, help='lamda') 49 | parser.add_argument('--theta_norm', type=bool, default=False, help='Whether theta norm') 50 | 51 | args = parser.parse_args() 52 | args.device = 'cpu' if not torch.cuda.is_available() else f'cuda:{args.gpu_id}' 53 | 54 | seed_everything(args.seed) 55 | 56 | # =========================================== Dataset ===================================================================== # 57 | # Prepare for dataset 58 | dataset = Planetoid(args.dataset_path, args.dataset) 59 | data = dataset[0].to(args.device) 60 | data.edge_index = data.edge_index[[1, 0]] 61 | graph_processer = Graph_Processer() 62 | 63 | adj_csc = graph_processer.graph_from_edges(data.edge_index, data.num_nodes).tocsc() 64 | adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = graph_processer.edges_split_from_graph(adj_csc) 65 | 66 | # For encoder input and graph likelihood 67 | adj_train = adj_train + sp.eye(adj_train.shape[0]) 68 | data.edge_index = graph_processer.edges_from_graph(adj_train.tocoo(), args.device) 69 | 70 | # =========================================== Model ===================================================================== # 71 | model = WGAAE(in_dim=dataset.num_features, out_dim=args.out_dim, z_dims=args.z_dims, hid_dims=args.hid_dims, num_heads=args.num_heads, device=args.device) 72 | optim = torch.optim.Adam(model.parameters()) 73 | 74 | # Training 75 | best_AUC = best_AP = 0 76 | for epoch_index in range(args.num_epochs): 77 | if epoch_index <= 200: 78 | for i in range(20): 79 | _, _ = model.train_one_epoch(data=data, optim=optim, epoch_index=epoch_index, is_sample=args.is_sample, is_subgraph=args.is_subgraph, args=args, is_train=False) 80 | else: 81 | for i in range(5): 82 | _, _ = model.train_one_epoch(data=data, optim=optim, epoch_index=epoch_index, is_sample=args.is_sample, is_subgraph=args.is_subgraph, args=args, is_train=False) 83 | train_local_params, Loss = model.train_one_epoch(data=data, optim=optim, epoch_index=epoch_index, is_sample=args.is_sample, is_subgraph=args.is_subgraph, args=args) 84 | 85 | if args.task == 'classification': 86 | [train_loss, train_loss_cls, train_recon_llh, train_graph_llh] = Loss 87 | else: 88 | [train_loss, train_recon_llh, train_graph_llh] = Loss 89 | 90 | if epoch_index % 1 == 0: 91 | test_local_params, Loss = model.test_one_epoch(data, is_sample=args.is_sample, is_subgraph=args.is_subgraph, args=args) 92 | 93 | # if args.task == 'classification': 94 | # [test_loss, test_loss_cls, test_recon_llh, test_graph_llh] = Loss 95 | # else: 96 | # [test_loss, test_recon_llh, test_graph_llh] = Loss 97 | # On classification task 98 | # accs = [] 99 | # for mask in [dataset.train_mask, dataset.val_mask, dataset.test_mask]: 100 | # accs.append(int((pred[mask] == dataset.y[mask]).sum()) / int(mask.sum())) 101 | # [train_acc, val_acc, tmp_test_acc] = accs 102 | # best_test_acc = np.maximum(best_test_acc, tmp_test_acc) 103 | 104 | # On prediction task 105 | theta = test_local_params[1] 106 | # Construct theta_concat for prediction 107 | theta_concat = None 108 | for layer in range(model._model_setting.num_layers): 109 | if layer == 0: 110 | theta_concat = model.u[layer] * theta[layer] 111 | else: 112 | theta_concat = torch.cat([theta_concat, model.u[layer] * theta[layer]], 0) 113 | theta_concat = theta_concat.cpu().detach().numpy() 114 | 115 | metric = ROC_AP_SCORE(test_edges, test_edges_false, adj_csc, emb=theta_concat.T) 116 | best_AUC = np.maximum(best_AUC, metric._AUC) 117 | best_AP = np.maximum(best_AP, metric._AP) 118 | 119 | print(f'Epoch[{epoch_index}|{args.num_epochs}]: loss:{train_loss}, graph_lh:{train_graph_llh}, recon_lh:{train_recon_llh}.' 120 | f' best_AUC:{best_AUC}, best_AP: {best_AP}') 121 | 122 | 123 | 124 | 125 | -------------------------------------------------------------------------------- /pydpm/model/deep_learning_pm/dcgan.py: -------------------------------------------------------------------------------- 1 | """ 2 | =========================================== 3 | DCGAN 4 | Unsupervised Representation Learning with Deep Convolutional Generative Adversarial Networks 5 | Alec Radford, Luke Metz and Soumith Chintala 6 | Publihsed in ICLR 2016 7 | 8 | =========================================== 9 | """ 10 | 11 | # Author: Xinyang Liu 12 | # License: BSD-3-Clause 13 | 14 | import numpy as np 15 | import torch.nn as nn 16 | import torch 17 | from torch.autograd import Variable 18 | from torchvision.utils import save_image 19 | from pydpm.utils.utils import unnormalize_to_zero_to_one 20 | from tqdm import tqdm 21 | import os 22 | 23 | class DCGAN(nn.Module): 24 | def __init__(self, args, device='cuda:0'): 25 | super(DCGAN, self).__init__() 26 | setattr(self, '_model_name', 'DCGAN') 27 | self.z_dim = args.z_dim 28 | self.generator = Generator(args.in_channels, z_dim=self.z_dim).to(device) 29 | self.discriminator = Discriminator(args.in_channels).to(device) 30 | self.adversarial_loss = torch.nn.BCELoss().to(device) 31 | self.in_channel = args.in_channels 32 | self.device = device 33 | self.Tensor = torch.FloatTensor if self.device == 'cpu' else torch.cuda.FloatTensor 34 | 35 | def sample(self, batch_size): 36 | """ 37 | Sample from generator 38 | Inputs: 39 | batch_size : [int] number of img which you want; 40 | Outputs: 41 | gen_imgs : [tensor] a batch of images 42 | """ 43 | # Sample noise as generator input 44 | z = torch.tensor(np.random.normal(0, 1, (batch_size, self.z_dim, 1, 1))).type(self.Tensor).to(self.device) 45 | # Generate a batch of images 46 | gen_imgs = self.generator(z) 47 | return gen_imgs 48 | 49 | def train_one_epoch(self, model_opt_G, model_opt_D, dataloader, sample_interval, epoch, n_epochs): 50 | ''' 51 | Train for one epoch 52 | Inputs: 53 | model_opt_G : Optimizer for generator 54 | model_opt_D : Optimizer for discriminator 55 | dataloader : Train dataset with form of dataloader 56 | sample_interval : interval betwen image samples while training 57 | epoch : Current epoch on training stage 58 | n_epoch : Total number of epochs on training stage 59 | ''' 60 | G_loss_t, D_loss_t = 0, 0 61 | train_bar = tqdm(iterable=dataloader) 62 | for i, (imgs, _) in enumerate(train_bar): 63 | train_bar.set_description(f'Epoch [{epoch}/{n_epochs}]') 64 | train_bar.set_postfix(G_loss=G_loss_t / (i + 1), D_loss=D_loss_t / (i + 1)) 65 | 66 | imgs = imgs.to(self.device) 67 | # Adversarial ground truths 68 | valid = Variable(self.Tensor(imgs.size(0), 1).fill_(1.0), requires_grad=False).to(self.device) 69 | fake = Variable(self.Tensor(imgs.size(0), 1).fill_(0.0), requires_grad=False).to(self.device) 70 | real_imgs = Variable(imgs.type(self.Tensor)) 71 | 72 | gen_imgs = self.sample(imgs.shape[0]) 73 | 74 | # Train Discriminator 75 | model_opt_D.zero_grad() 76 | real_loss = self.adversarial_loss(self.discriminator(real_imgs).view(imgs.shape[0], 1), valid) 77 | fake_loss = self.adversarial_loss(self.discriminator(gen_imgs.detach()).view(imgs.shape[0], 1), fake) 78 | d_loss = (real_loss + fake_loss) / 2 79 | d_loss.backward() 80 | model_opt_D.step() 81 | D_loss_t += d_loss.item() 82 | 83 | # Train Generator 84 | model_opt_G.zero_grad() 85 | g_loss = self.adversarial_loss(self.discriminator(gen_imgs).view(imgs.shape[0], 1), valid) 86 | g_loss.backward() 87 | model_opt_G.step() 88 | G_loss_t += g_loss.item() 89 | 90 | batches_done = epoch * len(dataloader) + i 91 | if batches_done % sample_interval == 0: 92 | sample_images = gen_imgs.data.cpu()[:25] 93 | sample_images = unnormalize_to_zero_to_one(sample_images) 94 | save_image(sample_images, "../../output/images/dcgan_%d.png" % batches_done, nrow=5, normalize=True) 95 | 96 | def save(self, model_path: str = '../save_models'): 97 | """ 98 | save model 99 | Inputs: 100 | model_path : [str] the path to save the model, default '../save_models/DCGAN.pth'; 101 | """ 102 | # Save the model 103 | torch.save({'state_dict': self.state_dict()}, model_path + '/' + self._model_name + '.pth') 104 | print('model has been saved by ' + model_path + '/' + self._model_name + '.pth') 105 | 106 | 107 | def load(self, model_path): 108 | """ 109 | load model 110 | Inputs: 111 | model_path : [str] the path to load the model; 112 | """ 113 | assert os.path.exists(model_path), 'Path Error: can not find the path to load the model' 114 | # Load the model 115 | checkpoint = torch.load(model_path) 116 | self.load_state_dict(checkpoint['state_dict']) 117 | 118 | class Generator(torch.nn.Module): 119 | def __init__(self, in_channels, z_dim=100): 120 | ''' 121 | in_channels : channels of input data 122 | z_dim : dimension of latent vector 123 | ''' 124 | super().__init__() 125 | self.convT_layers = nn.Sequential( 126 | nn.ConvTranspose2d(in_channels=z_dim, out_channels=1024, kernel_size=4, stride=1, padding=0), 127 | nn.BatchNorm2d(num_features=1024), 128 | nn.ReLU(True), 129 | 130 | # [bs, 1024, 4, 4] 131 | nn.ConvTranspose2d(in_channels=1024, out_channels=512, kernel_size=4, stride=2, padding=1), 132 | nn.BatchNorm2d(num_features=512), 133 | nn.ReLU(True), 134 | 135 | # [bs, 512, 8, 8] 136 | nn.ConvTranspose2d(in_channels=512, out_channels=256, kernel_size=4, stride=2, padding=1), 137 | nn.BatchNorm2d(num_features=256), 138 | nn.ReLU(True), 139 | 140 | # [bs, 256, 16, 16] 141 | nn.ConvTranspose2d(in_channels=256, out_channels=in_channels, kernel_size=4, stride=2, padding=1)) 142 | # [bs, c, 32, 32] 143 | 144 | self.output = nn.Tanh() 145 | 146 | def forward(self, x): 147 | x = self.convT_layers(x) 148 | x = self.output(x) 149 | return x 150 | 151 | 152 | class Discriminator(torch.nn.Module): 153 | def __init__(self, in_channels): 154 | ''' 155 | in_channels : channels of input data 156 | ''' 157 | super().__init__() 158 | self.conv_layers = nn.Sequential( 159 | # [bs, c, 32, 32] 160 | nn.Conv2d(in_channels=in_channels, out_channels=256, kernel_size=4, stride=2, padding=1), 161 | nn.LeakyReLU(0.2, inplace=True), 162 | 163 | # [bs, 256, 16, 16] 164 | nn.Conv2d(in_channels=256, out_channels=512, kernel_size=4, stride=2, padding=1), 165 | nn.BatchNorm2d(512), 166 | nn.LeakyReLU(0.2, inplace=True), 167 | 168 | # [bs, 512, 8, 8] 169 | nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=4, stride=2, padding=1), 170 | nn.BatchNorm2d(1024), 171 | nn.LeakyReLU(0.2, inplace=True)) 172 | # [bs, 1024, 4, 4] 173 | 174 | self.output = nn.Sequential( 175 | nn.Conv2d(in_channels=1024, out_channels=1, kernel_size=4, stride=1, padding=0), 176 | # [bs, 1, 1, 1] 177 | nn.Sigmoid()) 178 | 179 | def forward(self, x): 180 | x = self.conv_layers(x) 181 | x = self.output(x) 182 | return x 183 | --------------------------------------------------------------------------------