├── .gitattributes ├── .gitignore ├── ChalearnLAPEvaluation.py ├── ChalearnLAPSample.py ├── ChalearnLAPTest.py ├── CoDaLab_Gesure_track3.pyproj ├── CoDaLab_Gesure_track3.sln ├── README.md ├── README.txt ├── SK_normalization.pkl ├── Step1_DBN.py ├── Step1_DBN_Structure2.py ├── Step1_SK_Neutral_Realtime.py ├── Step1_SK_realtime.py ├── Step1_transition_matrix.py ├── Step2_SK_Prediction.py ├── Step3_SK_Test_prediction.py ├── Step3_measure_performance.py ├── TheanoDL ├── DBN.py ├── DBN_MNIST.py ├── GRBM_DBN.py ├── GRBM_DBN.pyc ├── LogisticRegressionMNIST.py ├── Mean_DBN.py ├── RBM_Pylearn2.py ├── SdA.py ├── Theano_Tutorial.pyproj ├── Tutorial.py ├── Tutorial.v11.suo ├── cA.py ├── convolutional_mlp.py ├── dA.py ├── grbm.py ├── grbm.pyc ├── logistic_cg.py ├── logistic_sgd.py ├── logistic_sgd.pyc ├── mlp.py ├── mlp.pyc ├── rbm.py ├── rbm.pyc ├── rbm_gnumpy.py ├── rbm_mean.py ├── rnnrbm.py ├── test.py └── utils.py ├── Transition_matrix.mat ├── cvpr_2014_diwu.pdf ├── dbn_2014-05-23-20-07-28.npy ├── distance_median.npy ├── template.png └── utils.py /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | 4 | # Custom for Visual Studio 5 | *.cs diff=csharp 6 | *.sln merge=union 7 | *.csproj merge=union 8 | *.vbproj merge=union 9 | *.fsproj merge=union 10 | *.dbproj merge=union 11 | 12 | # Standard to msysgit 13 | *.doc diff=astextplain 14 | *.DOC diff=astextplain 15 | *.docx diff=astextplain 16 | *.DOCX diff=astextplain 17 | *.dot diff=astextplain 18 | *.DOT diff=astextplain 19 | *.pdf diff=astextplain 20 | *.PDF diff=astextplain 21 | *.rtf diff=astextplain 22 | *.RTF diff=astextplain 23 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Windows image file caches 2 | Thumbs.db 3 | ehthumbs.db 4 | 5 | # Folder config file 6 | Desktop.ini 7 | 8 | # Recycle Bin used on file shares 9 | $RECYCLE.BIN/ 10 | 11 | # Windows Installer files 12 | *.cab 13 | *.msi 14 | *.msm 15 | *.msp 16 | 17 | # ========================= 18 | # Operating System Files 19 | # ========================= 20 | 21 | # OSX 22 | # ========================= 23 | 24 | .DS_Store 25 | .AppleDouble 26 | .LSOverride 27 | 28 | # Icon must ends with two \r. 29 | Icon 30 | 31 | # Thumbnails 32 | ._* 33 | 34 | # Files that might appear on external disk 35 | .Spotlight-V100 36 | .Trashes 37 | -------------------------------------------------------------------------------- /ChalearnLAPTest.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------- 2 | # Name: Chalearn LAP utils scripts 3 | # Purpose: Provide scripts to add labels to Chalearn LAP challenge tracks samples 4 | # 5 | # Author: Xavier Baro 6 | # Di Wu: stevenwudi@gmail.com 7 | # Created: 25/04/2014 8 | # Copyright: (c) Chalearn LAP 2014 9 | # Licence: GPL 10 | #------------------------------------------------------------------------------- 11 | import os 12 | import zipfile 13 | import shutil 14 | import glob 15 | 16 | def main(): 17 | """ Main script. Created a labeled copy of validation samples """ 18 | # Data folder (Unlabeled data samples) 19 | dataPath=r'I:\Kaggle_multimodal\Validation' 20 | # Labels file (Unziped validation.zip) 21 | labelsPath=r'I:\Kaggle_multimodal\validation_labels' 22 | # Use the method for desired track 23 | print('Uncoment the line for your track') 24 | addLabels_Track3(dataPath, labelsPath) 25 | 26 | 27 | def addLabels_Track3(dataPath, labelsPath): 28 | """ Add labels to the samples""" 29 | # Check the given data path 30 | if not os.path.exists(dataPath) or not os.path.isdir(dataPath): 31 | raise Exception("Data path does not exist: " + dataPath) 32 | # Check the given labels path 33 | if not os.path.exists(labelsPath) or not os.path.isdir(labelsPath): 34 | raise Exception("Labels path does not exist: " + labelsPath) 35 | 36 | # Get the list of samples 37 | samplesList = os.listdir(dataPath) 38 | # For each sample on the GT, search the given prediction 39 | for sample in samplesList: 40 | print "writing file" + sample 41 | # Build paths for sample 42 | sampleFile = os.path.join(dataPath, sample) 43 | # Prepare sample information 44 | file = os.path.split(sampleFile)[1] 45 | sampleID = os.path.splitext(file)[0] 46 | samplePath = dataPath + os.path.sep + sampleID 47 | 48 | # Add the labels 49 | srtFileName=sampleID + '_labels.csv' 50 | srcSampleDataPath = os.path.join(labelsPath, srtFileName) 51 | dstSampleDataPath = os.path.join(sampleFile, srtFileName) 52 | shutil.copyfile(srcSampleDataPath, dstSampleDataPath) 53 | 54 | 55 | if __name__ == '__main__': 56 | main() 57 | -------------------------------------------------------------------------------- /CoDaLab_Gesure_track3.pyproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | Debug 5 | 2.0 6 | a842cc43-58a6-4065-a788-12333aa22a55 7 | . 8 | Step2_SK_Prediction.py 9 | C:\Users\PC-User\Documents\Visual Studio 2012\Projects\Theano\Tutorial\;ConvNet_3DCNN\ 10 | . 11 | . 12 | CoDaLab_Gesure_track3 13 | CoDaLab_Gesure_track3 14 | False 15 | {947cee96-2ef3-4b77-b850-f04b2d86f6b6} 16 | 2.7 17 | Standard Python launcher 18 | -W ignore::DeprecationWarning 19 | False 20 | 21 | 22 | true 23 | false 24 | 25 | 26 | true 27 | false 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 10.0 111 | $(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion) 112 | $(VSToolsPath)\Python Tools\Microsoft.PythonTools.targets 113 | 114 | 115 | 116 | -------------------------------------------------------------------------------- /CoDaLab_Gesure_track3.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio 2012 4 | Project("{888888A0-9F3D-457C-B088-3A5042F75D52}") = "CoDaLab_Gesure_track3", "CoDaLab_Gesure_track3.pyproj", "{A842CC43-58A6-4065-A788-12333AA22A55}" 5 | EndProject 6 | Global 7 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 8 | Debug|Any CPU = Debug|Any CPU 9 | Release|Any CPU = Release|Any CPU 10 | EndGlobalSection 11 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 12 | {A842CC43-58A6-4065-A788-12333AA22A55}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 13 | {A842CC43-58A6-4065-A788-12333AA22A55}.Release|Any CPU.ActiveCfg = Release|Any CPU 14 | EndGlobalSection 15 | GlobalSection(SolutionProperties) = preSolution 16 | HideSolutionNode = FALSE 17 | EndGlobalSection 18 | EndGlobal 19 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Purpose 2 | ============= 3 | This is the code the challenge"Chalearn Looking at People 2014“. 4 | ****************************************************************************************************** 5 | Gist: Delief Networks (Gaussian Bernoulli RBM as first layer) + Hidden Markov Networks 6 | ****************************************************************************************************** 7 | by Di WU: stevenwudi@gmail.com, 2015/05/27 8 | 9 | 10 | Citation 11 | ------- 12 | If you use this toolbox as part of a research project, please cite the corresponding paper 13 | ****************************************************************************************************** 14 | ```yaml 15 | @inproceedings{wu2014leveraging, 16 | title={Leveraging Hierarchical Parametric Networks for Skeletal Joints Based Action Segmentation and Recognition}, 17 | author={Wu, Di and Shao, Ling}, 18 | booktitle={Proc. Conference on Computer Vision and Pattern Recognition (CVPR)}, 19 | year={2014} 20 | } 21 | ``` 22 | ****************************************************************************************************** 23 | 24 | 25 | Dependency: Theano 26 | ------- 27 | Some dependent libraries requirements: 28 | Theano: for deep learning tasks http://deeplearning.net/software/theano/. 29 | Note that Wudi change some of the functionalities(Deep Belief Networks, Gaussian Bernoulli Restricted Boltzmann Machines). 30 | They are in the subfolder of -->TheanoDL 31 | 32 | 33 | 34 | Test 35 | ------- 36 | To reproduce the experimental result for test submission, there is a Python file: 37 | 38 | `Step3_SK_Test_prediction.py` and there are three paths needs to be changed accordingly: 39 | 40 | line: 60, Data folder (Test data) 41 | data_path=os.path.join("I:\Kaggle_multimodal\Test\Test\\") 42 | 43 | line: 62, Predictions folder (output) 44 | outPred=r'.\training\test' 45 | 46 | line: 64, Submision folder (output) 47 | outSubmision=r'.\training\test_submission' 48 | 49 | It takes about ~20 second for each example file using only skeleton information. (I use Theano GPU model, but I reckon CPU model should almost of the same speed) 50 | 51 | Train 52 | ------- 53 | To train the network, you first need to extract the skeleton information 54 | 55 | 1)`Step1_SK_Neutral_Realtime.py`--> extract neutral frames (aka., 5 frames before and after the gesture) 56 | 57 | 2)`Step1_SK_Realtime.py`--> extract gesture frames 58 | 59 | 3)`Step1_DBN_Strucutre2.py`-->Start training the networks (`Step1_DBN.py` specifies a smaller networks, train faster, but the larger the net is always better) 60 | 61 | Voila, here you go. 62 | 63 | Dataset 64 | ------- 65 | According to some reader recommendation, I supplement the links of the datasets used in the paper as follows: 66 | 67 | 1) `ChaLearn Italian Gesture Recognition` --> [http://gesture.chalearn.org/2013-multi-modal-challenge](http://gesture.chalearn.org/2013-multi-modal-challenge) 68 | 69 | You should download from this dataset from Kaggle platform. [https://www.kaggle.com/c/multi-modal-gesture-recognition/data](https://www.kaggle.com/c/multi-modal-gesture-recognition/data) 70 | 71 | 2) `MSR Action3D` --> [http://research.microsoft.com/en-us/um/people/zliu/actionrecorsrc](http://research.microsoft.com/en-us/um/people/zliu/actionrecorsrc) 72 | 73 | 3) `MSRC12 ` --> [http://research.microsoft.com/en-us/um/cambridge/projects/msrc12](http://research.microsoft.com/en-us/um/cambridge/projects/msrc12) 74 | 75 | (If you use the datasets, please cite the corresponding original paper. Thanks) 76 | 77 | Contact 78 | ------- 79 | If you read the code and find it really hard to understand, please send feedback to: stevenwudi@gmail.com 80 | Thank you! 81 | -------------------------------------------------------------------------------- /README.txt: -------------------------------------------------------------------------------- 1 | Purpose 2 | ============= 3 | This is the code the challenge"Chalearn Looking at People 2014“ 4 | Gist: Delief Networks (Gaussian Bernoulli RBM as first layer) + Hidden Markov Networks 5 | by Di WU: stevenwudi@gmail.com, 2015/05/27 6 | 7 | 8 | Citation 9 | ------- 10 | If you use this toolbox as part of a research project, please consider citing the corresponding paper 11 | ****************************************************************************************************** 12 | @inproceedings{wu2014leveraging, 13 | title={Leveraging Hierarchical Parametric Networks for Skeletal Joints Based Action Segmentation and Recognition}, 14 | author={Wu, Di and Shao, Ling}, 15 | booktitle={Proc. Conference on Computer Vision and Pattern Recognition (CVPR)}, 16 | year={2014} 17 | } 18 | ****************************************************************************************************** 19 | 20 | 21 | Dependency: Theano 22 | ------- 23 | Some dependent libraries requirements: 24 | (1) Theano: for deep learning tasks http://deeplearning.net/software/theano/ 25 | Note that Wudi change some of the functionalities(Deep Belief Networks, Gaussian Bernoulli Restricted Boltzmann Machines) 26 | They are in the subfolder of -->TheanoDL 27 | 28 | 29 | 30 | Test 31 | ------- 32 | To reproduce the experimental result for test submission, there is a Python file: 33 | 34 | Step3_SK_Test_prediction.py and there are three paths needs to be changed accordingly: 35 | 36 | line: 60, Data folder (Test data) 37 | data_path=os.path.join("I:\Kaggle_multimodal\Test\Test\\") 38 | line: 62, Predictions folder (output) 39 | outPred=r'.\training\test' 40 | line: 64, Submision folder (output) 41 | outSubmision=r'.\training\test_submission' 42 | 43 | It takes about ~20 second for each example file using only skeleton information. (I use Theano GPU model, but I reckon CPU model should almost of the same speed) 44 | 45 | Train 46 | ------- 47 | To train the network, you first need to extract the skeleton information 48 | 1)Step1_SK_Neutral_Realtime.py--> extract neutral frames (aka., 5 frames before and after the gesture) 49 | 2) Step1_SK_Realtime.py--> extract gesture frames 50 | 51 | 3)Step1_DBN_Strucutre2.py-->Start training the networks (Step1_DBN.py specifies a smaller networks, train faster, but the larger the net is always better) 52 | 53 | Voila, here you go. 54 | 55 | 56 | Contact 57 | ------- 58 | If you read the code and find it really hard to understand, please send feedback to: stevenwudi@gmail.com 59 | Thank you! 60 | -------------------------------------------------------------------------------- /SK_normalization.pkl: -------------------------------------------------------------------------------- 1 | (dp1 2 | S'Std1' 3 | p2 4 | cnumpy.core.multiarray 5 | _reconstruct 6 | p3 7 | (cnumpy 8 | ndarray 9 | p4 10 | (I0 11 | tS'b' 12 | tRp5 13 | (I1 14 | (L528L 15 | tcnumpy 16 | dtype 17 | p6 18 | (S'f4' 19 | I0 20 | I1 21 | tRp7 22 | (I3 23 | S'<' 24 | NNNI-1 25 | I-1 26 | I0 27 | tbI00 28 | S'c^\xae=0\xcf\x11>\x84z\x8f=\x1e\x9c\x06=\xe2\xb5k=\xca\x04\x8b=}.\xd6=\xea.B>2\xaa\xac=\x9f"\x8d=\xd4\xaf\xd5=,\xcc\x10>\x8d{\xe6=\xa9\x10y>\xeaL+>d\xb4K=#\xba\x81=M\xfe\xbd=#\xf0\x04>\x88\x8c\x97>\x0eA1>\xe1\xffC=\xee\xf1\x90=SK\xbb=vF\x15=\xa8lk=+\xe1\x9f=\x80\xb2\x1b=V\x84m=\xa7;\xa5=\x07\xd7\xb4=1:?>?\x1d\xe0=\xc9\x17\xf0\x06\x9a,>\xa9s\x1e>\xf1R\x9e>\x9d\xc48>\xfd\x04\xbf=\x01oA>7\x89\x04>\xa8\x93.>\x00b\xb4>\xb0B<>\xd1\xfc\xbd=\xb4FE>Tz\x03>\xf1\xde\xb8=\x1c\xf9;>@\xf8\xeb=\xc4\xf7\xb4=\xa5\x8c;>\xb9\xf8\xeb=\x9b%\xdb=b\xf1n>\xcam\xf1=Lvh=\xdb\x03\xa4=E:\xf1=\xccQ\xcc=[\x01i>((\x19>s\x86\r=e\xc3\xbd<\xdd\xe8<=\x1c*\xf1=x6\x90>\xd8\xd8\x1f>\xf0\x18\xe1<\x03\x8e\xfc<^\xd80=\xdd:\x9a<\xc9m\xd4\x93o\x84>l\x0f2>\x8e\xf7,>`"\xaf>\xdc\xf1;>\x15$\xe1=\xad\x01q>j\x9a\x0b>LD=>.7\xc3>\'\xd2=>\xc8\xb2\xe1=\x9blt>\x8e\xed\n>\xd4!\xde=\x9a}k>\xcc\x8e\xfa=0\xef\xd9=\xf3\xf2j>g1\xf9=\x85\x04\xbd=\xe9\x14.>\x0b\xca\xb0=S\xfc9=\x16\xc1\x9c=\x9a\xcf\xb6=\xacT\xe9=\xb3 g> \xe7\xd4=\x06\xd2^=h(\xae=\xdd\x04\xf2=\xf6\x1d@=;\xfb\xa1=\xeb\xc5\xd4=\xcc\xcb@=(\x88\xa3=\x11\x0f\xd1=\xdb\x14\xbf=".h>\xb3\xb5\x03>v\xd9\t=Jrv=\x05X\x04=b\xcc\xc7=\xe0\xe5k>\xa81\x19>\xf4,\xc4=;Mi>\xbch\x0b>xD\xc0=\x96Ui>\xe1.\x08>\xd8\xd6\xe7=\xcb\xf2\x8f>H\x8e\x0c>\xce\xfd\xf0<\xc6\xd8\xe4<\x9c\xa3M=\xaeW\xcf<\xf1\xbb\xe7<\xbd\x1c\x02=t5\xc9\x12\x11 >t\xb3\xeb=\x99Z\x90>!U\x12>J\xeb\xe7=\xacN\x90>m\xaf\x0e>\xb6\xc6\xd3<\xff\xcb!=\x97^C=\xb0\x9f\xd0\x83z\x93=N\xa9\x07= El=\x1eU\x92=\xf9\xf2\xd5=5\xbcA>\xd2(\xaf="\xe1\x8c=o\xc4\xd5=7\x83\x12>\x9b\x85\xe6=\xef\xe5x>\x93>,>\x94qK=\xbd\xdb\x81=\x10\xe7\xc2=k\xfe\x04>jr\x97>/\x0e2>M\x19D=\x9a\xf7\x90=B\x1e\xc0=r\xa3\x15=\x94\xa4k=7\xda\xa5=+\x03\x1c=\xf7\xc1m=\x00\xc0\xaa=\x14^\xaf=3\x0f\x13>=>\x9a=\xda\x84\x96<\xeb4\xd2\xf7x\xe5=\xe8\xad\x08=F\xeag=\xab\x99\x17=4a\xe7=\xb7\x08\\> \x93.>X\x06\x1e>\xfez\x9e>0::>\x9b$\xbf=\x8d\x97A>\xad\x9c\x06>\x0bB.>~\x81\xb4>\xaa{=>\xc3%\xbe=\xb0FE>Dv\x05>\xa9)\xb9=\x7f\x1f<>\xf6\xcc\xf0=8R\xb5=\x92\xb6;>\xda\xb0\xf0=K/\x07=Gyk=\xa1t\x90=\xd7\xc3\xb4=\xcc(?>\x98%\xe3=f+\x81;\x12R\x88;qJ\xa9<\x9b\x0c\xdb=\x05\xe0n>54\xf4=\x97Gh=\x9c\xff\xa3=\xec\x01\xf5=\x892\xcc=m\xfdh>=\x8f\x1a>\x16@\r=!u\xbf<\xa5\xdcN=1\x12\xf1=\xab4\x90>\xfd*!>\xd8\xea\xe0<\xde\t\x00=\xc4\xfdB=\x9a\xc0\x9b\xc8\x0b\xb6=(6\x10=\xe9"q=\xd4#%=\x88\xa0\xdb=D\x1eo>B\xbf\xf6=|\x82\xb7&\xa5\x84>@\x134>\x89j,>=b\xaf>\xfd\x95=>\x80d\xe1=\x83-q>\xcc\xba\r>n\xc2<>\x05s\xc3>\x0eO?>\xb1\xed\xe1=\x16mt>\t\xf3\x0c>x~\xde=\x87\xa8k>\xbd{\xff=\xdd\\\xda=a"k>t\x10\xfe=W\xe6\x8c=\xbf\xc7\xd5=\x9cQ\x12>\x00\xa4\xe7=L\x84[>LV->\xdc$h=\xd2\x1b\xa4=\xd0\x0c\xf5=\xbf\x17\x04>yT\x84>\xf5\xaf2>\xda\x8cF<\x83:Q<\xbah\xda<\t\xff\xbc=k\x80->6%\xb3=p\';=#;\x9d=b\xb6\xbd=\x12/\xe9=\x85of>\xaa\xbb\xd5=\xb5\x85^=B\x13\xae=\x9a\x1e\xf6=\xfa0@=\xb2\x0f\xa2=v\xbc\xd9=&\xd8@=%\xa3\xa3=\x1f\xe1\xd5=\xc8,\xe6=\x8b\x97y>\xdfq->u \x1e>\xa9\x8e\x9e>io:>\xf7d\xcc=QWi>\x9bJ\x1b>\xd4\xbd,>\x1cP\xaf>\x04q=>#\x02\xbe=a\x970>\x057\xbd=\xf6\x16\xacm\xec\x06>\xe4\x9a\x1b=\x9cm\x91=\xfa\xd43=e \xc8=\n\x0bl>\x80o\x1b>\xbd_\xc4=\x1f\x96i>\xe5\x16\x0e>\x1b\xaa\xc0=Y\xa3i>T\xe5\n>\xc4\x98K=\x9f\xa8\x81=\xb6\x84\xc2=\x19\x01\xbf=\x0ebA>\xd0\x06\x06>\xfe\x8e\r=\xceh\xbf\xf1\xfe\x0c>\x88!;=E\x87\x9c=\xf3\xa2\xba=\xa9\xe2\xbe=\xe6\x10h>\xfb\xd4\x04>at\xde;b?\xaa;\x87\xbd\xaf-\x9a\r>\x84:\xf1]\xdd\x97>m\xa33>g\x1d.>\'\xb9\xb4>\xb3T>>\x89R\xf1=Qi\x90>\xc28">\xcc\xda<>-\x84\xc3>\xcc\xcf?>\xba$\xea=e\x93i>I\xb2\xdf=\xd3\xfb%=o\xe4\x97=\xfe\x94F=\xf3h\xe8=O5\x90>\xc4\xea\x0f>\xa6\x1e\xce<\x7fM@=\x0e\x9c\x16=C\xde\xee=\xfb\x9c\x91>\x97\x86">\xc9\xf2\xeb=g\x87\x90>_A\x15>\xa7\\\xe8=\x1d~\x90>\xb3\xac\x11>\x96rD=\xd9\x9e\x90=\x82\xd4\xbf=\xa2\x04\xbe=\xb4@E>\x83\x02\x05>\xfe\x95\xe3<0\x96\xfc<\xf6\x84D=\xdf\xb8\xe1=\x06jt>i^\x0c>\x8aL_=(\x08\xae=\xa3\x8f\xf5=\x04\xae\xc7=\xd7\xf8k>\xde\x88\x1a>1y\xf2\xd2X!>\xaa\x82Z;]\x94\xca;\xf6\xdd\xa7<\xf1R\xd6<\xe9\\ =*\xdcU=t\xff\xd2<\xc7\x1e/=B\xb7\x84=\xcaz\x15=\xbcak=g\x07\xa5=R\xc4\xb8=\xe1\xf7;>\xe5,\xef=\xcbh\x9b{\x81\xfd=\xcc\x19@=X\xfa\xa1=\xb9\x9f\xd8=/\x19\xc4=\xd2Vi>\xe6\xbe\x0c>\xf7b\xcf\xd8\x93\x13>`\xb5\xd3o*\xef=d\xa7\xa6<\x91\xd3\xf5<&\xb4A=\x92\xc2\xd9=r\xf0j>N"\xfc=\xe5\xd1@=p}\xa3=\x0c\n\xd5=g\x0e\xc0=\x90[i>\xbd\x8b\t>o8\xc9<&~\x07=r_5=Y\xb8\xe7=OR\x90>\x19\xf2\x0f>\x98\xc3\xd0\x7f\xc3\xde=\xf1\x85z= \x95j\xbet^\x1f\xbd\xa2\xd6T<\x08\x0b8>\x9c@\r>s\x1a\xe4>]\x8e\xdf\xbc\x8d\xd2\x8f<%\xa9\xd4>\t\xb0\xda<\x11\x8a\x16>\\\x1a\xc2>]Kh\xbe\xeb\xf34\xbd\xaf\xfc\xca>\x11DI=\x86o7>\xeb\xe5e>Q\x9a\x05\xbf\x97\xe1Z\xbc\xa5\x08b>(\xadWQ]\x9a=\xcc\x04\x83\xbaq\x9dw=\x84\xb0\xb8\xbe\x16:\x17\xbe\xc73I<\xc5\xfaD=\x12\xf4\xee<\xc2\xbd\xe3>\xde\xbd"\xbe\xc0\xce\xba\xbdJK\xd4>\x1f\xed\xd6\xbd\xba\xa0\x1c=\x1c\xbd\xc1>\xb4\x8b\xb7\xbe{\x9f\x1c\xbe\xbf\x9f\xca>\x01\xf5\xa8\xbd\xac\x19\x90=\xa5,e>gL\'\xbf~\x1f\xfa\xbd\x85Na>\x82\xa3\xf2\xbdE\xb6\x1e\xbe\xf8{a>\xcbvf\xbdi\xcf\xe0\xbd\xcfOE\xbd\x12P\xd1>\xee\x175>\x9f\xc9\xc4>\x8e\xa3N>\xf1Fg=\x8dX\xb5>>\xf5\x82>\x04a>>\xb7\xc8\xa2>\xea\x9e\x12;\x8b\xab\xac\xbb|\xac\xab>\xd8r\x8e>\xefE_>\xcaD\'>A\xe9\x95\xbe\xa2L\xd1<\xc4g#>_\x0fx>\xcd\x95\xef\xbb\x05\x94#>\x91\xe1\x9b>nF\x1b=4t\xdd><\xfcS\xbe\xd5\x8b\xf6\xbd\x92\x02\xce>\xd0\xb4\x1c\xbe\xf6\x9b\x14<\x94s\xbb>\xe2*\xd0\xbe\xd1}:\xbe\xd7\xb9\x05\xbe\x9d\xb9(=\xf6\x98X>\xaf\x9c3\xbfL\xee\x1a\xbeh\xbcT>\x1c\x90*\xbeY\x94<\xbe\xd3\xe9T>\xde\xb9\xd5\xbd\xa6F\x0e\xbeM\r\xf7\xbce\x1f]=\xd1\x8f\x04>\xc0\xff\x87\xbd\x9aYL\xbe\x92\xdc|\xbd&\xebH\xbdG\x86\x9c=\x8ft%>\xf7Mb\xbe\xab:\xfd\xbe_D\xfd\xbc=+f\xbeR\xb2%=\xc1\x9c\x82\xbd\xc4\xfde\xbe\x89?\xd2=\xd6\x02\x98\xbc\x8ax\x14\xbde\xd0\x81\xbe\x1b\xc7C\xbe\x9d\xca\x9a\xbc^\xda\xb7\x1b\xacd>6N\x1e\xbe8\x0e\x97\xbe\xa6w\xfc<\x13+"\xbeI\xc5u>G\xd6\x05\xbb9\xfe!\xbe\xe0\xbc\x9a>\xe5\xdb0="\x130\xbe\xd4-\x12\xbf\xe1\x1cE\xbe\xf5\xef3\xbe\xa4Y\x13\xbd5\xc4f\xbeZ\xc33\xbe\x9f\xe8\xd6<\xcet8\xbe\xc9=w\xbbx\xf8\x08?\x14\x99\x06\xbd\x95\x12l\xbb0\xe6\x18?\xec\x81J<\xfb\xab29\xd7\xcc~=\xa6:9=l\xec\x0c9q\x18\x12\xb8\xe0f\xcf9\xc3q]:\x0e\xc3\x06>\xae\x92\xdf=\x06\x14{=\xd5\x9ej\xbe\xf3\xbf\x1d\xbdI\nW<\x83\x018>V\xa8\r>\xff,\xe4>=\xd6\xdf\xbc\xd7\x0f\x93<\x10\xbb\xd4>\xdfe\xda<9\xf2\x16>\x9f,\xc2>\xb9Th\xbeWU3\xbd\xcf\x0e\xcb>c I=\xc3\xd67>\xf8\x08f>\x10\x9d\x05\xbf\x19gT\xbc9,b>k\x1bW<\xcc\xb2;\xbd\x87Yb>\xa8J\x9a=jV\x1e\xbaLz\xeb\xb9\xa3\xfc\x06\xbe\x12V\xdd\xbd\xc2\xf1\x889[6B\xb9\x97P7:H\xafx=w\xc8\xb8\xbe\r\x82\x16\xbe\xfcyMp\xee"\xbe(`\xb9\xbd\x1bn\xd4>]N\xd7\xbd)~\x1f=s\xdf\xc1>h\xa3\xb7\xbe\x8d\xe8\x1b\xbe\x9f\xc2\xca>\x10W\xa9\xbdB\x88\x91=\xe9pe>dX\'\xbf\xab\xb2\xf8\xbdb\x93a>F\x04\xf3\xbd3\x00\x1e\xbe\x86\xc1a>":g\xbd\xb8a\xdf\xbd\x8a1z\xbd\xcd\x9ej>\x9f: =\xaaHw\xbd\x86\xb5\xb8>Mp\x17>\xb5?\xa98Q\x18!8\x8f\xba[9\xcd\xfbD\xbd\xf2T\xd1>\x7fO5>\xf7\xd3\xc4>=\xadN>\xb1"h=\x18c\xb5>=\xfa\x82>>\x98>>,\xd3\xa2>"$\x15;\xd9\xcc\xa5\xbb\xdb\xb6\xab>\xecw\x8e>N|_>_Z\'>T\xe4\x95\xbe\xa5\x03\xd3<\xc1|#>\x07\x1ax>\x86\xb7\xe8\xbb\xad\xa9#>\xbd\xe6\x9b>\xba"\x1c=F\xe3O\xbcvF8\xbeuj\x0c\xbe\xd7?D\xbc\xe2\xe9E\xbdx?\xe8\xbc7\x8dF=\xf4m\xd1\xbe$A4\xbe\x8cZ\x9e9\xf1\xc6m\xb9h\x97V:N\x9b\xdd>\xf26T\xbe\x8e\xde\xf4\xbdY*\xce>\xd6\xef\x1c\xbe\xdf\x05"<\x9e\x9b\xbb>\xb1H\xd0\xbe\xdb\xa69\xbe.~\xc4>i\xf5\x05\xbe\xd5\x13,=m\xe8X>\xed\xab3\xbf\xe8\x17\x1a\xbeO\x0cU>\x85\xcb*\xbe\x00\xbe;\xbe$8U>\x990\xd6\xbd\xc4o\r\xbe\x9c\x1e\xe4\xbe\xef\x04\xdf<\xbb3\x8c\xbc\x85\xc1\xe3\xbe4\xad">\xa8\xb6\xbb=\xfa\xcd\xc4\xbe\x9e\xb4N\xbe\xd5ze\xbd\xb9w\xdd\xbe(\xebS>\xaas\xf7=\x05\x0b\xf3\xb7\x1aP\x88\xb8\x86\x9d\xe79\x91I\xf7\xbc\xf4\xd9\\=\xec\x03\x05>\x9c\x0e\x88\xbd\xdaiL\xbe\xee\r{\xbdt\nI\xbd\xa7c\x9c=\xb5\xe7%>\xefUb\xbefD\xfd\xbe^\xa5\xf9\xbc\xfb2f\xbeWn%=#\xb5\x81\xbd6\x06f\xbe\x8e\x1d\xd2=Wd\x94\xbcQ\xb8\xd4\xbe\xd1{\xdd\xbc\'{\x15\xbe\xd0Y\xd4\xbe\x1e9\xd6=[f\x18\xbd\x82f\xb5\xbe\xa9"\x83\xbe1S=\xbe\x03\x12\xce\xbef[\x1c>\xb3\xb2\x03\xbc\x80%\xf6<\xb0\x84^\xbd/\x81\x03\xbeW)\xe9\xb8r\xed\xb2\xb9\xd5J\x87:w\xed\x14\xbd\x90\xfd\x81\xbe\x04\xb8B\xbe\x97\xb3\x9b\xbc\xfd\x0e\xb5<\xd4\xcc\x07=~\x89C\xbe\xc0\x85\x0c\xbf\x08)#\xbe\xa0fG\xbe\xfbHc\xbco\xcfD\xbeV9G\xbe\xce\xfbE=y\x81\x16\xbe\x07\x12\xc2\xbe\xb9Xh>FS5=x\xb3\xc1\xbe\x0b\x92\xb7>\x8d\xb7\x1c>U\xbf\xa2\xbehd\x0f\xbb\xc7\x9f\xaf;1k\xbb\xbe\x991\xd0>\x17\x95:>\x98$\x88=\x85fL>\xb9;}=\xab\xc2\x14=\xf3\xd6\x81>w\xdeC>\x1b\x03\x948&\xceN8x-\xbd8\xb7\xba\x8e<\x9cT\x8d>\x1a\xc4d>\xa6;\x1e\xbe\xd5\x07\x97\xbe\x035\xfd<\xf2\x18"\xbeC\xd2u>\x0c\xda\xff\xba\x9b\xeb!\xbeQ\xc3\x9a>\xdf:1=\xf6\x13\xcb\xbet\xe3J\xbd\xbe\x1f6\xbe\xcc\xb6\xca\xbe\xf5&\xa8=\xa4{\x8d\xbd\x8b\xc2\xab\xbe\xa2\xa6\x8e\xbe\x9c\xf6]\xbe\x86l\xc4\xbe!R\x05>\x96\x7f#\xbdO6H=JU\x9d\xbdi%$\xbe\x02`\x99\x11\xaa\xcf\xbc\x9a|X\xbe\x07\xa13?\x9d"\x1b>1jb>\xa9C\xfd>W\xe6\xfe<\xa9\x87C>Us\x0c?\xacl$>\xffi\x1e>\x08\x16\x97>\xf0\xd4\xfa\xbc\xa1.0>\x012\x12?\xfcPE>\xe8\x16\xdb8\xcb2\x858\x86^Q9\x0fep\xbb\xb9\xfc\x08?R\xc8\x05\xbd\x859e\xbb(\xea\x18?\xb7\xc6M<\xeb\xf6a\xbe\x05\xa5W\xbc\x90\xf7==.=a\xbe\x9f\xa4\xf2=\xdd\xe0\x1e>GV#\xbe\xf1\x0ex\xbeU\xcd\xf4;\xd4\xaaT\xbe\xb3\x90*>\xb9\xbe<>~\xed\xaf%\xbd\xc7\xf0\x82=\xb4ZG>\xcc\xb9]<5\x08F>2<">\xca\xc4u\xbe\xfcE\x10;P\x014>\x98[\x13=\xf7\xedf>\xf2\x8e{;J\xf8\x08\xbf\xbe?\x07=\x8b(\x8a8\x91\xa6\x156#\xf7&9\xfa\xc0w9\xfa\xcf~=\x12\xe29=\x83\x1eb\xbe\xb1^\x9a\xbd\x14e\x98:\xc4da\xbe\x12tf=j%\xe1=\xb7|#\xbe\xfc\xe1\x9b\xbe\x01\x9c\x1a\xbdz\xd1T\xbe\xdb\xb8\xd5=\x15q\x0e>\xc8\x15f>\xbd@\xd2\xbd\x01Y\x99\'dG\xbd\xf7\xba\x17>\x0b\x16">>\xbd\x9a\xbe\xb510\xbd\x03\xdb3>F\xed\xd6\xbc(\x9f8>\xde\xe9q;<\xe6\x18\xbf\\\xd5G\xbc\x8eo\xaa\xb8\xb4\xd0~\xbd\xc4\x8e8\xbd\xf1\xea\xba8\xe3\n-\xb6\xba\t+9' 41 | tbs. -------------------------------------------------------------------------------- /Step1_DBN.py: -------------------------------------------------------------------------------- 1 | import cPickle 2 | import gzip 3 | import os 4 | import sys 5 | import time 6 | import numpy 7 | import sys 8 | sys.path.append(r'C:\Users\PC-User\Documents\Visual Studio 2012\Projects\Theano\Tutorial') 9 | 10 | import theano 11 | import theano.tensor as T 12 | from theano.tensor.shared_randomstreams import RandomStreams 13 | from logistic_sgd import LogisticRegression 14 | from mlp import HiddenLayer 15 | 16 | from rbm import RBM 17 | from grbm import GBRBM 18 | from utils import zero_mean_unit_variance 19 | from utils import normalize 20 | from GRBM_DBN import GRBM_DBN 21 | from sklearn import preprocessing 22 | 23 | def shared_dataset(data_xy, borrow=True): 24 | """ Function that loads the dataset into shared variables 25 | 26 | The reason we store our dataset in shared variables is to allow 27 | Theano to copy it into the GPU memory (when code is run on GPU). 28 | Since copying data into the GPU is slow, copying a minibatch everytime 29 | is needed (the default behaviour if the data is not in a shared 30 | variable) would lead to a large decrease in performance. 31 | """ 32 | data_x, data_y = data_xy 33 | shared_x = theano.shared(numpy.asarray(data_x, 34 | dtype=theano.config.floatX), 35 | borrow=borrow) 36 | shared_y = theano.shared(numpy.asarray(data_y, 37 | dtype=theano.config.floatX), 38 | borrow=borrow) 39 | # When storing data on the GPU it has to be stored as floats 40 | # therefore we will store the labels as ``floatX`` as well 41 | # (``shared_y`` does exactly that). But during our computations 42 | # we need them as ints (we use labels as index, and if they are 43 | # floats it doesn't make sense) therefore instead of returning 44 | # ``shared_y`` we will have to cast it to int. This little hack 45 | # lets ous get around this issue 46 | return shared_x, T.cast(shared_y, 'int32') 47 | 48 | def load_CodaLab_skel(ratio_train=0.9, ration_valid=0.1): 49 | print '... loading data' 50 | 51 | f = file('Feature_train_realtime.pkl','rb' ) 52 | Feature_train = cPickle.load(f) 53 | f.close() 54 | 55 | f = file('Feature_all_neutral_realtime.pkl','rb' ) 56 | Feature_train_neural = cPickle.load(f) 57 | f.close() 58 | 59 | #Because we have too much neural frames, we only need part of them 60 | rand_num = numpy.random.permutation(Feature_train_neural['Feature_all_neutral'].shape[0]) 61 | 62 | 63 | F_neural = Feature_train_neural['Feature_all_neutral'][rand_num] 64 | T_neural = Feature_train_neural['Targets_all_new'][rand_num] 65 | Feature_all = numpy.concatenate((Feature_train['Feature_all'], F_neural)) 66 | Target_all = numpy.concatenate((Feature_train['Targets_all'], T_neural)) 67 | 68 | rand_num = numpy.random.permutation(Feature_all.shape[0]) 69 | Feature_all = Feature_all[rand_num] 70 | Target_all = Target_all[rand_num] 71 | Target_all_numeric = numpy.argmax(Target_all, axis=1) 72 | #train_set, valid_set, test_set format: tuple(input, target) 73 | #input is an numpy.ndarray of 2 dimensions (a matrix) 74 | #witch row's correspond to an example. target is a 75 | #numpy.ndarray of 1 dimensions (vector)) that have the same length as 76 | #the number of rows in the input. It should give the target 77 | #target to the example with the same index in the input. 78 | 79 | # we separate the dataset into training: 80%, validation: 10%, testing: 10% 80 | train_end = int(rand_num.shape[0]*ratio_train) 81 | valid_end = int(rand_num.shape[0]*(ratio_train+ration_valid)) 82 | 83 | # Wudi made it a small set: 84 | train_set_feature = Feature_all[0:train_end,:] 85 | train_set_new_target = Target_all_numeric[0:train_end] 86 | 87 | # Wudi added normalized data for GRBM 88 | [train_set_feature_normalized, Mean1, Std1] = preprocessing.scale(train_set_feature) 89 | 90 | import cPickle as pickle 91 | f = open('SK_normalization.pkl','wb') 92 | pickle.dump( {"Mean1": Mean1, "Std1": Std1 },f) 93 | f.close() 94 | 95 | train_set_x, train_set_y = shared_dataset( (train_set_feature_normalized, train_set_new_target)) 96 | 97 | valid_set_feature = Feature_all[train_end:valid_end,:] 98 | valid_set_new_target = Target_all_numeric[train_end:valid_end] 99 | valid_set_feature = normalize(valid_set_feature, Mean1, Std1) 100 | valid_set_x, valid_set_y = shared_dataset((valid_set_feature,valid_set_new_target)) 101 | 102 | # test feature set 103 | test_set_feature = Feature_all[valid_end:,:] 104 | test_set_new_target = Target_all_numeric[valid_end:] 105 | test_set_feature = normalize(test_set_feature, Mean1, Std1) 106 | test_set_x, test_set_y = shared_dataset((test_set_feature,test_set_new_target)) 107 | 108 | rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y), 109 | (test_set_x, test_set_y)] 110 | return rval 111 | 112 | def test_GRBM_DBN(finetune_lr=0.1, pretraining_epochs=100, 113 | pretrain_lr=0.01, k=1, training_epochs=500, 114 | batch_size=200): 115 | """ 116 | Demonstrates how to train and test a Deep Belief Network. 117 | 118 | This is demonstrated on MNIST. 119 | 120 | :type learning_rate: float 121 | :param learning_rate: learning rate used in the finetune stage 122 | :type pretraining_epochs: int 123 | :param pretraining_epochs: number of epoch to do pretraining 124 | :type pretrain_lr: float 125 | :param pretrain_lr: learning rate to be used during pre-training 126 | :type k: int 127 | :param k: number of Gibbs steps in CD/PCD 128 | :type training_epochs: int 129 | :param training_epochs: maximal number of iterations ot run the optimizer 130 | :type dataset: string 131 | :param dataset: path the the pickled dataset 132 | :type batch_size: int 133 | :param batch_size: the size of a minibatch 134 | """ 135 | 136 | datasets = load_CodaLab_skel(ratio_train=0.9, ration_valid=0.08) 137 | 138 | train_set_x, train_set_y = datasets[0] 139 | valid_set_x, valid_set_y = datasets[1] 140 | test_set_x, test_set_y = datasets[2] 141 | 142 | # compute number of minibatches for training, validation and testing 143 | n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size 144 | 145 | # numpy random generator 146 | numpy_rng = numpy.random.RandomState(123) 147 | print '... building the model' 148 | # construct the Deep Belief Network 149 | dbn = GRBM_DBN(numpy_rng=numpy_rng, n_ins=528, 150 | hidden_layers_sizes=[1000, 1000, 500], 151 | n_outs=201, finetune_lr=finetune_lr) 152 | 153 | ######################### 154 | # PRETRAINING THE MODEL # 155 | ######################### 156 | print '... getting the pretraining functions' 157 | pretraining_fns = dbn.pretraining_functions(train_set_x=train_set_x, 158 | batch_size=batch_size, 159 | k=k) 160 | 161 | print '... pre-training the model' 162 | start_time = time.clock() 163 | ## Pre-train layer-wise 164 | for i in xrange(dbn.n_layers): 165 | if i==0: 166 | # for GRBM, the The learning rate needs to be about one or 167 | #two orders of magnitude smaller than when using 168 | #binary visible units and some of the failures reported in the 169 | # literature are probably due to using a 170 | pretrain_lr_new = pretrain_lr*0.1 171 | else: 172 | pretrain_lr_new = pretrain_lr 173 | # go through pretraining epochs 174 | for epoch in xrange(pretraining_epochs): 175 | start_time_temp = time.clock() 176 | # go through the training set 177 | c = [] 178 | for batch_index in xrange(n_train_batches): 179 | c.append(pretraining_fns[i](index=batch_index, 180 | lr=pretrain_lr_new)) 181 | end_time_temp = time.clock() 182 | print 'Pre-training layer %i, epoch %d, cost %f ' % (i, epoch, numpy.mean(c)) + ' ran for %d sec' % ((end_time_temp - start_time_temp) ) 183 | 184 | end_time = time.clock() 185 | print >> sys.stderr, ('The pretraining code for file ' + 186 | os.path.split(__file__)[1] + 187 | ' ran for %.2fm' % ((end_time - start_time) / 60.)) 188 | 189 | ######################## 190 | # FINETUNING THE MODEL # 191 | ######################## 192 | 193 | # get the training, validation and testing function for the model 194 | print '... getting the finetuning functions' 195 | train_fn, validate_model, test_model = dbn.build_finetune_functions( 196 | datasets=datasets, batch_size=batch_size, 197 | learning_rate=finetune_lr) 198 | 199 | print '... finetunning the model' 200 | # early-stopping parameters 201 | patience = 4 * n_train_batches # look as this many examples regardless 202 | patience_increase = 2. # wait this much longer when a new best is 203 | # found 204 | improvement_threshold = 0.999 # a relative improvement of this much is 205 | # considered significant 206 | validation_frequency = min(n_train_batches, patience / 2) 207 | # go through this many 208 | # minibatche before checking the network 209 | # on the validation set; in this case we 210 | # check every epoch 211 | 212 | best_params = None 213 | best_validation_loss = numpy.inf 214 | test_score = 0. 215 | start_time = time.clock() 216 | 217 | done_looping = False 218 | epoch = 0 219 | 220 | while (epoch < training_epochs) and (not done_looping): 221 | start_time_temp = time.clock() 222 | epoch = epoch + 1 223 | for minibatch_index in xrange(n_train_batches): 224 | 225 | minibatch_avg_cost = train_fn(minibatch_index) 226 | iter = (epoch - 1) * n_train_batches + minibatch_index 227 | 228 | if (iter + 1) % validation_frequency == 0: 229 | import warnings 230 | warnings.filterwarnings("ignore") 231 | validation_losses = validate_model() 232 | this_validation_loss = numpy.mean(validation_losses) 233 | 234 | # if we got the best validation score until now 235 | if this_validation_loss < best_validation_loss: 236 | 237 | #improve patience if loss improvement is good enough 238 | if (this_validation_loss < best_validation_loss * 239 | improvement_threshold): 240 | patience = max(patience, iter * patience_increase) 241 | 242 | # save best validation score and iteration number 243 | best_validation_loss = this_validation_loss 244 | best_iter = iter 245 | 246 | # test it on the test set 247 | test_losses = test_model() 248 | test_score = numpy.mean(test_losses) 249 | 250 | end_time_temp = time.clock() 251 | print(('epoch %i, minibatch %i/%i, validation error %f %%' \ 252 | 'test error of best model %f %%, used time %d sec') % 253 | (epoch, minibatch_index + 1, n_train_batches,this_validation_loss * 100., 254 | test_score * 100., (end_time_temp - start_time_temp))) 255 | 256 | if patience <= iter: 257 | done_looping = True 258 | break 259 | 260 | end_time = time.clock() 261 | print(('Optimization complete with best validation score of %f %%,' 262 | 'with test performance %f %%') % 263 | (best_validation_loss * 100., test_score * 100.)) 264 | print >> sys.stderr, ('The fine tuning code for file ' + 265 | os.path.split(__file__)[1] + 266 | ' ran for %.2fm' % ((end_time - start_time) 267 | / 60.)) 268 | from time import gmtime, strftime 269 | filename = 'dbn_'+strftime("%Y-%m-%d-%H-%M-%S", gmtime()) 270 | dbn.save(filename) 271 | 272 | 273 | if 0: # here for testing, where we never used 274 | ## Now for testing 275 | dbn = GRBM_DBN(numpy_rng=numpy_rng, n_ins=528, 276 | hidden_layers_sizes=[1000, 1000, 500], 277 | n_outs=201) 278 | 279 | 280 | dbn.load('dbn_2014-05-22-18-39-37.npy') 281 | # compiling a Theano function that computes the mistakes that are made by 282 | # the model on a minibatch 283 | index = T.lscalar('index') 284 | validate_model = theano.function(inputs=[index], 285 | outputs=dbn.logLayer.p_y_given_x, 286 | givens={ 287 | dbn.x: valid_set_x[index * batch_size:(index + 1) * batch_size]}) 288 | 289 | n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] 290 | n_valid_batches /= batch_size 291 | temp = [validate_model(i) 292 | for i in xrange(n_valid_batches)] 293 | 294 | 295 | 296 | if __name__ == '__main__': 297 | test_GRBM_DBN() -------------------------------------------------------------------------------- /Step1_DBN_Structure2.py: -------------------------------------------------------------------------------- 1 | import cPickle 2 | import gzip 3 | import os 4 | import sys 5 | import time 6 | import numpy 7 | import sys 8 | sys.path.append(r'C:\Users\PC-User\Documents\Visual Studio 2012\Projects\Theano\Tutorial') 9 | 10 | import theano 11 | import theano.tensor as T 12 | from theano.tensor.shared_randomstreams import RandomStreams 13 | from logistic_sgd import LogisticRegression 14 | from mlp import HiddenLayer 15 | 16 | from rbm import RBM 17 | from grbm import GBRBM 18 | from utils import zero_mean_unit_variance 19 | from utils import normalize 20 | from GRBM_DBN import GRBM_DBN 21 | from sklearn import preprocessing 22 | 23 | def shared_dataset(data_xy, borrow=True): 24 | """ Function that loads the dataset into shared variables 25 | 26 | The reason we store our dataset in shared variables is to allow 27 | Theano to copy it into the GPU memory (when code is run on GPU). 28 | Since copying data into the GPU is slow, copying a minibatch everytime 29 | is needed (the default behaviour if the data is not in a shared 30 | variable) would lead to a large decrease in performance. 31 | """ 32 | data_x, data_y = data_xy 33 | shared_x = theano.shared(numpy.asarray(data_x, 34 | dtype=theano.config.floatX), 35 | borrow=borrow) 36 | shared_y = theano.shared(numpy.asarray(data_y, 37 | dtype=theano.config.floatX), 38 | borrow=borrow) 39 | # When storing data on the GPU it has to be stored as floats 40 | # therefore we will store the labels as ``floatX`` as well 41 | # (``shared_y`` does exactly that). But during our computations 42 | # we need them as ints (we use labels as index, and if they are 43 | # floats it doesn't make sense) therefore instead of returning 44 | # ``shared_y`` we will have to cast it to int. This little hack 45 | # lets ous get around this issue 46 | return shared_x, T.cast(shared_y, 'int32') 47 | 48 | def load_CodaLab_skel(ratio_train=0.9, ration_valid=0.1): 49 | print '... loading data' 50 | 51 | f = file('Feature_train_realtime.pkl','rb' ) 52 | Feature_train = cPickle.load(f) 53 | f.close() 54 | 55 | f = file('Feature_all_neutral_realtime.pkl','rb' ) 56 | Feature_train_neural = cPickle.load(f) 57 | f.close() 58 | 59 | #Because we have too much neural frames, we only need part of them 60 | rand_num = numpy.random.permutation(Feature_train_neural['Feature_all_neutral'].shape[0]) 61 | 62 | 63 | F_neural = Feature_train_neural['Feature_all_neutral'][rand_num] 64 | T_neural = Feature_train_neural['Targets_all_new'][rand_num] 65 | Feature_all = numpy.concatenate((Feature_train['Feature_all'], F_neural)) 66 | Target_all = numpy.concatenate((Feature_train['Targets_all'], T_neural)) 67 | 68 | rand_num = numpy.random.permutation(Feature_all.shape[0]) 69 | Feature_all = Feature_all[rand_num] 70 | Target_all = Target_all[rand_num] 71 | Target_all_numeric = numpy.argmax(Target_all, axis=1) 72 | #train_set, valid_set, test_set format: tuple(input, target) 73 | #input is an numpy.ndarray of 2 dimensions (a matrix) 74 | #witch row's correspond to an example. target is a 75 | #numpy.ndarray of 1 dimensions (vector)) that have the same length as 76 | #the number of rows in the input. It should give the target 77 | #target to the example with the same index in the input. 78 | 79 | # we separate the dataset into training: 80%, validation: 10%, testing: 10% 80 | train_end = int(rand_num.shape[0]*ratio_train) 81 | valid_end = int(rand_num.shape[0]*(ratio_train+ration_valid)) 82 | 83 | # Wudi made it a small set: 84 | train_set_feature = Feature_all[0:train_end,:] 85 | train_set_new_target = Target_all_numeric[0:train_end] 86 | 87 | # Wudi added normalized data for GRBM 88 | [train_set_feature_normalized, Mean1, Std1] = preprocessing.scale(train_set_feature) 89 | 90 | import cPickle as pickle 91 | f = open('SK_normalization.pkl','wb') 92 | pickle.dump( {"Mean1": Mean1, "Std1": Std1 },f) 93 | f.close() 94 | 95 | train_set_x, train_set_y = shared_dataset( (train_set_feature_normalized, train_set_new_target)) 96 | 97 | valid_set_feature = Feature_all[train_end:valid_end,:] 98 | valid_set_new_target = Target_all_numeric[train_end:valid_end] 99 | valid_set_feature = normalize(valid_set_feature, Mean1, Std1) 100 | valid_set_x, valid_set_y = shared_dataset((valid_set_feature,valid_set_new_target)) 101 | 102 | # test feature set 103 | test_set_feature = Feature_all[valid_end:,:] 104 | test_set_new_target = Target_all_numeric[valid_end:] 105 | test_set_feature = normalize(test_set_feature, Mean1, Std1) 106 | test_set_x, test_set_y = shared_dataset((test_set_feature,test_set_new_target)) 107 | 108 | rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y), 109 | (test_set_x, test_set_y)] 110 | return rval 111 | 112 | def test_GRBM_DBN(finetune_lr=1, pretraining_epochs=100, 113 | pretrain_lr=0.01, k=1, training_epochs=500, 114 | batch_size=200, annealing_learning_rate=0.99999): 115 | """ 116 | Demonstrates how to train and test a Deep Belief Network. 117 | 118 | This is demonstrated on MNIST. 119 | 120 | :type learning_rate: float 121 | :param learning_rate: learning rate used in the finetune stage 122 | :type pretraining_epochs: int 123 | :param pretraining_epochs: number of epoch to do pretraining 124 | :type pretrain_lr: float 125 | :param pretrain_lr: learning rate to be used during pre-training 126 | :type k: int 127 | :param k: number of Gibbs steps in CD/PCD 128 | :type training_epochs: int 129 | :param training_epochs: maximal number of iterations ot run the optimizer 130 | :type dataset: string 131 | :param dataset: path the the pickled dataset 132 | :type batch_size: int 133 | :param batch_size: the size of a minibatch 134 | """ 135 | 136 | datasets = load_CodaLab_skel(ratio_train=0.9, ration_valid=0.08) 137 | 138 | train_set_x, train_set_y = datasets[0] 139 | valid_set_x, valid_set_y = datasets[1] 140 | test_set_x, test_set_y = datasets[2] 141 | 142 | # compute number of minibatches for training, validation and testing 143 | n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size 144 | 145 | # numpy random generator 146 | numpy_rng = numpy.random.RandomState(123) 147 | print '... building the model' 148 | # construct the Deep Belief Network 149 | dbn = GRBM_DBN(numpy_rng=numpy_rng, n_ins=528, 150 | hidden_layers_sizes=[2000, 2000, 1000], 151 | n_outs=201, finetune_lr=finetune_lr) 152 | 153 | ######################### 154 | # PRETRAINING THE MODEL # 155 | ######################### 156 | print '... getting the pretraining functions' 157 | pretraining_fns = dbn.pretraining_functions(train_set_x=train_set_x, 158 | batch_size=batch_size, 159 | k=k) 160 | 161 | print '... pre-training the model' 162 | start_time = time.clock() 163 | ## Pre-train layer-wise 164 | for i in xrange(dbn.n_layers): 165 | if i==0: 166 | # for GRBM, the The learning rate needs to be about one or 167 | #two orders of magnitude smaller than when using 168 | #binary visible units and some of the failures reported in the 169 | # literature are probably due to using a 170 | pretrain_lr_new = pretrain_lr*0.1 171 | else: 172 | pretrain_lr_new = pretrain_lr 173 | # go through pretraining epochs 174 | for epoch in xrange(pretraining_epochs): 175 | start_time_temp = time.clock() 176 | # go through the training set 177 | c = [] 178 | for batch_index in xrange(n_train_batches): 179 | c.append(pretraining_fns[i](index=batch_index, 180 | lr=pretrain_lr_new)) 181 | end_time_temp = time.clock() 182 | print 'Pre-training layer %i, epoch %d, cost %f ' % (i, epoch, numpy.mean(c)) + ' ran for %d sec' % ((end_time_temp - start_time_temp) ) 183 | 184 | end_time = time.clock() 185 | print >> sys.stderr, ('The pretraining code for file ' + 186 | os.path.split(__file__)[1] + 187 | ' ran for %.2fm' % ((end_time - start_time) / 60.)) 188 | 189 | ######################## 190 | # FINETUNING THE MODEL # 191 | ######################## 192 | 193 | # get the training, validation and testing function for the model 194 | print '... getting the finetuning functions' 195 | train_fn, validate_model, test_model = dbn.build_finetune_functions( 196 | datasets=datasets, batch_size=batch_size, 197 | annealing_learning_rate=annealing_learning_rate) 198 | 199 | print '... finetunning the model' 200 | # early-stopping parameters 201 | patience = 4 * n_train_batches # look as this many examples regardless 202 | patience_increase = 2. # wait this much longer when a new best is 203 | # found 204 | improvement_threshold = 0.999 # a relative improvement of this much is 205 | # considered significant 206 | validation_frequency = min(n_train_batches, patience / 2) 207 | # go through this many 208 | # minibatche before checking the network 209 | # on the validation set; in this case we 210 | # check every epoch 211 | 212 | best_params = None 213 | best_validation_loss = numpy.inf 214 | test_score = 0. 215 | start_time = time.clock() 216 | 217 | done_looping = False 218 | epoch = 0 219 | 220 | while (epoch < training_epochs) and (not done_looping): 221 | start_time_temp = time.clock() 222 | epoch = epoch + 1 223 | for minibatch_index in xrange(n_train_batches): 224 | 225 | minibatch_avg_cost = train_fn(minibatch_index) 226 | iter = (epoch - 1) * n_train_batches + minibatch_index 227 | 228 | if (iter + 1) % validation_frequency == 0: 229 | import warnings 230 | warnings.filterwarnings("ignore") 231 | validation_losses = validate_model() 232 | this_validation_loss = numpy.mean(validation_losses) 233 | 234 | # if we got the best validation score until now 235 | if this_validation_loss < best_validation_loss: 236 | 237 | #improve patience if loss improvement is good enough 238 | if (this_validation_loss < best_validation_loss * 239 | improvement_threshold): 240 | patience = max(patience, iter * patience_increase) 241 | 242 | # save best validation score and iteration number 243 | best_validation_loss = this_validation_loss 244 | best_iter = iter 245 | 246 | # test it on the test set 247 | test_losses = test_model() 248 | test_score = numpy.mean(test_losses) 249 | 250 | end_time_temp = time.clock() 251 | print(('epoch %i, minibatch %i/%i, validation error %f %%' \ 252 | 'test error of best model %f %%, used time %d sec') % 253 | (epoch, minibatch_index + 1, n_train_batches,this_validation_loss * 100., 254 | test_score * 100., (end_time_temp - start_time_temp))) 255 | 256 | if patience <= iter: 257 | done_looping = True 258 | break 259 | 260 | end_time = time.clock() 261 | print(('Optimization complete with best validation score of %f %%,' 262 | 'with test performance %f %%') % 263 | (best_validation_loss * 100., test_score * 100.)) 264 | print >> sys.stderr, ('The fine tuning code for file ' + 265 | os.path.split(__file__)[1] + 266 | ' ran for %.2fm' % ((end_time - start_time) 267 | / 60.)) 268 | from time import gmtime, strftime 269 | filename = 'dbn_'+strftime("%Y-%m-%d-%H-%M-%S", gmtime()) 270 | dbn.save(filename) 271 | 272 | 273 | if 0: # here for testing, where we never used 274 | ## Now for testing 275 | dbn = GRBM_DBN(numpy_rng=numpy_rng, n_ins=528, 276 | hidden_layers_sizes=[1000, 1000, 500], 277 | n_outs=201) 278 | 279 | 280 | dbn.load('dbn_2014-05-22-18-39-37.npy') 281 | # compiling a Theano function that computes the mistakes that are made by 282 | # the model on a minibatch 283 | index = T.lscalar('index') 284 | validate_model = theano.function(inputs=[index], 285 | outputs=dbn.logLayer.p_y_given_x, 286 | givens={ 287 | dbn.x: valid_set_x[index * batch_size:(index + 1) * batch_size]}) 288 | 289 | n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] 290 | n_valid_batches /= batch_size 291 | temp = [validate_model(i) 292 | for i in xrange(n_valid_batches)] 293 | 294 | 295 | 296 | if __name__ == '__main__': 297 | test_GRBM_DBN() -------------------------------------------------------------------------------- /Step1_SK_Neutral_Realtime.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------- 2 | # Name: Starting Kit for ChaLearn LAP 2014 Track3 3 | # Purpose: Show basic functionality of provided code 4 | # 5 | # Author: Xavier Baro 6 | # Author: Di Wu: stevenwudi@gmail.com 7 | # Created: 24/03/2014 8 | # Copyright: (c) Chalearn LAP 2014 9 | # Licence: GPL3 10 | #------------------------------------------------------------------------------- 11 | import sys, os,random,numpy,zipfile 12 | from shutil import copyfile 13 | import matplotlib.pyplot as plt 14 | 15 | from ChalearnLAPEvaluation import evalGesture,exportGT_Gesture 16 | from ChalearnLAPSample import GestureSample 17 | from utils import IsLeftDominant 18 | from utils import Extract_feature_Realtime 19 | from utils import Extract_feature_UNnormalized 20 | 21 | 22 | # Data folder (Training data) 23 | print("Extracting the training files") 24 | data=os.path.join("I:\Kaggle_multimodal\Training\\") 25 | # Get the list of training samples 26 | samples=os.listdir(data) 27 | used_joints = ['ElbowLeft', 'WristLeft', 'ShoulderLeft','HandLeft', 28 | 'ElbowRight', 'WristRight','ShoulderRight','HandRight', 29 | 'Head','Spine','HipCenter'] 30 | njoints = len(used_joints) 31 | STATE_NO = 10 32 | count = 0 33 | 34 | # pre-allocating the memory 35 | Feature_all = numpy.zeros(shape=(100000, (njoints*(njoints-1)/2 + njoints**2)*3),dtype=numpy.float32) 36 | Targets = numpy.zeros( shape=(100000, STATE_NO*20+1), dtype=numpy.uint8) 37 | 38 | # Access to each sample 39 | for file_count, file in enumerate(samples): 40 | #if not file.endswith(".zip"): 41 | # continue; 42 | if file_count<650: 43 | print("\t Processing file " + file) 44 | # Create the object to access the sample 45 | smp=GestureSample(os.path.join(data,file)) 46 | # ############################################### 47 | # USE Ground Truth information to learn the model 48 | # ############################################### 49 | # Get the list of actions for this frame 50 | gesturesList=smp.getGestures() 51 | # Iterate for each action in this sample 52 | # Then we also choose 5 frame before and after the ground true data: 53 | seg_length = 5 54 | for gesture in gesturesList: 55 | # Get the gesture ID, and start and end frames for the gesture 56 | gestureID,startFrame,endFrame=gesture 57 | # This part is to extract action data 58 | 59 | Skeleton_matrix = numpy.zeros(shape=(5, len(used_joints)*3)) 60 | HipCentre_matrix = numpy.zeros(shape=(5, 3)) 61 | frame_num = 0 62 | 63 | ## extract first 5 frames 64 | if startFrame-seg_length > 0: 65 | Skeleton_matrix, valid_skel = Extract_feature_UNnormalized(smp, used_joints, startFrame-seg_length+1, startFrame) 66 | if not valid_skel: 67 | print "No detected Skeleton: ", gestureID 68 | else: 69 | Feature = Extract_feature_Realtime(Skeleton_matrix, njoints) 70 | begin_frame = count 71 | end_frame = count+seg_length-1 72 | Feature_all[begin_frame:end_frame,:] = Feature 73 | Targets[begin_frame:end_frame, -1] = 1 74 | count=count+seg_length-1 75 | 76 | ## extract last 5 frames 77 | if endFrame+seg_length < smp.getNumFrames(): 78 | Skeleton_matrix, valid_skel = Extract_feature_UNnormalized(smp, used_joints, endFrame, endFrame+seg_length-1) 79 | if not valid_skel: 80 | print "No detected Skeleton: ", gestureID 81 | else: 82 | Feature = Extract_feature_Realtime(Skeleton_matrix, njoints) 83 | begin_frame = count 84 | end_frame = count+seg_length-1 85 | Feature_all[begin_frame:end_frame,:] = Feature 86 | Targets[begin_frame:end_frame, -1] = 1 87 | count=count+seg_length-1 88 | # ############################################### 89 | del smp 90 | 91 | # save the skeleton file: 92 | Feature_all_new = Feature_all[0:end_frame, :] 93 | Targets_all_new = Targets[0:end_frame, :] 94 | import cPickle as pickle 95 | Feature_train = { "Feature_all_neutral": Feature_all_new, "Targets_all_new": Targets_all_new } 96 | pickle.dump( Feature_train, open( "Feature_all_neutral_realtime.pkl", "wb" ) ) 97 | 98 | import scipy.io as sio 99 | sio.savemat('Feature_all_neutral_realtime.mat', Feature_train) 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | -------------------------------------------------------------------------------- /Step1_SK_realtime.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------- 2 | # Name: Starting Kit for ChaLearn LAP 2014 Track3 3 | # Purpose: Show basic functionality of provided code 4 | # 5 | # Author: Xavier Baro 6 | # Author: Di Wu: stevenwudi@gmail.com 7 | # Created: 24/03/2014 8 | # Copyright: (c) Chalearn LAP 2014 9 | # Licence: GPL3 10 | #------------------------------------------------------------------------------- 11 | import sys, os,random,numpy,zipfile 12 | from shutil import copyfile 13 | import matplotlib.pyplot as plt 14 | 15 | from ChalearnLAPEvaluation import evalGesture,exportGT_Gesture 16 | from ChalearnLAPSample import GestureSample 17 | from utils import IsLeftDominant 18 | from utils import Extract_feature_Realtime 19 | from utils import Extract_feature_UNnormalized 20 | 21 | 22 | # Data folder (Training data) 23 | print("Extracting the training files") 24 | data=os.path.join("I:\Kaggle_multimodal\Training\\") 25 | # Get the list of training samples 26 | samples=os.listdir(data) 27 | used_joints = ['ElbowLeft', 'WristLeft', 'ShoulderLeft','HandLeft', 28 | 'ElbowRight', 'WristRight','ShoulderRight','HandRight', 29 | 'Head','Spine','HipCenter'] 30 | njoints = len(used_joints) 31 | STATE_NO = 10 32 | count = 0 33 | 34 | # pre-allocating the memory 35 | Feature_all = numpy.zeros(shape=(400000, (njoints*(njoints-1)/2 + njoints**2)*3),dtype=numpy.float32) 36 | Targets = numpy.zeros( shape=(400000, STATE_NO*20+1), dtype=numpy.uint8) 37 | # Access to each sample 38 | for file_count, file in enumerate(samples): 39 | #if not file.endswith(".zip"): 40 | # continue; 41 | if file_count<650: 42 | print("\t Processing file " + file) 43 | # Create the object to access the sample 44 | smp=GestureSample(os.path.join(data,file)) 45 | # ############################################### 46 | # USE Ground Truth information to learn the model 47 | # ############################################### 48 | # Get the list of actions for this frame 49 | gesturesList=smp.getGestures() 50 | 51 | # Iterate for each action in this sample 52 | for gesture in gesturesList: 53 | # Get the gesture ID, and start and end frames for the gesture 54 | gestureID,startFrame,endFrame=gesture 55 | Skeleton_matrix, valid_skel = Extract_feature_UNnormalized(smp, used_joints, startFrame, endFrame) 56 | # to see we actually detect a skeleton: 57 | if not valid_skel: 58 | print "No detected Skeleton: ", gestureID 59 | else: 60 | ### extract the features according to the CVPR2014 paper 61 | Feature = Extract_feature_Realtime(Skeleton_matrix, njoints) 62 | Target = numpy.zeros( shape=(Feature.shape[0], STATE_NO*20+1)) 63 | fr_no = Feature.shape[0] 64 | for i in range(STATE_NO): #HMM states force alignment 65 | begin_fr = numpy.round(fr_no* i /STATE_NO) + 1 66 | end_fr = numpy.round( fr_no*(i+1) /STATE_NO) 67 | #print "begin: %d, end: %d"%(begin_fr-1, end_fr) 68 | seg_length=end_fr-begin_fr + 1 69 | targets = numpy.zeros( shape =(STATE_NO*20+1,1)) 70 | targets[ i + STATE_NO*(gestureID-1)] = 1 71 | begin_frame = count 72 | end_frame = count+seg_length 73 | Feature_all[begin_frame:end_frame,:] = Feature[begin_fr-1:end_fr,:] 74 | Targets[begin_frame:end_frame, :]= numpy.tile(targets.T,(seg_length, 1)) 75 | count=count+seg_length 76 | # ############################################### 77 | ## delete the sample 78 | del smp 79 | 80 | # save the skeleton file: 81 | 82 | 83 | import cPickle as pickle 84 | f = open('Feature_train_realtime.pkl','wb') 85 | pickle.dump( {"Feature_all": Feature_all[0:end_frame, :], "Targets_all": Targets[0:end_frame, :] },f) 86 | f.close() 87 | 88 | 89 | 90 | import scipy.io as sio 91 | sio.savemat('Feature_all_train__realtime.mat', { "Feature_all": Feature_all[0:end_frame, :], "Targets_all": Targets[0:end_frame, :] }) 92 | 93 | 94 | 95 | 96 | -------------------------------------------------------------------------------- /Step1_transition_matrix.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------- 2 | # Name: Starting Kit for ChaLearn LAP 2014 Track3 3 | # Purpose: Show basic functionality of provided code 4 | # 5 | # Author: Xavier Baro 6 | # Author: Di Wu: stevenwudi@gmail.com 7 | # Created: 24/03/2014 8 | # Copyright: (c) Chalearn LAP 2014 9 | # Licence: GPL3 10 | #------------------------------------------------------------------------------- 11 | import sys, os,random,numpy,zipfile 12 | from shutil import copyfile 13 | import matplotlib.pyplot as plt 14 | import cv2 15 | from ChalearnLAPEvaluation import evalGesture,exportGT_Gesture 16 | from ChalearnLAPSample import GestureSample 17 | from utils import IsLeftDominant 18 | from utils import Extract_feature_normalized 19 | from utils import Extract_feature 20 | import time 21 | import cPickle 22 | """ Main script. Show how to perform all competition steps 23 | Access the sample information to learn a model. """ 24 | # Data folder (Training data) 25 | print("Extracting the training files") 26 | data=os.path.join("I:\Kaggle_multimodal\Training\\") 27 | # Get the list of training samples 28 | samples=os.listdir(data) 29 | used_joints = ['ElbowLeft', 'WristLeft', 'ElbowRight', 'WristRight'] 30 | njoints = len(used_joints) 31 | STATE_NO = 10 32 | batch_num = 13 33 | 34 | # pre-allocating the memory 35 | Prior = numpy.zeros(shape=(201)) 36 | Transition_matrix = numpy.zeros(shape=(201,201)) 37 | 38 | for file_count, file in enumerate(samples): 39 | #if not file.endswith(".zip"): 40 | # continue; 41 | time_tic = time.time() 42 | if (file_count<651): 43 | print("\t Processing file " + file) 44 | # Create the object to access the sample 45 | smp=GestureSample(os.path.join(data,file)) 46 | # ############################################### 47 | # USE Ground Truth information to learn the model 48 | # ############################################### 49 | # Get the list of actions for this frame 50 | gesturesList=smp.getGestures() 51 | 52 | 53 | for gesture in gesturesList: 54 | gestureID,startFrame,endFrame=gesture 55 | 56 | for frame in range(endFrame-startFrame+1-4): 57 | 58 | state_no_1 = numpy.floor(frame*(STATE_NO*1.0/(endFrame-startFrame+1-3))) 59 | state_no_1 = state_no_1+STATE_NO*(gestureID-1) 60 | state_no_2 = numpy.floor((frame+1)*(STATE_NO*1.0/(endFrame-startFrame+1-3))) 61 | state_no_2 = state_no_2+STATE_NO*(gestureID-1) 62 | ## we allow first two states add together: 63 | Prior [state_no_1] += 1 64 | Transition_matrix[state_no_1, state_no_2] += 1 65 | if frame<2: 66 | Transition_matrix[-1, state_no_1] += 1 67 | Prior[-1] += 1 68 | if frame> (endFrame-startFrame+1-4-2): 69 | Transition_matrix[state_no_2, -1] += 1 70 | Prior[-1] += 1 71 | del smp 72 | 73 | import scipy.io as sio 74 | sio.savemat('Transition_matrix.mat', {'Transition_matrix':Transition_matrix}) 75 | sio.savemat('Prior.mat', {Prior:'Prior'}) 76 | sio.savemat('Prior_Transition_matrix.mat', {'Transition_matrix':Transition_matrix, 'Prior': Prior}) 77 | 78 | img = Transition_matrix*1.0*255/Transition_matrix.max() 79 | fig, ax = plt.subplots() 80 | cax = ax.imshow(temp2, interpolation='nearest', cmap=cm.coolwarm) 81 | cbar = fig.colorbar(cax, ticks=[-1, 0, 1]) 82 | cbar.ax.set_yticklabels(['< -1', '0', '> 1'])# vertically oriented colorbar 83 | 84 | -------------------------------------------------------------------------------- /Step2_SK_Prediction.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------- 2 | # Name: Starting Kit for ChaLearn LAP 2014 Track3 3 | # Purpose: Show basic functionality of provided code 4 | # 5 | # Author: Xavier Baro 6 | # Author: Di Wu: stevenwudi@gmail.com 7 | # Created: 24/03/2014 8 | # Copyright: (c) Chalearn LAP 2014 9 | # Licence: GPL3 10 | #------------------------------------------------------------------------------- 11 | import sys, os,random,numpy,zipfile 12 | from numpy import log 13 | from shutil import copyfile 14 | import matplotlib.pyplot as plt 15 | import cv2 16 | from ChalearnLAPEvaluation import evalGesture,exportGT_Gesture 17 | from ChalearnLAPSample import GestureSample 18 | from utils import IsLeftDominant 19 | from utils import Extract_feature_Realtime 20 | from utils import Extract_feature_UNnormalized 21 | from utils import normalize 22 | from utils import imdisplay 23 | from utils import viterbi_colab_clean 24 | from utils import createSubmisionFile 25 | import time 26 | import cPickle 27 | import numpy 28 | import pickle 29 | import scipy.io as sio 30 | 31 | ### theano import 32 | sys.path.append(r'C:\Users\PC-User\Documents\Visual Studio 2012\Projects\Theano\Tutorial') 33 | import theano 34 | import theano.tensor as T 35 | from theano.tensor.shared_randomstreams import RandomStreams 36 | from logistic_sgd import LogisticRegression 37 | from mlp import HiddenLayer 38 | from rbm import RBM 39 | from grbm import GBRBM 40 | from utils import zero_mean_unit_variance 41 | from utils import normalize 42 | from GRBM_DBN import GRBM_DBN 43 | from sklearn import preprocessing 44 | ############### viterbi path import 45 | from utils import viterbi_path, viterbi_path_log 46 | 47 | ######################### 48 | 49 | """ Main script. Show how to perform all competition steps 50 | Access the sample information to learn a model. """ 51 | # Data folder (Training data) 52 | print("Extracting the training files") 53 | data_path=os.path.join("I:\Kaggle_multimodal\Training\\") 54 | # Predictions folder (output) 55 | outPred='./training/pred/' 56 | # Get the list of training samples 57 | samples=os.listdir(data_path) 58 | used_joints = ['ElbowLeft', 'WristLeft', 'ShoulderLeft','HandLeft', 59 | 'ElbowRight', 'WristRight','ShoulderRight','HandRight', 60 | 'Head','Spine','HipCenter'] 61 | njoints = len(used_joints) 62 | STATE_NO = 10 63 | count = 0 64 | 65 | ### load the pre-store normalization constant 66 | f = open('SK_normalization.pkl','rb') 67 | SK_normalization = cPickle.load(f) 68 | Mean1 = SK_normalization ['Mean1'] 69 | Std1 = SK_normalization['Std1'] 70 | 71 | ## Load Prior and transitional Matrix 72 | dic=sio.loadmat('Transition_matrix.mat') 73 | Transition_matrix = dic['Transition_matrix'] 74 | Prior = dic['Prior'] 75 | 76 | for file_count, file in enumerate(samples): 77 | #if not file.endswith(".zip"): 78 | # continue; 79 | time_tic = time.time() 80 | if not file_count<650: 81 | print("\t Processing file " + file) 82 | # Create the object to access the sample 83 | smp=GestureSample(os.path.join(data_path,file)) 84 | # ############################################### 85 | # USE Ground Truth information to learn the model 86 | # ############################################### 87 | # Get the list of actions for this frame 88 | gesturesList=smp.getGestures() 89 | ########################################################### 90 | # we check whether it's left dominant or right dominanant 91 | # if right dominant, we correct them to left dominant 92 | ########################################################## 93 | Skeleton_matrix, valid_skel = Extract_feature_UNnormalized(smp, used_joints, 1, smp.getNumFrames()) 94 | 95 | Feature = Extract_feature_Realtime(Skeleton_matrix, njoints) 96 | 97 | Feature_normalized = normalize(Feature, Mean1, Std1) 98 | 99 | ### Feed into DBN 100 | shared_x = theano.shared(numpy.asarray(Feature_normalized, 101 | dtype=theano.config.floatX), 102 | borrow=True) 103 | numpy_rng = numpy.random.RandomState(123) 104 | 105 | ### model 1 106 | ########################## 107 | dbn = GRBM_DBN(numpy_rng=numpy_rng, n_ins=528, 108 | hidden_layers_sizes=[1000, 1000, 500], 109 | n_outs=201) 110 | dbn.load('dbn_2014-05-23-20-07-28.npy') 111 | 112 | validate_model = theano.function(inputs=[], 113 | outputs=dbn.logLayer.p_y_given_x, 114 | givens={ dbn.x: shared_x}) 115 | 116 | observ_likelihood_1 = validate_model() 117 | del dbn 118 | ### model 2 119 | ########################## 120 | dbn = GRBM_DBN(numpy_rng=numpy_rng, n_ins=528, 121 | hidden_layers_sizes=[1000, 1000, 500], 122 | n_outs=201) 123 | 124 | dbn.load('dbn_2014-05-24-05-53-17.npy') 125 | 126 | validate_model = theano.function(inputs=[], 127 | outputs=dbn.logLayer.p_y_given_x, 128 | givens={ dbn.x: shared_x}) 129 | 130 | observ_likelihood_2 = validate_model() 131 | del dbn 132 | ### model 3 133 | ########################## 134 | 135 | dbn = GRBM_DBN(numpy_rng=numpy_rng, n_ins=528, 136 | hidden_layers_sizes=[2000, 2000, 1000], 137 | n_outs=201, finetune_lr=0.1) 138 | dbn.load('dbn_2014-05-25-10-11-56.npy') 139 | # Optimization complete with best validation score of 38.194915 %,with test performance 38.113636 % 140 | #....The score for this prediction is 0.792685963841 141 | 142 | validate_model = theano.function(inputs=[], 143 | outputs=dbn.logLayer.p_y_given_x, 144 | givens={ dbn.x: shared_x}) 145 | 146 | observ_likelihood_3 = validate_model() 147 | del dbn 148 | 149 | ### model 4 150 | ########################## 151 | dbn = GRBM_DBN(numpy_rng=numpy_rng, n_ins=528, 152 | hidden_layers_sizes=[2000, 2000, 1000], 153 | n_outs=201, finetune_lr=0.1) 154 | dbn.load('dbn_2014-05-25-10-11-56.npy') 155 | # Optimization complete with best validation score of 38.194915 %,with test performance 38.113636 % 156 | #The score for this prediction is 0.777992357011 157 | 158 | validate_model = theano.function(inputs=[], 159 | outputs=dbn.logLayer.p_y_given_x, 160 | givens={ dbn.x: shared_x}) 161 | 162 | observ_likelihood_4 = validate_model() 163 | del dbn 164 | #sio.savemat('observ_likelihood.mat', {'observ_likelihood':observ_likelihood}) 165 | ########################## 166 | # viterbi path decoding 167 | ##################### 168 | 169 | log_observ_likelihood = log(observ_likelihood_1.T) + log(observ_likelihood_2.T) \ 170 | + log(observ_likelihood_3.T) #+ log(observ_likelihood_4.T) 171 | log_observ_likelihood[-1, 0:5] = 0 172 | log_observ_likelihood[-1, -5:] = 0 173 | 174 | print("\t Viterbi path decoding " ) 175 | # do it in log space avoid numeric underflow 176 | [path, predecessor_state_index, global_score] = viterbi_path_log(log(Prior), log(Transition_matrix), log_observ_likelihood) 177 | #[path, predecessor_state_index, global_score] = viterbi_path(Prior, Transition_matrix, observ_likelihood) 178 | 179 | # Some gestures are not within the vocabulary 180 | [pred_label, begin_frame, end_frame, Individual_score, frame_length] = viterbi_colab_clean(path, global_score, threshold=-100, mini_frame=19) 181 | 182 | 183 | ### In theory we need add frame, but it seems that the groutnd truth is about 3 frames more, a bit random 184 | end_frame = end_frame + 3 185 | ### plot the path and prediction 186 | if True: 187 | im = imdisplay(global_score) 188 | plt.clf() 189 | plt.imshow(im, cmap='gray') 190 | plt.plot(range(global_score.shape[-1]), path, color='c',linewidth=2.0) 191 | plt.xlim((0, global_score.shape[-1])) 192 | # plot ground truth 193 | for gesture in gesturesList: 194 | # Get the gesture ID, and start and end frames for the gesture 195 | gestureID,startFrame,endFrame=gesture 196 | frames_count = numpy.array(range(startFrame, endFrame+1)) 197 | pred_label_temp = ((gestureID-1) *10 +5) * numpy.ones(len(frames_count)) 198 | plt.plot(frames_count, pred_label_temp, color='r', linewidth=5.0) 199 | 200 | # plot clean path 201 | for i in range(len(begin_frame)): 202 | frames_count = numpy.array(range(begin_frame[i], end_frame[i]+1)) 203 | pred_label_temp = ((pred_label[i]-1) *10 +5) * numpy.ones(len(frames_count)) 204 | plt.plot(frames_count, pred_label_temp, color='#ffff00', linewidth=2.0) 205 | 206 | plt.show() 207 | 208 | from pylab import savefig 209 | save_dir=r'.\training\SK_path' 210 | save_path= os.path.join(save_dir,file) 211 | savefig(save_path, bbox_inches='tight') 212 | plt.show() 213 | 214 | print "Elapsed time %d sec" % int(time.time() - time_tic) 215 | save_dir=r'.\training\SK' 216 | save_path= os.path.join(save_dir,file) 217 | out_file = open(save_path, 'wb') 218 | cPickle.dump({'log_observ_likelihood':log_observ_likelihood}, out_file, protocol=cPickle.HIGHEST_PROTOCOL) 219 | out_file.close() 220 | 221 | pred=[] 222 | for i in range(len(begin_frame)): 223 | pred.append([ pred_label[i], begin_frame[i], end_frame[i]] ) 224 | 225 | smp.exportPredictions(pred,outPred) 226 | # ############################################### 227 | ## delete the sample 228 | del smp 229 | 230 | 231 | 232 | TruthDir='./training/gt/' 233 | final_score = evalGesture(outPred,TruthDir) 234 | print("The score for this prediction is " + "{:.12f}".format(final_score)) 235 | #The score for this prediction is 0.746762613292 -inf threshold, dbn_2014-05-23-20-07-28 236 | #The score for this prediction is 0.731507614243 -3 threshold, dbn_2014-05-23-20-07-28 237 | #The score for this prediction is 0.748537955342 -inf threshold, dbn_2014-05-24-05-53-17 238 | # Submision folder (output) 239 | outSubmision='./training/submision/' 240 | # Prepare submision file (only for validation and final evaluation data sets) 241 | createSubmisionFile(outPred, outSubmision) 242 | 243 | 244 | 245 | 246 | #Sample ID: Sample0651, score 0.857417 247 | #Sample ID: Sample0652, score 0.913935 248 | #Sample ID: Sample0653, score 0.943355 249 | #Sample ID: Sample0654, score 0.917020 250 | #Sample ID: Sample0655, score 0.924133 251 | #Sample ID: Sample0656, score 0.749035 252 | #Sample ID: Sample0657, score 0.955422 253 | #Sample ID: Sample0658, score 0.908295 254 | #Sample ID: Sample0659, score 0.859846 255 | #Sample ID: Sample0660, score 0.856747 256 | #Sample ID: Sample0661, score 0.914236 257 | #Sample ID: Sample0662, score 0.786864 258 | #Sample ID: Sample0663, score 0.941406 259 | #Sample ID: Sample0664, score 0.828827 260 | #Sample ID: Sample0665, score 0.853589 261 | #Sample ID: Sample0666, score 0.720335 262 | #Sample ID: Sample0667, score 0.853116 263 | #Sample ID: Sample0668, score 0.933476 264 | #Sample ID: Sample0669, score 0.881736 265 | #Sample ID: Sample0670, score 0.768433 266 | #Sample ID: Sample0671, score 0.909118 267 | #Sample ID: Sample0672, score 0.950289 268 | #Sample ID: Sample0673, score 0.744832 269 | #Sample ID: Sample0674, score 0.859022 270 | #Sample ID: Sample0675, score 0.092073 271 | #Sample ID: Sample0676, score 0.954039 272 | #Sample ID: Sample0677, score 0.794421 273 | #Sample ID: Sample0678, score 0.709793 274 | #Sample ID: Sample0679, score 0.809159 275 | #Sample ID: Sample0680, score 0.812236 276 | #Sample ID: Sample0681, score 0.684452 277 | #Sample ID: Sample0682, score 0.828362 278 | #Sample ID: Sample0683, score 0.650288 279 | #Sample ID: Sample0684, score 0.820198 280 | #Sample ID: Sample0685, score 0.948309 281 | #Sample ID: Sample0686, score 0.774727 282 | #Sample ID: Sample0687, score 0.870839 283 | #Sample ID: Sample0688, score 0.770792 284 | #Sample ID: Sample0689, score 0.957459 285 | #Sample ID: Sample0690, score 0.929372 286 | #Sample ID: Sample0691, score 0.912913 287 | #Sample ID: Sample0692, score 0.921437 288 | #Sample ID: Sample0693, score 0.917426 289 | #Sample ID: Sample0694, score 0.819258 290 | #Sample ID: Sample0695, score 0.930915 291 | #Sample ID: Sample0696, score 0.913294 292 | #Sample ID: Sample0697, score 0.904583 293 | #Sample ID: Sample0698, score 0.807169 294 | #Sample ID: Sample0699, score 0.000000 295 | #Sample ID: Sample0700, score 0.814740 296 | #The score for this prediction is 0.823574769630 297 | 298 | 299 | ####################True validation############## 300 | #Sample ID: Sample0651, score 0.904025 301 | #Sample ID: Sample0652, score 0.904918 302 | #Sample ID: Sample0653, score 0.958563 303 | #Sample ID: Sample0654, score 0.908786 304 | #Sample ID: Sample0655, score 0.939971 305 | #Sample ID: Sample0656, score 0.855592 306 | #Sample ID: Sample0657, score 0.949432 307 | #Sample ID: Sample0658, score 0.892104 308 | #Sample ID: Sample0659, score 0.821147 309 | #Sample ID: Sample0660, score 0.731472 310 | #Sample ID: Sample0661, score 0.937361 311 | #Sample ID: Sample0662, score 0.669438 312 | #Sample ID: Sample0663, score 0.951005 313 | #Sample ID: Sample0664, score 0.943669 314 | #Sample ID: Sample0665, score 0.733362 315 | #Sample ID: Sample0666, score 0.609271 316 | #Sample ID: Sample0667, score 0.860603 317 | #Sample ID: Sample0668, score 0.858290 318 | #Sample ID: Sample0669, score 0.929701 319 | #Sample ID: Sample0670, score 0.768116 320 | #Sample ID: Sample0671, score 0.814299 321 | #Sample ID: Sample0672, score 0.930511 322 | #Sample ID: Sample0673, score 0.673121 323 | #Sample ID: Sample0674, score 0.812634 324 | #Sample ID: Sample0675, score 0.095109 325 | #Sample ID: Sample0676, score 0.849760 326 | #Sample ID: Sample0677, score 0.855732 327 | #Sample ID: Sample0678, score 0.697313 328 | #Sample ID: Sample0679, score 0.868751 329 | #Sample ID: Sample0680, score 0.784426 330 | #Sample ID: Sample0681, score 0.667418 331 | #Sample ID: Sample0682, score 0.789869 332 | #Sample ID: Sample0683, score 0.712648 333 | #Sample ID: Sample0684, score 0.774973 334 | #Sample ID: Sample0685, score 0.696109 335 | #Sample ID: Sample0686, score 0.718954 336 | #Sample ID: Sample0687, score 0.614459 337 | #Sample ID: Sample0688, score 0.823834 338 | #Sample ID: Sample0689, score 0.891862 339 | #Sample ID: Sample0690, score 0.868217 340 | #Sample ID: Sample0691, score 0.895659 341 | #Sample ID: Sample0692, score 0.763341 342 | #Sample ID: Sample0693, score 0.919345 343 | #Sample ID: Sample0694, score 0.884368 344 | #Sample ID: Sample0695, score 0.786327 345 | #Sample ID: Sample0696, score 0.855285 346 | #Sample ID: Sample0697, score 0.909057 347 | #Sample ID: Sample0698, score 0.714707 348 | #Sample ID: Sample0699, score 0.493874 349 | #Sample ID: Sample0700, score 0.797374 350 | #The score for this prediction is 0.801723171675 -------------------------------------------------------------------------------- /Step3_SK_Test_prediction.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------- 2 | # Name: Starting Kit for ChaLearn LAP 2014 Track3 3 | # Purpose: Show basic functionality of provided code 4 | # 5 | # Author: Xavier Baro 6 | # Author: Di Wu: stevenwudi@gmail.com 7 | # Created: 24/03/2014 8 | # Copyright: (c) Chalearn LAP 2014 9 | # Licence: GPL3 10 | #------------------------------------------------------------------------------- 11 | import sys, os, random, zipfile 12 | from numpy import log 13 | from shutil import copyfile 14 | import matplotlib.pyplot as plt 15 | import time 16 | import cPickle 17 | import numpy 18 | import scipy.io as sio 19 | 20 | #################################### 21 | ### local import 22 | #################################### 23 | from ChalearnLAPEvaluation import evalGesture,exportGT_Gesture 24 | from ChalearnLAPSample import GestureSample 25 | from utils import Extract_feature_Realtime 26 | from utils import Extract_feature_UNnormalized 27 | from utils import normalize 28 | from utils import imdisplay 29 | from utils import createSubmisionFile 30 | ############### viterbi path import 31 | from utils import viterbi_path, viterbi_path_log 32 | from utils import viterbi_colab_clean 33 | 34 | #################################### 35 | ### theano import 36 | #################################### 37 | sys.path.append(r'.\TheanoDL') 38 | try: 39 | import theano 40 | import theano.tensor as T 41 | from theano.tensor.shared_randomstreams import RandomStreams 42 | from logistic_sgd import LogisticRegression 43 | from mlp import HiddenLayer 44 | from rbm import RBM 45 | from grbm import GBRBM 46 | from utils import zero_mean_unit_variance 47 | from utils import normalize 48 | from GRBM_DBN import GRBM_DBN 49 | from sklearn import preprocessing 50 | except ImportError: 51 | sys.exit('Please install Theano!') 52 | 53 | 54 | print("Congratulations, you finish all the reqruired modules!") 55 | #################################### 56 | ### The path you need to set!!!##### 57 | #################################### 58 | 59 | # Data folder (Test data) 60 | data_path=os.path.join("I:\Kaggle_multimodal\Test\Test\\") 61 | # Predictions folder (output) 62 | outPred=r'.\training\test' 63 | # Submision folder (output) 64 | outSubmision=r'.\training\test_submission' 65 | 66 | #################################### 67 | ### Some initialization ############ 68 | #################################### 69 | used_joints = ['ElbowLeft', 'WristLeft', 'ShoulderLeft','HandLeft', 70 | 'ElbowRight', 'WristRight','ShoulderRight','HandRight', 71 | 'Head','Spine','HipCenter'] 72 | njoints = len(used_joints) 73 | ### load the pre-store normalization constant 74 | f = open('SK_normalization.pkl','rb') 75 | SK_normalization = cPickle.load(f) 76 | Mean1 = SK_normalization ['Mean1'] 77 | Std1 = SK_normalization['Std1'] 78 | ## Load Prior and transitional Matrix 79 | dic=sio.loadmat('Transition_matrix.mat') 80 | Transition_matrix = dic['Transition_matrix'] 81 | Prior = dic['Prior'] 82 | 83 | 84 | 85 | #################################### 86 | ### Start predicting here ########## 87 | #################################### 88 | samples=os.listdir(data_path) 89 | for file_count, file in enumerate(samples): 90 | #if not file.endswith(".zip"): 91 | # continue; 92 | time_tic = time.time() 93 | if file_count > -1: # do nothing, just for bebug convenience 94 | print("\t Processing file " + file) 95 | # Create the object to access the sample 96 | smp=GestureSample(os.path.join(data_path,file)) 97 | # ############################################### 98 | # USE Ground Truth information to learn the model 99 | # ############################################### 100 | # Get the list of actions for this frame 101 | gesturesList=smp.getGestures() 102 | 103 | Skeleton_matrix, valid_skel = Extract_feature_UNnormalized(smp, used_joints, 1, smp.getNumFrames()) 104 | 105 | Feature = Extract_feature_Realtime(Skeleton_matrix, njoints) 106 | 107 | Feature_normalized = normalize(Feature, Mean1, Std1) 108 | 109 | ### Feed into DBN, theano requires the shared tensor representation 110 | shared_x = theano.shared(numpy.asarray(Feature_normalized, 111 | dtype=theano.config.floatX), 112 | borrow=True) 113 | numpy_rng = numpy.random.RandomState(123) 114 | 115 | ########################## 116 | ### model 1 117 | ########################## 118 | dbn = GRBM_DBN(numpy_rng=numpy_rng, n_ins=528, 119 | hidden_layers_sizes=[1000, 1000, 500], 120 | n_outs=201) 121 | dbn.load('dbn_2014-05-23-20-07-28.npy') 122 | 123 | validate_model = theano.function(inputs=[], 124 | outputs=dbn.logLayer.p_y_given_x, 125 | givens={ dbn.x: shared_x}) 126 | 127 | observ_likelihood_1 = validate_model() 128 | del dbn 129 | 130 | ########################## 131 | ### model 2 132 | ########################## 133 | dbn = GRBM_DBN(numpy_rng=numpy_rng, n_ins=528, 134 | hidden_layers_sizes=[1000, 1000, 500], 135 | n_outs=201) 136 | 137 | dbn.load('dbn_2014-05-24-05-53-17.npy') 138 | 139 | validate_model = theano.function(inputs=[], 140 | outputs=dbn.logLayer.p_y_given_x, 141 | givens={ dbn.x: shared_x}) 142 | 143 | observ_likelihood_2 = validate_model() 144 | del dbn 145 | 146 | ########################## 147 | ### model 3 148 | ########################## 149 | 150 | dbn = GRBM_DBN(numpy_rng=numpy_rng, n_ins=528, 151 | hidden_layers_sizes=[2000, 2000, 1000], 152 | n_outs=201, finetune_lr=0.1) 153 | dbn.load('dbn_2014-05-25-10-11-56.npy') 154 | # Optimization complete with best validation score of 38.194915 %,with test performance 38.113636 % 155 | #....The score for this prediction is 0.792685963841 156 | 157 | validate_model = theano.function(inputs=[], 158 | outputs=dbn.logLayer.p_y_given_x, 159 | givens={ dbn.x: shared_x}) 160 | 161 | observ_likelihood_3 = validate_model() 162 | del dbn 163 | 164 | ########################## 165 | ### model 4 166 | ########################## 167 | dbn = GRBM_DBN(numpy_rng=numpy_rng, n_ins=528, 168 | hidden_layers_sizes=[2000, 2000, 1000], 169 | n_outs=201, finetune_lr=0.1) 170 | dbn.load('dbn_2014-05-25-11-09-00.npy') 171 | 172 | validate_model = theano.function(inputs=[], 173 | outputs=dbn.logLayer.p_y_given_x, 174 | givens={ dbn.x: shared_x}) 175 | 176 | observ_likelihood_4 = validate_model() 177 | del dbn 178 | 179 | #sio.savemat('observ_likelihood.mat', {'observ_likelihood':observ_likelihood}) 180 | ########################## 181 | # viterbi path decoding 182 | ##################### 183 | 184 | log_observ_likelihood = log(observ_likelihood_1.T) + log(observ_likelihood_2.T) \ 185 | + log(observ_likelihood_3.T) + log(observ_likelihood_4.T) 186 | ########################## 187 | # viterbi path decoding 188 | ##################### 189 | 190 | log_observ_likelihood = log(observ_likelihood_1.T) 191 | log_observ_likelihood[-1, 0:5] = 0 # We force first and last 5 frames are in the ergodic state 192 | log_observ_likelihood[-1, -5:] = 0 193 | 194 | print("\t Viterbi path decoding " ) 195 | # do it in log space avoid numeric underflow 196 | [path, predecessor_state_index, global_score] = viterbi_path_log(log(Prior), log(Transition_matrix), log_observ_likelihood) 197 | #[path, predecessor_state_index, global_score] = viterbi_path(Prior, Transition_matrix, observ_likelihood) 198 | 199 | # Some gestures are not within the vocabulary 200 | [pred_label, begin_frame, end_frame, Individual_score, frame_length] = viterbi_colab_clean(path, global_score, threshold=-100, mini_frame=19) 201 | 202 | #begin_frame = begin_frame-1 203 | end_frame = end_frame + 3 204 | ### plot the path and prediction 205 | if False: 206 | im = imdisplay(global_score) 207 | plt.imshow(im, cmap='gray') 208 | plt.plot(range(global_score.shape[-1]), path, color='c',linewidth=2.0) 209 | plt.xlim((0, global_score.shape[-1])) 210 | # plot ground truth 211 | for gesture in gesturesList: 212 | # Get the gesture ID, and start and end frames for the gesture 213 | gestureID,startFrame,endFrame=gesture 214 | frames_count = numpy.array(range(startFrame, endFrame+1)) 215 | pred_label_temp = ((gestureID-1) *10 +5) * numpy.ones(len(frames_count)) 216 | plt.plot(frames_count, pred_label_temp, color='r', linewidth=5.0) 217 | 218 | # plot clean path 219 | for i in range(len(begin_frame)): 220 | frames_count = numpy.array(range(begin_frame[i], end_frame[i]+1)) 221 | pred_label_temp = ((pred_label[i]-1) *10 +5) * numpy.ones(len(frames_count)) 222 | plt.plot(frames_count, pred_label_temp, color='#ffff00', linewidth=2.0) 223 | 224 | plt.show() 225 | else: 226 | print "Elapsed time %d sec" % int(time.time() - time_tic) 227 | 228 | pred=[] 229 | for i in range(len(begin_frame)): 230 | pred.append([ pred_label[i], begin_frame[i], end_frame[i]] ) 231 | 232 | smp.exportPredictions(pred,outPred) 233 | 234 | # ############################################### 235 | ## delete the sample 236 | del smp 237 | 238 | 239 | # Prepare submision file (only for validation and final evaluation data sets) 240 | createSubmisionFile(outPred, outSubmision) 241 | -------------------------------------------------------------------------------- /Step3_measure_performance.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------- 2 | # Name: Starting Kit for ChaLearn LAP 2014 Track3 3 | # Purpose: Show basic functionality of provided code 4 | # 5 | # Author: Xavier Baro 6 | # Author: Di Wu: stevenwudi@gmail.com 7 | # Created: 24/03/2014 8 | # Copyright: (c) Chalearn LAP 2014 9 | # Licence: GPL3 10 | #------------------------------------------------------------------------------- 11 | import sys, os, os.path,random,numpy,zipfile 12 | from shutil import copyfile 13 | import matplotlib.pyplot as plt 14 | 15 | from ChalearnLAPEvaluation import evalGesture,exportGT_Gesture 16 | from ChalearnLAPSample import GestureSample 17 | from ChalearnLAPEvaluation import gesture_overlap_csv 18 | from utils import Extract_feature 19 | 20 | def main(): 21 | 22 | prediction_dir = r'I:\Kaggle_multimodal\StartingKit_track3\CoDaLab_Gesure_track3\matlab\prediction_650_conv' 23 | #prediction_dir = r'I:\Kaggle_multimodal\StartingKit_track3\CoDaLab_Gesure_track3\matlab\prediction_650' 24 | #truth_dir = r'I:\Kaggle_multimodal\validation_labels' 25 | truth_dir = r'I:\Kaggle_multimodal\validation' 26 | final_score = evalGesture(prediction_dir,truth_dir) 27 | print "final_score "+str(final_score) 28 | 29 | # 3DCNN: final_score0.375025337775 30 | 31 | 32 | if __name__ == '__main__': 33 | main() 34 | -------------------------------------------------------------------------------- /TheanoDL/GRBM_DBN.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stevenwudi/CVPR_2014_code/7976acb440dbe4e8d060685371e3f0c5d35a4796/TheanoDL/GRBM_DBN.pyc -------------------------------------------------------------------------------- /TheanoDL/Theano_Tutorial.pyproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | Debug 5 | 2.0 6 | 9f652ea6-cee9-4214-9774-25d00ce2972c 7 | 8 | 9 | GRBM_DBN.py 10 | code\ 11 | . 12 | . 13 | Theano_Tutorial 14 | Tutorial 15 | Standard Python launcher 16 | -W ignore::DeprecationWarning 17 | False 18 | False 19 | {947cee96-2ef3-4b77-b850-f04b2d86f6b6} 20 | 2.7 21 | 22 | 23 | true 24 | false 25 | 26 | 27 | true 28 | false 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 10.0 85 | $(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion) 86 | $(VSToolsPath)\Python Tools\Microsoft.PythonTools.targets 87 | 88 | 89 | 90 | -------------------------------------------------------------------------------- /TheanoDL/Tutorial.py: -------------------------------------------------------------------------------- 1 | import theano.tensor as T 2 | from theano import function 3 | from theano import pp 4 | 5 | x = T.dscalar('x') 6 | y = T.dscalar('y') 7 | z = x + y 8 | f= function( [x, y], z) 9 | 10 | print f(2, 3) -------------------------------------------------------------------------------- /TheanoDL/Tutorial.v11.suo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stevenwudi/CVPR_2014_code/7976acb440dbe4e8d060685371e3f0c5d35a4796/TheanoDL/Tutorial.v11.suo -------------------------------------------------------------------------------- /TheanoDL/cA.py: -------------------------------------------------------------------------------- 1 | """This tutorial introduces Contractive auto-encoders (cA) using Theano. 2 | 3 | They are based on auto-encoders as the ones used in Bengio et 4 | al. 2007. An autoencoder takes an input x and first maps it to a 5 | hidden representation y = f_{\theta}(x) = s(Wx+b), parameterized by 6 | \theta={W,b}. The resulting latent representation y is then mapped 7 | back to a "reconstructed" vector z \in [0,1]^d in input space z = 8 | g_{\theta'}(y) = s(W'y + b'). The weight matrix W' can optionally be 9 | constrained such that W' = W^T, in which case the autoencoder is said 10 | to have tied weights. The network is trained such that to minimize 11 | the reconstruction error (the error between x and z). Adding the 12 | squared Frobenius norm of the Jacobian of the hidden mapping h with 13 | respect to the visible units yields the contractive auto-encoder: 14 | 15 | - \sum_{k=1}^d[ x_k \log z_k + (1-x_k) \log( 1-z_k)] + \| \frac{\partial h(x)}{\partial x} \|^2 16 | 17 | References : 18 | - S. Rifai, P. Vincent, X. Muller, X. Glorot, Y. Bengio: Contractive 19 | Auto-Encoders: Explicit Invariance During Feature Extraction, ICML-11 20 | 21 | - S. Rifai, X. Muller, X. Glorot, G. Mesnil, Y. Bengio, and Pascal 22 | Vincent. Learning invariant features through local space 23 | contraction. Technical Report 1360, Universite de Montreal 24 | 25 | - Y. Bengio, P. Lamblin, D. Popovici, H. Larochelle: Greedy Layer-Wise 26 | Training of Deep Networks, Advances in Neural Information Processing 27 | Systems 19, 2007 28 | 29 | """ 30 | import cPickle 31 | import gzip 32 | import os 33 | import sys 34 | import time 35 | 36 | import numpy 37 | 38 | import theano 39 | import theano.tensor as T 40 | 41 | 42 | from logistic_sgd import load_data 43 | from utils import tile_raster_images 44 | 45 | import PIL.Image 46 | 47 | 48 | class cA(object): 49 | """ Contractive Auto-Encoder class (cA) 50 | 51 | The contractive autoencoder tries to reconstruct the input with an 52 | additional constraint on the latent space. With the objective of 53 | obtaining a robust representation of the input space, we 54 | regularize the L2 norm(Froebenius) of the jacobian of the hidden 55 | representation with respect to the input. Please refer to Rifai et 56 | al.,2011 for more details. 57 | 58 | If x is the input then equation (1) computes the projection of the 59 | input into the latent space h. Equation (2) computes the jacobian 60 | of h with respect to x. Equation (3) computes the reconstruction 61 | of the input, while equation (4) computes the reconstruction 62 | error and the added regularization term from Eq.(2). 63 | 64 | .. math:: 65 | 66 | h_i = s(W_i x + b_i) (1) 67 | 68 | J_i = h_i (1 - h_i) * W_i (2) 69 | 70 | x' = s(W' h + b') (3) 71 | 72 | L = -sum_{k=1}^d [x_k \log x'_k + (1-x_k) \log( 1-x'_k)] 73 | + lambda * sum_{i=1}^d sum_{j=1}^n J_{ij}^2 (4) 74 | 75 | """ 76 | 77 | def __init__(self, numpy_rng, input=None, n_visible=784, n_hidden=100, 78 | n_batchsize=1, W=None, bhid=None, bvis=None): 79 | """Initialize the cA class by specifying the number of visible units (the 80 | dimension d of the input ), the number of hidden units ( the dimension 81 | d' of the latent or hidden space ) and the contraction level. The 82 | constructor also receives symbolic variables for the input, weights and 83 | bias. 84 | 85 | :type numpy_rng: numpy.random.RandomState 86 | :param numpy_rng: number random generator used to generate weights 87 | 88 | :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams 89 | :param theano_rng: Theano random generator; if None is given 90 | one is generated based on a seed drawn from `rng` 91 | 92 | :type input: theano.tensor.TensorType 93 | :param input: a symbolic description of the input or None for 94 | standalone cA 95 | 96 | :type n_visible: int 97 | :param n_visible: number of visible units 98 | 99 | :type n_hidden: int 100 | :param n_hidden: number of hidden units 101 | 102 | :type n_batchsize int 103 | :param n_batchsize: number of examples per batch 104 | 105 | :type W: theano.tensor.TensorType 106 | :param W: Theano variable pointing to a set of weights that should be 107 | shared belong the dA and another architecture; if dA should 108 | be standalone set this to None 109 | 110 | :type bhid: theano.tensor.TensorType 111 | :param bhid: Theano variable pointing to a set of biases values (for 112 | hidden units) that should be shared belong dA and another 113 | architecture; if dA should be standalone set this to None 114 | 115 | :type bvis: theano.tensor.TensorType 116 | :param bvis: Theano variable pointing to a set of biases values (for 117 | visible units) that should be shared belong dA and another 118 | architecture; if dA should be standalone set this to None 119 | 120 | """ 121 | self.n_visible = n_visible 122 | self.n_hidden = n_hidden 123 | self.n_batchsize = n_batchsize 124 | # note : W' was written as `W_prime` and b' as `b_prime` 125 | if not W: 126 | # W is initialized with `initial_W` which is uniformely sampled 127 | # from -4*sqrt(6./(n_visible+n_hidden)) and 128 | # 4*sqrt(6./(n_hidden+n_visible))the output of uniform if 129 | # converted using asarray to dtype 130 | # theano.config.floatX so that the code is runable on GPU 131 | initial_W = numpy.asarray(numpy_rng.uniform( 132 | low=-4 * numpy.sqrt(6. / (n_hidden + n_visible)), 133 | high=4 * numpy.sqrt(6. / (n_hidden + n_visible)), 134 | size=(n_visible, n_hidden)), 135 | dtype=theano.config.floatX) 136 | W = theano.shared(value=initial_W, name='W', borrow=True) 137 | 138 | if not bvis: 139 | bvis = theano.shared(value=numpy.zeros(n_visible, 140 | dtype=theano.config.floatX), 141 | borrow=True) 142 | 143 | if not bhid: 144 | bhid = theano.shared(value=numpy.zeros(n_hidden, 145 | dtype=theano.config.floatX), 146 | name='b', 147 | borrow=True) 148 | 149 | self.W = W 150 | # b corresponds to the bias of the hidden 151 | self.b = bhid 152 | # b_prime corresponds to the bias of the visible 153 | self.b_prime = bvis 154 | # tied weights, therefore W_prime is W transpose 155 | self.W_prime = self.W.T 156 | 157 | # if no input is given, generate a variable representing the input 158 | if input == None: 159 | # we use a matrix because we expect a minibatch of several 160 | # examples, each example being a row 161 | self.x = T.dmatrix(name='input') 162 | else: 163 | self.x = input 164 | 165 | self.params = [self.W, self.b, self.b_prime] 166 | 167 | def get_hidden_values(self, input): 168 | """ Computes the values of the hidden layer """ 169 | return T.nnet.sigmoid(T.dot(input, self.W) + self.b) 170 | 171 | def get_jacobian(self, hidden, W): 172 | """Computes the jacobian of the hidden layer with respect to 173 | the input, reshapes are necessary for broadcasting the 174 | element-wise product on the right axis 175 | 176 | """ 177 | return T.reshape(hidden * (1 - hidden), 178 | (self.n_batchsize, 1, self.n_hidden)) * T.reshape( 179 | W, (1, self.n_visible, self.n_hidden)) 180 | 181 | def get_reconstructed_input(self, hidden): 182 | """Computes the reconstructed input given the values of the 183 | hidden layer 184 | 185 | """ 186 | return T.nnet.sigmoid(T.dot(hidden, self.W_prime) + self.b_prime) 187 | 188 | def get_cost_updates(self, contraction_level, learning_rate): 189 | """ This function computes the cost and the updates for one trainng 190 | step of the cA """ 191 | 192 | y = self.get_hidden_values(self.x) 193 | z = self.get_reconstructed_input(y) 194 | J = self.get_jacobian(y, self.W) 195 | # note : we sum over the size of a datapoint; if we are using 196 | # minibatches, L will be a vector, with one entry per 197 | # example in minibatch 198 | self.L_rec = - T.sum(self.x * T.log(z) + 199 | (1 - self.x) * T.log(1 - z), 200 | axis=1) 201 | 202 | # Compute the jacobian and average over the number of samples/minibatch 203 | self.L_jacob = T.sum(J ** 2) / self.n_batchsize 204 | 205 | # note : L is now a vector, where each element is the 206 | # cross-entropy cost of the reconstruction of the 207 | # corresponding example of the minibatch. We need to 208 | # compute the average of all these to get the cost of 209 | # the minibatch 210 | cost = T.mean(self.L_rec) + contraction_level * T.mean(self.L_jacob) 211 | 212 | # compute the gradients of the cost of the `cA` with respect 213 | # to its parameters 214 | gparams = T.grad(cost, self.params) 215 | # generate the list of updates 216 | updates = [] 217 | for param, gparam in zip(self.params, gparams): 218 | updates.append((param, param - learning_rate * gparam)) 219 | 220 | return (cost, updates) 221 | 222 | 223 | def test_cA(learning_rate=0.01, training_epochs=20, 224 | dataset='mnist.pkl.gz', 225 | batch_size=10, output_folder='cA_plots', contraction_level=.1): 226 | """ 227 | This demo is tested on MNIST 228 | 229 | :type learning_rate: float 230 | :param learning_rate: learning rate used for training the contracting 231 | AutoEncoder 232 | 233 | :type training_epochs: int 234 | :param training_epochs: number of epochs used for training 235 | 236 | :type dataset: string 237 | :param dataset: path to the picked dataset 238 | 239 | """ 240 | datasets = load_data(dataset) 241 | train_set_x, train_set_y = datasets[0] 242 | 243 | # compute number of minibatches for training, validation and testing 244 | n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size 245 | 246 | # allocate symbolic variables for the data 247 | index = T.lscalar() # index to a [mini]batch 248 | x = T.matrix('x') # the data is presented as rasterized images 249 | 250 | if not os.path.isdir(output_folder): 251 | os.makedirs(output_folder) 252 | os.chdir(output_folder) 253 | #################################### 254 | # BUILDING THE MODEL # 255 | #################################### 256 | 257 | rng = numpy.random.RandomState(123) 258 | 259 | ca = cA(numpy_rng=rng, input=x, 260 | n_visible=28 * 28, n_hidden=500, n_batchsize=batch_size) 261 | 262 | cost, updates = ca.get_cost_updates(contraction_level=contraction_level, 263 | learning_rate=learning_rate) 264 | 265 | train_ca = theano.function([index], [T.mean(ca.L_rec), ca.L_jacob], 266 | updates=updates, 267 | givens={x: train_set_x[index * batch_size: 268 | (index + 1) * batch_size]}) 269 | 270 | start_time = time.clock() 271 | 272 | ############ 273 | # TRAINING # 274 | ############ 275 | 276 | # go through training epochs 277 | for epoch in xrange(training_epochs): 278 | # go through trainng set 279 | c = [] 280 | for batch_index in xrange(n_train_batches): 281 | c.append(train_ca(batch_index)) 282 | 283 | c_array = numpy.vstack(c) 284 | print 'Training epoch %d, reconstruction cost ' % epoch, numpy.mean( 285 | c_array[0]), ' jacobian norm ', numpy.mean(numpy.sqrt(c_array[1])) 286 | 287 | end_time = time.clock() 288 | 289 | training_time = (end_time - start_time) 290 | 291 | print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + 292 | ' ran for %.2fm' % ((training_time) / 60.)) 293 | image = PIL.Image.fromarray(tile_raster_images( 294 | X=ca.W.get_value(borrow=True).T, 295 | img_shape=(28, 28), tile_shape=(10, 10), 296 | tile_spacing=(1, 1))) 297 | 298 | image.save('cae_filters.png') 299 | 300 | os.chdir('../') 301 | 302 | 303 | if __name__ == '__main__': 304 | test_cA() 305 | -------------------------------------------------------------------------------- /TheanoDL/convolutional_mlp.py: -------------------------------------------------------------------------------- 1 | """This tutorial introduces the LeNet5 neural network architecture 2 | using Theano. LeNet5 is a convolutional neural network, good for 3 | classifying images. This tutorial shows how to build the architecture, 4 | and comes with all the hyper-parameters you need to reproduce the 5 | paper's MNIST results. 6 | 7 | 8 | This implementation simplifies the model in the following ways: 9 | 10 | - LeNetConvPool doesn't implement location-specific gain and bias parameters 11 | - LeNetConvPool doesn't implement pooling by average, it implements pooling 12 | by max. 13 | - Digit classification is implemented with a logistic regression rather than 14 | an RBF network 15 | - LeNet5 was not fully-connected convolutions at second layer 16 | 17 | References: 18 | - Y. LeCun, L. Bottou, Y. Bengio and P. Haffner: 19 | Gradient-Based Learning Applied to Document 20 | Recognition, Proceedings of the IEEE, 86(11):2278-2324, November 1998. 21 | http://yann.lecun.com/exdb/publis/pdf/lecun-98.pdf 22 | 23 | """ 24 | import cPickle 25 | import gzip 26 | import os 27 | import sys 28 | import time 29 | 30 | import numpy 31 | 32 | import theano 33 | import theano.tensor as T 34 | from theano.tensor.signal import downsample 35 | from theano.tensor.nnet import conv 36 | 37 | from logistic_sgd import LogisticRegression, load_data 38 | from mlp import HiddenLayer 39 | 40 | 41 | class LeNetConvPoolLayer(object): 42 | """Pool Layer of a convolutional network """ 43 | 44 | def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2)): 45 | """ 46 | Allocate a LeNetConvPoolLayer with shared variable internal parameters. 47 | 48 | :type rng: numpy.random.RandomState 49 | :param rng: a random number generator used to initialize weights 50 | 51 | :type input: theano.tensor.dtensor4 52 | :param input: symbolic image tensor, of shape image_shape 53 | 54 | :type filter_shape: tuple or list of length 4 55 | :param filter_shape: (number of filters, num input feature maps, 56 | filter height,filter width) 57 | 58 | :type image_shape: tuple or list of length 4 59 | :param image_shape: (batch size, num input feature maps, 60 | image height, image width) 61 | 62 | :type poolsize: tuple or list of length 2 63 | :param poolsize: the downsampling (pooling) factor (#rows,#cols) 64 | """ 65 | 66 | assert image_shape[1] == filter_shape[1] 67 | self.input = input 68 | 69 | # there are "num input feature maps * filter height * filter width" 70 | # inputs to each hidden unit 71 | fan_in = numpy.prod(filter_shape[1:]) 72 | # each unit in the lower layer receives a gradient from: 73 | # "num output feature maps * filter height * filter width" / 74 | # pooling size 75 | fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) / 76 | numpy.prod(poolsize)) 77 | # initialize weights with random weights 78 | W_bound = numpy.sqrt(6. / (fan_in + fan_out)) 79 | self.W = theano.shared(numpy.asarray( 80 | rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), 81 | dtype=theano.config.floatX), 82 | borrow=True) 83 | 84 | # the bias is a 1D tensor -- one bias per output feature map 85 | b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX) 86 | self.b = theano.shared(value=b_values, borrow=True) 87 | 88 | # convolve input feature maps with filters 89 | conv_out = conv.conv2d(input=input, filters=self.W, 90 | filter_shape=filter_shape, image_shape=image_shape) 91 | 92 | # downsample each feature map individually, using maxpooling 93 | pooled_out = downsample.max_pool_2d(input=conv_out, 94 | ds=poolsize, ignore_border=True) 95 | 96 | # add the bias term. Since the bias is a vector (1D array), we first 97 | # reshape it to a tensor of shape (1,n_filters,1,1). Each bias will 98 | # thus be broadcasted across mini-batches and feature map 99 | # width & height 100 | self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')) 101 | 102 | # store parameters of this layer 103 | self.params = [self.W, self.b] 104 | 105 | 106 | def evaluate_lenet5(learning_rate=0.1, n_epochs=200, 107 | dataset='mnist.pkl.gz', 108 | nkerns=[20, 50], batch_size=500): 109 | """ Demonstrates lenet on MNIST dataset 110 | 111 | :type learning_rate: float 112 | :param learning_rate: learning rate used (factor for the stochastic 113 | gradient) 114 | 115 | :type n_epochs: int 116 | :param n_epochs: maximal number of epochs to run the optimizer 117 | 118 | :type dataset: string 119 | :param dataset: path to the dataset used for training /testing (MNIST here) 120 | 121 | :type nkerns: list of ints 122 | :param nkerns: number of kernels on each layer 123 | """ 124 | 125 | rng = numpy.random.RandomState(23455) 126 | 127 | datasets = load_data(dataset) 128 | 129 | train_set_x, train_set_y = datasets[0] 130 | valid_set_x, valid_set_y = datasets[1] 131 | test_set_x, test_set_y = datasets[2] 132 | 133 | # compute number of minibatches for training, validation and testing 134 | n_train_batches = train_set_x.get_value(borrow=True).shape[0] 135 | n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] 136 | n_test_batches = test_set_x.get_value(borrow=True).shape[0] 137 | n_train_batches /= batch_size 138 | n_valid_batches /= batch_size 139 | n_test_batches /= batch_size 140 | 141 | # allocate symbolic variables for the data 142 | index = T.lscalar() # index to a [mini]batch 143 | x = T.matrix('x') # the data is presented as rasterized images 144 | y = T.ivector('y') # the labels are presented as 1D vector of 145 | # [int] labels 146 | 147 | ishape = (28, 28) # this is the size of MNIST images 148 | 149 | ###################### 150 | # BUILD ACTUAL MODEL # 151 | ###################### 152 | print '... building the model' 153 | 154 | # Reshape matrix of rasterized images of shape (batch_size,28*28) 155 | # to a 4D tensor, compatible with our LeNetConvPoolLayer 156 | layer0_input = x.reshape((batch_size, 1, 28, 28)) 157 | 158 | # Construct the first convolutional pooling layer: 159 | # filtering reduces the image size to (28-5+1,28-5+1)=(24,24) 160 | # maxpooling reduces this further to (24/2,24/2) = (12,12) 161 | # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12) 162 | layer0 = LeNetConvPoolLayer(rng, input=layer0_input, 163 | image_shape=(batch_size, 1, 28, 28), 164 | filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2)) 165 | 166 | # Construct the second convolutional pooling layer 167 | # filtering reduces the image size to (12-5+1,12-5+1)=(8,8) 168 | # maxpooling reduces this further to (8/2,8/2) = (4,4) 169 | # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4) 170 | layer1 = LeNetConvPoolLayer(rng, input=layer0.output, 171 | image_shape=(batch_size, nkerns[0], 12, 12), 172 | filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2)) 173 | 174 | # the HiddenLayer being fully-connected, it operates on 2D matrices of 175 | # shape (batch_size,num_pixels) (i.e matrix of rasterized images). 176 | # This will generate a matrix of shape (20,32*4*4) = (20,512) 177 | layer2_input = layer1.output.flatten(2) 178 | 179 | # construct a fully-connected sigmoidal layer 180 | layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * 4 * 4, 181 | n_out=500, activation=T.tanh) 182 | 183 | # classify the values of the fully-connected sigmoidal layer 184 | layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10) 185 | 186 | # the cost we minimize during training is the NLL of the model 187 | cost = layer3.negative_log_likelihood(y) 188 | 189 | # create a function to compute the mistakes that are made by the model 190 | test_model = theano.function([index], layer3.errors(y), 191 | givens={ 192 | x: test_set_x[index * batch_size: (index + 1) * batch_size], 193 | y: test_set_y[index * batch_size: (index + 1) * batch_size]}) 194 | 195 | validate_model = theano.function([index], layer3.errors(y), 196 | givens={ 197 | x: valid_set_x[index * batch_size: (index + 1) * batch_size], 198 | y: valid_set_y[index * batch_size: (index + 1) * batch_size]}) 199 | 200 | # create a list of all model parameters to be fit by gradient descent 201 | params = layer3.params + layer2.params + layer1.params + layer0.params 202 | 203 | # create a list of gradients for all model parameters 204 | grads = T.grad(cost, params) 205 | 206 | # train_model is a function that updates the model parameters by 207 | # SGD Since this model has many parameters, it would be tedious to 208 | # manually create an update rule for each model parameter. We thus 209 | # create the updates list by automatically looping over all 210 | # (params[i],grads[i]) pairs. 211 | updates = [] 212 | for param_i, grad_i in zip(params, grads): 213 | updates.append((param_i, param_i - learning_rate * grad_i)) 214 | 215 | train_model = theano.function([index], cost, updates=updates, 216 | givens={ 217 | x: train_set_x[index * batch_size: (index + 1) * batch_size], 218 | y: train_set_y[index * batch_size: (index + 1) * batch_size]}) 219 | 220 | ############### 221 | # TRAIN MODEL # 222 | ############### 223 | print '... training' 224 | # early-stopping parameters 225 | patience = 10000 # look as this many examples regardless 226 | patience_increase = 2 # wait this much longer when a new best is 227 | # found 228 | improvement_threshold = 0.995 # a relative improvement of this much is 229 | # considered significant 230 | validation_frequency = min(n_train_batches, patience / 2) 231 | # go through this many 232 | # minibatche before checking the network 233 | # on the validation set; in this case we 234 | # check every epoch 235 | 236 | best_params = None 237 | best_validation_loss = numpy.inf 238 | best_iter = 0 239 | test_score = 0. 240 | start_time = time.clock() 241 | 242 | epoch = 0 243 | done_looping = False 244 | 245 | while (epoch < n_epochs) and (not done_looping): 246 | epoch = epoch + 1 247 | for minibatch_index in xrange(n_train_batches): 248 | 249 | iter = (epoch - 1) * n_train_batches + minibatch_index 250 | 251 | if iter % 100 == 0: 252 | print 'training @ iter = ', iter 253 | cost_ij = train_model(minibatch_index) 254 | 255 | if (iter + 1) % validation_frequency == 0: 256 | 257 | # compute zero-one loss on validation set 258 | validation_losses = [validate_model(i) for i 259 | in xrange(n_valid_batches)] 260 | this_validation_loss = numpy.mean(validation_losses) 261 | print('epoch %i, minibatch %i/%i, validation error %f %%' % \ 262 | (epoch, minibatch_index + 1, n_train_batches, \ 263 | this_validation_loss * 100.)) 264 | 265 | # if we got the best validation score until now 266 | if this_validation_loss < best_validation_loss: 267 | 268 | #improve patience if loss improvement is good enough 269 | if this_validation_loss < best_validation_loss * \ 270 | improvement_threshold: 271 | patience = max(patience, iter * patience_increase) 272 | 273 | # save best validation score and iteration number 274 | best_validation_loss = this_validation_loss 275 | best_iter = iter 276 | 277 | # test it on the test set 278 | test_losses = [test_model(i) for i in xrange(n_test_batches)] 279 | test_score = numpy.mean(test_losses) 280 | print((' epoch %i, minibatch %i/%i, test error of best ' 281 | 'model %f %%') % 282 | (epoch, minibatch_index + 1, n_train_batches, 283 | test_score * 100.)) 284 | 285 | if patience <= iter: 286 | done_looping = True 287 | break 288 | 289 | end_time = time.clock() 290 | print('Optimization complete.') 291 | print('Best validation score of %f %% obtained at iteration %i,'\ 292 | 'with test performance %f %%' % 293 | (best_validation_loss * 100., best_iter + 1, test_score * 100.)) 294 | print >> sys.stderr, ('The code for file ' + 295 | os.path.split(__file__)[1] + 296 | ' ran for %.2fm' % ((end_time - start_time) / 60.)) 297 | 298 | if __name__ == '__main__': 299 | evaluate_lenet5() 300 | 301 | 302 | def experiment(state, channel): 303 | evaluate_lenet5(state.learning_rate, dataset=state.dataset) 304 | -------------------------------------------------------------------------------- /TheanoDL/dA.py: -------------------------------------------------------------------------------- 1 | """ 2 | This tutorial introduces denoising auto-encoders (dA) using Theano. 3 | 4 | Denoising autoencoders are the building blocks for SdA. 5 | They are based on auto-encoders as the ones used in Bengio et al. 2007. 6 | An autoencoder takes an input x and first maps it to a hidden representation 7 | y = f_{\theta}(x) = s(Wx+b), parameterized by \theta={W,b}. The resulting 8 | latent representation y is then mapped back to a "reconstructed" vector 9 | z \in [0,1]^d in input space z = g_{\theta'}(y) = s(W'y + b'). The weight 10 | matrix W' can optionally be constrained such that W' = W^T, in which case 11 | the autoencoder is said to have tied weights. The network is trained such 12 | that to minimize the reconstruction error (the error between x and z). 13 | 14 | For the denosing autoencoder, during training, first x is corrupted into 15 | \tilde{x}, where \tilde{x} is a partially destroyed version of x by means 16 | of a stochastic mapping. Afterwards y is computed as before (using 17 | \tilde{x}), y = s(W\tilde{x} + b) and z as s(W'y + b'). The reconstruction 18 | error is now measured between z and the uncorrupted input x, which is 19 | computed as the cross-entropy : 20 | - \sum_{k=1}^d[ x_k \log z_k + (1-x_k) \log( 1-z_k)] 21 | 22 | 23 | References : 24 | - P. Vincent, H. Larochelle, Y. Bengio, P.A. Manzagol: Extracting and 25 | Composing Robust Features with Denoising Autoencoders, ICML'08, 1096-1103, 26 | 2008 27 | - Y. Bengio, P. Lamblin, D. Popovici, H. Larochelle: Greedy Layer-Wise 28 | Training of Deep Networks, Advances in Neural Information Processing 29 | Systems 19, 2007 30 | 31 | """ 32 | 33 | import cPickle 34 | import gzip 35 | import os 36 | import sys 37 | import time 38 | 39 | import numpy 40 | 41 | import theano 42 | import theano.tensor as T 43 | from theano.tensor.shared_randomstreams import RandomStreams 44 | 45 | from logistic_sgd import load_data 46 | from utils import tile_raster_images 47 | 48 | import PIL.Image 49 | 50 | 51 | class dA(object): 52 | """Denoising Auto-Encoder class (dA) 53 | 54 | A denoising autoencoders tries to reconstruct the input from a corrupted 55 | version of it by projecting it first in a latent space and reprojecting 56 | it afterwards back in the input space. Please refer to Vincent et al.,2008 57 | for more details. If x is the input then equation (1) computes a partially 58 | destroyed version of x by means of a stochastic mapping q_D. Equation (2) 59 | computes the projection of the input into the latent space. Equation (3) 60 | computes the reconstruction of the input, while equation (4) computes the 61 | reconstruction error. 62 | 63 | .. math:: 64 | 65 | \tilde{x} ~ q_D(\tilde{x}|x) (1) 66 | 67 | y = s(W \tilde{x} + b) (2) 68 | 69 | x = s(W' y + b') (3) 70 | 71 | L(x,z) = -sum_{k=1}^d [x_k \log z_k + (1-x_k) \log( 1-z_k)] (4) 72 | 73 | """ 74 | 75 | def __init__(self, numpy_rng, theano_rng=None, input=None, 76 | n_visible=784, n_hidden=500, 77 | W=None, bhid=None, bvis=None): 78 | """ 79 | Initialize the dA class by specifying the number of visible units (the 80 | dimension d of the input ), the number of hidden units ( the dimension 81 | d' of the latent or hidden space ) and the corruption level. The 82 | constructor also receives symbolic variables for the input, weights and 83 | bias. Such a symbolic variables are useful when, for example the input 84 | is the result of some computations, or when weights are shared between 85 | the dA and an MLP layer. When dealing with SdAs this always happens, 86 | the dA on layer 2 gets as input the output of the dA on layer 1, 87 | and the weights of the dA are used in the second stage of training 88 | to construct an MLP. 89 | 90 | :type numpy_rng: numpy.random.RandomState 91 | :param numpy_rng: number random generator used to generate weights 92 | 93 | :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams 94 | :param theano_rng: Theano random generator; if None is given one is 95 | generated based on a seed drawn from `rng` 96 | 97 | :type input: theano.tensor.TensorType 98 | :param input: a symbolic description of the input or None for 99 | standalone dA 100 | 101 | :type n_visible: int 102 | :param n_visible: number of visible units 103 | 104 | :type n_hidden: int 105 | :param n_hidden: number of hidden units 106 | 107 | :type W: theano.tensor.TensorType 108 | :param W: Theano variable pointing to a set of weights that should be 109 | shared belong the dA and another architecture; if dA should 110 | be standalone set this to None 111 | 112 | :type bhid: theano.tensor.TensorType 113 | :param bhid: Theano variable pointing to a set of biases values (for 114 | hidden units) that should be shared belong dA and another 115 | architecture; if dA should be standalone set this to None 116 | 117 | :type bvis: theano.tensor.TensorType 118 | :param bvis: Theano variable pointing to a set of biases values (for 119 | visible units) that should be shared belong dA and another 120 | architecture; if dA should be standalone set this to None 121 | 122 | 123 | """ 124 | self.n_visible = n_visible 125 | self.n_hidden = n_hidden 126 | 127 | # create a Theano random generator that gives symbolic random values 128 | if not theano_rng: 129 | theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) 130 | 131 | # note : W' was written as `W_prime` and b' as `b_prime` 132 | if not W: 133 | # W is initialized with `initial_W` which is uniformely sampled 134 | # from -4*sqrt(6./(n_visible+n_hidden)) and 135 | # 4*sqrt(6./(n_hidden+n_visible))the output of uniform if 136 | # converted using asarray to dtype 137 | # theano.config.floatX so that the code is runable on GPU 138 | initial_W = numpy.asarray(numpy_rng.uniform( 139 | low=-4 * numpy.sqrt(6. / (n_hidden + n_visible)), 140 | high=4 * numpy.sqrt(6. / (n_hidden + n_visible)), 141 | size=(n_visible, n_hidden)), dtype=theano.config.floatX) 142 | W = theano.shared(value=initial_W, name='W', borrow=True) 143 | 144 | if not bvis: 145 | bvis = theano.shared(value=numpy.zeros(n_visible, 146 | dtype=theano.config.floatX), 147 | borrow=True) 148 | 149 | if not bhid: 150 | bhid = theano.shared(value=numpy.zeros(n_hidden, 151 | dtype=theano.config.floatX), 152 | name='b', 153 | borrow=True) 154 | 155 | self.W = W 156 | # b corresponds to the bias of the hidden 157 | self.b = bhid 158 | # b_prime corresponds to the bias of the visible 159 | self.b_prime = bvis 160 | # tied weights, therefore W_prime is W transpose 161 | self.W_prime = self.W.T 162 | self.theano_rng = theano_rng 163 | # if no input is given, generate a variable representing the input 164 | if input == None: 165 | # we use a matrix because we expect a minibatch of several 166 | # examples, each example being a row 167 | self.x = T.dmatrix(name='input') 168 | else: 169 | self.x = input 170 | 171 | self.params = [self.W, self.b, self.b_prime] 172 | 173 | def get_corrupted_input(self, input, corruption_level): 174 | """This function keeps ``1-corruption_level`` entries of the inputs the 175 | same and zero-out randomly selected subset of size ``coruption_level`` 176 | Note : first argument of theano.rng.binomial is the shape(size) of 177 | random numbers that it should produce 178 | second argument is the number of trials 179 | third argument is the probability of success of any trial 180 | 181 | this will produce an array of 0s and 1s where 1 has a 182 | probability of 1 - ``corruption_level`` and 0 with 183 | ``corruption_level`` 184 | 185 | The binomial function return int64 data type by 186 | default. int64 multiplicated by the input 187 | type(floatX) always return float64. To keep all data 188 | in floatX when floatX is float32, we set the dtype of 189 | the binomial to floatX. As in our case the value of 190 | the binomial is always 0 or 1, this don't change the 191 | result. This is needed to allow the gpu to work 192 | correctly as it only support float32 for now. 193 | 194 | """ 195 | return self.theano_rng.binomial(size=input.shape, n=1, 196 | p=1 - corruption_level, 197 | dtype=theano.config.floatX) * input 198 | 199 | def get_hidden_values(self, input): 200 | """ Computes the values of the hidden layer """ 201 | return T.nnet.sigmoid(T.dot(input, self.W) + self.b) 202 | 203 | def get_reconstructed_input(self, hidden): 204 | """Computes the reconstructed input given the values of the 205 | hidden layer 206 | 207 | """ 208 | return T.nnet.sigmoid(T.dot(hidden, self.W_prime) + self.b_prime) 209 | 210 | def get_cost_updates(self, corruption_level, learning_rate): 211 | """ This function computes the cost and the updates for one trainng 212 | step of the dA """ 213 | 214 | tilde_x = self.get_corrupted_input(self.x, corruption_level) 215 | y = self.get_hidden_values(tilde_x) 216 | z = self.get_reconstructed_input(y) 217 | # note : we sum over the size of a datapoint; if we are using 218 | # minibatches, L will be a vector, with one entry per 219 | # example in minibatch 220 | L = - T.sum(self.x * T.log(z) + (1 - self.x) * T.log(1 - z), axis=1) 221 | # note : L is now a vector, where each element is the 222 | # cross-entropy cost of the reconstruction of the 223 | # corresponding example of the minibatch. We need to 224 | # compute the average of all these to get the cost of 225 | # the minibatch 226 | cost = T.mean(L) 227 | 228 | # compute the gradients of the cost of the `dA` with respect 229 | # to its parameters 230 | gparams = T.grad(cost, self.params) 231 | # generate the list of updates 232 | updates = [] 233 | for param, gparam in zip(self.params, gparams): 234 | updates.append((param, param - learning_rate * gparam)) 235 | 236 | return (cost, updates) 237 | 238 | 239 | def test_dA(learning_rate=0.1, training_epochs=15, 240 | dataset='mnist.pkl.gz', 241 | batch_size=20, output_folder='dA_plots'): 242 | 243 | """ 244 | This demo is tested on MNIST 245 | 246 | :type learning_rate: float 247 | :param learning_rate: learning rate used for training the DeNosing 248 | AutoEncoder 249 | 250 | :type training_epochs: int 251 | :param training_epochs: number of epochs used for training 252 | 253 | :type dataset: string 254 | :param dataset: path to the picked dataset 255 | 256 | """ 257 | datasets = load_data(dataset) 258 | train_set_x, train_set_y = datasets[0] 259 | 260 | # compute number of minibatches for training, validation and testing 261 | n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size 262 | 263 | # allocate symbolic variables for the data 264 | index = T.lscalar() # index to a [mini]batch 265 | x = T.matrix('x') # the data is presented as rasterized images 266 | 267 | if not os.path.isdir(output_folder): 268 | os.makedirs(output_folder) 269 | os.chdir(output_folder) 270 | #################################### 271 | # BUILDING THE MODEL NO CORRUPTION # 272 | #################################### 273 | 274 | rng = numpy.random.RandomState(123) 275 | theano_rng = RandomStreams(rng.randint(2 ** 30)) 276 | 277 | da = dA(numpy_rng=rng, theano_rng=theano_rng, input=x, 278 | n_visible=28 * 28, n_hidden=500) 279 | 280 | cost, updates = da.get_cost_updates(corruption_level=0., 281 | learning_rate=learning_rate) 282 | 283 | train_da = theano.function([index], cost, updates=updates, 284 | givens={x: train_set_x[index * batch_size: 285 | (index + 1) * batch_size]}) 286 | 287 | start_time = time.clock() 288 | 289 | ############ 290 | # TRAINING # 291 | ############ 292 | 293 | # go through training epochs 294 | for epoch in xrange(training_epochs): 295 | # go through trainng set 296 | c = [] 297 | for batch_index in xrange(n_train_batches): 298 | c.append(train_da(batch_index)) 299 | 300 | print 'Training epoch %d, cost ' % epoch, numpy.mean(c) 301 | 302 | end_time = time.clock() 303 | 304 | training_time = (end_time - start_time) 305 | 306 | print >> sys.stderr, ('The no corruption code for file ' + 307 | os.path.split(__file__)[1] + 308 | ' ran for %.2fm' % ((training_time) / 60.)) 309 | image = PIL.Image.fromarray( 310 | tile_raster_images(X=da.W.get_value(borrow=True).T, 311 | img_shape=(28, 28), tile_shape=(10, 10), 312 | tile_spacing=(1, 1))) 313 | image.save('filters_corruption_0.png') 314 | 315 | ##################################### 316 | # BUILDING THE MODEL CORRUPTION 30% # 317 | ##################################### 318 | 319 | rng = numpy.random.RandomState(123) 320 | theano_rng = RandomStreams(rng.randint(2 ** 30)) 321 | 322 | da = dA(numpy_rng=rng, theano_rng=theano_rng, input=x, 323 | n_visible=28 * 28, n_hidden=500) 324 | 325 | cost, updates = da.get_cost_updates(corruption_level=0.3, 326 | learning_rate=learning_rate) 327 | 328 | train_da = theano.function([index], cost, updates=updates, 329 | givens={x: train_set_x[index * batch_size: 330 | (index + 1) * batch_size]}) 331 | 332 | start_time = time.clock() 333 | 334 | ############ 335 | # TRAINING # 336 | ############ 337 | 338 | # go through training epochs 339 | for epoch in xrange(training_epochs): 340 | # go through trainng set 341 | c = [] 342 | for batch_index in xrange(n_train_batches): 343 | c.append(train_da(batch_index)) 344 | 345 | print 'Training epoch %d, cost ' % epoch, numpy.mean(c) 346 | 347 | end_time = time.clock() 348 | 349 | training_time = (end_time - start_time) 350 | 351 | print >> sys.stderr, ('The 30% corruption code for file ' + 352 | os.path.split(__file__)[1] + 353 | ' ran for %.2fm' % (training_time / 60.)) 354 | 355 | image = PIL.Image.fromarray(tile_raster_images( 356 | X=da.W.get_value(borrow=True).T, 357 | img_shape=(28, 28), tile_shape=(10, 10), 358 | tile_spacing=(1, 1))) 359 | image.save('filters_corruption_30.png') 360 | 361 | os.chdir('../') 362 | 363 | 364 | if __name__ == '__main__': 365 | test_dA() 366 | -------------------------------------------------------------------------------- /TheanoDL/grbm.py: -------------------------------------------------------------------------------- 1 | from rbm import RBM 2 | 3 | import theano 4 | import theano.tensor as T 5 | 6 | # -------------------------------------------------------------------------- 7 | class GBRBM(RBM): 8 | 9 | # -------------------------------------------------------------------------- 10 | # initialize class 11 | def __init__(self, input, n_in=784, n_hidden=500, \ 12 | W=None, hbias=None, vbias=None, numpy_rng=None, transpose=False, activation=T.nnet.sigmoid, 13 | theano_rng=None, name='grbm', W_r=None, dropout=0, dropconnect=0): 14 | 15 | # initialize parent class (RBM) 16 | RBM.__init__(self, input=input, n_visible=n_in, n_hidden=n_hidden, \ 17 | W=W, hbias=hbias, vbias=vbias, numpy_rng=numpy_rng, 18 | theano_rng=theano_rng) 19 | 20 | # -------------------------------------------------------------------------- 21 | def type(self): 22 | return 'gauss-bernoulli' 23 | 24 | # -------------------------------------------------------------------------- 25 | # overwrite free energy function (here only vbias term is different) 26 | def free_energy(self, v_sample): 27 | wx_b = T.dot(v_sample, self.W) + self.hbias 28 | vbias_term = 0.5 * T.dot((v_sample - self.vbias), (v_sample - self.vbias).T) 29 | hidden_term = T.sum(T.log(1 + T.exp(wx_b)), axis=1) 30 | return -hidden_term - T.diagonal(vbias_term) 31 | 32 | # -------------------------------------------------------------------------- 33 | # overwrite sampling function (here you sample from normal distribution) 34 | def sample_v_given_h(self, h0_sample): 35 | 36 | pre_sigmoid_v1, v1_mean = self.propdown(h0_sample) 37 | 38 | ''' 39 | Since the input data is normalized to unit variance and zero mean, we do not have to sample 40 | from a normal distribution and pass the pre_sigmoid instead. If this is not the case, we have to sample the 41 | distribution. 42 | ''' 43 | # in fact, you don't need to sample from normal distribution here and just use pre_sigmoid activation instead 44 | # v1_sample = self.theano_rng.normal(size=v1_mean.shape, avg=v1_mean, std=1.0, dtype=theano.config.floatX) + pre_sigmoid_v1 45 | v1_sample = pre_sigmoid_v1 46 | return [pre_sigmoid_v1, v1_mean, v1_sample] 47 | 48 | def get_reconstruction_cost(self, updates, pre_sigmoid_nv): 49 | """ 50 | RMS as the reconstructed cost 51 | 52 | """ 53 | 54 | rms_cost = T.mean(T.sum((self.input - pre_sigmoid_nv)** 2, axis=1)) 55 | return rms_cost 56 | -------------------------------------------------------------------------------- /TheanoDL/grbm.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stevenwudi/CVPR_2014_code/7976acb440dbe4e8d060685371e3f0c5d35a4796/TheanoDL/grbm.pyc -------------------------------------------------------------------------------- /TheanoDL/logistic_cg.py: -------------------------------------------------------------------------------- 1 | """ 2 | This tutorial introduces logistic regression using Theano and conjugate 3 | gradient descent. 4 | 5 | Logistic regression is a probabilistic, linear classifier. It is parametrized 6 | by a weight matrix :math:`W` and a bias vector :math:`b`. Classification is 7 | done by projecting data points onto a set of hyperplanes, the distance to 8 | which is used to determine a class membership probability. 9 | 10 | Mathematically, this can be written as: 11 | 12 | .. math:: 13 | P(Y=i|x, W,b) &= softmax_i(W x + b) \\ 14 | &= \frac {e^{W_i x + b_i}} {\sum_j e^{W_j x + b_j}} 15 | 16 | 17 | The output of the model or prediction is then done by taking the argmax of 18 | the vector whose i'th element is P(Y=i|x). 19 | 20 | .. math:: 21 | 22 | y_{pred} = argmax_i P(Y=i|x,W,b) 23 | 24 | 25 | This tutorial presents a stochastic gradient descent optimization method 26 | suitable for large datasets, and a conjugate gradient optimization method 27 | that is suitable for smaller datasets. 28 | 29 | 30 | References: 31 | 32 | - textbooks: "Pattern Recognition and Machine Learning" - 33 | Christopher M. Bishop, section 4.3.2 34 | 35 | 36 | """ 37 | __docformat__ = 'restructedtext en' 38 | 39 | 40 | import cPickle 41 | import gzip 42 | import os 43 | import sys 44 | import time 45 | 46 | import numpy 47 | 48 | import theano 49 | import theano.tensor as T 50 | 51 | from logistic_sgd import load_data 52 | 53 | 54 | class LogisticRegression(object): 55 | """Multi-class Logistic Regression Class 56 | 57 | The logistic regression is fully described by a weight matrix :math:`W` 58 | and bias vector :math:`b`. Classification is done by projecting data 59 | points onto a set of hyperplanes, the distance to which is used to 60 | determine a class membership probability. 61 | """ 62 | 63 | def __init__(self, input, n_in, n_out): 64 | """ Initialize the parameters of the logistic regression 65 | 66 | :type input: theano.tensor.TensorType 67 | :param input: symbolic variable that describes the input of the 68 | architecture ( one minibatch) 69 | 70 | :type n_in: int 71 | :param n_in: number of input units, the dimension of the space in 72 | which the datapoint lies 73 | 74 | :type n_out: int 75 | :param n_out: number of output units, the dimension of the space in 76 | which the target lies 77 | 78 | """ 79 | 80 | # initialize theta = (W,b) with 0s; W gets the shape (n_in, n_out), 81 | # while b is a vector of n_out elements, making theta a vector of 82 | # n_in*n_out + n_out elements 83 | self.theta = theano.shared(value=numpy.zeros(n_in * n_out + n_out, 84 | dtype=theano.config.floatX), 85 | name='theta', 86 | borrow=True) 87 | # W is represented by the fisr n_in*n_out elements of theta 88 | self.W = self.theta[0:n_in * n_out].reshape((n_in, n_out)) 89 | # b is the rest (last n_out elements) 90 | self.b = self.theta[n_in * n_out:n_in * n_out + n_out] 91 | 92 | # compute vector of class-membership probabilities in symbolic form 93 | self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W) + self.b) 94 | 95 | # compute prediction as class whose probability is maximal in 96 | # symbolic form 97 | self.y_pred = T.argmax(self.p_y_given_x, axis=1) 98 | 99 | def negative_log_likelihood(self, y): 100 | """Return the negative log-likelihood of the prediction of this model 101 | under a given target distribution. 102 | 103 | .. math:: 104 | 105 | \frac{1}{|\mathcal{D}|}\mathcal{L} (\theta=\{W,b\}, \mathcal{D}) = 106 | \frac{1}{|\mathcal{D}|}\sum_{i=0}^{|\mathcal{D}|} \log(P(Y=y^{(i)}|x^{(i)}, W,b)) \\ 107 | \ell (\theta=\{W,b\}, \mathcal{D}) 108 | 109 | :type y: theano.tensor.TensorType 110 | :param y: corresponds to a vector that gives for each example the 111 | correct label 112 | """ 113 | return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]), y]) 114 | 115 | def errors(self, y): 116 | """Return a float representing the number of errors in the minibatch 117 | over the total number of examples of the minibatch 118 | 119 | :type y: theano.tensor.TensorType 120 | :param y: corresponds to a vector that gives for each example 121 | the correct label 122 | """ 123 | 124 | # check if y has same dimension of y_pred 125 | if y.ndim != self.y_pred.ndim: 126 | raise TypeError('y should have the same shape as self.y_pred', 127 | ('y', target.type, 'y_pred', self.y_pred.type)) 128 | # check if y is of the correct datatype 129 | if y.dtype.startswith('int'): 130 | # the T.neq operator returns a vector of 0s and 1s, where 1 131 | # represents a mistake in prediction 132 | return T.mean(T.neq(self.y_pred, y)) 133 | else: 134 | raise NotImplementedError() 135 | 136 | 137 | def cg_optimization_mnist(n_epochs=50, mnist_pkl_gz='mnist.pkl.gz'): 138 | """Demonstrate conjugate gradient optimization of a log-linear model 139 | 140 | This is demonstrated on MNIST. 141 | 142 | :type n_epochs: int 143 | :param n_epochs: number of epochs to run the optimizer 144 | 145 | :type mnist_pkl_gz: string 146 | :param mnist_pkl_gz: the path of the mnist training file from 147 | http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz 148 | 149 | """ 150 | ############# 151 | # LOAD DATA # 152 | ############# 153 | datasets = load_data(mnist_pkl_gz) 154 | 155 | train_set_x, train_set_y = datasets[0] 156 | valid_set_x, valid_set_y = datasets[1] 157 | test_set_x, test_set_y = datasets[2] 158 | 159 | batch_size = 600 # size of the minibatch 160 | 161 | n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size 162 | n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size 163 | n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size 164 | 165 | ishape = (28, 28) # this is the size of MNIST images 166 | n_in = 28 * 28 # number of input units 167 | n_out = 10 # number of output units 168 | 169 | ###################### 170 | # BUILD ACTUAL MODEL # 171 | ###################### 172 | print '... building the model' 173 | 174 | # allocate symbolic variables for the data 175 | minibatch_offset = T.lscalar() # offset to the start of a [mini]batch 176 | x = T.matrix() # the data is presented as rasterized images 177 | y = T.ivector() # the labels are presented as 1D vector of 178 | # [int] labels 179 | 180 | # construct the logistic regression class 181 | classifier = LogisticRegression(input=x, n_in=28 * 28, n_out=10) 182 | 183 | # the cost we minimize during training is the negative log likelihood of 184 | # the model in symbolic format 185 | cost = classifier.negative_log_likelihood(y).mean() 186 | 187 | # compile a theano function that computes the mistakes that are made by 188 | # the model on a minibatch 189 | test_model = theano.function([minibatch_offset], classifier.errors(y), 190 | givens={ 191 | x: test_set_x[minibatch_offset:minibatch_offset + batch_size], 192 | y: test_set_y[minibatch_offset:minibatch_offset + batch_size]}, 193 | name="test") 194 | 195 | validate_model = theano.function([minibatch_offset], classifier.errors(y), 196 | givens={ 197 | x: valid_set_x[minibatch_offset: 198 | minibatch_offset + batch_size], 199 | y: valid_set_y[minibatch_offset: 200 | minibatch_offset + batch_size]}, 201 | name="validate") 202 | 203 | # compile a thenao function that returns the cost of a minibatch 204 | batch_cost = theano.function([minibatch_offset], cost, 205 | givens={ 206 | x: train_set_x[minibatch_offset: 207 | minibatch_offset + batch_size], 208 | y: train_set_y[minibatch_offset: 209 | minibatch_offset + batch_size]}, 210 | name="batch_cost") 211 | 212 | # compile a theano function that returns the gradient of the minibatch 213 | # with respect to theta 214 | batch_grad = theano.function([minibatch_offset], 215 | T.grad(cost, classifier.theta), 216 | givens={ 217 | x: train_set_x[minibatch_offset: 218 | minibatch_offset + batch_size], 219 | y: train_set_y[minibatch_offset: 220 | minibatch_offset + batch_size]}, 221 | name="batch_grad") 222 | 223 | # creates a function that computes the average cost on the training set 224 | def train_fn(theta_value): 225 | classifier.theta.set_value(theta_value, borrow=True) 226 | train_losses = [batch_cost(i * batch_size) 227 | for i in xrange(n_train_batches)] 228 | return numpy.mean(train_losses) 229 | 230 | # creates a function that computes the average gradient of cost with 231 | # respect to theta 232 | def train_fn_grad(theta_value): 233 | classifier.theta.set_value(theta_value, borrow=True) 234 | grad = batch_grad(0) 235 | for i in xrange(1, n_train_batches): 236 | grad += batch_grad(i * batch_size) 237 | return grad / n_train_batches 238 | 239 | validation_scores = [numpy.inf, 0] 240 | 241 | # creates the validation function 242 | def callback(theta_value): 243 | classifier.theta.set_value(theta_value, borrow=True) 244 | #compute the validation loss 245 | validation_losses = [validate_model(i * batch_size) 246 | for i in xrange(n_valid_batches)] 247 | this_validation_loss = numpy.mean(validation_losses) 248 | print('validation error %f %%' % (this_validation_loss * 100.,)) 249 | 250 | # check if it is better then best validation score got until now 251 | if this_validation_loss < validation_scores[0]: 252 | # if so, replace the old one, and compute the score on the 253 | # testing dataset 254 | validation_scores[0] = this_validation_loss 255 | test_losses = [test_model(i * batch_size) 256 | for i in xrange(n_test_batches)] 257 | validation_scores[1] = numpy.mean(test_losses) 258 | 259 | ############### 260 | # TRAIN MODEL # 261 | ############### 262 | 263 | # using scipy conjugate gradient optimizer 264 | import scipy.optimize 265 | print ("Optimizing using scipy.optimize.fmin_cg...") 266 | start_time = time.clock() 267 | best_w_b = scipy.optimize.fmin_cg( 268 | f=train_fn, 269 | x0=numpy.zeros((n_in + 1) * n_out, dtype=x.dtype), 270 | fprime=train_fn_grad, 271 | callback=callback, 272 | disp=0, 273 | maxiter=n_epochs) 274 | end_time = time.clock() 275 | print(('Optimization complete with best validation score of %f %%, with ' 276 | 'test performance %f %%') % 277 | (validation_scores[0] * 100., validation_scores[1] * 100.)) 278 | 279 | print >> sys.stderr, ('The code for file ' + 280 | os.path.split(__file__)[1] + 281 | ' ran for %.1fs' % ((end_time - start_time))) 282 | 283 | 284 | if __name__ == '__main__': 285 | cg_optimization_mnist() 286 | -------------------------------------------------------------------------------- /TheanoDL/logistic_sgd.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stevenwudi/CVPR_2014_code/7976acb440dbe4e8d060685371e3f0c5d35a4796/TheanoDL/logistic_sgd.pyc -------------------------------------------------------------------------------- /TheanoDL/mlp.py: -------------------------------------------------------------------------------- 1 | """ 2 | This tutorial introduces the multilayer perceptron using Theano. 3 | 4 | A multilayer perceptron is a logistic regressor where 5 | instead of feeding the input to the logistic regression you insert a 6 | intermediate layer, called the hidden layer, that has a nonlinear 7 | activation function (usually tanh or sigmoid) . One can use many such 8 | hidden layers making the architecture deep. The tutorial will also tackle 9 | the problem of MNIST digit classification. 10 | 11 | .. math:: 12 | 13 | f(x) = G( b^{(2)} + W^{(2)}( s( b^{(1)} + W^{(1)} x))), 14 | 15 | References: 16 | 17 | - textbooks: "Pattern Recognition and Machine Learning" - 18 | Christopher M. Bishop, section 5 19 | 20 | """ 21 | __docformat__ = 'restructedtext en' 22 | 23 | 24 | import cPickle 25 | import gzip 26 | import os 27 | import sys 28 | import time 29 | 30 | import numpy 31 | 32 | import theano 33 | import theano.tensor as T 34 | 35 | 36 | from logistic_sgd import LogisticRegression, load_data 37 | 38 | 39 | class HiddenLayer(object): 40 | def __init__(self, rng, input, n_in, n_out, W=None, b=None, 41 | activation=T.tanh): 42 | """ 43 | Typical hidden layer of a MLP: units are fully-connected and have 44 | sigmoidal activation function. Weight matrix W is of shape (n_in,n_out) 45 | and the bias vector b is of shape (n_out,). 46 | 47 | NOTE : The nonlinearity used here is tanh 48 | 49 | Hidden unit activation is given by: tanh(dot(input,W) + b) 50 | 51 | :type rng: numpy.random.RandomState 52 | :param rng: a random number generator used to initialize weights 53 | 54 | :type input: theano.tensor.dmatrix 55 | :param input: a symbolic tensor of shape (n_examples, n_in) 56 | 57 | :type n_in: int 58 | :param n_in: dimensionality of input 59 | 60 | :type n_out: int 61 | :param n_out: number of hidden units 62 | 63 | :type activation: theano.Op or function 64 | :param activation: Non linearity to be applied in the hidden 65 | layer 66 | """ 67 | self.input = input 68 | 69 | # `W` is initialized with `W_values` which is uniformely sampled 70 | # from sqrt(-6./(n_in+n_hidden)) and sqrt(6./(n_in+n_hidden)) 71 | # for tanh activation function 72 | # the output of uniform if converted using asarray to dtype 73 | # theano.config.floatX so that the code is runable on GPU 74 | # Note : optimal initialization of weights is dependent on the 75 | # activation function used (among other things). 76 | # For example, results presented in [Xavier10] suggest that you 77 | # should use 4 times larger initial weights for sigmoid 78 | # compared to tanh 79 | # We have no info for other function, so we use the same as 80 | # tanh. 81 | if W is None: 82 | W_values = numpy.asarray(rng.uniform( 83 | low=-numpy.sqrt(6. / (n_in + n_out)), 84 | high=numpy.sqrt(6. / (n_in + n_out)), 85 | size=(n_in, n_out)), dtype=theano.config.floatX) 86 | if activation == theano.tensor.nnet.sigmoid: 87 | W_values *= 4 88 | 89 | W = theano.shared(value=W_values, name='W', borrow=True) 90 | 91 | if b is None: 92 | b_values = numpy.zeros((n_out,), dtype=theano.config.floatX) 93 | b = theano.shared(value=b_values, name='b', borrow=True) 94 | 95 | self.W = W 96 | self.b = b 97 | 98 | lin_output = T.dot(input, self.W) + self.b 99 | self.output = (lin_output if activation is None 100 | else activation(lin_output)) 101 | # parameters of the model 102 | self.params = [self.W, self.b] 103 | 104 | 105 | class MLP(object): 106 | """Multi-Layer Perceptron Class 107 | 108 | A multilayer perceptron is a feedforward artificial neural network model 109 | that has one layer or more of hidden units and nonlinear activations. 110 | Intermediate layers usually have as activation function tanh or the 111 | sigmoid function (defined here by a ``HiddenLayer`` class) while the 112 | top layer is a softamx layer (defined here by a ``LogisticRegression`` 113 | class). 114 | """ 115 | 116 | def __init__(self, rng, input, n_in, n_hidden, n_out): 117 | """Initialize the parameters for the multilayer perceptron 118 | 119 | :type rng: numpy.random.RandomState 120 | :param rng: a random number generator used to initialize weights 121 | 122 | :type input: theano.tensor.TensorType 123 | :param input: symbolic variable that describes the input of the 124 | architecture (one minibatch) 125 | 126 | :type n_in: int 127 | :param n_in: number of input units, the dimension of the space in 128 | which the datapoints lie 129 | 130 | :type n_hidden: int 131 | :param n_hidden: number of hidden units 132 | 133 | :type n_out: int 134 | :param n_out: number of output units, the dimension of the space in 135 | which the labels lie 136 | 137 | """ 138 | 139 | # Since we are dealing with a one hidden layer MLP, this will translate 140 | # into a HiddenLayer with a tanh activation function connected to the 141 | # LogisticRegression layer; the activation function can be replaced by 142 | # sigmoid or any other nonlinear function 143 | self.hiddenLayer = HiddenLayer(rng=rng, input=input, 144 | n_in=n_in, n_out=n_hidden, 145 | activation=T.tanh) 146 | 147 | # The logistic regression layer gets as input the hidden units 148 | # of the hidden layer 149 | self.logRegressionLayer = LogisticRegression( 150 | input=self.hiddenLayer.output, 151 | n_in=n_hidden, 152 | n_out=n_out) 153 | 154 | # L1 norm ; one regularization option is to enforce L1 norm to 155 | # be small 156 | self.L1 = abs(self.hiddenLayer.W).sum() \ 157 | + abs(self.logRegressionLayer.W).sum() 158 | 159 | # square of L2 norm ; one regularization option is to enforce 160 | # square of L2 norm to be small 161 | self.L2_sqr = (self.hiddenLayer.W ** 2).sum() \ 162 | + (self.logRegressionLayer.W ** 2).sum() 163 | 164 | # negative log likelihood of the MLP is given by the negative 165 | # log likelihood of the output of the model, computed in the 166 | # logistic regression layer 167 | self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood 168 | # same holds for the function computing the number of errors 169 | self.errors = self.logRegressionLayer.errors 170 | 171 | # the parameters of the model are the parameters of the two layer it is 172 | # made out of 173 | self.params = self.hiddenLayer.params + self.logRegressionLayer.params 174 | 175 | 176 | def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000, 177 | dataset='mnist.pkl.gz', batch_size=20, n_hidden=500): 178 | """ 179 | Demonstrate stochastic gradient descent optimization for a multilayer 180 | perceptron 181 | 182 | This is demonstrated on MNIST. 183 | 184 | :type learning_rate: float 185 | :param learning_rate: learning rate used (factor for the stochastic 186 | gradient 187 | 188 | :type L1_reg: float 189 | :param L1_reg: L1-norm's weight when added to the cost (see 190 | regularization) 191 | 192 | :type L2_reg: float 193 | :param L2_reg: L2-norm's weight when added to the cost (see 194 | regularization) 195 | 196 | :type n_epochs: int 197 | :param n_epochs: maximal number of epochs to run the optimizer 198 | 199 | :type dataset: string 200 | :param dataset: the path of the MNIST dataset file from 201 | http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz 202 | 203 | 204 | """ 205 | datasets = load_data(dataset) 206 | 207 | train_set_x, train_set_y = datasets[0] 208 | valid_set_x, valid_set_y = datasets[1] 209 | test_set_x, test_set_y = datasets[2] 210 | 211 | # compute number of minibatches for training, validation and testing 212 | n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size 213 | n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size 214 | n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size 215 | 216 | ###################### 217 | # BUILD ACTUAL MODEL # 218 | ###################### 219 | print '... building the model' 220 | 221 | # allocate symbolic variables for the data 222 | index = T.lscalar() # index to a [mini]batch 223 | x = T.matrix('x') # the data is presented as rasterized images 224 | y = T.ivector('y') # the labels are presented as 1D vector of 225 | # [int] labels 226 | 227 | rng = numpy.random.RandomState(1234) 228 | 229 | # construct the MLP class 230 | classifier = MLP(rng=rng, input=x, n_in=28 * 28, 231 | n_hidden=n_hidden, n_out=10) 232 | 233 | # the cost we minimize during training is the negative log likelihood of 234 | # the model plus the regularization terms (L1 and L2); cost is expressed 235 | # here symbolically 236 | cost = classifier.negative_log_likelihood(y) \ 237 | + L1_reg * classifier.L1 \ 238 | + L2_reg * classifier.L2_sqr 239 | 240 | # compiling a Theano function that computes the mistakes that are made 241 | # by the model on a minibatch 242 | test_model = theano.function(inputs=[index], 243 | outputs=classifier.errors(y), 244 | givens={ 245 | x: test_set_x[index * batch_size:(index + 1) * batch_size], 246 | y: test_set_y[index * batch_size:(index + 1) * batch_size]}) 247 | 248 | validate_model = theano.function(inputs=[index], 249 | outputs=classifier.errors(y), 250 | givens={ 251 | x: valid_set_x[index * batch_size:(index + 1) * batch_size], 252 | y: valid_set_y[index * batch_size:(index + 1) * batch_size]}) 253 | 254 | # compute the gradient of cost with respect to theta (sotred in params) 255 | # the resulting gradients will be stored in a list gparams 256 | gparams = [] 257 | for param in classifier.params: 258 | gparam = T.grad(cost, param) 259 | gparams.append(gparam) 260 | 261 | # specify how to update the parameters of the model as a list of 262 | # (variable, update expression) pairs 263 | updates = [] 264 | # given two list the zip A = [a1, a2, a3, a4] and B = [b1, b2, b3, b4] of 265 | # same length, zip generates a list C of same size, where each element 266 | # is a pair formed from the two lists : 267 | # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] 268 | for param, gparam in zip(classifier.params, gparams): 269 | updates.append((param, param - learning_rate * gparam)) 270 | 271 | # compiling a Theano function `train_model` that returns the cost, but 272 | # in the same time updates the parameter of the model based on the rules 273 | # defined in `updates` 274 | train_model = theano.function(inputs=[index], outputs=cost, 275 | updates=updates, 276 | givens={ 277 | x: train_set_x[index * batch_size:(index + 1) * batch_size], 278 | y: train_set_y[index * batch_size:(index + 1) * batch_size]}) 279 | 280 | ############### 281 | # TRAIN MODEL # 282 | ############### 283 | print '... training' 284 | 285 | # early-stopping parameters 286 | patience = 10000 # look as this many examples regardless 287 | patience_increase = 2 # wait this much longer when a new best is 288 | # found 289 | improvement_threshold = 0.995 # a relative improvement of this much is 290 | # considered significant 291 | validation_frequency = min(n_train_batches, patience / 2) 292 | # go through this many 293 | # minibatche before checking the network 294 | # on the validation set; in this case we 295 | # check every epoch 296 | 297 | best_params = None 298 | best_validation_loss = numpy.inf 299 | best_iter = 0 300 | test_score = 0. 301 | start_time = time.clock() 302 | 303 | epoch = 0 304 | done_looping = False 305 | 306 | while (epoch < n_epochs) and (not done_looping): 307 | epoch = epoch + 1 308 | for minibatch_index in xrange(n_train_batches): 309 | 310 | minibatch_avg_cost = train_model(minibatch_index) 311 | # iteration number 312 | iter = (epoch - 1) * n_train_batches + minibatch_index 313 | 314 | if (iter + 1) % validation_frequency == 0: 315 | # compute zero-one loss on validation set 316 | validation_losses = [validate_model(i) for i 317 | in xrange(n_valid_batches)] 318 | this_validation_loss = numpy.mean(validation_losses) 319 | 320 | print('epoch %i, minibatch %i/%i, validation error %f %%' % 321 | (epoch, minibatch_index + 1, n_train_batches, 322 | this_validation_loss * 100.)) 323 | 324 | # if we got the best validation score until now 325 | if this_validation_loss < best_validation_loss: 326 | #improve patience if loss improvement is good enough 327 | if this_validation_loss < best_validation_loss * \ 328 | improvement_threshold: 329 | patience = max(patience, iter * patience_increase) 330 | 331 | best_validation_loss = this_validation_loss 332 | best_iter = iter 333 | 334 | # test it on the test set 335 | test_losses = [test_model(i) for i 336 | in xrange(n_test_batches)] 337 | test_score = numpy.mean(test_losses) 338 | 339 | print((' epoch %i, minibatch %i/%i, test error of ' 340 | 'best model %f %%') % 341 | (epoch, minibatch_index + 1, n_train_batches, 342 | test_score * 100.)) 343 | 344 | if patience <= iter: 345 | done_looping = True 346 | break 347 | 348 | end_time = time.clock() 349 | print(('Optimization complete. Best validation score of %f %% ' 350 | 'obtained at iteration %i, with test performance %f %%') % 351 | (best_validation_loss * 100., best_iter + 1, test_score * 100.)) 352 | print >> sys.stderr, ('The code for file ' + 353 | os.path.split(__file__)[1] + 354 | ' ran for %.2fm' % ((end_time - start_time) / 60.)) 355 | 356 | 357 | if __name__ == '__main__': 358 | test_mlp() 359 | -------------------------------------------------------------------------------- /TheanoDL/mlp.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stevenwudi/CVPR_2014_code/7976acb440dbe4e8d060685371e3f0c5d35a4796/TheanoDL/mlp.pyc -------------------------------------------------------------------------------- /TheanoDL/rbm.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stevenwudi/CVPR_2014_code/7976acb440dbe4e8d060685371e3f0c5d35a4796/TheanoDL/rbm.pyc -------------------------------------------------------------------------------- /TheanoDL/rbm_gnumpy.py: -------------------------------------------------------------------------------- 1 | # this is a modified version of the example script that comes with cudamat 2 | 3 | def test_gnumpy(dat, num_epochs): 4 | import gnumpy as gpu 5 | import numpy 6 | import time 7 | # load data. is 2 dimensional: 60000 X 784 8 | #dat = gpu.garray(load('mnist_cudaTest').T/255.) 9 | # training parameters 10 | epsilon = 0.1 11 | momentum = 0.9 12 | batch_size = 128 13 | num_batches = dat.shape[0]/batch_size 14 | # model parameters 15 | num_vis = dat.shape[1] 16 | num_hid = 4096 17 | # initialize weights 18 | w_vh = 0.1 * gpu.randn(num_vis, num_hid) 19 | w_v = gpu.zeros(num_vis) 20 | w_h = -4. * gpu.ones(num_hid) 21 | # initialize weight updates 22 | wu_vh = gpu.zeros((num_vis, num_hid)) 23 | wu_v = gpu.zeros(num_vis) 24 | wu_h = gpu.zeros(num_hid) 25 | for epoch in range(num_epochs): 26 | err = [] 27 | tic = time.clock() 28 | for batch in range(num_batches): 29 | # positive phase 30 | v1 = dat[batch*batch_size : (batch + 1)*batch_size] 31 | h1 = (gpu.dot(v1, w_vh) + w_h).logistic() 32 | # sample hiddens 33 | hSampled = h1.rand() < h1 34 | # negative phase 35 | v2 = (gpu.dot(hSampled, w_vh.T) + w_v).logistic() 36 | h2 = (gpu.dot(v2, w_vh) + w_h).logistic() 37 | # update weights 38 | wu_vh = wu_vh * momentum + gpu.dot(v1.T, h1) - gpu.dot(v2.T, h2) 39 | wu_v = wu_v * momentum + v1.sum(0) - v2.sum(0) 40 | wu_h = wu_h * momentum + h1.sum(0) - h2.sum(0) 41 | 42 | w_vh += wu_vh * (epsilon/batch_size) 43 | w_v += wu_v * (epsilon/batch_size) 44 | w_h += wu_h * (epsilon/batch_size) 45 | # calculate reconstruction error 46 | err.append((v2-v1).euclid_norm()**2/(num_vis*batch_size)) 47 | toc = time.clock() 48 | print "Mean squared error: %.4f, takes time: %d" % (numpy.mean(err), toc-tic) 49 | return w_vh, w_v, w_h 50 | 51 | 52 | def test_cpu_numpy(dat, num_epochs): 53 | import numpy 54 | import time 55 | logistic = lambda x:1.0 / (1.0 + numpy.exp(-1.0 * x)) 56 | epsilon = 0.1 57 | momentum = 0.9 58 | batch_size = 128 59 | num_batches = dat.shape[0]/batch_size 60 | # model parameters 61 | num_vis = dat.shape[1] 62 | num_hid = 4096 63 | # initialize weights 64 | w_vh = 0.1 * numpy.random.randn(num_vis, num_hid) 65 | w_v = numpy.zeros(num_vis) 66 | w_h = -4. * numpy.ones(num_hid) 67 | # initialize weight updates 68 | wu_vh = numpy.zeros((num_vis, num_hid)) 69 | wu_v = numpy.zeros(num_vis) 70 | wu_h = numpy.zeros(num_hid) 71 | for epoch in range(num_epochs): 72 | err = [] 73 | tic = time.clock() 74 | for batch in range(num_batches): 75 | # positive phase 76 | v1 = dat[batch*batch_size : (batch + 1)*batch_size] 77 | h1 = logistic(numpy.dot(v1, w_vh) + w_h) 78 | # sample hiddens 79 | hSampled = numpy.random.rand(h1.shape[0], h1.shape[1]) < h1 80 | # negative phase 81 | v2 = logistic(numpy.dot(hSampled, w_vh.T) + w_v) 82 | h2 = logistic(numpy.dot(v2, w_vh) + w_h) 83 | # update weights 84 | wu_vh = wu_vh * momentum + numpy.dot(v1.T, h1) - numpy.dot(v2.T, h2) 85 | wu_v = wu_v * momentum + v1.sum(0) - v2.sum(0) 86 | wu_h = wu_h * momentum + h1.sum(0) - h2.sum(0) 87 | 88 | w_vh += wu_vh * (epsilon/batch_size) 89 | w_v += wu_v * (epsilon/batch_size) 90 | w_h += wu_h * (epsilon/batch_size) 91 | # calculate reconstruction error 92 | err.append(sum(sum(v2-v1)**2)/(num_vis*batch_size)) 93 | toc = time.clock() 94 | print "Mean squared error: %.4f, takes time: %d" % (numpy.mean(err), toc-tic) 95 | return w_vh, w_v, w_h 96 | 97 | 98 | def sigmoid(z): 99 | s = 1.0 / (1.0 + np.exp**(-1.0 * z)) 100 | return s 101 | -------------------------------------------------------------------------------- /TheanoDL/rbm_mean.py: -------------------------------------------------------------------------------- 1 | """This tutorial introduces restricted boltzmann machines (RBM) using Theano. 2 | 3 | Boltzmann Machines (BMs) are a particular form of energy-based model which 4 | contain hidden variables. Restricted Boltzmann Machines further restrict BMs 5 | to those without visible-visible and hidden-hidden connections. 6 | """ 7 | import cPickle 8 | import gzip 9 | import time 10 | import PIL.Image 11 | 12 | import numpy 13 | 14 | import theano 15 | import theano.tensor as T 16 | import os 17 | 18 | from theano.tensor.shared_randomstreams import RandomStreams 19 | 20 | from utils import tile_raster_images 21 | from logistic_sgd import load_data 22 | 23 | 24 | class RBM_Mean(object): 25 | """Restricted Boltzmann Machine (RBM) """ 26 | def __init__(self, input=None, n_visible=784, n_hidden=500, \ 27 | W=None, hbias=None, vbias=None, numpy_rng=None, 28 | theano_rng=None, MU=None): 29 | """ 30 | The same as RBM apart from that for inputs we substract mean. 31 | See: 32 | Data Normalization in the Learning of Restricted Boltzmann Machines 33 | """ 34 | 35 | self.n_visible = n_visible 36 | self.n_hidden = n_hidden 37 | self.MU = MU 38 | # initialize input layer for standalone RBM or layer0 of DBN 39 | self.input = input - self.MU 40 | if numpy_rng is None: 41 | # create a number generator 42 | numpy_rng = numpy.random.RandomState(1234) 43 | 44 | if theano_rng is None: 45 | theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) 46 | 47 | if W is None: 48 | # W is initialized with `initial_W` which is uniformely 49 | # sampled from -4*sqrt(6./(n_visible+n_hidden)) and 50 | # 4*sqrt(6./(n_hidden+n_visible)) the output of uniform if 51 | # converted using asarray to dtype theano.config.floatX so 52 | # that the code is runable on GPU 53 | initial_W = numpy.asarray(numpy_rng.uniform( 54 | low=-4 * numpy.sqrt(6. / (n_hidden + n_visible)), 55 | high=4 * numpy.sqrt(6. / (n_hidden + n_visible)), 56 | size=(n_visible, n_hidden)), 57 | dtype=theano.config.floatX) 58 | # theano shared variables for weights and biases 59 | W = theano.shared(value=initial_W, name='W', borrow=True) 60 | 61 | if hbias is None: 62 | # create shared variable for hidden units bias 63 | hbias = theano.shared(value=numpy.zeros(n_hidden, 64 | dtype=theano.config.floatX), 65 | name='hbias', borrow=True) 66 | 67 | if vbias is None: 68 | # create shared variable for visible units bias 69 | vbias = theano.shared(value=numpy.zeros(n_visible, 70 | dtype=theano.config.floatX), 71 | name='vbias', borrow=True) 72 | 73 | 74 | if not input: 75 | self.input = T.matrix('input') 76 | 77 | self.W = W 78 | self.hbias = hbias 79 | self.vbias = vbias 80 | self.theano_rng = theano_rng 81 | # **** WARNING: It is not a good idea to put things in this list 82 | # other than shared variables created in this function. 83 | self.params = [self.W, self.hbias, self.vbias] 84 | 85 | def free_energy(self, v_sample): 86 | ''' Function to compute the free energy ''' 87 | wx_b = T.dot(v_sample, self.W) + self.hbias 88 | vbias_term = T.dot(v_sample, self.vbias) 89 | hidden_term = T.sum(T.log(1 + T.exp(wx_b)), axis=1) 90 | return -hidden_term - vbias_term 91 | 92 | def propup(self, vis): 93 | '''This function propagates the visible units activation upwards to 94 | the hidden units 95 | 96 | Note that we return also the pre-sigmoid activation of the 97 | layer. As it will turn out later, due to how Theano deals with 98 | optimizations, this symbolic variable will be needed to write 99 | down a more stable computational graph (see details in the 100 | reconstruction cost function) 101 | 102 | ''' 103 | vis = vis 104 | pre_sigmoid_activation = T.dot(vis, self.W) + self.hbias 105 | return [pre_sigmoid_activation, T.nnet.sigmoid(pre_sigmoid_activation)] 106 | 107 | def sample_h_given_v(self, v0_sample): 108 | ''' This function infers state of hidden units given visible units ''' 109 | # compute the activation of the hidden units given a sample of 110 | # the visibles 111 | pre_sigmoid_h1, h1_mean = self.propup(v0_sample) 112 | # get a sample of the hiddens given their activation 113 | # Note that theano_rng.binomial returns a symbolic sample of dtype 114 | # int64 by default. If we want to keep our computations in floatX 115 | # for the GPU we need to specify to return the dtype floatX 116 | h1_sample = self.theano_rng.binomial(size=h1_mean.shape, 117 | n=1, p=h1_mean, 118 | dtype=theano.config.floatX) 119 | return [pre_sigmoid_h1, h1_mean, h1_sample] 120 | 121 | def propdown(self, hid): 122 | '''This function propagates the hidden units activation downwards to 123 | the visible units 124 | 125 | Note that we return also the pre_sigmoid_activation of the 126 | layer. As it will turn out later, due to how Theano deals with 127 | optimizations, this symbolic variable will be needed to write 128 | down a more stable computational graph (see details in the 129 | reconstruction cost function) 130 | 131 | ''' 132 | pre_sigmoid_activation = T.dot(hid, self.W.T) + self.vbias 133 | return [pre_sigmoid_activation, T.nnet.sigmoid(pre_sigmoid_activation)] 134 | 135 | def sample_v_given_h(self, h0_sample): 136 | ''' This function infers state of visible units given hidden units ''' 137 | # compute the activation of the visible given the hidden sample 138 | pre_sigmoid_v1, v1_mean = self.propdown(h0_sample) 139 | # get a sample of the visible given their activation 140 | # Note that theano_rng.binomial returns a symbolic sample of dtype 141 | # int64 by default. If we want to keep our computations in floatX 142 | # for the GPU we need to specify to return the dtype floatX 143 | v1_sample = self.theano_rng.binomial(size=v1_mean.shape, 144 | n=1, p=v1_mean, 145 | dtype=theano.config.floatX) 146 | v1_sample = v1_sample - self.MU 147 | return [pre_sigmoid_v1, v1_mean, v1_sample] 148 | 149 | def gibbs_hvh(self, h0_sample): 150 | ''' This function implements one step of Gibbs sampling, 151 | starting from the hidden state''' 152 | pre_sigmoid_v1, v1_mean, v1_sample = self.sample_v_given_h(h0_sample) 153 | pre_sigmoid_h1, h1_mean, h1_sample = self.sample_h_given_v(v1_sample) 154 | return [pre_sigmoid_v1, v1_mean, v1_sample, 155 | pre_sigmoid_h1, h1_mean, h1_sample] 156 | 157 | def gibbs_vhv(self, v0_sample): 158 | ''' This function implements one step of Gibbs sampling, 159 | starting from the visible state''' 160 | pre_sigmoid_h1, h1_mean, h1_sample = self.sample_h_given_v(v0_sample) 161 | pre_sigmoid_v1, v1_mean, v1_sample = self.sample_v_given_h(h1_sample) 162 | return [pre_sigmoid_h1, h1_mean, h1_sample, 163 | pre_sigmoid_v1, v1_mean, v1_sample] 164 | 165 | def get_cost_updates(self, lr=0.1, persistent=None, k=1): 166 | """This functions implements one step of CD-k or PCD-k 167 | 168 | :param lr: learning rate used to train the RBM 169 | 170 | :param persistent: None for CD. For PCD, shared variable 171 | containing old state of Gibbs chain. This must be a shared 172 | variable of size (batch size, number of hidden units). 173 | 174 | :param k: number of Gibbs steps to do in CD-k/PCD-k 175 | 176 | Returns a proxy for the cost and the updates dictionary. The 177 | dictionary contains the update rules for weights and biases but 178 | also an update of the shared variable used to store the persistent 179 | chain, if one is used. 180 | 181 | """ 182 | 183 | # compute positive phase 184 | pre_sigmoid_ph, ph_mean, ph_sample = self.sample_h_given_v(self.input) 185 | 186 | # decide how to initialize persistent chain: 187 | # for CD, we use the newly generate hidden sample 188 | # for PCD, we initialize from the old state of the chain 189 | if persistent is None: 190 | chain_start = ph_sample 191 | else: 192 | chain_start = persistent 193 | 194 | # perform actual negative phase 195 | # in order to implement CD-k/PCD-k we need to scan over the 196 | # function that implements one gibbs step k times. 197 | # Read Theano tutorial on scan for more information : 198 | # http://deeplearning.net/software/theano/library/scan.html 199 | # the scan will return the entire Gibbs chain 200 | [pre_sigmoid_nvs, nv_means, nv_samples, 201 | pre_sigmoid_nhs, nh_means, nh_samples], updates = \ 202 | theano.scan(self.gibbs_hvh, 203 | # the None are place holders, saying that 204 | # chain_start is the initial state corresponding to the 205 | # 6th output 206 | outputs_info=[None, None, None, None, None, chain_start], 207 | n_steps=k) 208 | 209 | # determine gradients on RBM parameters 210 | # not that we only need the sample at the end of the chain 211 | chain_end = nv_samples[-1] 212 | 213 | cost = T.mean(self.free_energy(self.input)) - T.mean( 214 | self.free_energy(chain_end)) 215 | # We must not compute the gradient through the gibbs sampling 216 | gparams = T.grad(cost, self.params, consider_constant=[chain_end]) 217 | 218 | # constructs the update dictionary 219 | for gparam, param in zip(gparams, self.params): 220 | # make sure that the learning rate is of the right dtype 221 | updates[param] = param - gparam * T.cast(lr, 222 | dtype=theano.config.floatX) 223 | if param.name=='W': 224 | W_temp = T.cast(param, dtype=theano.config.floatX) 225 | if param.name=='b': 226 | updates[param] = param - T.dot(self.MU.T, W_temp) 227 | 228 | 229 | 230 | if persistent: 231 | # Note that this works only if persistent is a shared variable 232 | updates[persistent] = nh_samples[-1] 233 | # pseudo-likelihood is a better proxy for PCD 234 | monitoring_cost = self.get_pseudo_likelihood_cost(updates) 235 | else: 236 | # reconstruction cross-entropy is a better proxy for CD 237 | monitoring_cost = self.get_reconstruction_cost(updates, 238 | pre_sigmoid_nvs[-1], nv_samples[-1]) 239 | 240 | return monitoring_cost, updates 241 | 242 | def get_pseudo_likelihood_cost(self, updates): 243 | """Stochastic approximation to the pseudo-likelihood""" 244 | 245 | # index of bit i in expression p(x_i | x_{\i}) 246 | bit_i_idx = theano.shared(value=0, name='bit_i_idx') 247 | 248 | # binarize the input image by rounding to nearest integer 249 | xi = T.round(self.input) 250 | 251 | # calculate free energy for the given bit configuration 252 | fe_xi = self.free_energy(xi) 253 | 254 | # flip bit x_i of matrix xi and preserve all other bits x_{\i} 255 | # Equivalent to xi[:,bit_i_idx] = 1-xi[:, bit_i_idx], but assigns 256 | # the result to xi_flip, instead of working in place on xi. 257 | xi_flip = T.set_subtensor(xi[:, bit_i_idx], 1 - xi[:, bit_i_idx]) 258 | 259 | # calculate free energy with bit flipped 260 | fe_xi_flip = self.free_energy(xi_flip) 261 | 262 | # equivalent to e^(-FE(x_i)) / (e^(-FE(x_i)) + e^(-FE(x_{\i}))) 263 | cost = T.mean(self.n_visible * T.log(T.nnet.sigmoid(fe_xi_flip - 264 | fe_xi))) 265 | 266 | # increment bit_i_idx % number as part of updates 267 | updates[bit_i_idx] = (bit_i_idx + 1) % self.n_visible 268 | 269 | return cost 270 | 271 | def get_reconstruction_cost(self, updates, pre_sigmoid_nv, nv_samples): 272 | """Approximation to the reconstruction error 273 | 274 | Note that this function requires the pre-sigmoid activation as 275 | input. To understand why this is so you need to understand a 276 | bit about how Theano works. Whenever you compile a Theano 277 | function, the computational graph that you pass as input gets 278 | optimized for speed and stability. This is done by changing 279 | several parts of the subgraphs with others. One such 280 | optimization expresses terms of the form log(sigmoid(x)) in 281 | terms of softplus. We need this optimization for the 282 | cross-entropy since sigmoid of numbers larger than 30. (or 283 | even less then that) turn to 1. and numbers smaller than 284 | -30. turn to 0 which in terms will force theano to compute 285 | log(0) and therefore we will get either -inf or NaN as 286 | cost. If the value is expressed in terms of softplus we do not 287 | get this undesirable behaviour. This optimization usually 288 | works fine, but here we have a special case. The sigmoid is 289 | applied inside the scan op, while the log is 290 | outside. Therefore Theano will only see log(scan(..)) instead 291 | of log(sigmoid(..)) and will not apply the wanted 292 | optimization. We can not go and replace the sigmoid in scan 293 | with something else also, because this only needs to be done 294 | on the last step. Therefore the easiest and more efficient way 295 | is to get also the pre-sigmoid activation as an output of 296 | scan, and apply both the log and sigmoid outside scan such 297 | that Theano can catch and optimize the expression. 298 | 299 | """ 300 | 301 | cross_entropy = T.mean( 302 | T.sum(self.input * T.log(T.nnet.sigmoid(pre_sigmoid_nv)) + 303 | (1 - self.input) * T.log(1 - T.nnet.sigmoid(pre_sigmoid_nv)), 304 | axis=1)) 305 | errsum= T.sum(T.sum( (self.input-nv_samples) **2 )) 306 | 307 | return errsum#cross_entropy 308 | 309 | -------------------------------------------------------------------------------- /TheanoDL/rnnrbm.py: -------------------------------------------------------------------------------- 1 | # Author: Nicolas Boulanger-Lewandowski 2 | # University of Montreal (2012) 3 | # RNN-RBM deep learning tutorial 4 | # More information at http://deeplearning.net/tutorial/rnnrbm.html 5 | 6 | import glob 7 | import os 8 | import sys 9 | 10 | import numpy 11 | try: 12 | import pylab 13 | except ImportError: 14 | print "pylab isn't available, if you use their fonctionality, it will crash" 15 | print "It can be installed with 'pip install -q Pillow'" 16 | 17 | from midi.utils import midiread, midiwrite 18 | import theano 19 | import theano.tensor as T 20 | from theano.tensor.shared_randomstreams import RandomStreams 21 | 22 | #Don't use a python long as this don't work on 32 bits computers. 23 | numpy.random.seed(0xbeef) 24 | rng = RandomStreams(seed=numpy.random.randint(1 << 30)) 25 | theano.config.warn.subtensor_merge_bug = False 26 | 27 | 28 | def build_rbm(v, W, bv, bh, k): 29 | '''Construct a k-step Gibbs chain starting at v for an RBM. 30 | 31 | v : Theano vector or matrix 32 | If a matrix, multiple chains will be run in parallel (batch). 33 | W : Theano matrix 34 | Weight matrix of the RBM. 35 | bv : Theano vector 36 | Visible bias vector of the RBM. 37 | bh : Theano vector 38 | Hidden bias vector of the RBM. 39 | k : scalar or Theano scalar 40 | Length of the Gibbs chain. 41 | 42 | Return a (v_sample, cost, monitor, updates) tuple: 43 | 44 | v_sample : Theano vector or matrix with the same shape as `v` 45 | Corresponds to the generated sample(s). 46 | cost : Theano scalar 47 | Expression whose gradient with respect to W, bv, bh is the CD-k approximation 48 | to the log-likelihood of `v` (training example) under the RBM. 49 | The cost is averaged in the batch case. 50 | monitor: Theano scalar 51 | Pseudo log-likelihood (also averaged in the batch case). 52 | updates: dictionary of Theano variable -> Theano variable 53 | The `updates` object returned by scan.''' 54 | 55 | def gibbs_step(v): 56 | mean_h = T.nnet.sigmoid(T.dot(v, W) + bh) 57 | h = rng.binomial(size=mean_h.shape, n=1, p=mean_h, 58 | dtype=theano.config.floatX) 59 | mean_v = T.nnet.sigmoid(T.dot(h, W.T) + bv) 60 | v = rng.binomial(size=mean_v.shape, n=1, p=mean_v, 61 | dtype=theano.config.floatX) 62 | return mean_v, v 63 | 64 | chain, updates = theano.scan(lambda v: gibbs_step(v)[1], outputs_info=[v], 65 | n_steps=k) 66 | v_sample = chain[-1] 67 | 68 | mean_v = gibbs_step(v_sample)[0] 69 | monitor = T.xlogx.xlogy0(v, mean_v) + T.xlogx.xlogy0(1 - v, 1 - mean_v) 70 | monitor = monitor.sum() / v.shape[0] 71 | 72 | def free_energy(v): 73 | return -(v * bv).sum() - T.log(1 + T.exp(T.dot(v, W) + bh)).sum() 74 | cost = (free_energy(v) - free_energy(v_sample)) / v.shape[0] 75 | 76 | return v_sample, cost, monitor, updates 77 | 78 | 79 | def shared_normal(num_rows, num_cols, scale=1): 80 | '''Initialize a matrix shared variable with normally distributed 81 | elements.''' 82 | return theano.shared(numpy.random.normal( 83 | scale=scale, size=(num_rows, num_cols)).astype(theano.config.floatX)) 84 | 85 | 86 | def shared_zeros(*shape): 87 | '''Initialize a vector shared variable with zero elements.''' 88 | return theano.shared(numpy.zeros(shape, dtype=theano.config.floatX)) 89 | 90 | 91 | def build_rnnrbm(n_visible, n_hidden, n_hidden_recurrent): 92 | '''Construct a symbolic RNN-RBM and initialize parameters. 93 | 94 | n_visible : integer 95 | Number of visible units. 96 | n_hidden : integer 97 | Number of hidden units of the conditional RBMs. 98 | n_hidden_recurrent : integer 99 | Number of hidden units of the RNN. 100 | 101 | Return a (v, v_sample, cost, monitor, params, updates_train, v_t, 102 | updates_generate) tuple: 103 | 104 | v : Theano matrix 105 | Symbolic variable holding an input sequence (used during training) 106 | v_sample : Theano matrix 107 | Symbolic variable holding the negative particles for CD log-likelihood 108 | gradient estimation (used during training) 109 | cost : Theano scalar 110 | Expression whose gradient (considering v_sample constant) corresponds to the 111 | LL gradient of the RNN-RBM (used during training) 112 | monitor : Theano scalar 113 | Frame-level pseudo-likelihood (useful for monitoring during training) 114 | params : tuple of Theano shared variables 115 | The parameters of the model to be optimized during training. 116 | updates_train : dictionary of Theano variable -> Theano variable 117 | Update object that should be passed to theano.function when compiling the 118 | training function. 119 | v_t : Theano matrix 120 | Symbolic variable holding a generated sequence (used during sampling) 121 | updates_generate : dictionary of Theano variable -> Theano variable 122 | Update object that should be passed to theano.function when compiling the 123 | generation function.''' 124 | 125 | W = shared_normal(n_visible, n_hidden, 0.01) 126 | bv = shared_zeros(n_visible) 127 | bh = shared_zeros(n_hidden) 128 | Wuh = shared_normal(n_hidden_recurrent, n_hidden, 0.0001) 129 | Wuv = shared_normal(n_hidden_recurrent, n_visible, 0.0001) 130 | Wvu = shared_normal(n_visible, n_hidden_recurrent, 0.0001) 131 | Wuu = shared_normal(n_hidden_recurrent, n_hidden_recurrent, 0.0001) 132 | bu = shared_zeros(n_hidden_recurrent) 133 | 134 | params = W, bv, bh, Wuh, Wuv, Wvu, Wuu, bu # learned parameters as shared 135 | # variables 136 | 137 | v = T.matrix() # a training sequence 138 | u0 = T.zeros((n_hidden_recurrent,)) # initial value for the RNN hidden 139 | # units 140 | 141 | # If `v_t` is given, deterministic recurrence to compute the variable 142 | # biases bv_t, bh_t at each time step. If `v_t` is None, same recurrence 143 | # but with a separate Gibbs chain at each time step to sample (generate) 144 | # from the RNN-RBM. The resulting sample v_t is returned in order to be 145 | # passed down to the sequence history. 146 | def recurrence(v_t, u_tm1): 147 | bv_t = bv + T.dot(u_tm1, Wuv) 148 | bh_t = bh + T.dot(u_tm1, Wuh) 149 | generate = v_t is None 150 | if generate: 151 | v_t, _, _, updates = build_rbm(T.zeros((n_visible,)), W, bv_t, 152 | bh_t, k=25) 153 | u_t = T.tanh(bu + T.dot(v_t, Wvu) + T.dot(u_tm1, Wuu)) 154 | return ([v_t, u_t], updates) if generate else [u_t, bv_t, bh_t] 155 | 156 | # For training, the deterministic recurrence is used to compute all the 157 | # {bv_t, bh_t, 1 <= t <= T} given v. Conditional RBMs can then be trained 158 | # in batches using those parameters. 159 | (u_t, bv_t, bh_t), updates_train = theano.scan( 160 | lambda v_t, u_tm1, *_: recurrence(v_t, u_tm1), 161 | sequences=v, outputs_info=[u0, None, None], non_sequences=params) 162 | v_sample, cost, monitor, updates_rbm = build_rbm(v, W, bv_t[:], bh_t[:], 163 | k=15) 164 | updates_train.update(updates_rbm) 165 | 166 | # symbolic loop for sequence generation 167 | (v_t, u_t), updates_generate = theano.scan( 168 | lambda u_tm1, *_: recurrence(None, u_tm1), 169 | outputs_info=[None, u0], non_sequences=params, n_steps=200) 170 | 171 | return (v, v_sample, cost, monitor, params, updates_train, v_t, 172 | updates_generate) 173 | 174 | 175 | class RnnRbm: 176 | '''Simple class to train an RNN-RBM from MIDI files and to generate sample 177 | sequences.''' 178 | 179 | def __init__(self, n_hidden=150, n_hidden_recurrent=100, lr=0.001, 180 | r=(21, 109), dt=0.3): 181 | '''Constructs and compiles Theano functions for training and sequence 182 | generation. 183 | 184 | n_hidden : integer 185 | Number of hidden units of the conditional RBMs. 186 | n_hidden_recurrent : integer 187 | Number of hidden units of the RNN. 188 | lr : float 189 | Learning rate 190 | r : (integer, integer) tuple 191 | Specifies the pitch range of the piano-roll in MIDI note numbers, including 192 | r[0] but not r[1], such that r[1]-r[0] is the number of visible units of the 193 | RBM at a given time step. The default (21, 109) corresponds to the full range 194 | of piano (88 notes). 195 | dt : float 196 | Sampling period when converting the MIDI files into piano-rolls, or 197 | equivalently the time difference between consecutive time steps.''' 198 | 199 | self.r = r 200 | self.dt = dt 201 | (v, v_sample, cost, monitor, params, updates_train, v_t, 202 | updates_generate) = build_rnnrbm(r[1] - r[0], n_hidden, 203 | n_hidden_recurrent) 204 | 205 | gradient = T.grad(cost, params, consider_constant=[v_sample]) 206 | updates_train.update(((p, p - lr * g) for p, g in zip(params, 207 | gradient))) 208 | self.train_function = theano.function([v], monitor, 209 | updates=updates_train) 210 | self.generate_function = theano.function([], v_t, 211 | updates=updates_generate) 212 | 213 | def train(self, files, batch_size=100, num_epochs=200): 214 | '''Train the RNN-RBM via stochastic gradient descent (SGD) using MIDI 215 | files converted to piano-rolls. 216 | 217 | files : list of strings 218 | List of MIDI files that will be loaded as piano-rolls for training. 219 | batch_size : integer 220 | Training sequences will be split into subsequences of at most this size 221 | before applying the SGD updates. 222 | num_epochs : integer 223 | Number of epochs (pass over the training set) performed. The user can 224 | safely interrupt training with Ctrl+C at any time.''' 225 | 226 | assert len(files) > 0, 'Training set is empty!' \ 227 | ' (did you download the data files?)' 228 | dataset = [midiread(f, self.r, 229 | self.dt).piano_roll.astype(theano.config.floatX) 230 | for f in files] 231 | 232 | try: 233 | for epoch in xrange(num_epochs): 234 | numpy.random.shuffle(dataset) 235 | costs = [] 236 | 237 | for s, sequence in enumerate(dataset): 238 | for i in xrange(0, len(sequence), batch_size): 239 | cost = self.train_function(sequence[i:i + batch_size]) 240 | costs.append(cost) 241 | 242 | print 'Epoch %i/%i' % (epoch + 1, num_epochs), 243 | print numpy.mean(costs) 244 | sys.stdout.flush() 245 | 246 | except KeyboardInterrupt: 247 | print 'Interrupted by user.' 248 | 249 | def generate(self, filename, show=True): 250 | '''Generate a sample sequence, plot the resulting piano-roll and save 251 | it as a MIDI file. 252 | 253 | filename : string 254 | A MIDI file will be created at this location. 255 | show : boolean 256 | If True, a piano-roll of the generated sequence will be shown.''' 257 | 258 | piano_roll = self.generate_function() 259 | midiwrite(filename, piano_roll, self.r, self.dt) 260 | if show: 261 | extent = (0, self.dt * len(piano_roll)) + self.r 262 | pylab.figure() 263 | pylab.imshow(piano_roll.T, origin='lower', aspect='auto', 264 | interpolation='nearest', cmap=pylab.cm.gray_r, 265 | extent=extent) 266 | pylab.xlabel('time (s)') 267 | pylab.ylabel('MIDI note number') 268 | pylab.title('generated piano-roll') 269 | 270 | 271 | def test_rnnrbm(batch_size=100, num_epochs=200): 272 | model = RnnRbm() 273 | re = os.path.join(os.path.split(os.path.dirname(__file__))[0], 274 | 'data', 'Nottingham', 'train', '*.mid') 275 | model.train(glob.glob(re), 276 | batch_size=batch_size, num_epochs=num_epochs) 277 | return model 278 | 279 | if __name__ == '__main__': 280 | model = test_rnnrbm() 281 | model.generate('sample1.mid') 282 | model.generate('sample2.mid') 283 | pylab.show() 284 | -------------------------------------------------------------------------------- /TheanoDL/test.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import numpy 4 | import theano 5 | 6 | import convolutional_mlp 7 | import dA 8 | import DBN 9 | import logistic_cg 10 | import logistic_sgd 11 | import mlp 12 | import rbm 13 | import rnnrbm 14 | import SdA 15 | 16 | 17 | def test_logistic_sgd(): 18 | logistic_sgd.sgd_optimization_mnist(n_epochs=10) 19 | 20 | 21 | def test_logistic_cg(): 22 | try: 23 | import scipy 24 | logistic_cg.cg_optimization_mnist(n_epochs=10) 25 | except ImportError: 26 | from nose.plugins.skip import SkipTest 27 | raise SkipTest( 28 | 'SciPy not available. Needed for the logistic_cg example.') 29 | 30 | 31 | def test_mlp(): 32 | mlp.test_mlp(n_epochs=1) 33 | 34 | 35 | def test_convolutional_mlp(): 36 | convolutional_mlp.evaluate_lenet5(n_epochs=1, nkerns=[5, 5]) 37 | 38 | 39 | def test_dA(): 40 | dA.test_dA(training_epochs=1, output_folder='tmp_dA_plots') 41 | 42 | 43 | def test_SdA(): 44 | SdA.test_SdA(pretraining_epochs=1, training_epochs=1, batch_size=300) 45 | 46 | 47 | def test_dbn(): 48 | DBN.test_DBN(pretraining_epochs=1, training_epochs=1, batch_size=300) 49 | 50 | 51 | def test_rbm(): 52 | rbm.test_rbm(training_epochs=1, batch_size=300, n_chains=1, n_samples=1, 53 | n_hidden=20, output_folder='tmp_rbm_plots') 54 | 55 | 56 | def test_rnnrbm(): 57 | rnnrbm.test_rnnrbm(num_epochs=1) 58 | 59 | 60 | def speed(): 61 | """ 62 | This fonction modify the configuration theano and don't restore it! 63 | """ 64 | 65 | algo = ['logistic_sgd', 'logistic_cg', 'mlp', 'convolutional_mlp', 66 | 'dA', 'SdA', 'DBN', 'rbm', 'rnnrbm'] 67 | to_exec = [True] * len(algo) 68 | # to_exec = [False] * len(algo) 69 | # to_exec[-1] = True 70 | do_float64 = True 71 | do_float32 = True 72 | do_gpu = True 73 | 74 | algo_executed = [s for idx, s in enumerate(algo) if to_exec[idx]] 75 | #Timming expected are from the buildbot that have an i7-920 @ 76 | # 2.67GHz with hyperthread enabled for the cpu, 12G of ram. An GeForce GTX 77 | # 285 for the GPU. OS=Fedora 14, gcc=4.5.1, python/BLAS from EPD 78 | # 7.1-2 (python 2.7.2, mkl unknow). BLAS with only 1 thread. 79 | 80 | expected_times_64 = numpy.asarray([10.0, 22.5, 76.1, 73.7, 116.4, 81 | 346.9, 381.9, 558.1, 186.3]) 82 | expected_times_32 = numpy.asarray([11.6, 29.6, 42.5, 66.5, 71, 83 | 191.2, 226.8, 432.8, 176.2]) 84 | 85 | # Number with just 1 decimal are new value that are faster with 86 | # the Theano version 0.5rc2 Other number are older. They are not 87 | # updated, as we where faster in the past! 88 | # TODO: find why and fix this! 89 | 90 | # Here is the value for the buildbot on February 3th 2012. 91 | # sgd, cg mlp conv da 92 | # sda dbn rbm 93 | # gpu times[3.72957802, 9.94316864, 29.1772666, 9.13857198, 25.91144657, 94 | # 18.30802011, 53.38651466, 285.41386175] 95 | # expected [3.076634879, 7.555234910, 18.99226785, 9.58915591, 24.130070450, 96 | # 24.77524018, 92.66246653, 322.340329170] 97 | # sgd, cg mlp conv da 98 | # sda dbn rbm 99 | #expected/get [0.82492841, 0.75984178, 0.65092691, 1.04930573, 0.93125138 100 | # 1.35324519 1.7356905 1.12937868] 101 | expected_times_gpu = numpy.asarray([3.07663488, 7.55523491, 18.99226785, 102 | 9.6, 24.13007045, 103 | 20.4, 56, 302.6, 315.4]) 104 | expected_times_64 = [s for idx, s in enumerate(expected_times_64) 105 | if to_exec[idx]] 106 | expected_times_32 = [s for idx, s in enumerate(expected_times_32) 107 | if to_exec[idx]] 108 | expected_times_gpu = [s for idx, s in enumerate(expected_times_gpu) 109 | if to_exec[idx]] 110 | 111 | def time_test(m, l, idx, f, **kwargs): 112 | if not to_exec[idx]: 113 | return 114 | print algo[idx] 115 | ts = m.call_time 116 | try: 117 | f(**kwargs) 118 | except Exception, e: 119 | print >> sys.stderr, 'test', algo[idx], 'FAILED', e 120 | l.append(numpy.nan) 121 | return 122 | te = m.call_time 123 | l.append(te - ts) 124 | 125 | def do_tests(): 126 | m = theano.compile.mode.get_default_mode() 127 | l = [] 128 | time_test(m, l, 0, logistic_sgd.sgd_optimization_mnist, n_epochs=30) 129 | time_test(m, l, 1, logistic_cg.cg_optimization_mnist, n_epochs=30) 130 | time_test(m, l, 2, mlp.test_mlp, n_epochs=5) 131 | time_test(m, l, 3, convolutional_mlp.evaluate_lenet5, n_epochs=5, 132 | nkerns=[5, 5]) 133 | time_test(m, l, 4, dA.test_dA, training_epochs=2, 134 | output_folder='tmp_dA_plots') 135 | time_test(m, l, 5, SdA.test_SdA, pretraining_epochs=1, 136 | training_epochs=2, batch_size=300) 137 | time_test(m, l, 6, DBN.test_DBN, pretraining_epochs=1, 138 | training_epochs=2, batch_size=300) 139 | time_test(m, l, 7, rbm.test_rbm, training_epochs=1, batch_size=300, 140 | n_chains=1, n_samples=1, output_folder='tmp_rbm_plots') 141 | time_test(m, l, 8, rnnrbm.test_rnnrbm, num_epochs=1) 142 | return numpy.asarray(l) 143 | 144 | #test in float64 in FAST_RUN mode on the cpu 145 | import theano 146 | if do_float64: 147 | theano.config.floatX = 'float64' 148 | theano.config.mode = 'FAST_RUN' 149 | float64_times = do_tests() 150 | print >> sys.stderr, algo_executed 151 | print >> sys.stderr, 'float64 times', float64_times 152 | print >> sys.stderr, 'float64 expected', expected_times_64 153 | print >> sys.stderr, 'float64 % expected/get', ( 154 | expected_times_64 / float64_times) 155 | 156 | #test in float32 in FAST_RUN mode on the cpu 157 | theano.config.floatX = 'float32' 158 | if do_float32: 159 | float32_times = do_tests() 160 | print >> sys.stderr, algo_executed 161 | print >> sys.stderr, 'float32 times', float32_times 162 | print >> sys.stderr, 'float32 expected', expected_times_32 163 | print >> sys.stderr, 'float32 % expected/get', ( 164 | expected_times_32 / float32_times) 165 | 166 | if do_float64: 167 | print >> sys.stderr, 'float64/float32', ( 168 | float64_times / float32_times) 169 | print >> sys.stderr 170 | print >> sys.stderr, 'Duplicate the timing to have everything in one place' 171 | print >> sys.stderr, algo_executed 172 | print >> sys.stderr, 'float64 times', float64_times 173 | print >> sys.stderr, 'float64 expected', expected_times_64 174 | print >> sys.stderr, 'float64 % expected/get', ( 175 | expected_times_64 / float64_times) 176 | print >> sys.stderr, 'float32 times', float32_times 177 | print >> sys.stderr, 'float32 expected', expected_times_32 178 | print >> sys.stderr, 'float32 % expected/get', ( 179 | expected_times_32 / float32_times) 180 | 181 | print >> sys.stderr, 'float64/float32', ( 182 | float64_times / float32_times) 183 | print >> sys.stderr, 'expected float64/float32', ( 184 | expected_times_64 / float32_times) 185 | 186 | #test in float32 in FAST_RUN mode on the gpu 187 | import theano.sandbox.cuda 188 | if do_gpu: 189 | theano.sandbox.cuda.use('gpu') 190 | gpu_times = do_tests() 191 | print >> sys.stderr, algo_executed 192 | print >> sys.stderr, 'gpu times', gpu_times 193 | print >> sys.stderr, 'gpu expected', expected_times_gpu 194 | print >> sys.stderr, 'gpu % expected/get', ( 195 | expected_times_gpu / gpu_times) 196 | 197 | if do_float64: 198 | print >> sys.stderr, 'float64/gpu', float64_times / gpu_times 199 | 200 | if (do_float64 + do_float32 + do_gpu) > 1: 201 | print >> sys.stderr 202 | print >> sys.stderr, 'Duplicate the timing to have everything in one place' 203 | print >> sys.stderr, algo_executed 204 | if do_float64: 205 | print >> sys.stderr, 'float64 times', float64_times 206 | print >> sys.stderr, 'float64 expected', expected_times_64 207 | print >> sys.stderr, 'float64 % expected/get', ( 208 | expected_times_64 / float64_times) 209 | if do_float32: 210 | print >> sys.stderr, 'float32 times', float32_times 211 | print >> sys.stderr, 'float32 expected', expected_times_32 212 | print >> sys.stderr, 'float32 % expected/get', ( 213 | expected_times_32 / float32_times) 214 | if do_gpu: 215 | print >> sys.stderr, 'gpu times', gpu_times 216 | print >> sys.stderr, 'gpu expected', expected_times_gpu 217 | print >> sys.stderr, 'gpu % expected/get', ( 218 | expected_times_gpu / gpu_times) 219 | 220 | if do_float64 and do_float32: 221 | print >> sys.stderr, 'float64/float32', ( 222 | float64_times / float32_times) 223 | print >> sys.stderr, 'expected float64/float32', ( 224 | expected_times_64 / float32_times) 225 | if do_float64 and do_gpu: 226 | print >> sys.stderr, 'float64/gpu', float64_times / gpu_times 227 | print >> sys.stderr, 'expected float64/gpu', ( 228 | expected_times_64 / gpu_times) 229 | if do_float32 and do_gpu: 230 | print >> sys.stderr, 'float32/gpu', float32_times / gpu_times 231 | print >> sys.stderr, 'expected float32/gpu', ( 232 | expected_times_32 / gpu_times) 233 | 234 | def compare(x, y): 235 | ratio = x / y 236 | # If there is more then 5% difference between the expected 237 | # time and the real time, we consider this an error. 238 | return sum((ratio < 0.95) + (ratio > 1.05)) 239 | 240 | if do_float64: 241 | err = compare(expected_times_64, float64_times) 242 | print >> sys.stderr, 'speed_failure_float64=' + str(err) 243 | if do_float32: 244 | err = compare(expected_times_32, float32_times) 245 | print >> sys.stderr, 'speed_failure_float32=' + str(err) 246 | if do_gpu: 247 | err = compare(expected_times_gpu, gpu_times) 248 | print >> sys.stderr, 'speed_failure_gpu=' + str(err) 249 | 250 | assert not numpy.isnan(gpu_times).any() 251 | -------------------------------------------------------------------------------- /TheanoDL/utils.py: -------------------------------------------------------------------------------- 1 | """ This file contains different utility functions that are not connected 2 | in anyway to the networks presented in the tutorials, but rather help in 3 | processing the outputs into a more understandable way. 4 | 5 | For example ``tile_raster_images`` helps in generating a easy to grasp 6 | image from a set of samples or weights. 7 | """ 8 | 9 | 10 | import numpy 11 | 12 | 13 | def scale_to_unit_interval(ndar, eps=1e-8): 14 | """ Scales all values in the ndarray ndar to be between 0 and 1 """ 15 | ndar = ndar.copy() 16 | ndar -= ndar.min() 17 | ndar *= 1.0 / (ndar.max() + eps) 18 | return ndar 19 | 20 | 21 | def zero_mean_unit_variance(Data): 22 | """ Scales all values in the ndarray ndar to be between 0 and 1 """ 23 | Mean = numpy.mean(Data, axis=0) 24 | Data -= Mean 25 | 26 | Std = numpy.std(Data, axis = 0) 27 | index = (numpy.abs(Std<10**-5)) 28 | Std[index] = 1 29 | Data /= Std 30 | return [Data, Mean, Std] 31 | 32 | 33 | def normalize(Data, Mean, Std): 34 | Data -= Mean 35 | Data /= Std 36 | return Data 37 | 38 | def tile_raster_images(X, img_shape, tile_shape, tile_spacing=(0, 0), 39 | scale_rows_to_unit_interval=True, 40 | output_pixel_vals=True): 41 | """ 42 | Transform an array with one flattened image per row, into an array in 43 | which images are reshaped and layed out like tiles on a floor. 44 | 45 | This function is useful for visualizing datasets whose rows are images, 46 | and also columns of matrices for transforming those rows 47 | (such as the first layer of a neural net). 48 | 49 | :type X: a 2-D ndarray or a tuple of 4 channels, elements of which can 50 | be 2-D ndarrays or None; 51 | :param X: a 2-D array in which every row is a flattened image. 52 | 53 | :type img_shape: tuple; (height, width) 54 | :param img_shape: the original shape of each image 55 | 56 | :type tile_shape: tuple; (rows, cols) 57 | :param tile_shape: the number of images to tile (rows, cols) 58 | 59 | :param output_pixel_vals: if output should be pixel values (i.e. int8 60 | values) or floats 61 | 62 | :param scale_rows_to_unit_interval: if the values need to be scaled before 63 | being plotted to [0,1] or not 64 | 65 | 66 | :returns: array suitable for viewing as an image. 67 | (See:`PIL.Image.fromarray`.) 68 | :rtype: a 2-d array with same dtype as X. 69 | 70 | """ 71 | 72 | assert len(img_shape) == 2 73 | assert len(tile_shape) == 2 74 | assert len(tile_spacing) == 2 75 | 76 | # The expression below can be re-written in a more C style as 77 | # follows : 78 | # 79 | # out_shape = [0,0] 80 | # out_shape[0] = (img_shape[0]+tile_spacing[0])*tile_shape[0] - 81 | # tile_spacing[0] 82 | # out_shape[1] = (img_shape[1]+tile_spacing[1])*tile_shape[1] - 83 | # tile_spacing[1] 84 | out_shape = [(ishp + tsp) * tshp - tsp for ishp, tshp, tsp 85 | in zip(img_shape, tile_shape, tile_spacing)] 86 | 87 | if isinstance(X, tuple): 88 | assert len(X) == 4 89 | # Create an output numpy ndarray to store the image 90 | if output_pixel_vals: 91 | out_array = numpy.zeros((out_shape[0], out_shape[1], 4), 92 | dtype='uint8') 93 | else: 94 | out_array = numpy.zeros((out_shape[0], out_shape[1], 4), 95 | dtype=X.dtype) 96 | 97 | #colors default to 0, alpha defaults to 1 (opaque) 98 | if output_pixel_vals: 99 | channel_defaults = [0, 0, 0, 255] 100 | else: 101 | channel_defaults = [0., 0., 0., 1.] 102 | 103 | for i in xrange(4): 104 | if X[i] is None: 105 | # if channel is None, fill it with zeros of the correct 106 | # dtype 107 | dt = out_array.dtype 108 | if output_pixel_vals: 109 | dt = 'uint8' 110 | out_array[:, :, i] = numpy.zeros(out_shape, 111 | dtype=dt) + channel_defaults[i] 112 | else: 113 | # use a recurrent call to compute the channel and store it 114 | # in the output 115 | out_array[:, :, i] = tile_raster_images( 116 | X[i], img_shape, tile_shape, tile_spacing, 117 | scale_rows_to_unit_interval, output_pixel_vals) 118 | return out_array 119 | 120 | else: 121 | # if we are dealing with only one channel 122 | H, W = img_shape 123 | Hs, Ws = tile_spacing 124 | 125 | # generate a matrix to store the output 126 | dt = X.dtype 127 | if output_pixel_vals: 128 | dt = 'uint8' 129 | out_array = numpy.zeros(out_shape, dtype=dt) 130 | 131 | for tile_row in xrange(tile_shape[0]): 132 | for tile_col in xrange(tile_shape[1]): 133 | if tile_row * tile_shape[1] + tile_col < X.shape[0]: 134 | this_x = X[tile_row * tile_shape[1] + tile_col] 135 | if scale_rows_to_unit_interval: 136 | # if we should scale values to be between 0 and 1 137 | # do this by calling the `scale_to_unit_interval` 138 | # function 139 | this_img = scale_to_unit_interval( 140 | this_x.reshape(img_shape)) 141 | else: 142 | this_img = this_x.reshape(img_shape) 143 | # add the slice to the corresponding position in the 144 | # output array 145 | c = 1 146 | if output_pixel_vals: 147 | c = 255 148 | out_array[ 149 | tile_row * (H + Hs): tile_row * (H + Hs) + H, 150 | tile_col * (W + Ws): tile_col * (W + Ws) + W 151 | ] = this_img * c 152 | return out_array 153 | -------------------------------------------------------------------------------- /Transition_matrix.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stevenwudi/CVPR_2014_code/7976acb440dbe4e8d060685371e3f0c5d35a4796/Transition_matrix.mat -------------------------------------------------------------------------------- /cvpr_2014_diwu.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stevenwudi/CVPR_2014_code/7976acb440dbe4e8d060685371e3f0c5d35a4796/cvpr_2014_diwu.pdf -------------------------------------------------------------------------------- /dbn_2014-05-23-20-07-28.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stevenwudi/CVPR_2014_code/7976acb440dbe4e8d060685371e3f0c5d35a4796/dbn_2014-05-23-20-07-28.npy -------------------------------------------------------------------------------- /distance_median.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stevenwudi/CVPR_2014_code/7976acb440dbe4e8d060685371e3f0c5d35a4796/distance_median.npy -------------------------------------------------------------------------------- /template.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stevenwudi/CVPR_2014_code/7976acb440dbe4e8d060685371e3f0c5d35a4796/template.png --------------------------------------------------------------------------------