├── .gitattributes
├── .gitignore
├── ChalearnLAPEvaluation.py
├── ChalearnLAPSample.py
├── ChalearnLAPTest.py
├── CoDaLab_Gesure_track3.pyproj
├── CoDaLab_Gesure_track3.sln
├── README.md
├── README.txt
├── SK_normalization.pkl
├── Step1_DBN.py
├── Step1_DBN_Structure2.py
├── Step1_SK_Neutral_Realtime.py
├── Step1_SK_realtime.py
├── Step1_transition_matrix.py
├── Step2_SK_Prediction.py
├── Step3_SK_Test_prediction.py
├── Step3_measure_performance.py
├── TheanoDL
├── DBN.py
├── DBN_MNIST.py
├── GRBM_DBN.py
├── GRBM_DBN.pyc
├── LogisticRegressionMNIST.py
├── Mean_DBN.py
├── RBM_Pylearn2.py
├── SdA.py
├── Theano_Tutorial.pyproj
├── Tutorial.py
├── Tutorial.v11.suo
├── cA.py
├── convolutional_mlp.py
├── dA.py
├── grbm.py
├── grbm.pyc
├── logistic_cg.py
├── logistic_sgd.py
├── logistic_sgd.pyc
├── mlp.py
├── mlp.pyc
├── rbm.py
├── rbm.pyc
├── rbm_gnumpy.py
├── rbm_mean.py
├── rnnrbm.py
├── test.py
└── utils.py
├── Transition_matrix.mat
├── cvpr_2014_diwu.pdf
├── dbn_2014-05-23-20-07-28.npy
├── distance_median.npy
├── template.png
└── utils.py
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 |
4 | # Custom for Visual Studio
5 | *.cs diff=csharp
6 | *.sln merge=union
7 | *.csproj merge=union
8 | *.vbproj merge=union
9 | *.fsproj merge=union
10 | *.dbproj merge=union
11 |
12 | # Standard to msysgit
13 | *.doc diff=astextplain
14 | *.DOC diff=astextplain
15 | *.docx diff=astextplain
16 | *.DOCX diff=astextplain
17 | *.dot diff=astextplain
18 | *.DOT diff=astextplain
19 | *.pdf diff=astextplain
20 | *.PDF diff=astextplain
21 | *.rtf diff=astextplain
22 | *.RTF diff=astextplain
23 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Windows image file caches
2 | Thumbs.db
3 | ehthumbs.db
4 |
5 | # Folder config file
6 | Desktop.ini
7 |
8 | # Recycle Bin used on file shares
9 | $RECYCLE.BIN/
10 |
11 | # Windows Installer files
12 | *.cab
13 | *.msi
14 | *.msm
15 | *.msp
16 |
17 | # =========================
18 | # Operating System Files
19 | # =========================
20 |
21 | # OSX
22 | # =========================
23 |
24 | .DS_Store
25 | .AppleDouble
26 | .LSOverride
27 |
28 | # Icon must ends with two \r.
29 | Icon
30 |
31 | # Thumbnails
32 | ._*
33 |
34 | # Files that might appear on external disk
35 | .Spotlight-V100
36 | .Trashes
37 |
--------------------------------------------------------------------------------
/ChalearnLAPTest.py:
--------------------------------------------------------------------------------
1 | #-------------------------------------------------------------------------------
2 | # Name: Chalearn LAP utils scripts
3 | # Purpose: Provide scripts to add labels to Chalearn LAP challenge tracks samples
4 | #
5 | # Author: Xavier Baro
6 | # Di Wu: stevenwudi@gmail.com
7 | # Created: 25/04/2014
8 | # Copyright: (c) Chalearn LAP 2014
9 | # Licence: GPL
10 | #-------------------------------------------------------------------------------
11 | import os
12 | import zipfile
13 | import shutil
14 | import glob
15 |
16 | def main():
17 | """ Main script. Created a labeled copy of validation samples """
18 | # Data folder (Unlabeled data samples)
19 | dataPath=r'I:\Kaggle_multimodal\Validation'
20 | # Labels file (Unziped validation.zip)
21 | labelsPath=r'I:\Kaggle_multimodal\validation_labels'
22 | # Use the method for desired track
23 | print('Uncoment the line for your track')
24 | addLabels_Track3(dataPath, labelsPath)
25 |
26 |
27 | def addLabels_Track3(dataPath, labelsPath):
28 | """ Add labels to the samples"""
29 | # Check the given data path
30 | if not os.path.exists(dataPath) or not os.path.isdir(dataPath):
31 | raise Exception("Data path does not exist: " + dataPath)
32 | # Check the given labels path
33 | if not os.path.exists(labelsPath) or not os.path.isdir(labelsPath):
34 | raise Exception("Labels path does not exist: " + labelsPath)
35 |
36 | # Get the list of samples
37 | samplesList = os.listdir(dataPath)
38 | # For each sample on the GT, search the given prediction
39 | for sample in samplesList:
40 | print "writing file" + sample
41 | # Build paths for sample
42 | sampleFile = os.path.join(dataPath, sample)
43 | # Prepare sample information
44 | file = os.path.split(sampleFile)[1]
45 | sampleID = os.path.splitext(file)[0]
46 | samplePath = dataPath + os.path.sep + sampleID
47 |
48 | # Add the labels
49 | srtFileName=sampleID + '_labels.csv'
50 | srcSampleDataPath = os.path.join(labelsPath, srtFileName)
51 | dstSampleDataPath = os.path.join(sampleFile, srtFileName)
52 | shutil.copyfile(srcSampleDataPath, dstSampleDataPath)
53 |
54 |
55 | if __name__ == '__main__':
56 | main()
57 |
--------------------------------------------------------------------------------
/CoDaLab_Gesure_track3.pyproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Debug
5 | 2.0
6 | a842cc43-58a6-4065-a788-12333aa22a55
7 | .
8 | Step2_SK_Prediction.py
9 | C:\Users\PC-User\Documents\Visual Studio 2012\Projects\Theano\Tutorial\;ConvNet_3DCNN\
10 | .
11 | .
12 | CoDaLab_Gesure_track3
13 | CoDaLab_Gesure_track3
14 | False
15 | {947cee96-2ef3-4b77-b850-f04b2d86f6b6}
16 | 2.7
17 | Standard Python launcher
18 | -W ignore::DeprecationWarning
19 | False
20 |
21 |
22 | true
23 | false
24 |
25 |
26 | true
27 | false
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 | 10.0
111 | $(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)
112 | $(VSToolsPath)\Python Tools\Microsoft.PythonTools.targets
113 |
114 |
115 |
116 |
--------------------------------------------------------------------------------
/CoDaLab_Gesure_track3.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 12.00
3 | # Visual Studio 2012
4 | Project("{888888A0-9F3D-457C-B088-3A5042F75D52}") = "CoDaLab_Gesure_track3", "CoDaLab_Gesure_track3.pyproj", "{A842CC43-58A6-4065-A788-12333AA22A55}"
5 | EndProject
6 | Global
7 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
8 | Debug|Any CPU = Debug|Any CPU
9 | Release|Any CPU = Release|Any CPU
10 | EndGlobalSection
11 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
12 | {A842CC43-58A6-4065-A788-12333AA22A55}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
13 | {A842CC43-58A6-4065-A788-12333AA22A55}.Release|Any CPU.ActiveCfg = Release|Any CPU
14 | EndGlobalSection
15 | GlobalSection(SolutionProperties) = preSolution
16 | HideSolutionNode = FALSE
17 | EndGlobalSection
18 | EndGlobal
19 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Purpose
2 | =============
3 | This is the code the challenge"Chalearn Looking at People 2014“.
4 | ******************************************************************************************************
5 | Gist: Delief Networks (Gaussian Bernoulli RBM as first layer) + Hidden Markov Networks
6 | ******************************************************************************************************
7 | by Di WU: stevenwudi@gmail.com, 2015/05/27
8 |
9 |
10 | Citation
11 | -------
12 | If you use this toolbox as part of a research project, please cite the corresponding paper
13 | ******************************************************************************************************
14 | ```yaml
15 | @inproceedings{wu2014leveraging,
16 | title={Leveraging Hierarchical Parametric Networks for Skeletal Joints Based Action Segmentation and Recognition},
17 | author={Wu, Di and Shao, Ling},
18 | booktitle={Proc. Conference on Computer Vision and Pattern Recognition (CVPR)},
19 | year={2014}
20 | }
21 | ```
22 | ******************************************************************************************************
23 |
24 |
25 | Dependency: Theano
26 | -------
27 | Some dependent libraries requirements:
28 | Theano: for deep learning tasks http://deeplearning.net/software/theano/.
29 | Note that Wudi change some of the functionalities(Deep Belief Networks, Gaussian Bernoulli Restricted Boltzmann Machines).
30 | They are in the subfolder of -->TheanoDL
31 |
32 |
33 |
34 | Test
35 | -------
36 | To reproduce the experimental result for test submission, there is a Python file:
37 |
38 | `Step3_SK_Test_prediction.py` and there are three paths needs to be changed accordingly:
39 |
40 | line: 60, Data folder (Test data)
41 | data_path=os.path.join("I:\Kaggle_multimodal\Test\Test\\")
42 |
43 | line: 62, Predictions folder (output)
44 | outPred=r'.\training\test'
45 |
46 | line: 64, Submision folder (output)
47 | outSubmision=r'.\training\test_submission'
48 |
49 | It takes about ~20 second for each example file using only skeleton information. (I use Theano GPU model, but I reckon CPU model should almost of the same speed)
50 |
51 | Train
52 | -------
53 | To train the network, you first need to extract the skeleton information
54 |
55 | 1)`Step1_SK_Neutral_Realtime.py`--> extract neutral frames (aka., 5 frames before and after the gesture)
56 |
57 | 2)`Step1_SK_Realtime.py`--> extract gesture frames
58 |
59 | 3)`Step1_DBN_Strucutre2.py`-->Start training the networks (`Step1_DBN.py` specifies a smaller networks, train faster, but the larger the net is always better)
60 |
61 | Voila, here you go.
62 |
63 | Dataset
64 | -------
65 | According to some reader recommendation, I supplement the links of the datasets used in the paper as follows:
66 |
67 | 1) `ChaLearn Italian Gesture Recognition` --> [http://gesture.chalearn.org/2013-multi-modal-challenge](http://gesture.chalearn.org/2013-multi-modal-challenge)
68 |
69 | You should download from this dataset from Kaggle platform. [https://www.kaggle.com/c/multi-modal-gesture-recognition/data](https://www.kaggle.com/c/multi-modal-gesture-recognition/data)
70 |
71 | 2) `MSR Action3D` --> [http://research.microsoft.com/en-us/um/people/zliu/actionrecorsrc](http://research.microsoft.com/en-us/um/people/zliu/actionrecorsrc)
72 |
73 | 3) `MSRC12 ` --> [http://research.microsoft.com/en-us/um/cambridge/projects/msrc12](http://research.microsoft.com/en-us/um/cambridge/projects/msrc12)
74 |
75 | (If you use the datasets, please cite the corresponding original paper. Thanks)
76 |
77 | Contact
78 | -------
79 | If you read the code and find it really hard to understand, please send feedback to: stevenwudi@gmail.com
80 | Thank you!
81 |
--------------------------------------------------------------------------------
/README.txt:
--------------------------------------------------------------------------------
1 | Purpose
2 | =============
3 | This is the code the challenge"Chalearn Looking at People 2014“
4 | Gist: Delief Networks (Gaussian Bernoulli RBM as first layer) + Hidden Markov Networks
5 | by Di WU: stevenwudi@gmail.com, 2015/05/27
6 |
7 |
8 | Citation
9 | -------
10 | If you use this toolbox as part of a research project, please consider citing the corresponding paper
11 | ******************************************************************************************************
12 | @inproceedings{wu2014leveraging,
13 | title={Leveraging Hierarchical Parametric Networks for Skeletal Joints Based Action Segmentation and Recognition},
14 | author={Wu, Di and Shao, Ling},
15 | booktitle={Proc. Conference on Computer Vision and Pattern Recognition (CVPR)},
16 | year={2014}
17 | }
18 | ******************************************************************************************************
19 |
20 |
21 | Dependency: Theano
22 | -------
23 | Some dependent libraries requirements:
24 | (1) Theano: for deep learning tasks http://deeplearning.net/software/theano/
25 | Note that Wudi change some of the functionalities(Deep Belief Networks, Gaussian Bernoulli Restricted Boltzmann Machines)
26 | They are in the subfolder of -->TheanoDL
27 |
28 |
29 |
30 | Test
31 | -------
32 | To reproduce the experimental result for test submission, there is a Python file:
33 |
34 | Step3_SK_Test_prediction.py and there are three paths needs to be changed accordingly:
35 |
36 | line: 60, Data folder (Test data)
37 | data_path=os.path.join("I:\Kaggle_multimodal\Test\Test\\")
38 | line: 62, Predictions folder (output)
39 | outPred=r'.\training\test'
40 | line: 64, Submision folder (output)
41 | outSubmision=r'.\training\test_submission'
42 |
43 | It takes about ~20 second for each example file using only skeleton information. (I use Theano GPU model, but I reckon CPU model should almost of the same speed)
44 |
45 | Train
46 | -------
47 | To train the network, you first need to extract the skeleton information
48 | 1)Step1_SK_Neutral_Realtime.py--> extract neutral frames (aka., 5 frames before and after the gesture)
49 | 2) Step1_SK_Realtime.py--> extract gesture frames
50 |
51 | 3)Step1_DBN_Strucutre2.py-->Start training the networks (Step1_DBN.py specifies a smaller networks, train faster, but the larger the net is always better)
52 |
53 | Voila, here you go.
54 |
55 |
56 | Contact
57 | -------
58 | If you read the code and find it really hard to understand, please send feedback to: stevenwudi@gmail.com
59 | Thank you!
60 |
--------------------------------------------------------------------------------
/SK_normalization.pkl:
--------------------------------------------------------------------------------
1 | (dp1
2 | S'Std1'
3 | p2
4 | cnumpy.core.multiarray
5 | _reconstruct
6 | p3
7 | (cnumpy
8 | ndarray
9 | p4
10 | (I0
11 | tS'b'
12 | tRp5
13 | (I1
14 | (L528L
15 | tcnumpy
16 | dtype
17 | p6
18 | (S'f4'
19 | I0
20 | I1
21 | tRp7
22 | (I3
23 | S'<'
24 | NNNI-1
25 | I-1
26 | I0
27 | tbI00
28 | S'c^\xae=0\xcf\x11>\x84z\x8f=\x1e\x9c\x06=\xe2\xb5k=\xca\x04\x8b=}.\xd6=\xea.B>2\xaa\xac=\x9f"\x8d=\xd4\xaf\xd5=,\xcc\x10>\x8d{\xe6=\xa9\x10y>\xeaL+>d\xb4K=#\xba\x81=M\xfe\xbd=#\xf0\x04>\x88\x8c\x97>\x0eA1>\xe1\xffC=\xee\xf1\x90=SK\xbb=vF\x15=\xa8lk=+\xe1\x9f=\x80\xb2\x1b=V\x84m=\xa7;\xa5=\x07\xd7\xb4=1:?>?\x1d\xe0=\xc9\x17\xf0\x06\x9a,>\xa9s\x1e>\xf1R\x9e>\x9d\xc48>\xfd\x04\xbf=\x01oA>7\x89\x04>\xa8\x93.>\x00b\xb4>\xb0B<>\xd1\xfc\xbd=\xb4FE>Tz\x03>\xf1\xde\xb8=\x1c\xf9;>@\xf8\xeb=\xc4\xf7\xb4=\xa5\x8c;>\xb9\xf8\xeb=\x9b%\xdb=b\xf1n>\xcam\xf1=Lvh=\xdb\x03\xa4=E:\xf1=\xccQ\xcc=[\x01i>((\x19>s\x86\r=e\xc3\xbd<\xdd\xe8<=\x1c*\xf1=x6\x90>\xd8\xd8\x1f>\xf0\x18\xe1<\x03\x8e\xfc<^\xd80=\xdd:\x9a<\xc9m\xd4\x93o\x84>l\x0f2>\x8e\xf7,>`"\xaf>\xdc\xf1;>\x15$\xe1=\xad\x01q>j\x9a\x0b>LD=>.7\xc3>\'\xd2=>\xc8\xb2\xe1=\x9blt>\x8e\xed\n>\xd4!\xde=\x9a}k>\xcc\x8e\xfa=0\xef\xd9=\xf3\xf2j>g1\xf9=\x85\x04\xbd=\xe9\x14.>\x0b\xca\xb0=S\xfc9=\x16\xc1\x9c=\x9a\xcf\xb6=\xacT\xe9=\xb3 g> \xe7\xd4=\x06\xd2^=h(\xae=\xdd\x04\xf2=\xf6\x1d@=;\xfb\xa1=\xeb\xc5\xd4=\xcc\xcb@=(\x88\xa3=\x11\x0f\xd1=\xdb\x14\xbf=".h>\xb3\xb5\x03>v\xd9\t=Jrv=\x05X\x04=b\xcc\xc7=\xe0\xe5k>\xa81\x19>\xf4,\xc4=;Mi>\xbch\x0b>xD\xc0=\x96Ui>\xe1.\x08>\xd8\xd6\xe7=\xcb\xf2\x8f>H\x8e\x0c>\xce\xfd\xf0<\xc6\xd8\xe4<\x9c\xa3M=\xaeW\xcf<\xf1\xbb\xe7<\xbd\x1c\x02=t5\xc9\x12\x11 >t\xb3\xeb=\x99Z\x90>!U\x12>J\xeb\xe7=\xacN\x90>m\xaf\x0e>\xb6\xc6\xd3<\xff\xcb!=\x97^C=\xb0\x9f\xd0\xce0=*\xc8{=^\x98\xec;x\xe9\xe0;\x9c\xd5\xa4<\x98\x8a\xda;\xb7<\x05<\x17[\xbf<\xc1G\xae=Km\x11>\x83z\x93=N\xa9\x07= El=\x1eU\x92=\xf9\xf2\xd5=5\xbcA>\xd2(\xaf="\xe1\x8c=o\xc4\xd5=7\x83\x12>\x9b\x85\xe6=\xef\xe5x>\x93>,>\x94qK=\xbd\xdb\x81=\x10\xe7\xc2=k\xfe\x04>jr\x97>/\x0e2>M\x19D=\x9a\xf7\x90=B\x1e\xc0=r\xa3\x15=\x94\xa4k=7\xda\xa5=+\x03\x1c=\xf7\xc1m=\x00\xc0\xaa=\x14^\xaf=3\x0f\x13>=>\x9a=\xda\x84\x96<\xeb4\xd2
\xf7x\xe5=\xe8\xad\x08=F\xeag=\xab\x99\x17=4a\xe7=\xb7\x08\\> \x93.>X\x06\x1e>\xfez\x9e>0::>\x9b$\xbf=\x8d\x97A>\xad\x9c\x06>\x0bB.>~\x81\xb4>\xaa{=>\xc3%\xbe=\xb0FE>Dv\x05>\xa9)\xb9=\x7f\x1f<>\xf6\xcc\xf0=8R\xb5=\x92\xb6;>\xda\xb0\xf0=K/\x07=Gyk=\xa1t\x90=\xd7\xc3\xb4=\xcc(?>\x98%\xe3=f+\x81;\x12R\x88;qJ\xa9<\x9b\x0c\xdb=\x05\xe0n>54\xf4=\x97Gh=\x9c\xff\xa3=\xec\x01\xf5=\x892\xcc=m\xfdh>=\x8f\x1a>\x16@\r=!u\xbf<\xa5\xdcN=1\x12\xf1=\xab4\x90>\xfd*!>\xd8\xea\xe0<\xde\t\x00=\xc4\xfdB=\x9a\xc0\x9b\xc8\x0b\xb6=(6\x10=\xe9"q=\xd4#%=\x88\xa0\xdb=D\x1eo>B\xbf\xf6=|\x82\xb7&\xa5\x84>@\x134>\x89j,>=b\xaf>\xfd\x95=>\x80d\xe1=\x83-q>\xcc\xba\r>n\xc2<>\x05s\xc3>\x0eO?>\xb1\xed\xe1=\x16mt>\t\xf3\x0c>x~\xde=\x87\xa8k>\xbd{\xff=\xdd\\\xda=a"k>t\x10\xfe=W\xe6\x8c=\xbf\xc7\xd5=\x9cQ\x12>\x00\xa4\xe7=L\x84[>LV->\xdc$h=\xd2\x1b\xa4=\xd0\x0c\xf5=\xbf\x17\x04>yT\x84>\xf5\xaf2>\xda\x8cF<\x83:Q<\xbah\xda<\t\xff\xbc=k\x80->6%\xb3=p\';=#;\x9d=b\xb6\xbd=\x12/\xe9=\x85of>\xaa\xbb\xd5=\xb5\x85^=B\x13\xae=\x9a\x1e\xf6=\xfa0@=\xb2\x0f\xa2=v\xbc\xd9=&\xd8@=%\xa3\xa3=\x1f\xe1\xd5=\xc8,\xe6=\x8b\x97y>\xdfq->u \x1e>\xa9\x8e\x9e>io:>\xf7d\xcc=QWi>\x9bJ\x1b>\xd4\xbd,>\x1cP\xaf>\x04q=>#\x02\xbe=a\x970>\x057\xbd=\xf6\x16\xacm\xec\x06>\xe4\x9a\x1b=\x9cm\x91=\xfa\xd43=e \xc8=\n\x0bl>\x80o\x1b>\xbd_\xc4=\x1f\x96i>\xe5\x16\x0e>\x1b\xaa\xc0=Y\xa3i>T\xe5\n>\xc4\x98K=\x9f\xa8\x81=\xb6\x84\xc2=\x19\x01\xbf=\x0ebA>\xd0\x06\x06>\xfe\x8e\r=\xceh\xbf\xf1\xfe\x0c>\x88!;=E\x87\x9c=\xf3\xa2\xba=\xa9\xe2\xbe=\xe6\x10h>\xfb\xd4\x04>at\xde;b?\xaa;\x87\xbd\xaf-\x9a\r>\x84:\xf1]\xdd\x97>m\xa33>g\x1d.>\'\xb9\xb4>\xb3T>>\x89R\xf1=Qi\x90>\xc28">\xcc\xda<>-\x84\xc3>\xcc\xcf?>\xba$\xea=e\x93i>I\xb2\xdf=\xd3\xfb%=o\xe4\x97=\xfe\x94F=\xf3h\xe8=O5\x90>\xc4\xea\x0f>\xa6\x1e\xce<\x7fM@=\x0e\x9c\x16=C\xde\xee=\xfb\x9c\x91>\x97\x86">\xc9\xf2\xeb=g\x87\x90>_A\x15>\xa7\\\xe8=\x1d~\x90>\xb3\xac\x11>\x96rD=\xd9\x9e\x90=\x82\xd4\xbf=\xa2\x04\xbe=\xb4@E>\x83\x02\x05>\xfe\x95\xe3<0\x96\xfc<\xf6\x84D=\xdf\xb8\xe1=\x06jt>i^\x0c>\x8aL_=(\x08\xae=\xa3\x8f\xf5=\x04\xae\xc7=\xd7\xf8k>\xde\x88\x1a>1y\xf2\xd2X!>\xaa\x82Z;]\x94\xca;\xf6\xdd\xa7<\xf1R\xd6<\xe9\\ =*\xdcU=t\xff\xd2<\xc7\x1e/=B\xb7\x84=\xcaz\x15=\xbcak=g\x07\xa5=R\xc4\xb8=\xe1\xf7;>\xe5,\xef=\xcbh\x9b{\x81\xfd=\xcc\x19@=X\xfa\xa1=\xb9\x9f\xd8=/\x19\xc4=\xd2Vi>\xe6\xbe\x0c>\xf7b\xcf\xd8\x93\x13>`\xb5\xd3o*\xef=d\xa7\xa6<\x91\xd3\xf5<&\xb4A=\x92\xc2\xd9=r\xf0j>N"\xfc=\xe5\xd1@=p}\xa3=\x0c\n\xd5=g\x0e\xc0=\x90[i>\xbd\x8b\t>o8\xc9<&~\x07=r_5=Y\xb8\xe7=OR\x90>\x19\xf2\x0f>\x98\xc3\xd0\xb00=\xb9\x8b\x84=m4\xfe;\xda\xb4\xe6;\x17\xea\xec<\xec\xa4);\x1b~\xd8:\x14)\xa7<'
29 | tbsS'Mean1'
30 | p8
31 | g3
32 | (g4
33 | (I0
34 | tS'b'
35 | tRp9
36 | (I1
37 | (L528L
38 | tg7
39 | I00
40 | S'\xb25::\xa8\xcc\x06>\x7f\xc3\xde=\xf1\x85z= \x95j\xbet^\x1f\xbd\xa2\xd6T<\x08\x0b8>\x9c@\r>s\x1a\xe4>]\x8e\xdf\xbc\x8d\xd2\x8f<%\xa9\xd4>\t\xb0\xda<\x11\x8a\x16>\\\x1a\xc2>]Kh\xbe\xeb\xf34\xbd\xaf\xfc\xca>\x11DI=\x86o7>\xeb\xe5e>Q\x9a\x05\xbf\x97\xe1Z\xbc\xa5\x08b>(\xadWQ]\x9a=\xcc\x04\x83\xbaq\x9dw=\x84\xb0\xb8\xbe\x16:\x17\xbe\xc73I<\xc5\xfaD=\x12\xf4\xee<\xc2\xbd\xe3>\xde\xbd"\xbe\xc0\xce\xba\xbdJK\xd4>\x1f\xed\xd6\xbd\xba\xa0\x1c=\x1c\xbd\xc1>\xb4\x8b\xb7\xbe{\x9f\x1c\xbe\xbf\x9f\xca>\x01\xf5\xa8\xbd\xac\x19\x90=\xa5,e>gL\'\xbf~\x1f\xfa\xbd\x85Na>\x82\xa3\xf2\xbdE\xb6\x1e\xbe\xf8{a>\xcbvf\xbdi\xcf\xe0\xbd\xcfOE\xbd\x12P\xd1>\xee\x175>\x9f\xc9\xc4>\x8e\xa3N>\xf1Fg=\x8dX\xb5>>\xf5\x82>\x04a>>\xb7\xc8\xa2>\xea\x9e\x12;\x8b\xab\xac\xbb|\xac\xab>\xd8r\x8e>\xefE_>\xcaD\'>A\xe9\x95\xbe\xa2L\xd1<\xc4g#>_\x0fx>\xcd\x95\xef\xbb\x05\x94#>\x91\xe1\x9b>nF\x1b=4t\xdd><\xfcS\xbe\xd5\x8b\xf6\xbd\x92\x02\xce>\xd0\xb4\x1c\xbe\xf6\x9b\x14<\x94s\xbb>\xe2*\xd0\xbe\xd1}:\xbe\xd7\xb9\x05\xbe\x9d\xb9(=\xf6\x98X>\xaf\x9c3\xbfL\xee\x1a\xbeh\xbcT>\x1c\x90*\xbeY\x94<\xbe\xd3\xe9T>\xde\xb9\xd5\xbd\xa6F\x0e\xbeM\r\xf7\xbce\x1f]=\xd1\x8f\x04>\xc0\xff\x87\xbd\x9aYL\xbe\x92\xdc|\xbd&\xebH\xbdG\x86\x9c=\x8ft%>\xf7Mb\xbe\xab:\xfd\xbe_D\xfd\xbc=+f\xbeR\xb2%=\xc1\x9c\x82\xbd\xc4\xfde\xbe\x89?\xd2=\xd6\x02\x98\xbc\x8ax\x14\xbde\xd0\x81\xbe\x1b\xc7C\xbe\x9d\xca\x9a\xbc^\xda\xb7\x92\x03=\xf0lC\xbe\xefn\x0c\xbf.8$\xbe\x01IG\xbe\xd7\xb0]\xbc"\xdeE\xbe\x83\x1cG\xbeoaG=j\x90\x17\xbe\x1d\'\x8e<)N\x8d>\x1b\xacd>6N\x1e\xbe8\x0e\x97\xbe\xa6w\xfc<\x13+"\xbeI\xc5u>G\xd6\x05\xbb9\xfe!\xbe\xe0\xbc\x9a>\xe5\xdb0="\x130\xbe\xd4-\x12\xbf\xe1\x1cE\xbe\xf5\xef3\xbe\xa4Y\x13\xbd5\xc4f\xbeZ\xc33\xbe\x9f\xe8\xd6<\xcet8\xbe\xc9=w\xbbx\xf8\x08?\x14\x99\x06\xbd\x95\x12l\xbb0\xe6\x18?\xec\x81J<\xfb\xab29\xd7\xcc~=\xa6:9=l\xec\x0c9q\x18\x12\xb8\xe0f\xcf9\xc3q]:\x0e\xc3\x06>\xae\x92\xdf=\x06\x14{=\xd5\x9ej\xbe\xf3\xbf\x1d\xbdI\nW<\x83\x018>V\xa8\r>\xff,\xe4>=\xd6\xdf\xbc\xd7\x0f\x93<\x10\xbb\xd4>\xdfe\xda<9\xf2\x16>\x9f,\xc2>\xb9Th\xbeWU3\xbd\xcf\x0e\xcb>c I=\xc3\xd67>\xf8\x08f>\x10\x9d\x05\xbf\x19gT\xbc9,b>k\x1bW<\xcc\xb2;\xbd\x87Yb>\xa8J\x9a=jV\x1e\xbaLz\xeb\xb9\xa3\xfc\x06\xbe\x12V\xdd\xbd\xc2\xf1\x889[6B\xb9\x97P7:H\xafx=w\xc8\xb8\xbe\r\x82\x16\xbe\xfcyMp\xee"\xbe(`\xb9\xbd\x1bn\xd4>]N\xd7\xbd)~\x1f=s\xdf\xc1>h\xa3\xb7\xbe\x8d\xe8\x1b\xbe\x9f\xc2\xca>\x10W\xa9\xbdB\x88\x91=\xe9pe>dX\'\xbf\xab\xb2\xf8\xbdb\x93a>F\x04\xf3\xbd3\x00\x1e\xbe\x86\xc1a>":g\xbd\xb8a\xdf\xbd\x8a1z\xbd\xcd\x9ej>\x9f: =\xaaHw\xbd\x86\xb5\xb8>Mp\x17>\xb5?\xa98Q\x18!8\x8f\xba[9\xcd\xfbD\xbd\xf2T\xd1>\x7fO5>\xf7\xd3\xc4>=\xadN>\xb1"h=\x18c\xb5>=\xfa\x82>>\x98>>,\xd3\xa2>"$\x15;\xd9\xcc\xa5\xbb\xdb\xb6\xab>\xecw\x8e>N|_>_Z\'>T\xe4\x95\xbe\xa5\x03\xd3<\xc1|#>\x07\x1ax>\x86\xb7\xe8\xbb\xad\xa9#>\xbd\xe6\x9b>\xba"\x1c=F\xe3O\xbcvF8\xbeuj\x0c\xbe\xd7?D\xbc\xe2\xe9E\xbdx?\xe8\xbc7\x8dF=\xf4m\xd1\xbe$A4\xbe\x8cZ\x9e9\xf1\xc6m\xb9h\x97V:N\x9b\xdd>\xf26T\xbe\x8e\xde\xf4\xbdY*\xce>\xd6\xef\x1c\xbe\xdf\x05"<\x9e\x9b\xbb>\xb1H\xd0\xbe\xdb\xa69\xbe.~\xc4>i\xf5\x05\xbe\xd5\x13,=m\xe8X>\xed\xab3\xbf\xe8\x17\x1a\xbeO\x0cU>\x85\xcb*\xbe\x00\xbe;\xbe$8U>\x990\xd6\xbd\xc4o\r\xbe\x9c\x1e\xe4\xbe\xef\x04\xdf<\xbb3\x8c\xbc\x85\xc1\xe3\xbe4\xad">\xa8\xb6\xbb=\xfa\xcd\xc4\xbe\x9e\xb4N\xbe\xd5ze\xbd\xb9w\xdd\xbe(\xebS>\xaas\xf7=\x05\x0b\xf3\xb7\x1aP\x88\xb8\x86\x9d\xe79\x91I\xf7\xbc\xf4\xd9\\=\xec\x03\x05>\x9c\x0e\x88\xbd\xdaiL\xbe\xee\r{\xbdt\nI\xbd\xa7c\x9c=\xb5\xe7%>\xefUb\xbefD\xfd\xbe^\xa5\xf9\xbc\xfb2f\xbeWn%=#\xb5\x81\xbd6\x06f\xbe\x8e\x1d\xd2=Wd\x94\xbcQ\xb8\xd4\xbe\xd1{\xdd\xbc\'{\x15\xbe\xd0Y\xd4\xbe\x1e9\xd6=[f\x18\xbd\x82f\xb5\xbe\xa9"\x83\xbe1S=\xbe\x03\x12\xce\xbef[\x1c>\xb3\xb2\x03\xbc\x80%\xf6<\xb0\x84^\xbd/\x81\x03\xbeW)\xe9\xb8r\xed\xb2\xb9\xd5J\x87:w\xed\x14\xbd\x90\xfd\x81\xbe\x04\xb8B\xbe\x97\xb3\x9b\xbc\xfd\x0e\xb5<\xd4\xcc\x07=~\x89C\xbe\xc0\x85\x0c\xbf\x08)#\xbe\xa0fG\xbe\xfbHc\xbco\xcfD\xbeV9G\xbe\xce\xfbE=y\x81\x16\xbe\x07\x12\xc2\xbe\xb9Xh>FS5=x\xb3\xc1\xbe\x0b\x92\xb7>\x8d\xb7\x1c>U\xbf\xa2\xbehd\x0f\xbb\xc7\x9f\xaf;1k\xbb\xbe\x991\xd0>\x17\x95:>\x98$\x88=\x85fL>\xb9;}=\xab\xc2\x14=\xf3\xd6\x81>w\xdeC>\x1b\x03\x948&\xceN8x-\xbd8\xb7\xba\x8e<\x9cT\x8d>\x1a\xc4d>\xa6;\x1e\xbe\xd5\x07\x97\xbe\x035\xfd<\xf2\x18"\xbeC\xd2u>\x0c\xda\xff\xba\x9b\xeb!\xbeQ\xc3\x9a>\xdf:1=\xf6\x13\xcb\xbet\xe3J\xbd\xbe\x1f6\xbe\xcc\xb6\xca\xbe\xf5&\xa8=\xa4{\x8d\xbd\x8b\xc2\xab\xbe\xa2\xa6\x8e\xbe\x9c\xf6]\xbe\x86l\xc4\xbe!R\x05>\x96\x7f#\xbdO6H=JU\x9d\xbdi%$\xbe\x02`\x99\x11\xaa\xcf\xbc\x9a|X\xbe\x07\xa13?\x9d"\x1b>1jb>\xa9C\xfd>W\xe6\xfe<\xa9\x87C>Us\x0c?\xacl$>\xffi\x1e>\x08\x16\x97>\xf0\xd4\xfa\xbc\xa1.0>\x012\x12?\xfcPE>\xe8\x16\xdb8\xcb2\x858\x86^Q9\x0fep\xbb\xb9\xfc\x08?R\xc8\x05\xbd\x859e\xbb(\xea\x18?\xb7\xc6M<\xeb\xf6a\xbe\x05\xa5W\xbc\x90\xf7==.=a\xbe\x9f\xa4\xf2=\xdd\xe0\x1e>GV#\xbe\xf1\x0ex\xbeU\xcd\xf4;\xd4\xaaT\xbe\xb3\x90*>\xb9\xbe<>~\xed\xaf%\xbd\xc7\xf0\x82=\xb4ZG>\xcc\xb9]<5\x08F>2<">\xca\xc4u\xbe\xfcE\x10;P\x014>\x98[\x13=\xf7\xedf>\xf2\x8e{;J\xf8\x08\xbf\xbe?\x07=\x8b(\x8a8\x91\xa6\x156#\xf7&9\xfa\xc0w9\xfa\xcf~=\x12\xe29=\x83\x1eb\xbe\xb1^\x9a\xbd\x14e\x98:\xc4da\xbe\x12tf=j%\xe1=\xb7|#\xbe\xfc\xe1\x9b\xbe\x01\x9c\x1a\xbdz\xd1T\xbe\xdb\xb8\xd5=\x15q\x0e>\xc8\x15f>\xbd@\xd2\xbd\x01Y\x99\'dG\xbd\xf7\xba\x17>\x0b\x16">>\xbd\x9a\xbe\xb510\xbd\x03\xdb3>F\xed\xd6\xbc(\x9f8>\xde\xe9q;<\xe6\x18\xbf\\\xd5G\xbc\x8eo\xaa\xb8\xb4\xd0~\xbd\xc4\x8e8\xbd\xf1\xea\xba8\xe3\n-\xb6\xba\t+9'
41 | tbs.
--------------------------------------------------------------------------------
/Step1_DBN.py:
--------------------------------------------------------------------------------
1 | import cPickle
2 | import gzip
3 | import os
4 | import sys
5 | import time
6 | import numpy
7 | import sys
8 | sys.path.append(r'C:\Users\PC-User\Documents\Visual Studio 2012\Projects\Theano\Tutorial')
9 |
10 | import theano
11 | import theano.tensor as T
12 | from theano.tensor.shared_randomstreams import RandomStreams
13 | from logistic_sgd import LogisticRegression
14 | from mlp import HiddenLayer
15 |
16 | from rbm import RBM
17 | from grbm import GBRBM
18 | from utils import zero_mean_unit_variance
19 | from utils import normalize
20 | from GRBM_DBN import GRBM_DBN
21 | from sklearn import preprocessing
22 |
23 | def shared_dataset(data_xy, borrow=True):
24 | """ Function that loads the dataset into shared variables
25 |
26 | The reason we store our dataset in shared variables is to allow
27 | Theano to copy it into the GPU memory (when code is run on GPU).
28 | Since copying data into the GPU is slow, copying a minibatch everytime
29 | is needed (the default behaviour if the data is not in a shared
30 | variable) would lead to a large decrease in performance.
31 | """
32 | data_x, data_y = data_xy
33 | shared_x = theano.shared(numpy.asarray(data_x,
34 | dtype=theano.config.floatX),
35 | borrow=borrow)
36 | shared_y = theano.shared(numpy.asarray(data_y,
37 | dtype=theano.config.floatX),
38 | borrow=borrow)
39 | # When storing data on the GPU it has to be stored as floats
40 | # therefore we will store the labels as ``floatX`` as well
41 | # (``shared_y`` does exactly that). But during our computations
42 | # we need them as ints (we use labels as index, and if they are
43 | # floats it doesn't make sense) therefore instead of returning
44 | # ``shared_y`` we will have to cast it to int. This little hack
45 | # lets ous get around this issue
46 | return shared_x, T.cast(shared_y, 'int32')
47 |
48 | def load_CodaLab_skel(ratio_train=0.9, ration_valid=0.1):
49 | print '... loading data'
50 |
51 | f = file('Feature_train_realtime.pkl','rb' )
52 | Feature_train = cPickle.load(f)
53 | f.close()
54 |
55 | f = file('Feature_all_neutral_realtime.pkl','rb' )
56 | Feature_train_neural = cPickle.load(f)
57 | f.close()
58 |
59 | #Because we have too much neural frames, we only need part of them
60 | rand_num = numpy.random.permutation(Feature_train_neural['Feature_all_neutral'].shape[0])
61 |
62 |
63 | F_neural = Feature_train_neural['Feature_all_neutral'][rand_num]
64 | T_neural = Feature_train_neural['Targets_all_new'][rand_num]
65 | Feature_all = numpy.concatenate((Feature_train['Feature_all'], F_neural))
66 | Target_all = numpy.concatenate((Feature_train['Targets_all'], T_neural))
67 |
68 | rand_num = numpy.random.permutation(Feature_all.shape[0])
69 | Feature_all = Feature_all[rand_num]
70 | Target_all = Target_all[rand_num]
71 | Target_all_numeric = numpy.argmax(Target_all, axis=1)
72 | #train_set, valid_set, test_set format: tuple(input, target)
73 | #input is an numpy.ndarray of 2 dimensions (a matrix)
74 | #witch row's correspond to an example. target is a
75 | #numpy.ndarray of 1 dimensions (vector)) that have the same length as
76 | #the number of rows in the input. It should give the target
77 | #target to the example with the same index in the input.
78 |
79 | # we separate the dataset into training: 80%, validation: 10%, testing: 10%
80 | train_end = int(rand_num.shape[0]*ratio_train)
81 | valid_end = int(rand_num.shape[0]*(ratio_train+ration_valid))
82 |
83 | # Wudi made it a small set:
84 | train_set_feature = Feature_all[0:train_end,:]
85 | train_set_new_target = Target_all_numeric[0:train_end]
86 |
87 | # Wudi added normalized data for GRBM
88 | [train_set_feature_normalized, Mean1, Std1] = preprocessing.scale(train_set_feature)
89 |
90 | import cPickle as pickle
91 | f = open('SK_normalization.pkl','wb')
92 | pickle.dump( {"Mean1": Mean1, "Std1": Std1 },f)
93 | f.close()
94 |
95 | train_set_x, train_set_y = shared_dataset( (train_set_feature_normalized, train_set_new_target))
96 |
97 | valid_set_feature = Feature_all[train_end:valid_end,:]
98 | valid_set_new_target = Target_all_numeric[train_end:valid_end]
99 | valid_set_feature = normalize(valid_set_feature, Mean1, Std1)
100 | valid_set_x, valid_set_y = shared_dataset((valid_set_feature,valid_set_new_target))
101 |
102 | # test feature set
103 | test_set_feature = Feature_all[valid_end:,:]
104 | test_set_new_target = Target_all_numeric[valid_end:]
105 | test_set_feature = normalize(test_set_feature, Mean1, Std1)
106 | test_set_x, test_set_y = shared_dataset((test_set_feature,test_set_new_target))
107 |
108 | rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y),
109 | (test_set_x, test_set_y)]
110 | return rval
111 |
112 | def test_GRBM_DBN(finetune_lr=0.1, pretraining_epochs=100,
113 | pretrain_lr=0.01, k=1, training_epochs=500,
114 | batch_size=200):
115 | """
116 | Demonstrates how to train and test a Deep Belief Network.
117 |
118 | This is demonstrated on MNIST.
119 |
120 | :type learning_rate: float
121 | :param learning_rate: learning rate used in the finetune stage
122 | :type pretraining_epochs: int
123 | :param pretraining_epochs: number of epoch to do pretraining
124 | :type pretrain_lr: float
125 | :param pretrain_lr: learning rate to be used during pre-training
126 | :type k: int
127 | :param k: number of Gibbs steps in CD/PCD
128 | :type training_epochs: int
129 | :param training_epochs: maximal number of iterations ot run the optimizer
130 | :type dataset: string
131 | :param dataset: path the the pickled dataset
132 | :type batch_size: int
133 | :param batch_size: the size of a minibatch
134 | """
135 |
136 | datasets = load_CodaLab_skel(ratio_train=0.9, ration_valid=0.08)
137 |
138 | train_set_x, train_set_y = datasets[0]
139 | valid_set_x, valid_set_y = datasets[1]
140 | test_set_x, test_set_y = datasets[2]
141 |
142 | # compute number of minibatches for training, validation and testing
143 | n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
144 |
145 | # numpy random generator
146 | numpy_rng = numpy.random.RandomState(123)
147 | print '... building the model'
148 | # construct the Deep Belief Network
149 | dbn = GRBM_DBN(numpy_rng=numpy_rng, n_ins=528,
150 | hidden_layers_sizes=[1000, 1000, 500],
151 | n_outs=201, finetune_lr=finetune_lr)
152 |
153 | #########################
154 | # PRETRAINING THE MODEL #
155 | #########################
156 | print '... getting the pretraining functions'
157 | pretraining_fns = dbn.pretraining_functions(train_set_x=train_set_x,
158 | batch_size=batch_size,
159 | k=k)
160 |
161 | print '... pre-training the model'
162 | start_time = time.clock()
163 | ## Pre-train layer-wise
164 | for i in xrange(dbn.n_layers):
165 | if i==0:
166 | # for GRBM, the The learning rate needs to be about one or
167 | #two orders of magnitude smaller than when using
168 | #binary visible units and some of the failures reported in the
169 | # literature are probably due to using a
170 | pretrain_lr_new = pretrain_lr*0.1
171 | else:
172 | pretrain_lr_new = pretrain_lr
173 | # go through pretraining epochs
174 | for epoch in xrange(pretraining_epochs):
175 | start_time_temp = time.clock()
176 | # go through the training set
177 | c = []
178 | for batch_index in xrange(n_train_batches):
179 | c.append(pretraining_fns[i](index=batch_index,
180 | lr=pretrain_lr_new))
181 | end_time_temp = time.clock()
182 | print 'Pre-training layer %i, epoch %d, cost %f ' % (i, epoch, numpy.mean(c)) + ' ran for %d sec' % ((end_time_temp - start_time_temp) )
183 |
184 | end_time = time.clock()
185 | print >> sys.stderr, ('The pretraining code for file ' +
186 | os.path.split(__file__)[1] +
187 | ' ran for %.2fm' % ((end_time - start_time) / 60.))
188 |
189 | ########################
190 | # FINETUNING THE MODEL #
191 | ########################
192 |
193 | # get the training, validation and testing function for the model
194 | print '... getting the finetuning functions'
195 | train_fn, validate_model, test_model = dbn.build_finetune_functions(
196 | datasets=datasets, batch_size=batch_size,
197 | learning_rate=finetune_lr)
198 |
199 | print '... finetunning the model'
200 | # early-stopping parameters
201 | patience = 4 * n_train_batches # look as this many examples regardless
202 | patience_increase = 2. # wait this much longer when a new best is
203 | # found
204 | improvement_threshold = 0.999 # a relative improvement of this much is
205 | # considered significant
206 | validation_frequency = min(n_train_batches, patience / 2)
207 | # go through this many
208 | # minibatche before checking the network
209 | # on the validation set; in this case we
210 | # check every epoch
211 |
212 | best_params = None
213 | best_validation_loss = numpy.inf
214 | test_score = 0.
215 | start_time = time.clock()
216 |
217 | done_looping = False
218 | epoch = 0
219 |
220 | while (epoch < training_epochs) and (not done_looping):
221 | start_time_temp = time.clock()
222 | epoch = epoch + 1
223 | for minibatch_index in xrange(n_train_batches):
224 |
225 | minibatch_avg_cost = train_fn(minibatch_index)
226 | iter = (epoch - 1) * n_train_batches + minibatch_index
227 |
228 | if (iter + 1) % validation_frequency == 0:
229 | import warnings
230 | warnings.filterwarnings("ignore")
231 | validation_losses = validate_model()
232 | this_validation_loss = numpy.mean(validation_losses)
233 |
234 | # if we got the best validation score until now
235 | if this_validation_loss < best_validation_loss:
236 |
237 | #improve patience if loss improvement is good enough
238 | if (this_validation_loss < best_validation_loss *
239 | improvement_threshold):
240 | patience = max(patience, iter * patience_increase)
241 |
242 | # save best validation score and iteration number
243 | best_validation_loss = this_validation_loss
244 | best_iter = iter
245 |
246 | # test it on the test set
247 | test_losses = test_model()
248 | test_score = numpy.mean(test_losses)
249 |
250 | end_time_temp = time.clock()
251 | print(('epoch %i, minibatch %i/%i, validation error %f %%' \
252 | 'test error of best model %f %%, used time %d sec') %
253 | (epoch, minibatch_index + 1, n_train_batches,this_validation_loss * 100.,
254 | test_score * 100., (end_time_temp - start_time_temp)))
255 |
256 | if patience <= iter:
257 | done_looping = True
258 | break
259 |
260 | end_time = time.clock()
261 | print(('Optimization complete with best validation score of %f %%,'
262 | 'with test performance %f %%') %
263 | (best_validation_loss * 100., test_score * 100.))
264 | print >> sys.stderr, ('The fine tuning code for file ' +
265 | os.path.split(__file__)[1] +
266 | ' ran for %.2fm' % ((end_time - start_time)
267 | / 60.))
268 | from time import gmtime, strftime
269 | filename = 'dbn_'+strftime("%Y-%m-%d-%H-%M-%S", gmtime())
270 | dbn.save(filename)
271 |
272 |
273 | if 0: # here for testing, where we never used
274 | ## Now for testing
275 | dbn = GRBM_DBN(numpy_rng=numpy_rng, n_ins=528,
276 | hidden_layers_sizes=[1000, 1000, 500],
277 | n_outs=201)
278 |
279 |
280 | dbn.load('dbn_2014-05-22-18-39-37.npy')
281 | # compiling a Theano function that computes the mistakes that are made by
282 | # the model on a minibatch
283 | index = T.lscalar('index')
284 | validate_model = theano.function(inputs=[index],
285 | outputs=dbn.logLayer.p_y_given_x,
286 | givens={
287 | dbn.x: valid_set_x[index * batch_size:(index + 1) * batch_size]})
288 |
289 | n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
290 | n_valid_batches /= batch_size
291 | temp = [validate_model(i)
292 | for i in xrange(n_valid_batches)]
293 |
294 |
295 |
296 | if __name__ == '__main__':
297 | test_GRBM_DBN()
--------------------------------------------------------------------------------
/Step1_DBN_Structure2.py:
--------------------------------------------------------------------------------
1 | import cPickle
2 | import gzip
3 | import os
4 | import sys
5 | import time
6 | import numpy
7 | import sys
8 | sys.path.append(r'C:\Users\PC-User\Documents\Visual Studio 2012\Projects\Theano\Tutorial')
9 |
10 | import theano
11 | import theano.tensor as T
12 | from theano.tensor.shared_randomstreams import RandomStreams
13 | from logistic_sgd import LogisticRegression
14 | from mlp import HiddenLayer
15 |
16 | from rbm import RBM
17 | from grbm import GBRBM
18 | from utils import zero_mean_unit_variance
19 | from utils import normalize
20 | from GRBM_DBN import GRBM_DBN
21 | from sklearn import preprocessing
22 |
23 | def shared_dataset(data_xy, borrow=True):
24 | """ Function that loads the dataset into shared variables
25 |
26 | The reason we store our dataset in shared variables is to allow
27 | Theano to copy it into the GPU memory (when code is run on GPU).
28 | Since copying data into the GPU is slow, copying a minibatch everytime
29 | is needed (the default behaviour if the data is not in a shared
30 | variable) would lead to a large decrease in performance.
31 | """
32 | data_x, data_y = data_xy
33 | shared_x = theano.shared(numpy.asarray(data_x,
34 | dtype=theano.config.floatX),
35 | borrow=borrow)
36 | shared_y = theano.shared(numpy.asarray(data_y,
37 | dtype=theano.config.floatX),
38 | borrow=borrow)
39 | # When storing data on the GPU it has to be stored as floats
40 | # therefore we will store the labels as ``floatX`` as well
41 | # (``shared_y`` does exactly that). But during our computations
42 | # we need them as ints (we use labels as index, and if they are
43 | # floats it doesn't make sense) therefore instead of returning
44 | # ``shared_y`` we will have to cast it to int. This little hack
45 | # lets ous get around this issue
46 | return shared_x, T.cast(shared_y, 'int32')
47 |
48 | def load_CodaLab_skel(ratio_train=0.9, ration_valid=0.1):
49 | print '... loading data'
50 |
51 | f = file('Feature_train_realtime.pkl','rb' )
52 | Feature_train = cPickle.load(f)
53 | f.close()
54 |
55 | f = file('Feature_all_neutral_realtime.pkl','rb' )
56 | Feature_train_neural = cPickle.load(f)
57 | f.close()
58 |
59 | #Because we have too much neural frames, we only need part of them
60 | rand_num = numpy.random.permutation(Feature_train_neural['Feature_all_neutral'].shape[0])
61 |
62 |
63 | F_neural = Feature_train_neural['Feature_all_neutral'][rand_num]
64 | T_neural = Feature_train_neural['Targets_all_new'][rand_num]
65 | Feature_all = numpy.concatenate((Feature_train['Feature_all'], F_neural))
66 | Target_all = numpy.concatenate((Feature_train['Targets_all'], T_neural))
67 |
68 | rand_num = numpy.random.permutation(Feature_all.shape[0])
69 | Feature_all = Feature_all[rand_num]
70 | Target_all = Target_all[rand_num]
71 | Target_all_numeric = numpy.argmax(Target_all, axis=1)
72 | #train_set, valid_set, test_set format: tuple(input, target)
73 | #input is an numpy.ndarray of 2 dimensions (a matrix)
74 | #witch row's correspond to an example. target is a
75 | #numpy.ndarray of 1 dimensions (vector)) that have the same length as
76 | #the number of rows in the input. It should give the target
77 | #target to the example with the same index in the input.
78 |
79 | # we separate the dataset into training: 80%, validation: 10%, testing: 10%
80 | train_end = int(rand_num.shape[0]*ratio_train)
81 | valid_end = int(rand_num.shape[0]*(ratio_train+ration_valid))
82 |
83 | # Wudi made it a small set:
84 | train_set_feature = Feature_all[0:train_end,:]
85 | train_set_new_target = Target_all_numeric[0:train_end]
86 |
87 | # Wudi added normalized data for GRBM
88 | [train_set_feature_normalized, Mean1, Std1] = preprocessing.scale(train_set_feature)
89 |
90 | import cPickle as pickle
91 | f = open('SK_normalization.pkl','wb')
92 | pickle.dump( {"Mean1": Mean1, "Std1": Std1 },f)
93 | f.close()
94 |
95 | train_set_x, train_set_y = shared_dataset( (train_set_feature_normalized, train_set_new_target))
96 |
97 | valid_set_feature = Feature_all[train_end:valid_end,:]
98 | valid_set_new_target = Target_all_numeric[train_end:valid_end]
99 | valid_set_feature = normalize(valid_set_feature, Mean1, Std1)
100 | valid_set_x, valid_set_y = shared_dataset((valid_set_feature,valid_set_new_target))
101 |
102 | # test feature set
103 | test_set_feature = Feature_all[valid_end:,:]
104 | test_set_new_target = Target_all_numeric[valid_end:]
105 | test_set_feature = normalize(test_set_feature, Mean1, Std1)
106 | test_set_x, test_set_y = shared_dataset((test_set_feature,test_set_new_target))
107 |
108 | rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y),
109 | (test_set_x, test_set_y)]
110 | return rval
111 |
112 | def test_GRBM_DBN(finetune_lr=1, pretraining_epochs=100,
113 | pretrain_lr=0.01, k=1, training_epochs=500,
114 | batch_size=200, annealing_learning_rate=0.99999):
115 | """
116 | Demonstrates how to train and test a Deep Belief Network.
117 |
118 | This is demonstrated on MNIST.
119 |
120 | :type learning_rate: float
121 | :param learning_rate: learning rate used in the finetune stage
122 | :type pretraining_epochs: int
123 | :param pretraining_epochs: number of epoch to do pretraining
124 | :type pretrain_lr: float
125 | :param pretrain_lr: learning rate to be used during pre-training
126 | :type k: int
127 | :param k: number of Gibbs steps in CD/PCD
128 | :type training_epochs: int
129 | :param training_epochs: maximal number of iterations ot run the optimizer
130 | :type dataset: string
131 | :param dataset: path the the pickled dataset
132 | :type batch_size: int
133 | :param batch_size: the size of a minibatch
134 | """
135 |
136 | datasets = load_CodaLab_skel(ratio_train=0.9, ration_valid=0.08)
137 |
138 | train_set_x, train_set_y = datasets[0]
139 | valid_set_x, valid_set_y = datasets[1]
140 | test_set_x, test_set_y = datasets[2]
141 |
142 | # compute number of minibatches for training, validation and testing
143 | n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
144 |
145 | # numpy random generator
146 | numpy_rng = numpy.random.RandomState(123)
147 | print '... building the model'
148 | # construct the Deep Belief Network
149 | dbn = GRBM_DBN(numpy_rng=numpy_rng, n_ins=528,
150 | hidden_layers_sizes=[2000, 2000, 1000],
151 | n_outs=201, finetune_lr=finetune_lr)
152 |
153 | #########################
154 | # PRETRAINING THE MODEL #
155 | #########################
156 | print '... getting the pretraining functions'
157 | pretraining_fns = dbn.pretraining_functions(train_set_x=train_set_x,
158 | batch_size=batch_size,
159 | k=k)
160 |
161 | print '... pre-training the model'
162 | start_time = time.clock()
163 | ## Pre-train layer-wise
164 | for i in xrange(dbn.n_layers):
165 | if i==0:
166 | # for GRBM, the The learning rate needs to be about one or
167 | #two orders of magnitude smaller than when using
168 | #binary visible units and some of the failures reported in the
169 | # literature are probably due to using a
170 | pretrain_lr_new = pretrain_lr*0.1
171 | else:
172 | pretrain_lr_new = pretrain_lr
173 | # go through pretraining epochs
174 | for epoch in xrange(pretraining_epochs):
175 | start_time_temp = time.clock()
176 | # go through the training set
177 | c = []
178 | for batch_index in xrange(n_train_batches):
179 | c.append(pretraining_fns[i](index=batch_index,
180 | lr=pretrain_lr_new))
181 | end_time_temp = time.clock()
182 | print 'Pre-training layer %i, epoch %d, cost %f ' % (i, epoch, numpy.mean(c)) + ' ran for %d sec' % ((end_time_temp - start_time_temp) )
183 |
184 | end_time = time.clock()
185 | print >> sys.stderr, ('The pretraining code for file ' +
186 | os.path.split(__file__)[1] +
187 | ' ran for %.2fm' % ((end_time - start_time) / 60.))
188 |
189 | ########################
190 | # FINETUNING THE MODEL #
191 | ########################
192 |
193 | # get the training, validation and testing function for the model
194 | print '... getting the finetuning functions'
195 | train_fn, validate_model, test_model = dbn.build_finetune_functions(
196 | datasets=datasets, batch_size=batch_size,
197 | annealing_learning_rate=annealing_learning_rate)
198 |
199 | print '... finetunning the model'
200 | # early-stopping parameters
201 | patience = 4 * n_train_batches # look as this many examples regardless
202 | patience_increase = 2. # wait this much longer when a new best is
203 | # found
204 | improvement_threshold = 0.999 # a relative improvement of this much is
205 | # considered significant
206 | validation_frequency = min(n_train_batches, patience / 2)
207 | # go through this many
208 | # minibatche before checking the network
209 | # on the validation set; in this case we
210 | # check every epoch
211 |
212 | best_params = None
213 | best_validation_loss = numpy.inf
214 | test_score = 0.
215 | start_time = time.clock()
216 |
217 | done_looping = False
218 | epoch = 0
219 |
220 | while (epoch < training_epochs) and (not done_looping):
221 | start_time_temp = time.clock()
222 | epoch = epoch + 1
223 | for minibatch_index in xrange(n_train_batches):
224 |
225 | minibatch_avg_cost = train_fn(minibatch_index)
226 | iter = (epoch - 1) * n_train_batches + minibatch_index
227 |
228 | if (iter + 1) % validation_frequency == 0:
229 | import warnings
230 | warnings.filterwarnings("ignore")
231 | validation_losses = validate_model()
232 | this_validation_loss = numpy.mean(validation_losses)
233 |
234 | # if we got the best validation score until now
235 | if this_validation_loss < best_validation_loss:
236 |
237 | #improve patience if loss improvement is good enough
238 | if (this_validation_loss < best_validation_loss *
239 | improvement_threshold):
240 | patience = max(patience, iter * patience_increase)
241 |
242 | # save best validation score and iteration number
243 | best_validation_loss = this_validation_loss
244 | best_iter = iter
245 |
246 | # test it on the test set
247 | test_losses = test_model()
248 | test_score = numpy.mean(test_losses)
249 |
250 | end_time_temp = time.clock()
251 | print(('epoch %i, minibatch %i/%i, validation error %f %%' \
252 | 'test error of best model %f %%, used time %d sec') %
253 | (epoch, minibatch_index + 1, n_train_batches,this_validation_loss * 100.,
254 | test_score * 100., (end_time_temp - start_time_temp)))
255 |
256 | if patience <= iter:
257 | done_looping = True
258 | break
259 |
260 | end_time = time.clock()
261 | print(('Optimization complete with best validation score of %f %%,'
262 | 'with test performance %f %%') %
263 | (best_validation_loss * 100., test_score * 100.))
264 | print >> sys.stderr, ('The fine tuning code for file ' +
265 | os.path.split(__file__)[1] +
266 | ' ran for %.2fm' % ((end_time - start_time)
267 | / 60.))
268 | from time import gmtime, strftime
269 | filename = 'dbn_'+strftime("%Y-%m-%d-%H-%M-%S", gmtime())
270 | dbn.save(filename)
271 |
272 |
273 | if 0: # here for testing, where we never used
274 | ## Now for testing
275 | dbn = GRBM_DBN(numpy_rng=numpy_rng, n_ins=528,
276 | hidden_layers_sizes=[1000, 1000, 500],
277 | n_outs=201)
278 |
279 |
280 | dbn.load('dbn_2014-05-22-18-39-37.npy')
281 | # compiling a Theano function that computes the mistakes that are made by
282 | # the model on a minibatch
283 | index = T.lscalar('index')
284 | validate_model = theano.function(inputs=[index],
285 | outputs=dbn.logLayer.p_y_given_x,
286 | givens={
287 | dbn.x: valid_set_x[index * batch_size:(index + 1) * batch_size]})
288 |
289 | n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
290 | n_valid_batches /= batch_size
291 | temp = [validate_model(i)
292 | for i in xrange(n_valid_batches)]
293 |
294 |
295 |
296 | if __name__ == '__main__':
297 | test_GRBM_DBN()
--------------------------------------------------------------------------------
/Step1_SK_Neutral_Realtime.py:
--------------------------------------------------------------------------------
1 | #-------------------------------------------------------------------------------
2 | # Name: Starting Kit for ChaLearn LAP 2014 Track3
3 | # Purpose: Show basic functionality of provided code
4 | #
5 | # Author: Xavier Baro
6 | # Author: Di Wu: stevenwudi@gmail.com
7 | # Created: 24/03/2014
8 | # Copyright: (c) Chalearn LAP 2014
9 | # Licence: GPL3
10 | #-------------------------------------------------------------------------------
11 | import sys, os,random,numpy,zipfile
12 | from shutil import copyfile
13 | import matplotlib.pyplot as plt
14 |
15 | from ChalearnLAPEvaluation import evalGesture,exportGT_Gesture
16 | from ChalearnLAPSample import GestureSample
17 | from utils import IsLeftDominant
18 | from utils import Extract_feature_Realtime
19 | from utils import Extract_feature_UNnormalized
20 |
21 |
22 | # Data folder (Training data)
23 | print("Extracting the training files")
24 | data=os.path.join("I:\Kaggle_multimodal\Training\\")
25 | # Get the list of training samples
26 | samples=os.listdir(data)
27 | used_joints = ['ElbowLeft', 'WristLeft', 'ShoulderLeft','HandLeft',
28 | 'ElbowRight', 'WristRight','ShoulderRight','HandRight',
29 | 'Head','Spine','HipCenter']
30 | njoints = len(used_joints)
31 | STATE_NO = 10
32 | count = 0
33 |
34 | # pre-allocating the memory
35 | Feature_all = numpy.zeros(shape=(100000, (njoints*(njoints-1)/2 + njoints**2)*3),dtype=numpy.float32)
36 | Targets = numpy.zeros( shape=(100000, STATE_NO*20+1), dtype=numpy.uint8)
37 |
38 | # Access to each sample
39 | for file_count, file in enumerate(samples):
40 | #if not file.endswith(".zip"):
41 | # continue;
42 | if file_count<650:
43 | print("\t Processing file " + file)
44 | # Create the object to access the sample
45 | smp=GestureSample(os.path.join(data,file))
46 | # ###############################################
47 | # USE Ground Truth information to learn the model
48 | # ###############################################
49 | # Get the list of actions for this frame
50 | gesturesList=smp.getGestures()
51 | # Iterate for each action in this sample
52 | # Then we also choose 5 frame before and after the ground true data:
53 | seg_length = 5
54 | for gesture in gesturesList:
55 | # Get the gesture ID, and start and end frames for the gesture
56 | gestureID,startFrame,endFrame=gesture
57 | # This part is to extract action data
58 |
59 | Skeleton_matrix = numpy.zeros(shape=(5, len(used_joints)*3))
60 | HipCentre_matrix = numpy.zeros(shape=(5, 3))
61 | frame_num = 0
62 |
63 | ## extract first 5 frames
64 | if startFrame-seg_length > 0:
65 | Skeleton_matrix, valid_skel = Extract_feature_UNnormalized(smp, used_joints, startFrame-seg_length+1, startFrame)
66 | if not valid_skel:
67 | print "No detected Skeleton: ", gestureID
68 | else:
69 | Feature = Extract_feature_Realtime(Skeleton_matrix, njoints)
70 | begin_frame = count
71 | end_frame = count+seg_length-1
72 | Feature_all[begin_frame:end_frame,:] = Feature
73 | Targets[begin_frame:end_frame, -1] = 1
74 | count=count+seg_length-1
75 |
76 | ## extract last 5 frames
77 | if endFrame+seg_length < smp.getNumFrames():
78 | Skeleton_matrix, valid_skel = Extract_feature_UNnormalized(smp, used_joints, endFrame, endFrame+seg_length-1)
79 | if not valid_skel:
80 | print "No detected Skeleton: ", gestureID
81 | else:
82 | Feature = Extract_feature_Realtime(Skeleton_matrix, njoints)
83 | begin_frame = count
84 | end_frame = count+seg_length-1
85 | Feature_all[begin_frame:end_frame,:] = Feature
86 | Targets[begin_frame:end_frame, -1] = 1
87 | count=count+seg_length-1
88 | # ###############################################
89 | del smp
90 |
91 | # save the skeleton file:
92 | Feature_all_new = Feature_all[0:end_frame, :]
93 | Targets_all_new = Targets[0:end_frame, :]
94 | import cPickle as pickle
95 | Feature_train = { "Feature_all_neutral": Feature_all_new, "Targets_all_new": Targets_all_new }
96 | pickle.dump( Feature_train, open( "Feature_all_neutral_realtime.pkl", "wb" ) )
97 |
98 | import scipy.io as sio
99 | sio.savemat('Feature_all_neutral_realtime.mat', Feature_train)
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
--------------------------------------------------------------------------------
/Step1_SK_realtime.py:
--------------------------------------------------------------------------------
1 | #-------------------------------------------------------------------------------
2 | # Name: Starting Kit for ChaLearn LAP 2014 Track3
3 | # Purpose: Show basic functionality of provided code
4 | #
5 | # Author: Xavier Baro
6 | # Author: Di Wu: stevenwudi@gmail.com
7 | # Created: 24/03/2014
8 | # Copyright: (c) Chalearn LAP 2014
9 | # Licence: GPL3
10 | #-------------------------------------------------------------------------------
11 | import sys, os,random,numpy,zipfile
12 | from shutil import copyfile
13 | import matplotlib.pyplot as plt
14 |
15 | from ChalearnLAPEvaluation import evalGesture,exportGT_Gesture
16 | from ChalearnLAPSample import GestureSample
17 | from utils import IsLeftDominant
18 | from utils import Extract_feature_Realtime
19 | from utils import Extract_feature_UNnormalized
20 |
21 |
22 | # Data folder (Training data)
23 | print("Extracting the training files")
24 | data=os.path.join("I:\Kaggle_multimodal\Training\\")
25 | # Get the list of training samples
26 | samples=os.listdir(data)
27 | used_joints = ['ElbowLeft', 'WristLeft', 'ShoulderLeft','HandLeft',
28 | 'ElbowRight', 'WristRight','ShoulderRight','HandRight',
29 | 'Head','Spine','HipCenter']
30 | njoints = len(used_joints)
31 | STATE_NO = 10
32 | count = 0
33 |
34 | # pre-allocating the memory
35 | Feature_all = numpy.zeros(shape=(400000, (njoints*(njoints-1)/2 + njoints**2)*3),dtype=numpy.float32)
36 | Targets = numpy.zeros( shape=(400000, STATE_NO*20+1), dtype=numpy.uint8)
37 | # Access to each sample
38 | for file_count, file in enumerate(samples):
39 | #if not file.endswith(".zip"):
40 | # continue;
41 | if file_count<650:
42 | print("\t Processing file " + file)
43 | # Create the object to access the sample
44 | smp=GestureSample(os.path.join(data,file))
45 | # ###############################################
46 | # USE Ground Truth information to learn the model
47 | # ###############################################
48 | # Get the list of actions for this frame
49 | gesturesList=smp.getGestures()
50 |
51 | # Iterate for each action in this sample
52 | for gesture in gesturesList:
53 | # Get the gesture ID, and start and end frames for the gesture
54 | gestureID,startFrame,endFrame=gesture
55 | Skeleton_matrix, valid_skel = Extract_feature_UNnormalized(smp, used_joints, startFrame, endFrame)
56 | # to see we actually detect a skeleton:
57 | if not valid_skel:
58 | print "No detected Skeleton: ", gestureID
59 | else:
60 | ### extract the features according to the CVPR2014 paper
61 | Feature = Extract_feature_Realtime(Skeleton_matrix, njoints)
62 | Target = numpy.zeros( shape=(Feature.shape[0], STATE_NO*20+1))
63 | fr_no = Feature.shape[0]
64 | for i in range(STATE_NO): #HMM states force alignment
65 | begin_fr = numpy.round(fr_no* i /STATE_NO) + 1
66 | end_fr = numpy.round( fr_no*(i+1) /STATE_NO)
67 | #print "begin: %d, end: %d"%(begin_fr-1, end_fr)
68 | seg_length=end_fr-begin_fr + 1
69 | targets = numpy.zeros( shape =(STATE_NO*20+1,1))
70 | targets[ i + STATE_NO*(gestureID-1)] = 1
71 | begin_frame = count
72 | end_frame = count+seg_length
73 | Feature_all[begin_frame:end_frame,:] = Feature[begin_fr-1:end_fr,:]
74 | Targets[begin_frame:end_frame, :]= numpy.tile(targets.T,(seg_length, 1))
75 | count=count+seg_length
76 | # ###############################################
77 | ## delete the sample
78 | del smp
79 |
80 | # save the skeleton file:
81 |
82 |
83 | import cPickle as pickle
84 | f = open('Feature_train_realtime.pkl','wb')
85 | pickle.dump( {"Feature_all": Feature_all[0:end_frame, :], "Targets_all": Targets[0:end_frame, :] },f)
86 | f.close()
87 |
88 |
89 |
90 | import scipy.io as sio
91 | sio.savemat('Feature_all_train__realtime.mat', { "Feature_all": Feature_all[0:end_frame, :], "Targets_all": Targets[0:end_frame, :] })
92 |
93 |
94 |
95 |
96 |
--------------------------------------------------------------------------------
/Step1_transition_matrix.py:
--------------------------------------------------------------------------------
1 | #-------------------------------------------------------------------------------
2 | # Name: Starting Kit for ChaLearn LAP 2014 Track3
3 | # Purpose: Show basic functionality of provided code
4 | #
5 | # Author: Xavier Baro
6 | # Author: Di Wu: stevenwudi@gmail.com
7 | # Created: 24/03/2014
8 | # Copyright: (c) Chalearn LAP 2014
9 | # Licence: GPL3
10 | #-------------------------------------------------------------------------------
11 | import sys, os,random,numpy,zipfile
12 | from shutil import copyfile
13 | import matplotlib.pyplot as plt
14 | import cv2
15 | from ChalearnLAPEvaluation import evalGesture,exportGT_Gesture
16 | from ChalearnLAPSample import GestureSample
17 | from utils import IsLeftDominant
18 | from utils import Extract_feature_normalized
19 | from utils import Extract_feature
20 | import time
21 | import cPickle
22 | """ Main script. Show how to perform all competition steps
23 | Access the sample information to learn a model. """
24 | # Data folder (Training data)
25 | print("Extracting the training files")
26 | data=os.path.join("I:\Kaggle_multimodal\Training\\")
27 | # Get the list of training samples
28 | samples=os.listdir(data)
29 | used_joints = ['ElbowLeft', 'WristLeft', 'ElbowRight', 'WristRight']
30 | njoints = len(used_joints)
31 | STATE_NO = 10
32 | batch_num = 13
33 |
34 | # pre-allocating the memory
35 | Prior = numpy.zeros(shape=(201))
36 | Transition_matrix = numpy.zeros(shape=(201,201))
37 |
38 | for file_count, file in enumerate(samples):
39 | #if not file.endswith(".zip"):
40 | # continue;
41 | time_tic = time.time()
42 | if (file_count<651):
43 | print("\t Processing file " + file)
44 | # Create the object to access the sample
45 | smp=GestureSample(os.path.join(data,file))
46 | # ###############################################
47 | # USE Ground Truth information to learn the model
48 | # ###############################################
49 | # Get the list of actions for this frame
50 | gesturesList=smp.getGestures()
51 |
52 |
53 | for gesture in gesturesList:
54 | gestureID,startFrame,endFrame=gesture
55 |
56 | for frame in range(endFrame-startFrame+1-4):
57 |
58 | state_no_1 = numpy.floor(frame*(STATE_NO*1.0/(endFrame-startFrame+1-3)))
59 | state_no_1 = state_no_1+STATE_NO*(gestureID-1)
60 | state_no_2 = numpy.floor((frame+1)*(STATE_NO*1.0/(endFrame-startFrame+1-3)))
61 | state_no_2 = state_no_2+STATE_NO*(gestureID-1)
62 | ## we allow first two states add together:
63 | Prior [state_no_1] += 1
64 | Transition_matrix[state_no_1, state_no_2] += 1
65 | if frame<2:
66 | Transition_matrix[-1, state_no_1] += 1
67 | Prior[-1] += 1
68 | if frame> (endFrame-startFrame+1-4-2):
69 | Transition_matrix[state_no_2, -1] += 1
70 | Prior[-1] += 1
71 | del smp
72 |
73 | import scipy.io as sio
74 | sio.savemat('Transition_matrix.mat', {'Transition_matrix':Transition_matrix})
75 | sio.savemat('Prior.mat', {Prior:'Prior'})
76 | sio.savemat('Prior_Transition_matrix.mat', {'Transition_matrix':Transition_matrix, 'Prior': Prior})
77 |
78 | img = Transition_matrix*1.0*255/Transition_matrix.max()
79 | fig, ax = plt.subplots()
80 | cax = ax.imshow(temp2, interpolation='nearest', cmap=cm.coolwarm)
81 | cbar = fig.colorbar(cax, ticks=[-1, 0, 1])
82 | cbar.ax.set_yticklabels(['< -1', '0', '> 1'])# vertically oriented colorbar
83 |
84 |
--------------------------------------------------------------------------------
/Step2_SK_Prediction.py:
--------------------------------------------------------------------------------
1 | #-------------------------------------------------------------------------------
2 | # Name: Starting Kit for ChaLearn LAP 2014 Track3
3 | # Purpose: Show basic functionality of provided code
4 | #
5 | # Author: Xavier Baro
6 | # Author: Di Wu: stevenwudi@gmail.com
7 | # Created: 24/03/2014
8 | # Copyright: (c) Chalearn LAP 2014
9 | # Licence: GPL3
10 | #-------------------------------------------------------------------------------
11 | import sys, os,random,numpy,zipfile
12 | from numpy import log
13 | from shutil import copyfile
14 | import matplotlib.pyplot as plt
15 | import cv2
16 | from ChalearnLAPEvaluation import evalGesture,exportGT_Gesture
17 | from ChalearnLAPSample import GestureSample
18 | from utils import IsLeftDominant
19 | from utils import Extract_feature_Realtime
20 | from utils import Extract_feature_UNnormalized
21 | from utils import normalize
22 | from utils import imdisplay
23 | from utils import viterbi_colab_clean
24 | from utils import createSubmisionFile
25 | import time
26 | import cPickle
27 | import numpy
28 | import pickle
29 | import scipy.io as sio
30 |
31 | ### theano import
32 | sys.path.append(r'C:\Users\PC-User\Documents\Visual Studio 2012\Projects\Theano\Tutorial')
33 | import theano
34 | import theano.tensor as T
35 | from theano.tensor.shared_randomstreams import RandomStreams
36 | from logistic_sgd import LogisticRegression
37 | from mlp import HiddenLayer
38 | from rbm import RBM
39 | from grbm import GBRBM
40 | from utils import zero_mean_unit_variance
41 | from utils import normalize
42 | from GRBM_DBN import GRBM_DBN
43 | from sklearn import preprocessing
44 | ############### viterbi path import
45 | from utils import viterbi_path, viterbi_path_log
46 |
47 | #########################
48 |
49 | """ Main script. Show how to perform all competition steps
50 | Access the sample information to learn a model. """
51 | # Data folder (Training data)
52 | print("Extracting the training files")
53 | data_path=os.path.join("I:\Kaggle_multimodal\Training\\")
54 | # Predictions folder (output)
55 | outPred='./training/pred/'
56 | # Get the list of training samples
57 | samples=os.listdir(data_path)
58 | used_joints = ['ElbowLeft', 'WristLeft', 'ShoulderLeft','HandLeft',
59 | 'ElbowRight', 'WristRight','ShoulderRight','HandRight',
60 | 'Head','Spine','HipCenter']
61 | njoints = len(used_joints)
62 | STATE_NO = 10
63 | count = 0
64 |
65 | ### load the pre-store normalization constant
66 | f = open('SK_normalization.pkl','rb')
67 | SK_normalization = cPickle.load(f)
68 | Mean1 = SK_normalization ['Mean1']
69 | Std1 = SK_normalization['Std1']
70 |
71 | ## Load Prior and transitional Matrix
72 | dic=sio.loadmat('Transition_matrix.mat')
73 | Transition_matrix = dic['Transition_matrix']
74 | Prior = dic['Prior']
75 |
76 | for file_count, file in enumerate(samples):
77 | #if not file.endswith(".zip"):
78 | # continue;
79 | time_tic = time.time()
80 | if not file_count<650:
81 | print("\t Processing file " + file)
82 | # Create the object to access the sample
83 | smp=GestureSample(os.path.join(data_path,file))
84 | # ###############################################
85 | # USE Ground Truth information to learn the model
86 | # ###############################################
87 | # Get the list of actions for this frame
88 | gesturesList=smp.getGestures()
89 | ###########################################################
90 | # we check whether it's left dominant or right dominanant
91 | # if right dominant, we correct them to left dominant
92 | ##########################################################
93 | Skeleton_matrix, valid_skel = Extract_feature_UNnormalized(smp, used_joints, 1, smp.getNumFrames())
94 |
95 | Feature = Extract_feature_Realtime(Skeleton_matrix, njoints)
96 |
97 | Feature_normalized = normalize(Feature, Mean1, Std1)
98 |
99 | ### Feed into DBN
100 | shared_x = theano.shared(numpy.asarray(Feature_normalized,
101 | dtype=theano.config.floatX),
102 | borrow=True)
103 | numpy_rng = numpy.random.RandomState(123)
104 |
105 | ### model 1
106 | ##########################
107 | dbn = GRBM_DBN(numpy_rng=numpy_rng, n_ins=528,
108 | hidden_layers_sizes=[1000, 1000, 500],
109 | n_outs=201)
110 | dbn.load('dbn_2014-05-23-20-07-28.npy')
111 |
112 | validate_model = theano.function(inputs=[],
113 | outputs=dbn.logLayer.p_y_given_x,
114 | givens={ dbn.x: shared_x})
115 |
116 | observ_likelihood_1 = validate_model()
117 | del dbn
118 | ### model 2
119 | ##########################
120 | dbn = GRBM_DBN(numpy_rng=numpy_rng, n_ins=528,
121 | hidden_layers_sizes=[1000, 1000, 500],
122 | n_outs=201)
123 |
124 | dbn.load('dbn_2014-05-24-05-53-17.npy')
125 |
126 | validate_model = theano.function(inputs=[],
127 | outputs=dbn.logLayer.p_y_given_x,
128 | givens={ dbn.x: shared_x})
129 |
130 | observ_likelihood_2 = validate_model()
131 | del dbn
132 | ### model 3
133 | ##########################
134 |
135 | dbn = GRBM_DBN(numpy_rng=numpy_rng, n_ins=528,
136 | hidden_layers_sizes=[2000, 2000, 1000],
137 | n_outs=201, finetune_lr=0.1)
138 | dbn.load('dbn_2014-05-25-10-11-56.npy')
139 | # Optimization complete with best validation score of 38.194915 %,with test performance 38.113636 %
140 | #....The score for this prediction is 0.792685963841
141 |
142 | validate_model = theano.function(inputs=[],
143 | outputs=dbn.logLayer.p_y_given_x,
144 | givens={ dbn.x: shared_x})
145 |
146 | observ_likelihood_3 = validate_model()
147 | del dbn
148 |
149 | ### model 4
150 | ##########################
151 | dbn = GRBM_DBN(numpy_rng=numpy_rng, n_ins=528,
152 | hidden_layers_sizes=[2000, 2000, 1000],
153 | n_outs=201, finetune_lr=0.1)
154 | dbn.load('dbn_2014-05-25-10-11-56.npy')
155 | # Optimization complete with best validation score of 38.194915 %,with test performance 38.113636 %
156 | #The score for this prediction is 0.777992357011
157 |
158 | validate_model = theano.function(inputs=[],
159 | outputs=dbn.logLayer.p_y_given_x,
160 | givens={ dbn.x: shared_x})
161 |
162 | observ_likelihood_4 = validate_model()
163 | del dbn
164 | #sio.savemat('observ_likelihood.mat', {'observ_likelihood':observ_likelihood})
165 | ##########################
166 | # viterbi path decoding
167 | #####################
168 |
169 | log_observ_likelihood = log(observ_likelihood_1.T) + log(observ_likelihood_2.T) \
170 | + log(observ_likelihood_3.T) #+ log(observ_likelihood_4.T)
171 | log_observ_likelihood[-1, 0:5] = 0
172 | log_observ_likelihood[-1, -5:] = 0
173 |
174 | print("\t Viterbi path decoding " )
175 | # do it in log space avoid numeric underflow
176 | [path, predecessor_state_index, global_score] = viterbi_path_log(log(Prior), log(Transition_matrix), log_observ_likelihood)
177 | #[path, predecessor_state_index, global_score] = viterbi_path(Prior, Transition_matrix, observ_likelihood)
178 |
179 | # Some gestures are not within the vocabulary
180 | [pred_label, begin_frame, end_frame, Individual_score, frame_length] = viterbi_colab_clean(path, global_score, threshold=-100, mini_frame=19)
181 |
182 |
183 | ### In theory we need add frame, but it seems that the groutnd truth is about 3 frames more, a bit random
184 | end_frame = end_frame + 3
185 | ### plot the path and prediction
186 | if True:
187 | im = imdisplay(global_score)
188 | plt.clf()
189 | plt.imshow(im, cmap='gray')
190 | plt.plot(range(global_score.shape[-1]), path, color='c',linewidth=2.0)
191 | plt.xlim((0, global_score.shape[-1]))
192 | # plot ground truth
193 | for gesture in gesturesList:
194 | # Get the gesture ID, and start and end frames for the gesture
195 | gestureID,startFrame,endFrame=gesture
196 | frames_count = numpy.array(range(startFrame, endFrame+1))
197 | pred_label_temp = ((gestureID-1) *10 +5) * numpy.ones(len(frames_count))
198 | plt.plot(frames_count, pred_label_temp, color='r', linewidth=5.0)
199 |
200 | # plot clean path
201 | for i in range(len(begin_frame)):
202 | frames_count = numpy.array(range(begin_frame[i], end_frame[i]+1))
203 | pred_label_temp = ((pred_label[i]-1) *10 +5) * numpy.ones(len(frames_count))
204 | plt.plot(frames_count, pred_label_temp, color='#ffff00', linewidth=2.0)
205 |
206 | plt.show()
207 |
208 | from pylab import savefig
209 | save_dir=r'.\training\SK_path'
210 | save_path= os.path.join(save_dir,file)
211 | savefig(save_path, bbox_inches='tight')
212 | plt.show()
213 |
214 | print "Elapsed time %d sec" % int(time.time() - time_tic)
215 | save_dir=r'.\training\SK'
216 | save_path= os.path.join(save_dir,file)
217 | out_file = open(save_path, 'wb')
218 | cPickle.dump({'log_observ_likelihood':log_observ_likelihood}, out_file, protocol=cPickle.HIGHEST_PROTOCOL)
219 | out_file.close()
220 |
221 | pred=[]
222 | for i in range(len(begin_frame)):
223 | pred.append([ pred_label[i], begin_frame[i], end_frame[i]] )
224 |
225 | smp.exportPredictions(pred,outPred)
226 | # ###############################################
227 | ## delete the sample
228 | del smp
229 |
230 |
231 |
232 | TruthDir='./training/gt/'
233 | final_score = evalGesture(outPred,TruthDir)
234 | print("The score for this prediction is " + "{:.12f}".format(final_score))
235 | #The score for this prediction is 0.746762613292 -inf threshold, dbn_2014-05-23-20-07-28
236 | #The score for this prediction is 0.731507614243 -3 threshold, dbn_2014-05-23-20-07-28
237 | #The score for this prediction is 0.748537955342 -inf threshold, dbn_2014-05-24-05-53-17
238 | # Submision folder (output)
239 | outSubmision='./training/submision/'
240 | # Prepare submision file (only for validation and final evaluation data sets)
241 | createSubmisionFile(outPred, outSubmision)
242 |
243 |
244 |
245 |
246 | #Sample ID: Sample0651, score 0.857417
247 | #Sample ID: Sample0652, score 0.913935
248 | #Sample ID: Sample0653, score 0.943355
249 | #Sample ID: Sample0654, score 0.917020
250 | #Sample ID: Sample0655, score 0.924133
251 | #Sample ID: Sample0656, score 0.749035
252 | #Sample ID: Sample0657, score 0.955422
253 | #Sample ID: Sample0658, score 0.908295
254 | #Sample ID: Sample0659, score 0.859846
255 | #Sample ID: Sample0660, score 0.856747
256 | #Sample ID: Sample0661, score 0.914236
257 | #Sample ID: Sample0662, score 0.786864
258 | #Sample ID: Sample0663, score 0.941406
259 | #Sample ID: Sample0664, score 0.828827
260 | #Sample ID: Sample0665, score 0.853589
261 | #Sample ID: Sample0666, score 0.720335
262 | #Sample ID: Sample0667, score 0.853116
263 | #Sample ID: Sample0668, score 0.933476
264 | #Sample ID: Sample0669, score 0.881736
265 | #Sample ID: Sample0670, score 0.768433
266 | #Sample ID: Sample0671, score 0.909118
267 | #Sample ID: Sample0672, score 0.950289
268 | #Sample ID: Sample0673, score 0.744832
269 | #Sample ID: Sample0674, score 0.859022
270 | #Sample ID: Sample0675, score 0.092073
271 | #Sample ID: Sample0676, score 0.954039
272 | #Sample ID: Sample0677, score 0.794421
273 | #Sample ID: Sample0678, score 0.709793
274 | #Sample ID: Sample0679, score 0.809159
275 | #Sample ID: Sample0680, score 0.812236
276 | #Sample ID: Sample0681, score 0.684452
277 | #Sample ID: Sample0682, score 0.828362
278 | #Sample ID: Sample0683, score 0.650288
279 | #Sample ID: Sample0684, score 0.820198
280 | #Sample ID: Sample0685, score 0.948309
281 | #Sample ID: Sample0686, score 0.774727
282 | #Sample ID: Sample0687, score 0.870839
283 | #Sample ID: Sample0688, score 0.770792
284 | #Sample ID: Sample0689, score 0.957459
285 | #Sample ID: Sample0690, score 0.929372
286 | #Sample ID: Sample0691, score 0.912913
287 | #Sample ID: Sample0692, score 0.921437
288 | #Sample ID: Sample0693, score 0.917426
289 | #Sample ID: Sample0694, score 0.819258
290 | #Sample ID: Sample0695, score 0.930915
291 | #Sample ID: Sample0696, score 0.913294
292 | #Sample ID: Sample0697, score 0.904583
293 | #Sample ID: Sample0698, score 0.807169
294 | #Sample ID: Sample0699, score 0.000000
295 | #Sample ID: Sample0700, score 0.814740
296 | #The score for this prediction is 0.823574769630
297 |
298 |
299 | ####################True validation##############
300 | #Sample ID: Sample0651, score 0.904025
301 | #Sample ID: Sample0652, score 0.904918
302 | #Sample ID: Sample0653, score 0.958563
303 | #Sample ID: Sample0654, score 0.908786
304 | #Sample ID: Sample0655, score 0.939971
305 | #Sample ID: Sample0656, score 0.855592
306 | #Sample ID: Sample0657, score 0.949432
307 | #Sample ID: Sample0658, score 0.892104
308 | #Sample ID: Sample0659, score 0.821147
309 | #Sample ID: Sample0660, score 0.731472
310 | #Sample ID: Sample0661, score 0.937361
311 | #Sample ID: Sample0662, score 0.669438
312 | #Sample ID: Sample0663, score 0.951005
313 | #Sample ID: Sample0664, score 0.943669
314 | #Sample ID: Sample0665, score 0.733362
315 | #Sample ID: Sample0666, score 0.609271
316 | #Sample ID: Sample0667, score 0.860603
317 | #Sample ID: Sample0668, score 0.858290
318 | #Sample ID: Sample0669, score 0.929701
319 | #Sample ID: Sample0670, score 0.768116
320 | #Sample ID: Sample0671, score 0.814299
321 | #Sample ID: Sample0672, score 0.930511
322 | #Sample ID: Sample0673, score 0.673121
323 | #Sample ID: Sample0674, score 0.812634
324 | #Sample ID: Sample0675, score 0.095109
325 | #Sample ID: Sample0676, score 0.849760
326 | #Sample ID: Sample0677, score 0.855732
327 | #Sample ID: Sample0678, score 0.697313
328 | #Sample ID: Sample0679, score 0.868751
329 | #Sample ID: Sample0680, score 0.784426
330 | #Sample ID: Sample0681, score 0.667418
331 | #Sample ID: Sample0682, score 0.789869
332 | #Sample ID: Sample0683, score 0.712648
333 | #Sample ID: Sample0684, score 0.774973
334 | #Sample ID: Sample0685, score 0.696109
335 | #Sample ID: Sample0686, score 0.718954
336 | #Sample ID: Sample0687, score 0.614459
337 | #Sample ID: Sample0688, score 0.823834
338 | #Sample ID: Sample0689, score 0.891862
339 | #Sample ID: Sample0690, score 0.868217
340 | #Sample ID: Sample0691, score 0.895659
341 | #Sample ID: Sample0692, score 0.763341
342 | #Sample ID: Sample0693, score 0.919345
343 | #Sample ID: Sample0694, score 0.884368
344 | #Sample ID: Sample0695, score 0.786327
345 | #Sample ID: Sample0696, score 0.855285
346 | #Sample ID: Sample0697, score 0.909057
347 | #Sample ID: Sample0698, score 0.714707
348 | #Sample ID: Sample0699, score 0.493874
349 | #Sample ID: Sample0700, score 0.797374
350 | #The score for this prediction is 0.801723171675
--------------------------------------------------------------------------------
/Step3_SK_Test_prediction.py:
--------------------------------------------------------------------------------
1 | #-------------------------------------------------------------------------------
2 | # Name: Starting Kit for ChaLearn LAP 2014 Track3
3 | # Purpose: Show basic functionality of provided code
4 | #
5 | # Author: Xavier Baro
6 | # Author: Di Wu: stevenwudi@gmail.com
7 | # Created: 24/03/2014
8 | # Copyright: (c) Chalearn LAP 2014
9 | # Licence: GPL3
10 | #-------------------------------------------------------------------------------
11 | import sys, os, random, zipfile
12 | from numpy import log
13 | from shutil import copyfile
14 | import matplotlib.pyplot as plt
15 | import time
16 | import cPickle
17 | import numpy
18 | import scipy.io as sio
19 |
20 | ####################################
21 | ### local import
22 | ####################################
23 | from ChalearnLAPEvaluation import evalGesture,exportGT_Gesture
24 | from ChalearnLAPSample import GestureSample
25 | from utils import Extract_feature_Realtime
26 | from utils import Extract_feature_UNnormalized
27 | from utils import normalize
28 | from utils import imdisplay
29 | from utils import createSubmisionFile
30 | ############### viterbi path import
31 | from utils import viterbi_path, viterbi_path_log
32 | from utils import viterbi_colab_clean
33 |
34 | ####################################
35 | ### theano import
36 | ####################################
37 | sys.path.append(r'.\TheanoDL')
38 | try:
39 | import theano
40 | import theano.tensor as T
41 | from theano.tensor.shared_randomstreams import RandomStreams
42 | from logistic_sgd import LogisticRegression
43 | from mlp import HiddenLayer
44 | from rbm import RBM
45 | from grbm import GBRBM
46 | from utils import zero_mean_unit_variance
47 | from utils import normalize
48 | from GRBM_DBN import GRBM_DBN
49 | from sklearn import preprocessing
50 | except ImportError:
51 | sys.exit('Please install Theano!')
52 |
53 |
54 | print("Congratulations, you finish all the reqruired modules!")
55 | ####################################
56 | ### The path you need to set!!!#####
57 | ####################################
58 |
59 | # Data folder (Test data)
60 | data_path=os.path.join("I:\Kaggle_multimodal\Test\Test\\")
61 | # Predictions folder (output)
62 | outPred=r'.\training\test'
63 | # Submision folder (output)
64 | outSubmision=r'.\training\test_submission'
65 |
66 | ####################################
67 | ### Some initialization ############
68 | ####################################
69 | used_joints = ['ElbowLeft', 'WristLeft', 'ShoulderLeft','HandLeft',
70 | 'ElbowRight', 'WristRight','ShoulderRight','HandRight',
71 | 'Head','Spine','HipCenter']
72 | njoints = len(used_joints)
73 | ### load the pre-store normalization constant
74 | f = open('SK_normalization.pkl','rb')
75 | SK_normalization = cPickle.load(f)
76 | Mean1 = SK_normalization ['Mean1']
77 | Std1 = SK_normalization['Std1']
78 | ## Load Prior and transitional Matrix
79 | dic=sio.loadmat('Transition_matrix.mat')
80 | Transition_matrix = dic['Transition_matrix']
81 | Prior = dic['Prior']
82 |
83 |
84 |
85 | ####################################
86 | ### Start predicting here ##########
87 | ####################################
88 | samples=os.listdir(data_path)
89 | for file_count, file in enumerate(samples):
90 | #if not file.endswith(".zip"):
91 | # continue;
92 | time_tic = time.time()
93 | if file_count > -1: # do nothing, just for bebug convenience
94 | print("\t Processing file " + file)
95 | # Create the object to access the sample
96 | smp=GestureSample(os.path.join(data_path,file))
97 | # ###############################################
98 | # USE Ground Truth information to learn the model
99 | # ###############################################
100 | # Get the list of actions for this frame
101 | gesturesList=smp.getGestures()
102 |
103 | Skeleton_matrix, valid_skel = Extract_feature_UNnormalized(smp, used_joints, 1, smp.getNumFrames())
104 |
105 | Feature = Extract_feature_Realtime(Skeleton_matrix, njoints)
106 |
107 | Feature_normalized = normalize(Feature, Mean1, Std1)
108 |
109 | ### Feed into DBN, theano requires the shared tensor representation
110 | shared_x = theano.shared(numpy.asarray(Feature_normalized,
111 | dtype=theano.config.floatX),
112 | borrow=True)
113 | numpy_rng = numpy.random.RandomState(123)
114 |
115 | ##########################
116 | ### model 1
117 | ##########################
118 | dbn = GRBM_DBN(numpy_rng=numpy_rng, n_ins=528,
119 | hidden_layers_sizes=[1000, 1000, 500],
120 | n_outs=201)
121 | dbn.load('dbn_2014-05-23-20-07-28.npy')
122 |
123 | validate_model = theano.function(inputs=[],
124 | outputs=dbn.logLayer.p_y_given_x,
125 | givens={ dbn.x: shared_x})
126 |
127 | observ_likelihood_1 = validate_model()
128 | del dbn
129 |
130 | ##########################
131 | ### model 2
132 | ##########################
133 | dbn = GRBM_DBN(numpy_rng=numpy_rng, n_ins=528,
134 | hidden_layers_sizes=[1000, 1000, 500],
135 | n_outs=201)
136 |
137 | dbn.load('dbn_2014-05-24-05-53-17.npy')
138 |
139 | validate_model = theano.function(inputs=[],
140 | outputs=dbn.logLayer.p_y_given_x,
141 | givens={ dbn.x: shared_x})
142 |
143 | observ_likelihood_2 = validate_model()
144 | del dbn
145 |
146 | ##########################
147 | ### model 3
148 | ##########################
149 |
150 | dbn = GRBM_DBN(numpy_rng=numpy_rng, n_ins=528,
151 | hidden_layers_sizes=[2000, 2000, 1000],
152 | n_outs=201, finetune_lr=0.1)
153 | dbn.load('dbn_2014-05-25-10-11-56.npy')
154 | # Optimization complete with best validation score of 38.194915 %,with test performance 38.113636 %
155 | #....The score for this prediction is 0.792685963841
156 |
157 | validate_model = theano.function(inputs=[],
158 | outputs=dbn.logLayer.p_y_given_x,
159 | givens={ dbn.x: shared_x})
160 |
161 | observ_likelihood_3 = validate_model()
162 | del dbn
163 |
164 | ##########################
165 | ### model 4
166 | ##########################
167 | dbn = GRBM_DBN(numpy_rng=numpy_rng, n_ins=528,
168 | hidden_layers_sizes=[2000, 2000, 1000],
169 | n_outs=201, finetune_lr=0.1)
170 | dbn.load('dbn_2014-05-25-11-09-00.npy')
171 |
172 | validate_model = theano.function(inputs=[],
173 | outputs=dbn.logLayer.p_y_given_x,
174 | givens={ dbn.x: shared_x})
175 |
176 | observ_likelihood_4 = validate_model()
177 | del dbn
178 |
179 | #sio.savemat('observ_likelihood.mat', {'observ_likelihood':observ_likelihood})
180 | ##########################
181 | # viterbi path decoding
182 | #####################
183 |
184 | log_observ_likelihood = log(observ_likelihood_1.T) + log(observ_likelihood_2.T) \
185 | + log(observ_likelihood_3.T) + log(observ_likelihood_4.T)
186 | ##########################
187 | # viterbi path decoding
188 | #####################
189 |
190 | log_observ_likelihood = log(observ_likelihood_1.T)
191 | log_observ_likelihood[-1, 0:5] = 0 # We force first and last 5 frames are in the ergodic state
192 | log_observ_likelihood[-1, -5:] = 0
193 |
194 | print("\t Viterbi path decoding " )
195 | # do it in log space avoid numeric underflow
196 | [path, predecessor_state_index, global_score] = viterbi_path_log(log(Prior), log(Transition_matrix), log_observ_likelihood)
197 | #[path, predecessor_state_index, global_score] = viterbi_path(Prior, Transition_matrix, observ_likelihood)
198 |
199 | # Some gestures are not within the vocabulary
200 | [pred_label, begin_frame, end_frame, Individual_score, frame_length] = viterbi_colab_clean(path, global_score, threshold=-100, mini_frame=19)
201 |
202 | #begin_frame = begin_frame-1
203 | end_frame = end_frame + 3
204 | ### plot the path and prediction
205 | if False:
206 | im = imdisplay(global_score)
207 | plt.imshow(im, cmap='gray')
208 | plt.plot(range(global_score.shape[-1]), path, color='c',linewidth=2.0)
209 | plt.xlim((0, global_score.shape[-1]))
210 | # plot ground truth
211 | for gesture in gesturesList:
212 | # Get the gesture ID, and start and end frames for the gesture
213 | gestureID,startFrame,endFrame=gesture
214 | frames_count = numpy.array(range(startFrame, endFrame+1))
215 | pred_label_temp = ((gestureID-1) *10 +5) * numpy.ones(len(frames_count))
216 | plt.plot(frames_count, pred_label_temp, color='r', linewidth=5.0)
217 |
218 | # plot clean path
219 | for i in range(len(begin_frame)):
220 | frames_count = numpy.array(range(begin_frame[i], end_frame[i]+1))
221 | pred_label_temp = ((pred_label[i]-1) *10 +5) * numpy.ones(len(frames_count))
222 | plt.plot(frames_count, pred_label_temp, color='#ffff00', linewidth=2.0)
223 |
224 | plt.show()
225 | else:
226 | print "Elapsed time %d sec" % int(time.time() - time_tic)
227 |
228 | pred=[]
229 | for i in range(len(begin_frame)):
230 | pred.append([ pred_label[i], begin_frame[i], end_frame[i]] )
231 |
232 | smp.exportPredictions(pred,outPred)
233 |
234 | # ###############################################
235 | ## delete the sample
236 | del smp
237 |
238 |
239 | # Prepare submision file (only for validation and final evaluation data sets)
240 | createSubmisionFile(outPred, outSubmision)
241 |
--------------------------------------------------------------------------------
/Step3_measure_performance.py:
--------------------------------------------------------------------------------
1 | #-------------------------------------------------------------------------------
2 | # Name: Starting Kit for ChaLearn LAP 2014 Track3
3 | # Purpose: Show basic functionality of provided code
4 | #
5 | # Author: Xavier Baro
6 | # Author: Di Wu: stevenwudi@gmail.com
7 | # Created: 24/03/2014
8 | # Copyright: (c) Chalearn LAP 2014
9 | # Licence: GPL3
10 | #-------------------------------------------------------------------------------
11 | import sys, os, os.path,random,numpy,zipfile
12 | from shutil import copyfile
13 | import matplotlib.pyplot as plt
14 |
15 | from ChalearnLAPEvaluation import evalGesture,exportGT_Gesture
16 | from ChalearnLAPSample import GestureSample
17 | from ChalearnLAPEvaluation import gesture_overlap_csv
18 | from utils import Extract_feature
19 |
20 | def main():
21 |
22 | prediction_dir = r'I:\Kaggle_multimodal\StartingKit_track3\CoDaLab_Gesure_track3\matlab\prediction_650_conv'
23 | #prediction_dir = r'I:\Kaggle_multimodal\StartingKit_track3\CoDaLab_Gesure_track3\matlab\prediction_650'
24 | #truth_dir = r'I:\Kaggle_multimodal\validation_labels'
25 | truth_dir = r'I:\Kaggle_multimodal\validation'
26 | final_score = evalGesture(prediction_dir,truth_dir)
27 | print "final_score "+str(final_score)
28 |
29 | # 3DCNN: final_score0.375025337775
30 |
31 |
32 | if __name__ == '__main__':
33 | main()
34 |
--------------------------------------------------------------------------------
/TheanoDL/GRBM_DBN.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stevenwudi/CVPR_2014_code/7976acb440dbe4e8d060685371e3f0c5d35a4796/TheanoDL/GRBM_DBN.pyc
--------------------------------------------------------------------------------
/TheanoDL/Theano_Tutorial.pyproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Debug
5 | 2.0
6 | 9f652ea6-cee9-4214-9774-25d00ce2972c
7 |
8 |
9 | GRBM_DBN.py
10 | code\
11 | .
12 | .
13 | Theano_Tutorial
14 | Tutorial
15 | Standard Python launcher
16 | -W ignore::DeprecationWarning
17 | False
18 | False
19 | {947cee96-2ef3-4b77-b850-f04b2d86f6b6}
20 | 2.7
21 |
22 |
23 | true
24 | false
25 |
26 |
27 | true
28 | false
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 | 10.0
85 | $(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)
86 | $(VSToolsPath)\Python Tools\Microsoft.PythonTools.targets
87 |
88 |
89 |
90 |
--------------------------------------------------------------------------------
/TheanoDL/Tutorial.py:
--------------------------------------------------------------------------------
1 | import theano.tensor as T
2 | from theano import function
3 | from theano import pp
4 |
5 | x = T.dscalar('x')
6 | y = T.dscalar('y')
7 | z = x + y
8 | f= function( [x, y], z)
9 |
10 | print f(2, 3)
--------------------------------------------------------------------------------
/TheanoDL/Tutorial.v11.suo:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stevenwudi/CVPR_2014_code/7976acb440dbe4e8d060685371e3f0c5d35a4796/TheanoDL/Tutorial.v11.suo
--------------------------------------------------------------------------------
/TheanoDL/cA.py:
--------------------------------------------------------------------------------
1 | """This tutorial introduces Contractive auto-encoders (cA) using Theano.
2 |
3 | They are based on auto-encoders as the ones used in Bengio et
4 | al. 2007. An autoencoder takes an input x and first maps it to a
5 | hidden representation y = f_{\theta}(x) = s(Wx+b), parameterized by
6 | \theta={W,b}. The resulting latent representation y is then mapped
7 | back to a "reconstructed" vector z \in [0,1]^d in input space z =
8 | g_{\theta'}(y) = s(W'y + b'). The weight matrix W' can optionally be
9 | constrained such that W' = W^T, in which case the autoencoder is said
10 | to have tied weights. The network is trained such that to minimize
11 | the reconstruction error (the error between x and z). Adding the
12 | squared Frobenius norm of the Jacobian of the hidden mapping h with
13 | respect to the visible units yields the contractive auto-encoder:
14 |
15 | - \sum_{k=1}^d[ x_k \log z_k + (1-x_k) \log( 1-z_k)] + \| \frac{\partial h(x)}{\partial x} \|^2
16 |
17 | References :
18 | - S. Rifai, P. Vincent, X. Muller, X. Glorot, Y. Bengio: Contractive
19 | Auto-Encoders: Explicit Invariance During Feature Extraction, ICML-11
20 |
21 | - S. Rifai, X. Muller, X. Glorot, G. Mesnil, Y. Bengio, and Pascal
22 | Vincent. Learning invariant features through local space
23 | contraction. Technical Report 1360, Universite de Montreal
24 |
25 | - Y. Bengio, P. Lamblin, D. Popovici, H. Larochelle: Greedy Layer-Wise
26 | Training of Deep Networks, Advances in Neural Information Processing
27 | Systems 19, 2007
28 |
29 | """
30 | import cPickle
31 | import gzip
32 | import os
33 | import sys
34 | import time
35 |
36 | import numpy
37 |
38 | import theano
39 | import theano.tensor as T
40 |
41 |
42 | from logistic_sgd import load_data
43 | from utils import tile_raster_images
44 |
45 | import PIL.Image
46 |
47 |
48 | class cA(object):
49 | """ Contractive Auto-Encoder class (cA)
50 |
51 | The contractive autoencoder tries to reconstruct the input with an
52 | additional constraint on the latent space. With the objective of
53 | obtaining a robust representation of the input space, we
54 | regularize the L2 norm(Froebenius) of the jacobian of the hidden
55 | representation with respect to the input. Please refer to Rifai et
56 | al.,2011 for more details.
57 |
58 | If x is the input then equation (1) computes the projection of the
59 | input into the latent space h. Equation (2) computes the jacobian
60 | of h with respect to x. Equation (3) computes the reconstruction
61 | of the input, while equation (4) computes the reconstruction
62 | error and the added regularization term from Eq.(2).
63 |
64 | .. math::
65 |
66 | h_i = s(W_i x + b_i) (1)
67 |
68 | J_i = h_i (1 - h_i) * W_i (2)
69 |
70 | x' = s(W' h + b') (3)
71 |
72 | L = -sum_{k=1}^d [x_k \log x'_k + (1-x_k) \log( 1-x'_k)]
73 | + lambda * sum_{i=1}^d sum_{j=1}^n J_{ij}^2 (4)
74 |
75 | """
76 |
77 | def __init__(self, numpy_rng, input=None, n_visible=784, n_hidden=100,
78 | n_batchsize=1, W=None, bhid=None, bvis=None):
79 | """Initialize the cA class by specifying the number of visible units (the
80 | dimension d of the input ), the number of hidden units ( the dimension
81 | d' of the latent or hidden space ) and the contraction level. The
82 | constructor also receives symbolic variables for the input, weights and
83 | bias.
84 |
85 | :type numpy_rng: numpy.random.RandomState
86 | :param numpy_rng: number random generator used to generate weights
87 |
88 | :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
89 | :param theano_rng: Theano random generator; if None is given
90 | one is generated based on a seed drawn from `rng`
91 |
92 | :type input: theano.tensor.TensorType
93 | :param input: a symbolic description of the input or None for
94 | standalone cA
95 |
96 | :type n_visible: int
97 | :param n_visible: number of visible units
98 |
99 | :type n_hidden: int
100 | :param n_hidden: number of hidden units
101 |
102 | :type n_batchsize int
103 | :param n_batchsize: number of examples per batch
104 |
105 | :type W: theano.tensor.TensorType
106 | :param W: Theano variable pointing to a set of weights that should be
107 | shared belong the dA and another architecture; if dA should
108 | be standalone set this to None
109 |
110 | :type bhid: theano.tensor.TensorType
111 | :param bhid: Theano variable pointing to a set of biases values (for
112 | hidden units) that should be shared belong dA and another
113 | architecture; if dA should be standalone set this to None
114 |
115 | :type bvis: theano.tensor.TensorType
116 | :param bvis: Theano variable pointing to a set of biases values (for
117 | visible units) that should be shared belong dA and another
118 | architecture; if dA should be standalone set this to None
119 |
120 | """
121 | self.n_visible = n_visible
122 | self.n_hidden = n_hidden
123 | self.n_batchsize = n_batchsize
124 | # note : W' was written as `W_prime` and b' as `b_prime`
125 | if not W:
126 | # W is initialized with `initial_W` which is uniformely sampled
127 | # from -4*sqrt(6./(n_visible+n_hidden)) and
128 | # 4*sqrt(6./(n_hidden+n_visible))the output of uniform if
129 | # converted using asarray to dtype
130 | # theano.config.floatX so that the code is runable on GPU
131 | initial_W = numpy.asarray(numpy_rng.uniform(
132 | low=-4 * numpy.sqrt(6. / (n_hidden + n_visible)),
133 | high=4 * numpy.sqrt(6. / (n_hidden + n_visible)),
134 | size=(n_visible, n_hidden)),
135 | dtype=theano.config.floatX)
136 | W = theano.shared(value=initial_W, name='W', borrow=True)
137 |
138 | if not bvis:
139 | bvis = theano.shared(value=numpy.zeros(n_visible,
140 | dtype=theano.config.floatX),
141 | borrow=True)
142 |
143 | if not bhid:
144 | bhid = theano.shared(value=numpy.zeros(n_hidden,
145 | dtype=theano.config.floatX),
146 | name='b',
147 | borrow=True)
148 |
149 | self.W = W
150 | # b corresponds to the bias of the hidden
151 | self.b = bhid
152 | # b_prime corresponds to the bias of the visible
153 | self.b_prime = bvis
154 | # tied weights, therefore W_prime is W transpose
155 | self.W_prime = self.W.T
156 |
157 | # if no input is given, generate a variable representing the input
158 | if input == None:
159 | # we use a matrix because we expect a minibatch of several
160 | # examples, each example being a row
161 | self.x = T.dmatrix(name='input')
162 | else:
163 | self.x = input
164 |
165 | self.params = [self.W, self.b, self.b_prime]
166 |
167 | def get_hidden_values(self, input):
168 | """ Computes the values of the hidden layer """
169 | return T.nnet.sigmoid(T.dot(input, self.W) + self.b)
170 |
171 | def get_jacobian(self, hidden, W):
172 | """Computes the jacobian of the hidden layer with respect to
173 | the input, reshapes are necessary for broadcasting the
174 | element-wise product on the right axis
175 |
176 | """
177 | return T.reshape(hidden * (1 - hidden),
178 | (self.n_batchsize, 1, self.n_hidden)) * T.reshape(
179 | W, (1, self.n_visible, self.n_hidden))
180 |
181 | def get_reconstructed_input(self, hidden):
182 | """Computes the reconstructed input given the values of the
183 | hidden layer
184 |
185 | """
186 | return T.nnet.sigmoid(T.dot(hidden, self.W_prime) + self.b_prime)
187 |
188 | def get_cost_updates(self, contraction_level, learning_rate):
189 | """ This function computes the cost and the updates for one trainng
190 | step of the cA """
191 |
192 | y = self.get_hidden_values(self.x)
193 | z = self.get_reconstructed_input(y)
194 | J = self.get_jacobian(y, self.W)
195 | # note : we sum over the size of a datapoint; if we are using
196 | # minibatches, L will be a vector, with one entry per
197 | # example in minibatch
198 | self.L_rec = - T.sum(self.x * T.log(z) +
199 | (1 - self.x) * T.log(1 - z),
200 | axis=1)
201 |
202 | # Compute the jacobian and average over the number of samples/minibatch
203 | self.L_jacob = T.sum(J ** 2) / self.n_batchsize
204 |
205 | # note : L is now a vector, where each element is the
206 | # cross-entropy cost of the reconstruction of the
207 | # corresponding example of the minibatch. We need to
208 | # compute the average of all these to get the cost of
209 | # the minibatch
210 | cost = T.mean(self.L_rec) + contraction_level * T.mean(self.L_jacob)
211 |
212 | # compute the gradients of the cost of the `cA` with respect
213 | # to its parameters
214 | gparams = T.grad(cost, self.params)
215 | # generate the list of updates
216 | updates = []
217 | for param, gparam in zip(self.params, gparams):
218 | updates.append((param, param - learning_rate * gparam))
219 |
220 | return (cost, updates)
221 |
222 |
223 | def test_cA(learning_rate=0.01, training_epochs=20,
224 | dataset='mnist.pkl.gz',
225 | batch_size=10, output_folder='cA_plots', contraction_level=.1):
226 | """
227 | This demo is tested on MNIST
228 |
229 | :type learning_rate: float
230 | :param learning_rate: learning rate used for training the contracting
231 | AutoEncoder
232 |
233 | :type training_epochs: int
234 | :param training_epochs: number of epochs used for training
235 |
236 | :type dataset: string
237 | :param dataset: path to the picked dataset
238 |
239 | """
240 | datasets = load_data(dataset)
241 | train_set_x, train_set_y = datasets[0]
242 |
243 | # compute number of minibatches for training, validation and testing
244 | n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
245 |
246 | # allocate symbolic variables for the data
247 | index = T.lscalar() # index to a [mini]batch
248 | x = T.matrix('x') # the data is presented as rasterized images
249 |
250 | if not os.path.isdir(output_folder):
251 | os.makedirs(output_folder)
252 | os.chdir(output_folder)
253 | ####################################
254 | # BUILDING THE MODEL #
255 | ####################################
256 |
257 | rng = numpy.random.RandomState(123)
258 |
259 | ca = cA(numpy_rng=rng, input=x,
260 | n_visible=28 * 28, n_hidden=500, n_batchsize=batch_size)
261 |
262 | cost, updates = ca.get_cost_updates(contraction_level=contraction_level,
263 | learning_rate=learning_rate)
264 |
265 | train_ca = theano.function([index], [T.mean(ca.L_rec), ca.L_jacob],
266 | updates=updates,
267 | givens={x: train_set_x[index * batch_size:
268 | (index + 1) * batch_size]})
269 |
270 | start_time = time.clock()
271 |
272 | ############
273 | # TRAINING #
274 | ############
275 |
276 | # go through training epochs
277 | for epoch in xrange(training_epochs):
278 | # go through trainng set
279 | c = []
280 | for batch_index in xrange(n_train_batches):
281 | c.append(train_ca(batch_index))
282 |
283 | c_array = numpy.vstack(c)
284 | print 'Training epoch %d, reconstruction cost ' % epoch, numpy.mean(
285 | c_array[0]), ' jacobian norm ', numpy.mean(numpy.sqrt(c_array[1]))
286 |
287 | end_time = time.clock()
288 |
289 | training_time = (end_time - start_time)
290 |
291 | print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
292 | ' ran for %.2fm' % ((training_time) / 60.))
293 | image = PIL.Image.fromarray(tile_raster_images(
294 | X=ca.W.get_value(borrow=True).T,
295 | img_shape=(28, 28), tile_shape=(10, 10),
296 | tile_spacing=(1, 1)))
297 |
298 | image.save('cae_filters.png')
299 |
300 | os.chdir('../')
301 |
302 |
303 | if __name__ == '__main__':
304 | test_cA()
305 |
--------------------------------------------------------------------------------
/TheanoDL/convolutional_mlp.py:
--------------------------------------------------------------------------------
1 | """This tutorial introduces the LeNet5 neural network architecture
2 | using Theano. LeNet5 is a convolutional neural network, good for
3 | classifying images. This tutorial shows how to build the architecture,
4 | and comes with all the hyper-parameters you need to reproduce the
5 | paper's MNIST results.
6 |
7 |
8 | This implementation simplifies the model in the following ways:
9 |
10 | - LeNetConvPool doesn't implement location-specific gain and bias parameters
11 | - LeNetConvPool doesn't implement pooling by average, it implements pooling
12 | by max.
13 | - Digit classification is implemented with a logistic regression rather than
14 | an RBF network
15 | - LeNet5 was not fully-connected convolutions at second layer
16 |
17 | References:
18 | - Y. LeCun, L. Bottou, Y. Bengio and P. Haffner:
19 | Gradient-Based Learning Applied to Document
20 | Recognition, Proceedings of the IEEE, 86(11):2278-2324, November 1998.
21 | http://yann.lecun.com/exdb/publis/pdf/lecun-98.pdf
22 |
23 | """
24 | import cPickle
25 | import gzip
26 | import os
27 | import sys
28 | import time
29 |
30 | import numpy
31 |
32 | import theano
33 | import theano.tensor as T
34 | from theano.tensor.signal import downsample
35 | from theano.tensor.nnet import conv
36 |
37 | from logistic_sgd import LogisticRegression, load_data
38 | from mlp import HiddenLayer
39 |
40 |
41 | class LeNetConvPoolLayer(object):
42 | """Pool Layer of a convolutional network """
43 |
44 | def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2)):
45 | """
46 | Allocate a LeNetConvPoolLayer with shared variable internal parameters.
47 |
48 | :type rng: numpy.random.RandomState
49 | :param rng: a random number generator used to initialize weights
50 |
51 | :type input: theano.tensor.dtensor4
52 | :param input: symbolic image tensor, of shape image_shape
53 |
54 | :type filter_shape: tuple or list of length 4
55 | :param filter_shape: (number of filters, num input feature maps,
56 | filter height,filter width)
57 |
58 | :type image_shape: tuple or list of length 4
59 | :param image_shape: (batch size, num input feature maps,
60 | image height, image width)
61 |
62 | :type poolsize: tuple or list of length 2
63 | :param poolsize: the downsampling (pooling) factor (#rows,#cols)
64 | """
65 |
66 | assert image_shape[1] == filter_shape[1]
67 | self.input = input
68 |
69 | # there are "num input feature maps * filter height * filter width"
70 | # inputs to each hidden unit
71 | fan_in = numpy.prod(filter_shape[1:])
72 | # each unit in the lower layer receives a gradient from:
73 | # "num output feature maps * filter height * filter width" /
74 | # pooling size
75 | fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) /
76 | numpy.prod(poolsize))
77 | # initialize weights with random weights
78 | W_bound = numpy.sqrt(6. / (fan_in + fan_out))
79 | self.W = theano.shared(numpy.asarray(
80 | rng.uniform(low=-W_bound, high=W_bound, size=filter_shape),
81 | dtype=theano.config.floatX),
82 | borrow=True)
83 |
84 | # the bias is a 1D tensor -- one bias per output feature map
85 | b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX)
86 | self.b = theano.shared(value=b_values, borrow=True)
87 |
88 | # convolve input feature maps with filters
89 | conv_out = conv.conv2d(input=input, filters=self.W,
90 | filter_shape=filter_shape, image_shape=image_shape)
91 |
92 | # downsample each feature map individually, using maxpooling
93 | pooled_out = downsample.max_pool_2d(input=conv_out,
94 | ds=poolsize, ignore_border=True)
95 |
96 | # add the bias term. Since the bias is a vector (1D array), we first
97 | # reshape it to a tensor of shape (1,n_filters,1,1). Each bias will
98 | # thus be broadcasted across mini-batches and feature map
99 | # width & height
100 | self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))
101 |
102 | # store parameters of this layer
103 | self.params = [self.W, self.b]
104 |
105 |
106 | def evaluate_lenet5(learning_rate=0.1, n_epochs=200,
107 | dataset='mnist.pkl.gz',
108 | nkerns=[20, 50], batch_size=500):
109 | """ Demonstrates lenet on MNIST dataset
110 |
111 | :type learning_rate: float
112 | :param learning_rate: learning rate used (factor for the stochastic
113 | gradient)
114 |
115 | :type n_epochs: int
116 | :param n_epochs: maximal number of epochs to run the optimizer
117 |
118 | :type dataset: string
119 | :param dataset: path to the dataset used for training /testing (MNIST here)
120 |
121 | :type nkerns: list of ints
122 | :param nkerns: number of kernels on each layer
123 | """
124 |
125 | rng = numpy.random.RandomState(23455)
126 |
127 | datasets = load_data(dataset)
128 |
129 | train_set_x, train_set_y = datasets[0]
130 | valid_set_x, valid_set_y = datasets[1]
131 | test_set_x, test_set_y = datasets[2]
132 |
133 | # compute number of minibatches for training, validation and testing
134 | n_train_batches = train_set_x.get_value(borrow=True).shape[0]
135 | n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
136 | n_test_batches = test_set_x.get_value(borrow=True).shape[0]
137 | n_train_batches /= batch_size
138 | n_valid_batches /= batch_size
139 | n_test_batches /= batch_size
140 |
141 | # allocate symbolic variables for the data
142 | index = T.lscalar() # index to a [mini]batch
143 | x = T.matrix('x') # the data is presented as rasterized images
144 | y = T.ivector('y') # the labels are presented as 1D vector of
145 | # [int] labels
146 |
147 | ishape = (28, 28) # this is the size of MNIST images
148 |
149 | ######################
150 | # BUILD ACTUAL MODEL #
151 | ######################
152 | print '... building the model'
153 |
154 | # Reshape matrix of rasterized images of shape (batch_size,28*28)
155 | # to a 4D tensor, compatible with our LeNetConvPoolLayer
156 | layer0_input = x.reshape((batch_size, 1, 28, 28))
157 |
158 | # Construct the first convolutional pooling layer:
159 | # filtering reduces the image size to (28-5+1,28-5+1)=(24,24)
160 | # maxpooling reduces this further to (24/2,24/2) = (12,12)
161 | # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12)
162 | layer0 = LeNetConvPoolLayer(rng, input=layer0_input,
163 | image_shape=(batch_size, 1, 28, 28),
164 | filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2))
165 |
166 | # Construct the second convolutional pooling layer
167 | # filtering reduces the image size to (12-5+1,12-5+1)=(8,8)
168 | # maxpooling reduces this further to (8/2,8/2) = (4,4)
169 | # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4)
170 | layer1 = LeNetConvPoolLayer(rng, input=layer0.output,
171 | image_shape=(batch_size, nkerns[0], 12, 12),
172 | filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2))
173 |
174 | # the HiddenLayer being fully-connected, it operates on 2D matrices of
175 | # shape (batch_size,num_pixels) (i.e matrix of rasterized images).
176 | # This will generate a matrix of shape (20,32*4*4) = (20,512)
177 | layer2_input = layer1.output.flatten(2)
178 |
179 | # construct a fully-connected sigmoidal layer
180 | layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * 4 * 4,
181 | n_out=500, activation=T.tanh)
182 |
183 | # classify the values of the fully-connected sigmoidal layer
184 | layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)
185 |
186 | # the cost we minimize during training is the NLL of the model
187 | cost = layer3.negative_log_likelihood(y)
188 |
189 | # create a function to compute the mistakes that are made by the model
190 | test_model = theano.function([index], layer3.errors(y),
191 | givens={
192 | x: test_set_x[index * batch_size: (index + 1) * batch_size],
193 | y: test_set_y[index * batch_size: (index + 1) * batch_size]})
194 |
195 | validate_model = theano.function([index], layer3.errors(y),
196 | givens={
197 | x: valid_set_x[index * batch_size: (index + 1) * batch_size],
198 | y: valid_set_y[index * batch_size: (index + 1) * batch_size]})
199 |
200 | # create a list of all model parameters to be fit by gradient descent
201 | params = layer3.params + layer2.params + layer1.params + layer0.params
202 |
203 | # create a list of gradients for all model parameters
204 | grads = T.grad(cost, params)
205 |
206 | # train_model is a function that updates the model parameters by
207 | # SGD Since this model has many parameters, it would be tedious to
208 | # manually create an update rule for each model parameter. We thus
209 | # create the updates list by automatically looping over all
210 | # (params[i],grads[i]) pairs.
211 | updates = []
212 | for param_i, grad_i in zip(params, grads):
213 | updates.append((param_i, param_i - learning_rate * grad_i))
214 |
215 | train_model = theano.function([index], cost, updates=updates,
216 | givens={
217 | x: train_set_x[index * batch_size: (index + 1) * batch_size],
218 | y: train_set_y[index * batch_size: (index + 1) * batch_size]})
219 |
220 | ###############
221 | # TRAIN MODEL #
222 | ###############
223 | print '... training'
224 | # early-stopping parameters
225 | patience = 10000 # look as this many examples regardless
226 | patience_increase = 2 # wait this much longer when a new best is
227 | # found
228 | improvement_threshold = 0.995 # a relative improvement of this much is
229 | # considered significant
230 | validation_frequency = min(n_train_batches, patience / 2)
231 | # go through this many
232 | # minibatche before checking the network
233 | # on the validation set; in this case we
234 | # check every epoch
235 |
236 | best_params = None
237 | best_validation_loss = numpy.inf
238 | best_iter = 0
239 | test_score = 0.
240 | start_time = time.clock()
241 |
242 | epoch = 0
243 | done_looping = False
244 |
245 | while (epoch < n_epochs) and (not done_looping):
246 | epoch = epoch + 1
247 | for minibatch_index in xrange(n_train_batches):
248 |
249 | iter = (epoch - 1) * n_train_batches + minibatch_index
250 |
251 | if iter % 100 == 0:
252 | print 'training @ iter = ', iter
253 | cost_ij = train_model(minibatch_index)
254 |
255 | if (iter + 1) % validation_frequency == 0:
256 |
257 | # compute zero-one loss on validation set
258 | validation_losses = [validate_model(i) for i
259 | in xrange(n_valid_batches)]
260 | this_validation_loss = numpy.mean(validation_losses)
261 | print('epoch %i, minibatch %i/%i, validation error %f %%' % \
262 | (epoch, minibatch_index + 1, n_train_batches, \
263 | this_validation_loss * 100.))
264 |
265 | # if we got the best validation score until now
266 | if this_validation_loss < best_validation_loss:
267 |
268 | #improve patience if loss improvement is good enough
269 | if this_validation_loss < best_validation_loss * \
270 | improvement_threshold:
271 | patience = max(patience, iter * patience_increase)
272 |
273 | # save best validation score and iteration number
274 | best_validation_loss = this_validation_loss
275 | best_iter = iter
276 |
277 | # test it on the test set
278 | test_losses = [test_model(i) for i in xrange(n_test_batches)]
279 | test_score = numpy.mean(test_losses)
280 | print((' epoch %i, minibatch %i/%i, test error of best '
281 | 'model %f %%') %
282 | (epoch, minibatch_index + 1, n_train_batches,
283 | test_score * 100.))
284 |
285 | if patience <= iter:
286 | done_looping = True
287 | break
288 |
289 | end_time = time.clock()
290 | print('Optimization complete.')
291 | print('Best validation score of %f %% obtained at iteration %i,'\
292 | 'with test performance %f %%' %
293 | (best_validation_loss * 100., best_iter + 1, test_score * 100.))
294 | print >> sys.stderr, ('The code for file ' +
295 | os.path.split(__file__)[1] +
296 | ' ran for %.2fm' % ((end_time - start_time) / 60.))
297 |
298 | if __name__ == '__main__':
299 | evaluate_lenet5()
300 |
301 |
302 | def experiment(state, channel):
303 | evaluate_lenet5(state.learning_rate, dataset=state.dataset)
304 |
--------------------------------------------------------------------------------
/TheanoDL/dA.py:
--------------------------------------------------------------------------------
1 | """
2 | This tutorial introduces denoising auto-encoders (dA) using Theano.
3 |
4 | Denoising autoencoders are the building blocks for SdA.
5 | They are based on auto-encoders as the ones used in Bengio et al. 2007.
6 | An autoencoder takes an input x and first maps it to a hidden representation
7 | y = f_{\theta}(x) = s(Wx+b), parameterized by \theta={W,b}. The resulting
8 | latent representation y is then mapped back to a "reconstructed" vector
9 | z \in [0,1]^d in input space z = g_{\theta'}(y) = s(W'y + b'). The weight
10 | matrix W' can optionally be constrained such that W' = W^T, in which case
11 | the autoencoder is said to have tied weights. The network is trained such
12 | that to minimize the reconstruction error (the error between x and z).
13 |
14 | For the denosing autoencoder, during training, first x is corrupted into
15 | \tilde{x}, where \tilde{x} is a partially destroyed version of x by means
16 | of a stochastic mapping. Afterwards y is computed as before (using
17 | \tilde{x}), y = s(W\tilde{x} + b) and z as s(W'y + b'). The reconstruction
18 | error is now measured between z and the uncorrupted input x, which is
19 | computed as the cross-entropy :
20 | - \sum_{k=1}^d[ x_k \log z_k + (1-x_k) \log( 1-z_k)]
21 |
22 |
23 | References :
24 | - P. Vincent, H. Larochelle, Y. Bengio, P.A. Manzagol: Extracting and
25 | Composing Robust Features with Denoising Autoencoders, ICML'08, 1096-1103,
26 | 2008
27 | - Y. Bengio, P. Lamblin, D. Popovici, H. Larochelle: Greedy Layer-Wise
28 | Training of Deep Networks, Advances in Neural Information Processing
29 | Systems 19, 2007
30 |
31 | """
32 |
33 | import cPickle
34 | import gzip
35 | import os
36 | import sys
37 | import time
38 |
39 | import numpy
40 |
41 | import theano
42 | import theano.tensor as T
43 | from theano.tensor.shared_randomstreams import RandomStreams
44 |
45 | from logistic_sgd import load_data
46 | from utils import tile_raster_images
47 |
48 | import PIL.Image
49 |
50 |
51 | class dA(object):
52 | """Denoising Auto-Encoder class (dA)
53 |
54 | A denoising autoencoders tries to reconstruct the input from a corrupted
55 | version of it by projecting it first in a latent space and reprojecting
56 | it afterwards back in the input space. Please refer to Vincent et al.,2008
57 | for more details. If x is the input then equation (1) computes a partially
58 | destroyed version of x by means of a stochastic mapping q_D. Equation (2)
59 | computes the projection of the input into the latent space. Equation (3)
60 | computes the reconstruction of the input, while equation (4) computes the
61 | reconstruction error.
62 |
63 | .. math::
64 |
65 | \tilde{x} ~ q_D(\tilde{x}|x) (1)
66 |
67 | y = s(W \tilde{x} + b) (2)
68 |
69 | x = s(W' y + b') (3)
70 |
71 | L(x,z) = -sum_{k=1}^d [x_k \log z_k + (1-x_k) \log( 1-z_k)] (4)
72 |
73 | """
74 |
75 | def __init__(self, numpy_rng, theano_rng=None, input=None,
76 | n_visible=784, n_hidden=500,
77 | W=None, bhid=None, bvis=None):
78 | """
79 | Initialize the dA class by specifying the number of visible units (the
80 | dimension d of the input ), the number of hidden units ( the dimension
81 | d' of the latent or hidden space ) and the corruption level. The
82 | constructor also receives symbolic variables for the input, weights and
83 | bias. Such a symbolic variables are useful when, for example the input
84 | is the result of some computations, or when weights are shared between
85 | the dA and an MLP layer. When dealing with SdAs this always happens,
86 | the dA on layer 2 gets as input the output of the dA on layer 1,
87 | and the weights of the dA are used in the second stage of training
88 | to construct an MLP.
89 |
90 | :type numpy_rng: numpy.random.RandomState
91 | :param numpy_rng: number random generator used to generate weights
92 |
93 | :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
94 | :param theano_rng: Theano random generator; if None is given one is
95 | generated based on a seed drawn from `rng`
96 |
97 | :type input: theano.tensor.TensorType
98 | :param input: a symbolic description of the input or None for
99 | standalone dA
100 |
101 | :type n_visible: int
102 | :param n_visible: number of visible units
103 |
104 | :type n_hidden: int
105 | :param n_hidden: number of hidden units
106 |
107 | :type W: theano.tensor.TensorType
108 | :param W: Theano variable pointing to a set of weights that should be
109 | shared belong the dA and another architecture; if dA should
110 | be standalone set this to None
111 |
112 | :type bhid: theano.tensor.TensorType
113 | :param bhid: Theano variable pointing to a set of biases values (for
114 | hidden units) that should be shared belong dA and another
115 | architecture; if dA should be standalone set this to None
116 |
117 | :type bvis: theano.tensor.TensorType
118 | :param bvis: Theano variable pointing to a set of biases values (for
119 | visible units) that should be shared belong dA and another
120 | architecture; if dA should be standalone set this to None
121 |
122 |
123 | """
124 | self.n_visible = n_visible
125 | self.n_hidden = n_hidden
126 |
127 | # create a Theano random generator that gives symbolic random values
128 | if not theano_rng:
129 | theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
130 |
131 | # note : W' was written as `W_prime` and b' as `b_prime`
132 | if not W:
133 | # W is initialized with `initial_W` which is uniformely sampled
134 | # from -4*sqrt(6./(n_visible+n_hidden)) and
135 | # 4*sqrt(6./(n_hidden+n_visible))the output of uniform if
136 | # converted using asarray to dtype
137 | # theano.config.floatX so that the code is runable on GPU
138 | initial_W = numpy.asarray(numpy_rng.uniform(
139 | low=-4 * numpy.sqrt(6. / (n_hidden + n_visible)),
140 | high=4 * numpy.sqrt(6. / (n_hidden + n_visible)),
141 | size=(n_visible, n_hidden)), dtype=theano.config.floatX)
142 | W = theano.shared(value=initial_W, name='W', borrow=True)
143 |
144 | if not bvis:
145 | bvis = theano.shared(value=numpy.zeros(n_visible,
146 | dtype=theano.config.floatX),
147 | borrow=True)
148 |
149 | if not bhid:
150 | bhid = theano.shared(value=numpy.zeros(n_hidden,
151 | dtype=theano.config.floatX),
152 | name='b',
153 | borrow=True)
154 |
155 | self.W = W
156 | # b corresponds to the bias of the hidden
157 | self.b = bhid
158 | # b_prime corresponds to the bias of the visible
159 | self.b_prime = bvis
160 | # tied weights, therefore W_prime is W transpose
161 | self.W_prime = self.W.T
162 | self.theano_rng = theano_rng
163 | # if no input is given, generate a variable representing the input
164 | if input == None:
165 | # we use a matrix because we expect a minibatch of several
166 | # examples, each example being a row
167 | self.x = T.dmatrix(name='input')
168 | else:
169 | self.x = input
170 |
171 | self.params = [self.W, self.b, self.b_prime]
172 |
173 | def get_corrupted_input(self, input, corruption_level):
174 | """This function keeps ``1-corruption_level`` entries of the inputs the
175 | same and zero-out randomly selected subset of size ``coruption_level``
176 | Note : first argument of theano.rng.binomial is the shape(size) of
177 | random numbers that it should produce
178 | second argument is the number of trials
179 | third argument is the probability of success of any trial
180 |
181 | this will produce an array of 0s and 1s where 1 has a
182 | probability of 1 - ``corruption_level`` and 0 with
183 | ``corruption_level``
184 |
185 | The binomial function return int64 data type by
186 | default. int64 multiplicated by the input
187 | type(floatX) always return float64. To keep all data
188 | in floatX when floatX is float32, we set the dtype of
189 | the binomial to floatX. As in our case the value of
190 | the binomial is always 0 or 1, this don't change the
191 | result. This is needed to allow the gpu to work
192 | correctly as it only support float32 for now.
193 |
194 | """
195 | return self.theano_rng.binomial(size=input.shape, n=1,
196 | p=1 - corruption_level,
197 | dtype=theano.config.floatX) * input
198 |
199 | def get_hidden_values(self, input):
200 | """ Computes the values of the hidden layer """
201 | return T.nnet.sigmoid(T.dot(input, self.W) + self.b)
202 |
203 | def get_reconstructed_input(self, hidden):
204 | """Computes the reconstructed input given the values of the
205 | hidden layer
206 |
207 | """
208 | return T.nnet.sigmoid(T.dot(hidden, self.W_prime) + self.b_prime)
209 |
210 | def get_cost_updates(self, corruption_level, learning_rate):
211 | """ This function computes the cost and the updates for one trainng
212 | step of the dA """
213 |
214 | tilde_x = self.get_corrupted_input(self.x, corruption_level)
215 | y = self.get_hidden_values(tilde_x)
216 | z = self.get_reconstructed_input(y)
217 | # note : we sum over the size of a datapoint; if we are using
218 | # minibatches, L will be a vector, with one entry per
219 | # example in minibatch
220 | L = - T.sum(self.x * T.log(z) + (1 - self.x) * T.log(1 - z), axis=1)
221 | # note : L is now a vector, where each element is the
222 | # cross-entropy cost of the reconstruction of the
223 | # corresponding example of the minibatch. We need to
224 | # compute the average of all these to get the cost of
225 | # the minibatch
226 | cost = T.mean(L)
227 |
228 | # compute the gradients of the cost of the `dA` with respect
229 | # to its parameters
230 | gparams = T.grad(cost, self.params)
231 | # generate the list of updates
232 | updates = []
233 | for param, gparam in zip(self.params, gparams):
234 | updates.append((param, param - learning_rate * gparam))
235 |
236 | return (cost, updates)
237 |
238 |
239 | def test_dA(learning_rate=0.1, training_epochs=15,
240 | dataset='mnist.pkl.gz',
241 | batch_size=20, output_folder='dA_plots'):
242 |
243 | """
244 | This demo is tested on MNIST
245 |
246 | :type learning_rate: float
247 | :param learning_rate: learning rate used for training the DeNosing
248 | AutoEncoder
249 |
250 | :type training_epochs: int
251 | :param training_epochs: number of epochs used for training
252 |
253 | :type dataset: string
254 | :param dataset: path to the picked dataset
255 |
256 | """
257 | datasets = load_data(dataset)
258 | train_set_x, train_set_y = datasets[0]
259 |
260 | # compute number of minibatches for training, validation and testing
261 | n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
262 |
263 | # allocate symbolic variables for the data
264 | index = T.lscalar() # index to a [mini]batch
265 | x = T.matrix('x') # the data is presented as rasterized images
266 |
267 | if not os.path.isdir(output_folder):
268 | os.makedirs(output_folder)
269 | os.chdir(output_folder)
270 | ####################################
271 | # BUILDING THE MODEL NO CORRUPTION #
272 | ####################################
273 |
274 | rng = numpy.random.RandomState(123)
275 | theano_rng = RandomStreams(rng.randint(2 ** 30))
276 |
277 | da = dA(numpy_rng=rng, theano_rng=theano_rng, input=x,
278 | n_visible=28 * 28, n_hidden=500)
279 |
280 | cost, updates = da.get_cost_updates(corruption_level=0.,
281 | learning_rate=learning_rate)
282 |
283 | train_da = theano.function([index], cost, updates=updates,
284 | givens={x: train_set_x[index * batch_size:
285 | (index + 1) * batch_size]})
286 |
287 | start_time = time.clock()
288 |
289 | ############
290 | # TRAINING #
291 | ############
292 |
293 | # go through training epochs
294 | for epoch in xrange(training_epochs):
295 | # go through trainng set
296 | c = []
297 | for batch_index in xrange(n_train_batches):
298 | c.append(train_da(batch_index))
299 |
300 | print 'Training epoch %d, cost ' % epoch, numpy.mean(c)
301 |
302 | end_time = time.clock()
303 |
304 | training_time = (end_time - start_time)
305 |
306 | print >> sys.stderr, ('The no corruption code for file ' +
307 | os.path.split(__file__)[1] +
308 | ' ran for %.2fm' % ((training_time) / 60.))
309 | image = PIL.Image.fromarray(
310 | tile_raster_images(X=da.W.get_value(borrow=True).T,
311 | img_shape=(28, 28), tile_shape=(10, 10),
312 | tile_spacing=(1, 1)))
313 | image.save('filters_corruption_0.png')
314 |
315 | #####################################
316 | # BUILDING THE MODEL CORRUPTION 30% #
317 | #####################################
318 |
319 | rng = numpy.random.RandomState(123)
320 | theano_rng = RandomStreams(rng.randint(2 ** 30))
321 |
322 | da = dA(numpy_rng=rng, theano_rng=theano_rng, input=x,
323 | n_visible=28 * 28, n_hidden=500)
324 |
325 | cost, updates = da.get_cost_updates(corruption_level=0.3,
326 | learning_rate=learning_rate)
327 |
328 | train_da = theano.function([index], cost, updates=updates,
329 | givens={x: train_set_x[index * batch_size:
330 | (index + 1) * batch_size]})
331 |
332 | start_time = time.clock()
333 |
334 | ############
335 | # TRAINING #
336 | ############
337 |
338 | # go through training epochs
339 | for epoch in xrange(training_epochs):
340 | # go through trainng set
341 | c = []
342 | for batch_index in xrange(n_train_batches):
343 | c.append(train_da(batch_index))
344 |
345 | print 'Training epoch %d, cost ' % epoch, numpy.mean(c)
346 |
347 | end_time = time.clock()
348 |
349 | training_time = (end_time - start_time)
350 |
351 | print >> sys.stderr, ('The 30% corruption code for file ' +
352 | os.path.split(__file__)[1] +
353 | ' ran for %.2fm' % (training_time / 60.))
354 |
355 | image = PIL.Image.fromarray(tile_raster_images(
356 | X=da.W.get_value(borrow=True).T,
357 | img_shape=(28, 28), tile_shape=(10, 10),
358 | tile_spacing=(1, 1)))
359 | image.save('filters_corruption_30.png')
360 |
361 | os.chdir('../')
362 |
363 |
364 | if __name__ == '__main__':
365 | test_dA()
366 |
--------------------------------------------------------------------------------
/TheanoDL/grbm.py:
--------------------------------------------------------------------------------
1 | from rbm import RBM
2 |
3 | import theano
4 | import theano.tensor as T
5 |
6 | # --------------------------------------------------------------------------
7 | class GBRBM(RBM):
8 |
9 | # --------------------------------------------------------------------------
10 | # initialize class
11 | def __init__(self, input, n_in=784, n_hidden=500, \
12 | W=None, hbias=None, vbias=None, numpy_rng=None, transpose=False, activation=T.nnet.sigmoid,
13 | theano_rng=None, name='grbm', W_r=None, dropout=0, dropconnect=0):
14 |
15 | # initialize parent class (RBM)
16 | RBM.__init__(self, input=input, n_visible=n_in, n_hidden=n_hidden, \
17 | W=W, hbias=hbias, vbias=vbias, numpy_rng=numpy_rng,
18 | theano_rng=theano_rng)
19 |
20 | # --------------------------------------------------------------------------
21 | def type(self):
22 | return 'gauss-bernoulli'
23 |
24 | # --------------------------------------------------------------------------
25 | # overwrite free energy function (here only vbias term is different)
26 | def free_energy(self, v_sample):
27 | wx_b = T.dot(v_sample, self.W) + self.hbias
28 | vbias_term = 0.5 * T.dot((v_sample - self.vbias), (v_sample - self.vbias).T)
29 | hidden_term = T.sum(T.log(1 + T.exp(wx_b)), axis=1)
30 | return -hidden_term - T.diagonal(vbias_term)
31 |
32 | # --------------------------------------------------------------------------
33 | # overwrite sampling function (here you sample from normal distribution)
34 | def sample_v_given_h(self, h0_sample):
35 |
36 | pre_sigmoid_v1, v1_mean = self.propdown(h0_sample)
37 |
38 | '''
39 | Since the input data is normalized to unit variance and zero mean, we do not have to sample
40 | from a normal distribution and pass the pre_sigmoid instead. If this is not the case, we have to sample the
41 | distribution.
42 | '''
43 | # in fact, you don't need to sample from normal distribution here and just use pre_sigmoid activation instead
44 | # v1_sample = self.theano_rng.normal(size=v1_mean.shape, avg=v1_mean, std=1.0, dtype=theano.config.floatX) + pre_sigmoid_v1
45 | v1_sample = pre_sigmoid_v1
46 | return [pre_sigmoid_v1, v1_mean, v1_sample]
47 |
48 | def get_reconstruction_cost(self, updates, pre_sigmoid_nv):
49 | """
50 | RMS as the reconstructed cost
51 |
52 | """
53 |
54 | rms_cost = T.mean(T.sum((self.input - pre_sigmoid_nv)** 2, axis=1))
55 | return rms_cost
56 |
--------------------------------------------------------------------------------
/TheanoDL/grbm.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stevenwudi/CVPR_2014_code/7976acb440dbe4e8d060685371e3f0c5d35a4796/TheanoDL/grbm.pyc
--------------------------------------------------------------------------------
/TheanoDL/logistic_cg.py:
--------------------------------------------------------------------------------
1 | """
2 | This tutorial introduces logistic regression using Theano and conjugate
3 | gradient descent.
4 |
5 | Logistic regression is a probabilistic, linear classifier. It is parametrized
6 | by a weight matrix :math:`W` and a bias vector :math:`b`. Classification is
7 | done by projecting data points onto a set of hyperplanes, the distance to
8 | which is used to determine a class membership probability.
9 |
10 | Mathematically, this can be written as:
11 |
12 | .. math::
13 | P(Y=i|x, W,b) &= softmax_i(W x + b) \\
14 | &= \frac {e^{W_i x + b_i}} {\sum_j e^{W_j x + b_j}}
15 |
16 |
17 | The output of the model or prediction is then done by taking the argmax of
18 | the vector whose i'th element is P(Y=i|x).
19 |
20 | .. math::
21 |
22 | y_{pred} = argmax_i P(Y=i|x,W,b)
23 |
24 |
25 | This tutorial presents a stochastic gradient descent optimization method
26 | suitable for large datasets, and a conjugate gradient optimization method
27 | that is suitable for smaller datasets.
28 |
29 |
30 | References:
31 |
32 | - textbooks: "Pattern Recognition and Machine Learning" -
33 | Christopher M. Bishop, section 4.3.2
34 |
35 |
36 | """
37 | __docformat__ = 'restructedtext en'
38 |
39 |
40 | import cPickle
41 | import gzip
42 | import os
43 | import sys
44 | import time
45 |
46 | import numpy
47 |
48 | import theano
49 | import theano.tensor as T
50 |
51 | from logistic_sgd import load_data
52 |
53 |
54 | class LogisticRegression(object):
55 | """Multi-class Logistic Regression Class
56 |
57 | The logistic regression is fully described by a weight matrix :math:`W`
58 | and bias vector :math:`b`. Classification is done by projecting data
59 | points onto a set of hyperplanes, the distance to which is used to
60 | determine a class membership probability.
61 | """
62 |
63 | def __init__(self, input, n_in, n_out):
64 | """ Initialize the parameters of the logistic regression
65 |
66 | :type input: theano.tensor.TensorType
67 | :param input: symbolic variable that describes the input of the
68 | architecture ( one minibatch)
69 |
70 | :type n_in: int
71 | :param n_in: number of input units, the dimension of the space in
72 | which the datapoint lies
73 |
74 | :type n_out: int
75 | :param n_out: number of output units, the dimension of the space in
76 | which the target lies
77 |
78 | """
79 |
80 | # initialize theta = (W,b) with 0s; W gets the shape (n_in, n_out),
81 | # while b is a vector of n_out elements, making theta a vector of
82 | # n_in*n_out + n_out elements
83 | self.theta = theano.shared(value=numpy.zeros(n_in * n_out + n_out,
84 | dtype=theano.config.floatX),
85 | name='theta',
86 | borrow=True)
87 | # W is represented by the fisr n_in*n_out elements of theta
88 | self.W = self.theta[0:n_in * n_out].reshape((n_in, n_out))
89 | # b is the rest (last n_out elements)
90 | self.b = self.theta[n_in * n_out:n_in * n_out + n_out]
91 |
92 | # compute vector of class-membership probabilities in symbolic form
93 | self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W) + self.b)
94 |
95 | # compute prediction as class whose probability is maximal in
96 | # symbolic form
97 | self.y_pred = T.argmax(self.p_y_given_x, axis=1)
98 |
99 | def negative_log_likelihood(self, y):
100 | """Return the negative log-likelihood of the prediction of this model
101 | under a given target distribution.
102 |
103 | .. math::
104 |
105 | \frac{1}{|\mathcal{D}|}\mathcal{L} (\theta=\{W,b\}, \mathcal{D}) =
106 | \frac{1}{|\mathcal{D}|}\sum_{i=0}^{|\mathcal{D}|} \log(P(Y=y^{(i)}|x^{(i)}, W,b)) \\
107 | \ell (\theta=\{W,b\}, \mathcal{D})
108 |
109 | :type y: theano.tensor.TensorType
110 | :param y: corresponds to a vector that gives for each example the
111 | correct label
112 | """
113 | return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]), y])
114 |
115 | def errors(self, y):
116 | """Return a float representing the number of errors in the minibatch
117 | over the total number of examples of the minibatch
118 |
119 | :type y: theano.tensor.TensorType
120 | :param y: corresponds to a vector that gives for each example
121 | the correct label
122 | """
123 |
124 | # check if y has same dimension of y_pred
125 | if y.ndim != self.y_pred.ndim:
126 | raise TypeError('y should have the same shape as self.y_pred',
127 | ('y', target.type, 'y_pred', self.y_pred.type))
128 | # check if y is of the correct datatype
129 | if y.dtype.startswith('int'):
130 | # the T.neq operator returns a vector of 0s and 1s, where 1
131 | # represents a mistake in prediction
132 | return T.mean(T.neq(self.y_pred, y))
133 | else:
134 | raise NotImplementedError()
135 |
136 |
137 | def cg_optimization_mnist(n_epochs=50, mnist_pkl_gz='mnist.pkl.gz'):
138 | """Demonstrate conjugate gradient optimization of a log-linear model
139 |
140 | This is demonstrated on MNIST.
141 |
142 | :type n_epochs: int
143 | :param n_epochs: number of epochs to run the optimizer
144 |
145 | :type mnist_pkl_gz: string
146 | :param mnist_pkl_gz: the path of the mnist training file from
147 | http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz
148 |
149 | """
150 | #############
151 | # LOAD DATA #
152 | #############
153 | datasets = load_data(mnist_pkl_gz)
154 |
155 | train_set_x, train_set_y = datasets[0]
156 | valid_set_x, valid_set_y = datasets[1]
157 | test_set_x, test_set_y = datasets[2]
158 |
159 | batch_size = 600 # size of the minibatch
160 |
161 | n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
162 | n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
163 | n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size
164 |
165 | ishape = (28, 28) # this is the size of MNIST images
166 | n_in = 28 * 28 # number of input units
167 | n_out = 10 # number of output units
168 |
169 | ######################
170 | # BUILD ACTUAL MODEL #
171 | ######################
172 | print '... building the model'
173 |
174 | # allocate symbolic variables for the data
175 | minibatch_offset = T.lscalar() # offset to the start of a [mini]batch
176 | x = T.matrix() # the data is presented as rasterized images
177 | y = T.ivector() # the labels are presented as 1D vector of
178 | # [int] labels
179 |
180 | # construct the logistic regression class
181 | classifier = LogisticRegression(input=x, n_in=28 * 28, n_out=10)
182 |
183 | # the cost we minimize during training is the negative log likelihood of
184 | # the model in symbolic format
185 | cost = classifier.negative_log_likelihood(y).mean()
186 |
187 | # compile a theano function that computes the mistakes that are made by
188 | # the model on a minibatch
189 | test_model = theano.function([minibatch_offset], classifier.errors(y),
190 | givens={
191 | x: test_set_x[minibatch_offset:minibatch_offset + batch_size],
192 | y: test_set_y[minibatch_offset:minibatch_offset + batch_size]},
193 | name="test")
194 |
195 | validate_model = theano.function([minibatch_offset], classifier.errors(y),
196 | givens={
197 | x: valid_set_x[minibatch_offset:
198 | minibatch_offset + batch_size],
199 | y: valid_set_y[minibatch_offset:
200 | minibatch_offset + batch_size]},
201 | name="validate")
202 |
203 | # compile a thenao function that returns the cost of a minibatch
204 | batch_cost = theano.function([minibatch_offset], cost,
205 | givens={
206 | x: train_set_x[minibatch_offset:
207 | minibatch_offset + batch_size],
208 | y: train_set_y[minibatch_offset:
209 | minibatch_offset + batch_size]},
210 | name="batch_cost")
211 |
212 | # compile a theano function that returns the gradient of the minibatch
213 | # with respect to theta
214 | batch_grad = theano.function([minibatch_offset],
215 | T.grad(cost, classifier.theta),
216 | givens={
217 | x: train_set_x[minibatch_offset:
218 | minibatch_offset + batch_size],
219 | y: train_set_y[minibatch_offset:
220 | minibatch_offset + batch_size]},
221 | name="batch_grad")
222 |
223 | # creates a function that computes the average cost on the training set
224 | def train_fn(theta_value):
225 | classifier.theta.set_value(theta_value, borrow=True)
226 | train_losses = [batch_cost(i * batch_size)
227 | for i in xrange(n_train_batches)]
228 | return numpy.mean(train_losses)
229 |
230 | # creates a function that computes the average gradient of cost with
231 | # respect to theta
232 | def train_fn_grad(theta_value):
233 | classifier.theta.set_value(theta_value, borrow=True)
234 | grad = batch_grad(0)
235 | for i in xrange(1, n_train_batches):
236 | grad += batch_grad(i * batch_size)
237 | return grad / n_train_batches
238 |
239 | validation_scores = [numpy.inf, 0]
240 |
241 | # creates the validation function
242 | def callback(theta_value):
243 | classifier.theta.set_value(theta_value, borrow=True)
244 | #compute the validation loss
245 | validation_losses = [validate_model(i * batch_size)
246 | for i in xrange(n_valid_batches)]
247 | this_validation_loss = numpy.mean(validation_losses)
248 | print('validation error %f %%' % (this_validation_loss * 100.,))
249 |
250 | # check if it is better then best validation score got until now
251 | if this_validation_loss < validation_scores[0]:
252 | # if so, replace the old one, and compute the score on the
253 | # testing dataset
254 | validation_scores[0] = this_validation_loss
255 | test_losses = [test_model(i * batch_size)
256 | for i in xrange(n_test_batches)]
257 | validation_scores[1] = numpy.mean(test_losses)
258 |
259 | ###############
260 | # TRAIN MODEL #
261 | ###############
262 |
263 | # using scipy conjugate gradient optimizer
264 | import scipy.optimize
265 | print ("Optimizing using scipy.optimize.fmin_cg...")
266 | start_time = time.clock()
267 | best_w_b = scipy.optimize.fmin_cg(
268 | f=train_fn,
269 | x0=numpy.zeros((n_in + 1) * n_out, dtype=x.dtype),
270 | fprime=train_fn_grad,
271 | callback=callback,
272 | disp=0,
273 | maxiter=n_epochs)
274 | end_time = time.clock()
275 | print(('Optimization complete with best validation score of %f %%, with '
276 | 'test performance %f %%') %
277 | (validation_scores[0] * 100., validation_scores[1] * 100.))
278 |
279 | print >> sys.stderr, ('The code for file ' +
280 | os.path.split(__file__)[1] +
281 | ' ran for %.1fs' % ((end_time - start_time)))
282 |
283 |
284 | if __name__ == '__main__':
285 | cg_optimization_mnist()
286 |
--------------------------------------------------------------------------------
/TheanoDL/logistic_sgd.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stevenwudi/CVPR_2014_code/7976acb440dbe4e8d060685371e3f0c5d35a4796/TheanoDL/logistic_sgd.pyc
--------------------------------------------------------------------------------
/TheanoDL/mlp.py:
--------------------------------------------------------------------------------
1 | """
2 | This tutorial introduces the multilayer perceptron using Theano.
3 |
4 | A multilayer perceptron is a logistic regressor where
5 | instead of feeding the input to the logistic regression you insert a
6 | intermediate layer, called the hidden layer, that has a nonlinear
7 | activation function (usually tanh or sigmoid) . One can use many such
8 | hidden layers making the architecture deep. The tutorial will also tackle
9 | the problem of MNIST digit classification.
10 |
11 | .. math::
12 |
13 | f(x) = G( b^{(2)} + W^{(2)}( s( b^{(1)} + W^{(1)} x))),
14 |
15 | References:
16 |
17 | - textbooks: "Pattern Recognition and Machine Learning" -
18 | Christopher M. Bishop, section 5
19 |
20 | """
21 | __docformat__ = 'restructedtext en'
22 |
23 |
24 | import cPickle
25 | import gzip
26 | import os
27 | import sys
28 | import time
29 |
30 | import numpy
31 |
32 | import theano
33 | import theano.tensor as T
34 |
35 |
36 | from logistic_sgd import LogisticRegression, load_data
37 |
38 |
39 | class HiddenLayer(object):
40 | def __init__(self, rng, input, n_in, n_out, W=None, b=None,
41 | activation=T.tanh):
42 | """
43 | Typical hidden layer of a MLP: units are fully-connected and have
44 | sigmoidal activation function. Weight matrix W is of shape (n_in,n_out)
45 | and the bias vector b is of shape (n_out,).
46 |
47 | NOTE : The nonlinearity used here is tanh
48 |
49 | Hidden unit activation is given by: tanh(dot(input,W) + b)
50 |
51 | :type rng: numpy.random.RandomState
52 | :param rng: a random number generator used to initialize weights
53 |
54 | :type input: theano.tensor.dmatrix
55 | :param input: a symbolic tensor of shape (n_examples, n_in)
56 |
57 | :type n_in: int
58 | :param n_in: dimensionality of input
59 |
60 | :type n_out: int
61 | :param n_out: number of hidden units
62 |
63 | :type activation: theano.Op or function
64 | :param activation: Non linearity to be applied in the hidden
65 | layer
66 | """
67 | self.input = input
68 |
69 | # `W` is initialized with `W_values` which is uniformely sampled
70 | # from sqrt(-6./(n_in+n_hidden)) and sqrt(6./(n_in+n_hidden))
71 | # for tanh activation function
72 | # the output of uniform if converted using asarray to dtype
73 | # theano.config.floatX so that the code is runable on GPU
74 | # Note : optimal initialization of weights is dependent on the
75 | # activation function used (among other things).
76 | # For example, results presented in [Xavier10] suggest that you
77 | # should use 4 times larger initial weights for sigmoid
78 | # compared to tanh
79 | # We have no info for other function, so we use the same as
80 | # tanh.
81 | if W is None:
82 | W_values = numpy.asarray(rng.uniform(
83 | low=-numpy.sqrt(6. / (n_in + n_out)),
84 | high=numpy.sqrt(6. / (n_in + n_out)),
85 | size=(n_in, n_out)), dtype=theano.config.floatX)
86 | if activation == theano.tensor.nnet.sigmoid:
87 | W_values *= 4
88 |
89 | W = theano.shared(value=W_values, name='W', borrow=True)
90 |
91 | if b is None:
92 | b_values = numpy.zeros((n_out,), dtype=theano.config.floatX)
93 | b = theano.shared(value=b_values, name='b', borrow=True)
94 |
95 | self.W = W
96 | self.b = b
97 |
98 | lin_output = T.dot(input, self.W) + self.b
99 | self.output = (lin_output if activation is None
100 | else activation(lin_output))
101 | # parameters of the model
102 | self.params = [self.W, self.b]
103 |
104 |
105 | class MLP(object):
106 | """Multi-Layer Perceptron Class
107 |
108 | A multilayer perceptron is a feedforward artificial neural network model
109 | that has one layer or more of hidden units and nonlinear activations.
110 | Intermediate layers usually have as activation function tanh or the
111 | sigmoid function (defined here by a ``HiddenLayer`` class) while the
112 | top layer is a softamx layer (defined here by a ``LogisticRegression``
113 | class).
114 | """
115 |
116 | def __init__(self, rng, input, n_in, n_hidden, n_out):
117 | """Initialize the parameters for the multilayer perceptron
118 |
119 | :type rng: numpy.random.RandomState
120 | :param rng: a random number generator used to initialize weights
121 |
122 | :type input: theano.tensor.TensorType
123 | :param input: symbolic variable that describes the input of the
124 | architecture (one minibatch)
125 |
126 | :type n_in: int
127 | :param n_in: number of input units, the dimension of the space in
128 | which the datapoints lie
129 |
130 | :type n_hidden: int
131 | :param n_hidden: number of hidden units
132 |
133 | :type n_out: int
134 | :param n_out: number of output units, the dimension of the space in
135 | which the labels lie
136 |
137 | """
138 |
139 | # Since we are dealing with a one hidden layer MLP, this will translate
140 | # into a HiddenLayer with a tanh activation function connected to the
141 | # LogisticRegression layer; the activation function can be replaced by
142 | # sigmoid or any other nonlinear function
143 | self.hiddenLayer = HiddenLayer(rng=rng, input=input,
144 | n_in=n_in, n_out=n_hidden,
145 | activation=T.tanh)
146 |
147 | # The logistic regression layer gets as input the hidden units
148 | # of the hidden layer
149 | self.logRegressionLayer = LogisticRegression(
150 | input=self.hiddenLayer.output,
151 | n_in=n_hidden,
152 | n_out=n_out)
153 |
154 | # L1 norm ; one regularization option is to enforce L1 norm to
155 | # be small
156 | self.L1 = abs(self.hiddenLayer.W).sum() \
157 | + abs(self.logRegressionLayer.W).sum()
158 |
159 | # square of L2 norm ; one regularization option is to enforce
160 | # square of L2 norm to be small
161 | self.L2_sqr = (self.hiddenLayer.W ** 2).sum() \
162 | + (self.logRegressionLayer.W ** 2).sum()
163 |
164 | # negative log likelihood of the MLP is given by the negative
165 | # log likelihood of the output of the model, computed in the
166 | # logistic regression layer
167 | self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood
168 | # same holds for the function computing the number of errors
169 | self.errors = self.logRegressionLayer.errors
170 |
171 | # the parameters of the model are the parameters of the two layer it is
172 | # made out of
173 | self.params = self.hiddenLayer.params + self.logRegressionLayer.params
174 |
175 |
176 | def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000,
177 | dataset='mnist.pkl.gz', batch_size=20, n_hidden=500):
178 | """
179 | Demonstrate stochastic gradient descent optimization for a multilayer
180 | perceptron
181 |
182 | This is demonstrated on MNIST.
183 |
184 | :type learning_rate: float
185 | :param learning_rate: learning rate used (factor for the stochastic
186 | gradient
187 |
188 | :type L1_reg: float
189 | :param L1_reg: L1-norm's weight when added to the cost (see
190 | regularization)
191 |
192 | :type L2_reg: float
193 | :param L2_reg: L2-norm's weight when added to the cost (see
194 | regularization)
195 |
196 | :type n_epochs: int
197 | :param n_epochs: maximal number of epochs to run the optimizer
198 |
199 | :type dataset: string
200 | :param dataset: the path of the MNIST dataset file from
201 | http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz
202 |
203 |
204 | """
205 | datasets = load_data(dataset)
206 |
207 | train_set_x, train_set_y = datasets[0]
208 | valid_set_x, valid_set_y = datasets[1]
209 | test_set_x, test_set_y = datasets[2]
210 |
211 | # compute number of minibatches for training, validation and testing
212 | n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
213 | n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
214 | n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size
215 |
216 | ######################
217 | # BUILD ACTUAL MODEL #
218 | ######################
219 | print '... building the model'
220 |
221 | # allocate symbolic variables for the data
222 | index = T.lscalar() # index to a [mini]batch
223 | x = T.matrix('x') # the data is presented as rasterized images
224 | y = T.ivector('y') # the labels are presented as 1D vector of
225 | # [int] labels
226 |
227 | rng = numpy.random.RandomState(1234)
228 |
229 | # construct the MLP class
230 | classifier = MLP(rng=rng, input=x, n_in=28 * 28,
231 | n_hidden=n_hidden, n_out=10)
232 |
233 | # the cost we minimize during training is the negative log likelihood of
234 | # the model plus the regularization terms (L1 and L2); cost is expressed
235 | # here symbolically
236 | cost = classifier.negative_log_likelihood(y) \
237 | + L1_reg * classifier.L1 \
238 | + L2_reg * classifier.L2_sqr
239 |
240 | # compiling a Theano function that computes the mistakes that are made
241 | # by the model on a minibatch
242 | test_model = theano.function(inputs=[index],
243 | outputs=classifier.errors(y),
244 | givens={
245 | x: test_set_x[index * batch_size:(index + 1) * batch_size],
246 | y: test_set_y[index * batch_size:(index + 1) * batch_size]})
247 |
248 | validate_model = theano.function(inputs=[index],
249 | outputs=classifier.errors(y),
250 | givens={
251 | x: valid_set_x[index * batch_size:(index + 1) * batch_size],
252 | y: valid_set_y[index * batch_size:(index + 1) * batch_size]})
253 |
254 | # compute the gradient of cost with respect to theta (sotred in params)
255 | # the resulting gradients will be stored in a list gparams
256 | gparams = []
257 | for param in classifier.params:
258 | gparam = T.grad(cost, param)
259 | gparams.append(gparam)
260 |
261 | # specify how to update the parameters of the model as a list of
262 | # (variable, update expression) pairs
263 | updates = []
264 | # given two list the zip A = [a1, a2, a3, a4] and B = [b1, b2, b3, b4] of
265 | # same length, zip generates a list C of same size, where each element
266 | # is a pair formed from the two lists :
267 | # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)]
268 | for param, gparam in zip(classifier.params, gparams):
269 | updates.append((param, param - learning_rate * gparam))
270 |
271 | # compiling a Theano function `train_model` that returns the cost, but
272 | # in the same time updates the parameter of the model based on the rules
273 | # defined in `updates`
274 | train_model = theano.function(inputs=[index], outputs=cost,
275 | updates=updates,
276 | givens={
277 | x: train_set_x[index * batch_size:(index + 1) * batch_size],
278 | y: train_set_y[index * batch_size:(index + 1) * batch_size]})
279 |
280 | ###############
281 | # TRAIN MODEL #
282 | ###############
283 | print '... training'
284 |
285 | # early-stopping parameters
286 | patience = 10000 # look as this many examples regardless
287 | patience_increase = 2 # wait this much longer when a new best is
288 | # found
289 | improvement_threshold = 0.995 # a relative improvement of this much is
290 | # considered significant
291 | validation_frequency = min(n_train_batches, patience / 2)
292 | # go through this many
293 | # minibatche before checking the network
294 | # on the validation set; in this case we
295 | # check every epoch
296 |
297 | best_params = None
298 | best_validation_loss = numpy.inf
299 | best_iter = 0
300 | test_score = 0.
301 | start_time = time.clock()
302 |
303 | epoch = 0
304 | done_looping = False
305 |
306 | while (epoch < n_epochs) and (not done_looping):
307 | epoch = epoch + 1
308 | for minibatch_index in xrange(n_train_batches):
309 |
310 | minibatch_avg_cost = train_model(minibatch_index)
311 | # iteration number
312 | iter = (epoch - 1) * n_train_batches + minibatch_index
313 |
314 | if (iter + 1) % validation_frequency == 0:
315 | # compute zero-one loss on validation set
316 | validation_losses = [validate_model(i) for i
317 | in xrange(n_valid_batches)]
318 | this_validation_loss = numpy.mean(validation_losses)
319 |
320 | print('epoch %i, minibatch %i/%i, validation error %f %%' %
321 | (epoch, minibatch_index + 1, n_train_batches,
322 | this_validation_loss * 100.))
323 |
324 | # if we got the best validation score until now
325 | if this_validation_loss < best_validation_loss:
326 | #improve patience if loss improvement is good enough
327 | if this_validation_loss < best_validation_loss * \
328 | improvement_threshold:
329 | patience = max(patience, iter * patience_increase)
330 |
331 | best_validation_loss = this_validation_loss
332 | best_iter = iter
333 |
334 | # test it on the test set
335 | test_losses = [test_model(i) for i
336 | in xrange(n_test_batches)]
337 | test_score = numpy.mean(test_losses)
338 |
339 | print((' epoch %i, minibatch %i/%i, test error of '
340 | 'best model %f %%') %
341 | (epoch, minibatch_index + 1, n_train_batches,
342 | test_score * 100.))
343 |
344 | if patience <= iter:
345 | done_looping = True
346 | break
347 |
348 | end_time = time.clock()
349 | print(('Optimization complete. Best validation score of %f %% '
350 | 'obtained at iteration %i, with test performance %f %%') %
351 | (best_validation_loss * 100., best_iter + 1, test_score * 100.))
352 | print >> sys.stderr, ('The code for file ' +
353 | os.path.split(__file__)[1] +
354 | ' ran for %.2fm' % ((end_time - start_time) / 60.))
355 |
356 |
357 | if __name__ == '__main__':
358 | test_mlp()
359 |
--------------------------------------------------------------------------------
/TheanoDL/mlp.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stevenwudi/CVPR_2014_code/7976acb440dbe4e8d060685371e3f0c5d35a4796/TheanoDL/mlp.pyc
--------------------------------------------------------------------------------
/TheanoDL/rbm.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stevenwudi/CVPR_2014_code/7976acb440dbe4e8d060685371e3f0c5d35a4796/TheanoDL/rbm.pyc
--------------------------------------------------------------------------------
/TheanoDL/rbm_gnumpy.py:
--------------------------------------------------------------------------------
1 | # this is a modified version of the example script that comes with cudamat
2 |
3 | def test_gnumpy(dat, num_epochs):
4 | import gnumpy as gpu
5 | import numpy
6 | import time
7 | # load data. is 2 dimensional: 60000 X 784
8 | #dat = gpu.garray(load('mnist_cudaTest').T/255.)
9 | # training parameters
10 | epsilon = 0.1
11 | momentum = 0.9
12 | batch_size = 128
13 | num_batches = dat.shape[0]/batch_size
14 | # model parameters
15 | num_vis = dat.shape[1]
16 | num_hid = 4096
17 | # initialize weights
18 | w_vh = 0.1 * gpu.randn(num_vis, num_hid)
19 | w_v = gpu.zeros(num_vis)
20 | w_h = -4. * gpu.ones(num_hid)
21 | # initialize weight updates
22 | wu_vh = gpu.zeros((num_vis, num_hid))
23 | wu_v = gpu.zeros(num_vis)
24 | wu_h = gpu.zeros(num_hid)
25 | for epoch in range(num_epochs):
26 | err = []
27 | tic = time.clock()
28 | for batch in range(num_batches):
29 | # positive phase
30 | v1 = dat[batch*batch_size : (batch + 1)*batch_size]
31 | h1 = (gpu.dot(v1, w_vh) + w_h).logistic()
32 | # sample hiddens
33 | hSampled = h1.rand() < h1
34 | # negative phase
35 | v2 = (gpu.dot(hSampled, w_vh.T) + w_v).logistic()
36 | h2 = (gpu.dot(v2, w_vh) + w_h).logistic()
37 | # update weights
38 | wu_vh = wu_vh * momentum + gpu.dot(v1.T, h1) - gpu.dot(v2.T, h2)
39 | wu_v = wu_v * momentum + v1.sum(0) - v2.sum(0)
40 | wu_h = wu_h * momentum + h1.sum(0) - h2.sum(0)
41 |
42 | w_vh += wu_vh * (epsilon/batch_size)
43 | w_v += wu_v * (epsilon/batch_size)
44 | w_h += wu_h * (epsilon/batch_size)
45 | # calculate reconstruction error
46 | err.append((v2-v1).euclid_norm()**2/(num_vis*batch_size))
47 | toc = time.clock()
48 | print "Mean squared error: %.4f, takes time: %d" % (numpy.mean(err), toc-tic)
49 | return w_vh, w_v, w_h
50 |
51 |
52 | def test_cpu_numpy(dat, num_epochs):
53 | import numpy
54 | import time
55 | logistic = lambda x:1.0 / (1.0 + numpy.exp(-1.0 * x))
56 | epsilon = 0.1
57 | momentum = 0.9
58 | batch_size = 128
59 | num_batches = dat.shape[0]/batch_size
60 | # model parameters
61 | num_vis = dat.shape[1]
62 | num_hid = 4096
63 | # initialize weights
64 | w_vh = 0.1 * numpy.random.randn(num_vis, num_hid)
65 | w_v = numpy.zeros(num_vis)
66 | w_h = -4. * numpy.ones(num_hid)
67 | # initialize weight updates
68 | wu_vh = numpy.zeros((num_vis, num_hid))
69 | wu_v = numpy.zeros(num_vis)
70 | wu_h = numpy.zeros(num_hid)
71 | for epoch in range(num_epochs):
72 | err = []
73 | tic = time.clock()
74 | for batch in range(num_batches):
75 | # positive phase
76 | v1 = dat[batch*batch_size : (batch + 1)*batch_size]
77 | h1 = logistic(numpy.dot(v1, w_vh) + w_h)
78 | # sample hiddens
79 | hSampled = numpy.random.rand(h1.shape[0], h1.shape[1]) < h1
80 | # negative phase
81 | v2 = logistic(numpy.dot(hSampled, w_vh.T) + w_v)
82 | h2 = logistic(numpy.dot(v2, w_vh) + w_h)
83 | # update weights
84 | wu_vh = wu_vh * momentum + numpy.dot(v1.T, h1) - numpy.dot(v2.T, h2)
85 | wu_v = wu_v * momentum + v1.sum(0) - v2.sum(0)
86 | wu_h = wu_h * momentum + h1.sum(0) - h2.sum(0)
87 |
88 | w_vh += wu_vh * (epsilon/batch_size)
89 | w_v += wu_v * (epsilon/batch_size)
90 | w_h += wu_h * (epsilon/batch_size)
91 | # calculate reconstruction error
92 | err.append(sum(sum(v2-v1)**2)/(num_vis*batch_size))
93 | toc = time.clock()
94 | print "Mean squared error: %.4f, takes time: %d" % (numpy.mean(err), toc-tic)
95 | return w_vh, w_v, w_h
96 |
97 |
98 | def sigmoid(z):
99 | s = 1.0 / (1.0 + np.exp**(-1.0 * z))
100 | return s
101 |
--------------------------------------------------------------------------------
/TheanoDL/rbm_mean.py:
--------------------------------------------------------------------------------
1 | """This tutorial introduces restricted boltzmann machines (RBM) using Theano.
2 |
3 | Boltzmann Machines (BMs) are a particular form of energy-based model which
4 | contain hidden variables. Restricted Boltzmann Machines further restrict BMs
5 | to those without visible-visible and hidden-hidden connections.
6 | """
7 | import cPickle
8 | import gzip
9 | import time
10 | import PIL.Image
11 |
12 | import numpy
13 |
14 | import theano
15 | import theano.tensor as T
16 | import os
17 |
18 | from theano.tensor.shared_randomstreams import RandomStreams
19 |
20 | from utils import tile_raster_images
21 | from logistic_sgd import load_data
22 |
23 |
24 | class RBM_Mean(object):
25 | """Restricted Boltzmann Machine (RBM) """
26 | def __init__(self, input=None, n_visible=784, n_hidden=500, \
27 | W=None, hbias=None, vbias=None, numpy_rng=None,
28 | theano_rng=None, MU=None):
29 | """
30 | The same as RBM apart from that for inputs we substract mean.
31 | See:
32 | Data Normalization in the Learning of Restricted Boltzmann Machines
33 | """
34 |
35 | self.n_visible = n_visible
36 | self.n_hidden = n_hidden
37 | self.MU = MU
38 | # initialize input layer for standalone RBM or layer0 of DBN
39 | self.input = input - self.MU
40 | if numpy_rng is None:
41 | # create a number generator
42 | numpy_rng = numpy.random.RandomState(1234)
43 |
44 | if theano_rng is None:
45 | theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
46 |
47 | if W is None:
48 | # W is initialized with `initial_W` which is uniformely
49 | # sampled from -4*sqrt(6./(n_visible+n_hidden)) and
50 | # 4*sqrt(6./(n_hidden+n_visible)) the output of uniform if
51 | # converted using asarray to dtype theano.config.floatX so
52 | # that the code is runable on GPU
53 | initial_W = numpy.asarray(numpy_rng.uniform(
54 | low=-4 * numpy.sqrt(6. / (n_hidden + n_visible)),
55 | high=4 * numpy.sqrt(6. / (n_hidden + n_visible)),
56 | size=(n_visible, n_hidden)),
57 | dtype=theano.config.floatX)
58 | # theano shared variables for weights and biases
59 | W = theano.shared(value=initial_W, name='W', borrow=True)
60 |
61 | if hbias is None:
62 | # create shared variable for hidden units bias
63 | hbias = theano.shared(value=numpy.zeros(n_hidden,
64 | dtype=theano.config.floatX),
65 | name='hbias', borrow=True)
66 |
67 | if vbias is None:
68 | # create shared variable for visible units bias
69 | vbias = theano.shared(value=numpy.zeros(n_visible,
70 | dtype=theano.config.floatX),
71 | name='vbias', borrow=True)
72 |
73 |
74 | if not input:
75 | self.input = T.matrix('input')
76 |
77 | self.W = W
78 | self.hbias = hbias
79 | self.vbias = vbias
80 | self.theano_rng = theano_rng
81 | # **** WARNING: It is not a good idea to put things in this list
82 | # other than shared variables created in this function.
83 | self.params = [self.W, self.hbias, self.vbias]
84 |
85 | def free_energy(self, v_sample):
86 | ''' Function to compute the free energy '''
87 | wx_b = T.dot(v_sample, self.W) + self.hbias
88 | vbias_term = T.dot(v_sample, self.vbias)
89 | hidden_term = T.sum(T.log(1 + T.exp(wx_b)), axis=1)
90 | return -hidden_term - vbias_term
91 |
92 | def propup(self, vis):
93 | '''This function propagates the visible units activation upwards to
94 | the hidden units
95 |
96 | Note that we return also the pre-sigmoid activation of the
97 | layer. As it will turn out later, due to how Theano deals with
98 | optimizations, this symbolic variable will be needed to write
99 | down a more stable computational graph (see details in the
100 | reconstruction cost function)
101 |
102 | '''
103 | vis = vis
104 | pre_sigmoid_activation = T.dot(vis, self.W) + self.hbias
105 | return [pre_sigmoid_activation, T.nnet.sigmoid(pre_sigmoid_activation)]
106 |
107 | def sample_h_given_v(self, v0_sample):
108 | ''' This function infers state of hidden units given visible units '''
109 | # compute the activation of the hidden units given a sample of
110 | # the visibles
111 | pre_sigmoid_h1, h1_mean = self.propup(v0_sample)
112 | # get a sample of the hiddens given their activation
113 | # Note that theano_rng.binomial returns a symbolic sample of dtype
114 | # int64 by default. If we want to keep our computations in floatX
115 | # for the GPU we need to specify to return the dtype floatX
116 | h1_sample = self.theano_rng.binomial(size=h1_mean.shape,
117 | n=1, p=h1_mean,
118 | dtype=theano.config.floatX)
119 | return [pre_sigmoid_h1, h1_mean, h1_sample]
120 |
121 | def propdown(self, hid):
122 | '''This function propagates the hidden units activation downwards to
123 | the visible units
124 |
125 | Note that we return also the pre_sigmoid_activation of the
126 | layer. As it will turn out later, due to how Theano deals with
127 | optimizations, this symbolic variable will be needed to write
128 | down a more stable computational graph (see details in the
129 | reconstruction cost function)
130 |
131 | '''
132 | pre_sigmoid_activation = T.dot(hid, self.W.T) + self.vbias
133 | return [pre_sigmoid_activation, T.nnet.sigmoid(pre_sigmoid_activation)]
134 |
135 | def sample_v_given_h(self, h0_sample):
136 | ''' This function infers state of visible units given hidden units '''
137 | # compute the activation of the visible given the hidden sample
138 | pre_sigmoid_v1, v1_mean = self.propdown(h0_sample)
139 | # get a sample of the visible given their activation
140 | # Note that theano_rng.binomial returns a symbolic sample of dtype
141 | # int64 by default. If we want to keep our computations in floatX
142 | # for the GPU we need to specify to return the dtype floatX
143 | v1_sample = self.theano_rng.binomial(size=v1_mean.shape,
144 | n=1, p=v1_mean,
145 | dtype=theano.config.floatX)
146 | v1_sample = v1_sample - self.MU
147 | return [pre_sigmoid_v1, v1_mean, v1_sample]
148 |
149 | def gibbs_hvh(self, h0_sample):
150 | ''' This function implements one step of Gibbs sampling,
151 | starting from the hidden state'''
152 | pre_sigmoid_v1, v1_mean, v1_sample = self.sample_v_given_h(h0_sample)
153 | pre_sigmoid_h1, h1_mean, h1_sample = self.sample_h_given_v(v1_sample)
154 | return [pre_sigmoid_v1, v1_mean, v1_sample,
155 | pre_sigmoid_h1, h1_mean, h1_sample]
156 |
157 | def gibbs_vhv(self, v0_sample):
158 | ''' This function implements one step of Gibbs sampling,
159 | starting from the visible state'''
160 | pre_sigmoid_h1, h1_mean, h1_sample = self.sample_h_given_v(v0_sample)
161 | pre_sigmoid_v1, v1_mean, v1_sample = self.sample_v_given_h(h1_sample)
162 | return [pre_sigmoid_h1, h1_mean, h1_sample,
163 | pre_sigmoid_v1, v1_mean, v1_sample]
164 |
165 | def get_cost_updates(self, lr=0.1, persistent=None, k=1):
166 | """This functions implements one step of CD-k or PCD-k
167 |
168 | :param lr: learning rate used to train the RBM
169 |
170 | :param persistent: None for CD. For PCD, shared variable
171 | containing old state of Gibbs chain. This must be a shared
172 | variable of size (batch size, number of hidden units).
173 |
174 | :param k: number of Gibbs steps to do in CD-k/PCD-k
175 |
176 | Returns a proxy for the cost and the updates dictionary. The
177 | dictionary contains the update rules for weights and biases but
178 | also an update of the shared variable used to store the persistent
179 | chain, if one is used.
180 |
181 | """
182 |
183 | # compute positive phase
184 | pre_sigmoid_ph, ph_mean, ph_sample = self.sample_h_given_v(self.input)
185 |
186 | # decide how to initialize persistent chain:
187 | # for CD, we use the newly generate hidden sample
188 | # for PCD, we initialize from the old state of the chain
189 | if persistent is None:
190 | chain_start = ph_sample
191 | else:
192 | chain_start = persistent
193 |
194 | # perform actual negative phase
195 | # in order to implement CD-k/PCD-k we need to scan over the
196 | # function that implements one gibbs step k times.
197 | # Read Theano tutorial on scan for more information :
198 | # http://deeplearning.net/software/theano/library/scan.html
199 | # the scan will return the entire Gibbs chain
200 | [pre_sigmoid_nvs, nv_means, nv_samples,
201 | pre_sigmoid_nhs, nh_means, nh_samples], updates = \
202 | theano.scan(self.gibbs_hvh,
203 | # the None are place holders, saying that
204 | # chain_start is the initial state corresponding to the
205 | # 6th output
206 | outputs_info=[None, None, None, None, None, chain_start],
207 | n_steps=k)
208 |
209 | # determine gradients on RBM parameters
210 | # not that we only need the sample at the end of the chain
211 | chain_end = nv_samples[-1]
212 |
213 | cost = T.mean(self.free_energy(self.input)) - T.mean(
214 | self.free_energy(chain_end))
215 | # We must not compute the gradient through the gibbs sampling
216 | gparams = T.grad(cost, self.params, consider_constant=[chain_end])
217 |
218 | # constructs the update dictionary
219 | for gparam, param in zip(gparams, self.params):
220 | # make sure that the learning rate is of the right dtype
221 | updates[param] = param - gparam * T.cast(lr,
222 | dtype=theano.config.floatX)
223 | if param.name=='W':
224 | W_temp = T.cast(param, dtype=theano.config.floatX)
225 | if param.name=='b':
226 | updates[param] = param - T.dot(self.MU.T, W_temp)
227 |
228 |
229 |
230 | if persistent:
231 | # Note that this works only if persistent is a shared variable
232 | updates[persistent] = nh_samples[-1]
233 | # pseudo-likelihood is a better proxy for PCD
234 | monitoring_cost = self.get_pseudo_likelihood_cost(updates)
235 | else:
236 | # reconstruction cross-entropy is a better proxy for CD
237 | monitoring_cost = self.get_reconstruction_cost(updates,
238 | pre_sigmoid_nvs[-1], nv_samples[-1])
239 |
240 | return monitoring_cost, updates
241 |
242 | def get_pseudo_likelihood_cost(self, updates):
243 | """Stochastic approximation to the pseudo-likelihood"""
244 |
245 | # index of bit i in expression p(x_i | x_{\i})
246 | bit_i_idx = theano.shared(value=0, name='bit_i_idx')
247 |
248 | # binarize the input image by rounding to nearest integer
249 | xi = T.round(self.input)
250 |
251 | # calculate free energy for the given bit configuration
252 | fe_xi = self.free_energy(xi)
253 |
254 | # flip bit x_i of matrix xi and preserve all other bits x_{\i}
255 | # Equivalent to xi[:,bit_i_idx] = 1-xi[:, bit_i_idx], but assigns
256 | # the result to xi_flip, instead of working in place on xi.
257 | xi_flip = T.set_subtensor(xi[:, bit_i_idx], 1 - xi[:, bit_i_idx])
258 |
259 | # calculate free energy with bit flipped
260 | fe_xi_flip = self.free_energy(xi_flip)
261 |
262 | # equivalent to e^(-FE(x_i)) / (e^(-FE(x_i)) + e^(-FE(x_{\i})))
263 | cost = T.mean(self.n_visible * T.log(T.nnet.sigmoid(fe_xi_flip -
264 | fe_xi)))
265 |
266 | # increment bit_i_idx % number as part of updates
267 | updates[bit_i_idx] = (bit_i_idx + 1) % self.n_visible
268 |
269 | return cost
270 |
271 | def get_reconstruction_cost(self, updates, pre_sigmoid_nv, nv_samples):
272 | """Approximation to the reconstruction error
273 |
274 | Note that this function requires the pre-sigmoid activation as
275 | input. To understand why this is so you need to understand a
276 | bit about how Theano works. Whenever you compile a Theano
277 | function, the computational graph that you pass as input gets
278 | optimized for speed and stability. This is done by changing
279 | several parts of the subgraphs with others. One such
280 | optimization expresses terms of the form log(sigmoid(x)) in
281 | terms of softplus. We need this optimization for the
282 | cross-entropy since sigmoid of numbers larger than 30. (or
283 | even less then that) turn to 1. and numbers smaller than
284 | -30. turn to 0 which in terms will force theano to compute
285 | log(0) and therefore we will get either -inf or NaN as
286 | cost. If the value is expressed in terms of softplus we do not
287 | get this undesirable behaviour. This optimization usually
288 | works fine, but here we have a special case. The sigmoid is
289 | applied inside the scan op, while the log is
290 | outside. Therefore Theano will only see log(scan(..)) instead
291 | of log(sigmoid(..)) and will not apply the wanted
292 | optimization. We can not go and replace the sigmoid in scan
293 | with something else also, because this only needs to be done
294 | on the last step. Therefore the easiest and more efficient way
295 | is to get also the pre-sigmoid activation as an output of
296 | scan, and apply both the log and sigmoid outside scan such
297 | that Theano can catch and optimize the expression.
298 |
299 | """
300 |
301 | cross_entropy = T.mean(
302 | T.sum(self.input * T.log(T.nnet.sigmoid(pre_sigmoid_nv)) +
303 | (1 - self.input) * T.log(1 - T.nnet.sigmoid(pre_sigmoid_nv)),
304 | axis=1))
305 | errsum= T.sum(T.sum( (self.input-nv_samples) **2 ))
306 |
307 | return errsum#cross_entropy
308 |
309 |
--------------------------------------------------------------------------------
/TheanoDL/rnnrbm.py:
--------------------------------------------------------------------------------
1 | # Author: Nicolas Boulanger-Lewandowski
2 | # University of Montreal (2012)
3 | # RNN-RBM deep learning tutorial
4 | # More information at http://deeplearning.net/tutorial/rnnrbm.html
5 |
6 | import glob
7 | import os
8 | import sys
9 |
10 | import numpy
11 | try:
12 | import pylab
13 | except ImportError:
14 | print "pylab isn't available, if you use their fonctionality, it will crash"
15 | print "It can be installed with 'pip install -q Pillow'"
16 |
17 | from midi.utils import midiread, midiwrite
18 | import theano
19 | import theano.tensor as T
20 | from theano.tensor.shared_randomstreams import RandomStreams
21 |
22 | #Don't use a python long as this don't work on 32 bits computers.
23 | numpy.random.seed(0xbeef)
24 | rng = RandomStreams(seed=numpy.random.randint(1 << 30))
25 | theano.config.warn.subtensor_merge_bug = False
26 |
27 |
28 | def build_rbm(v, W, bv, bh, k):
29 | '''Construct a k-step Gibbs chain starting at v for an RBM.
30 |
31 | v : Theano vector or matrix
32 | If a matrix, multiple chains will be run in parallel (batch).
33 | W : Theano matrix
34 | Weight matrix of the RBM.
35 | bv : Theano vector
36 | Visible bias vector of the RBM.
37 | bh : Theano vector
38 | Hidden bias vector of the RBM.
39 | k : scalar or Theano scalar
40 | Length of the Gibbs chain.
41 |
42 | Return a (v_sample, cost, monitor, updates) tuple:
43 |
44 | v_sample : Theano vector or matrix with the same shape as `v`
45 | Corresponds to the generated sample(s).
46 | cost : Theano scalar
47 | Expression whose gradient with respect to W, bv, bh is the CD-k approximation
48 | to the log-likelihood of `v` (training example) under the RBM.
49 | The cost is averaged in the batch case.
50 | monitor: Theano scalar
51 | Pseudo log-likelihood (also averaged in the batch case).
52 | updates: dictionary of Theano variable -> Theano variable
53 | The `updates` object returned by scan.'''
54 |
55 | def gibbs_step(v):
56 | mean_h = T.nnet.sigmoid(T.dot(v, W) + bh)
57 | h = rng.binomial(size=mean_h.shape, n=1, p=mean_h,
58 | dtype=theano.config.floatX)
59 | mean_v = T.nnet.sigmoid(T.dot(h, W.T) + bv)
60 | v = rng.binomial(size=mean_v.shape, n=1, p=mean_v,
61 | dtype=theano.config.floatX)
62 | return mean_v, v
63 |
64 | chain, updates = theano.scan(lambda v: gibbs_step(v)[1], outputs_info=[v],
65 | n_steps=k)
66 | v_sample = chain[-1]
67 |
68 | mean_v = gibbs_step(v_sample)[0]
69 | monitor = T.xlogx.xlogy0(v, mean_v) + T.xlogx.xlogy0(1 - v, 1 - mean_v)
70 | monitor = monitor.sum() / v.shape[0]
71 |
72 | def free_energy(v):
73 | return -(v * bv).sum() - T.log(1 + T.exp(T.dot(v, W) + bh)).sum()
74 | cost = (free_energy(v) - free_energy(v_sample)) / v.shape[0]
75 |
76 | return v_sample, cost, monitor, updates
77 |
78 |
79 | def shared_normal(num_rows, num_cols, scale=1):
80 | '''Initialize a matrix shared variable with normally distributed
81 | elements.'''
82 | return theano.shared(numpy.random.normal(
83 | scale=scale, size=(num_rows, num_cols)).astype(theano.config.floatX))
84 |
85 |
86 | def shared_zeros(*shape):
87 | '''Initialize a vector shared variable with zero elements.'''
88 | return theano.shared(numpy.zeros(shape, dtype=theano.config.floatX))
89 |
90 |
91 | def build_rnnrbm(n_visible, n_hidden, n_hidden_recurrent):
92 | '''Construct a symbolic RNN-RBM and initialize parameters.
93 |
94 | n_visible : integer
95 | Number of visible units.
96 | n_hidden : integer
97 | Number of hidden units of the conditional RBMs.
98 | n_hidden_recurrent : integer
99 | Number of hidden units of the RNN.
100 |
101 | Return a (v, v_sample, cost, monitor, params, updates_train, v_t,
102 | updates_generate) tuple:
103 |
104 | v : Theano matrix
105 | Symbolic variable holding an input sequence (used during training)
106 | v_sample : Theano matrix
107 | Symbolic variable holding the negative particles for CD log-likelihood
108 | gradient estimation (used during training)
109 | cost : Theano scalar
110 | Expression whose gradient (considering v_sample constant) corresponds to the
111 | LL gradient of the RNN-RBM (used during training)
112 | monitor : Theano scalar
113 | Frame-level pseudo-likelihood (useful for monitoring during training)
114 | params : tuple of Theano shared variables
115 | The parameters of the model to be optimized during training.
116 | updates_train : dictionary of Theano variable -> Theano variable
117 | Update object that should be passed to theano.function when compiling the
118 | training function.
119 | v_t : Theano matrix
120 | Symbolic variable holding a generated sequence (used during sampling)
121 | updates_generate : dictionary of Theano variable -> Theano variable
122 | Update object that should be passed to theano.function when compiling the
123 | generation function.'''
124 |
125 | W = shared_normal(n_visible, n_hidden, 0.01)
126 | bv = shared_zeros(n_visible)
127 | bh = shared_zeros(n_hidden)
128 | Wuh = shared_normal(n_hidden_recurrent, n_hidden, 0.0001)
129 | Wuv = shared_normal(n_hidden_recurrent, n_visible, 0.0001)
130 | Wvu = shared_normal(n_visible, n_hidden_recurrent, 0.0001)
131 | Wuu = shared_normal(n_hidden_recurrent, n_hidden_recurrent, 0.0001)
132 | bu = shared_zeros(n_hidden_recurrent)
133 |
134 | params = W, bv, bh, Wuh, Wuv, Wvu, Wuu, bu # learned parameters as shared
135 | # variables
136 |
137 | v = T.matrix() # a training sequence
138 | u0 = T.zeros((n_hidden_recurrent,)) # initial value for the RNN hidden
139 | # units
140 |
141 | # If `v_t` is given, deterministic recurrence to compute the variable
142 | # biases bv_t, bh_t at each time step. If `v_t` is None, same recurrence
143 | # but with a separate Gibbs chain at each time step to sample (generate)
144 | # from the RNN-RBM. The resulting sample v_t is returned in order to be
145 | # passed down to the sequence history.
146 | def recurrence(v_t, u_tm1):
147 | bv_t = bv + T.dot(u_tm1, Wuv)
148 | bh_t = bh + T.dot(u_tm1, Wuh)
149 | generate = v_t is None
150 | if generate:
151 | v_t, _, _, updates = build_rbm(T.zeros((n_visible,)), W, bv_t,
152 | bh_t, k=25)
153 | u_t = T.tanh(bu + T.dot(v_t, Wvu) + T.dot(u_tm1, Wuu))
154 | return ([v_t, u_t], updates) if generate else [u_t, bv_t, bh_t]
155 |
156 | # For training, the deterministic recurrence is used to compute all the
157 | # {bv_t, bh_t, 1 <= t <= T} given v. Conditional RBMs can then be trained
158 | # in batches using those parameters.
159 | (u_t, bv_t, bh_t), updates_train = theano.scan(
160 | lambda v_t, u_tm1, *_: recurrence(v_t, u_tm1),
161 | sequences=v, outputs_info=[u0, None, None], non_sequences=params)
162 | v_sample, cost, monitor, updates_rbm = build_rbm(v, W, bv_t[:], bh_t[:],
163 | k=15)
164 | updates_train.update(updates_rbm)
165 |
166 | # symbolic loop for sequence generation
167 | (v_t, u_t), updates_generate = theano.scan(
168 | lambda u_tm1, *_: recurrence(None, u_tm1),
169 | outputs_info=[None, u0], non_sequences=params, n_steps=200)
170 |
171 | return (v, v_sample, cost, monitor, params, updates_train, v_t,
172 | updates_generate)
173 |
174 |
175 | class RnnRbm:
176 | '''Simple class to train an RNN-RBM from MIDI files and to generate sample
177 | sequences.'''
178 |
179 | def __init__(self, n_hidden=150, n_hidden_recurrent=100, lr=0.001,
180 | r=(21, 109), dt=0.3):
181 | '''Constructs and compiles Theano functions for training and sequence
182 | generation.
183 |
184 | n_hidden : integer
185 | Number of hidden units of the conditional RBMs.
186 | n_hidden_recurrent : integer
187 | Number of hidden units of the RNN.
188 | lr : float
189 | Learning rate
190 | r : (integer, integer) tuple
191 | Specifies the pitch range of the piano-roll in MIDI note numbers, including
192 | r[0] but not r[1], such that r[1]-r[0] is the number of visible units of the
193 | RBM at a given time step. The default (21, 109) corresponds to the full range
194 | of piano (88 notes).
195 | dt : float
196 | Sampling period when converting the MIDI files into piano-rolls, or
197 | equivalently the time difference between consecutive time steps.'''
198 |
199 | self.r = r
200 | self.dt = dt
201 | (v, v_sample, cost, monitor, params, updates_train, v_t,
202 | updates_generate) = build_rnnrbm(r[1] - r[0], n_hidden,
203 | n_hidden_recurrent)
204 |
205 | gradient = T.grad(cost, params, consider_constant=[v_sample])
206 | updates_train.update(((p, p - lr * g) for p, g in zip(params,
207 | gradient)))
208 | self.train_function = theano.function([v], monitor,
209 | updates=updates_train)
210 | self.generate_function = theano.function([], v_t,
211 | updates=updates_generate)
212 |
213 | def train(self, files, batch_size=100, num_epochs=200):
214 | '''Train the RNN-RBM via stochastic gradient descent (SGD) using MIDI
215 | files converted to piano-rolls.
216 |
217 | files : list of strings
218 | List of MIDI files that will be loaded as piano-rolls for training.
219 | batch_size : integer
220 | Training sequences will be split into subsequences of at most this size
221 | before applying the SGD updates.
222 | num_epochs : integer
223 | Number of epochs (pass over the training set) performed. The user can
224 | safely interrupt training with Ctrl+C at any time.'''
225 |
226 | assert len(files) > 0, 'Training set is empty!' \
227 | ' (did you download the data files?)'
228 | dataset = [midiread(f, self.r,
229 | self.dt).piano_roll.astype(theano.config.floatX)
230 | for f in files]
231 |
232 | try:
233 | for epoch in xrange(num_epochs):
234 | numpy.random.shuffle(dataset)
235 | costs = []
236 |
237 | for s, sequence in enumerate(dataset):
238 | for i in xrange(0, len(sequence), batch_size):
239 | cost = self.train_function(sequence[i:i + batch_size])
240 | costs.append(cost)
241 |
242 | print 'Epoch %i/%i' % (epoch + 1, num_epochs),
243 | print numpy.mean(costs)
244 | sys.stdout.flush()
245 |
246 | except KeyboardInterrupt:
247 | print 'Interrupted by user.'
248 |
249 | def generate(self, filename, show=True):
250 | '''Generate a sample sequence, plot the resulting piano-roll and save
251 | it as a MIDI file.
252 |
253 | filename : string
254 | A MIDI file will be created at this location.
255 | show : boolean
256 | If True, a piano-roll of the generated sequence will be shown.'''
257 |
258 | piano_roll = self.generate_function()
259 | midiwrite(filename, piano_roll, self.r, self.dt)
260 | if show:
261 | extent = (0, self.dt * len(piano_roll)) + self.r
262 | pylab.figure()
263 | pylab.imshow(piano_roll.T, origin='lower', aspect='auto',
264 | interpolation='nearest', cmap=pylab.cm.gray_r,
265 | extent=extent)
266 | pylab.xlabel('time (s)')
267 | pylab.ylabel('MIDI note number')
268 | pylab.title('generated piano-roll')
269 |
270 |
271 | def test_rnnrbm(batch_size=100, num_epochs=200):
272 | model = RnnRbm()
273 | re = os.path.join(os.path.split(os.path.dirname(__file__))[0],
274 | 'data', 'Nottingham', 'train', '*.mid')
275 | model.train(glob.glob(re),
276 | batch_size=batch_size, num_epochs=num_epochs)
277 | return model
278 |
279 | if __name__ == '__main__':
280 | model = test_rnnrbm()
281 | model.generate('sample1.mid')
282 | model.generate('sample2.mid')
283 | pylab.show()
284 |
--------------------------------------------------------------------------------
/TheanoDL/test.py:
--------------------------------------------------------------------------------
1 | import sys
2 |
3 | import numpy
4 | import theano
5 |
6 | import convolutional_mlp
7 | import dA
8 | import DBN
9 | import logistic_cg
10 | import logistic_sgd
11 | import mlp
12 | import rbm
13 | import rnnrbm
14 | import SdA
15 |
16 |
17 | def test_logistic_sgd():
18 | logistic_sgd.sgd_optimization_mnist(n_epochs=10)
19 |
20 |
21 | def test_logistic_cg():
22 | try:
23 | import scipy
24 | logistic_cg.cg_optimization_mnist(n_epochs=10)
25 | except ImportError:
26 | from nose.plugins.skip import SkipTest
27 | raise SkipTest(
28 | 'SciPy not available. Needed for the logistic_cg example.')
29 |
30 |
31 | def test_mlp():
32 | mlp.test_mlp(n_epochs=1)
33 |
34 |
35 | def test_convolutional_mlp():
36 | convolutional_mlp.evaluate_lenet5(n_epochs=1, nkerns=[5, 5])
37 |
38 |
39 | def test_dA():
40 | dA.test_dA(training_epochs=1, output_folder='tmp_dA_plots')
41 |
42 |
43 | def test_SdA():
44 | SdA.test_SdA(pretraining_epochs=1, training_epochs=1, batch_size=300)
45 |
46 |
47 | def test_dbn():
48 | DBN.test_DBN(pretraining_epochs=1, training_epochs=1, batch_size=300)
49 |
50 |
51 | def test_rbm():
52 | rbm.test_rbm(training_epochs=1, batch_size=300, n_chains=1, n_samples=1,
53 | n_hidden=20, output_folder='tmp_rbm_plots')
54 |
55 |
56 | def test_rnnrbm():
57 | rnnrbm.test_rnnrbm(num_epochs=1)
58 |
59 |
60 | def speed():
61 | """
62 | This fonction modify the configuration theano and don't restore it!
63 | """
64 |
65 | algo = ['logistic_sgd', 'logistic_cg', 'mlp', 'convolutional_mlp',
66 | 'dA', 'SdA', 'DBN', 'rbm', 'rnnrbm']
67 | to_exec = [True] * len(algo)
68 | # to_exec = [False] * len(algo)
69 | # to_exec[-1] = True
70 | do_float64 = True
71 | do_float32 = True
72 | do_gpu = True
73 |
74 | algo_executed = [s for idx, s in enumerate(algo) if to_exec[idx]]
75 | #Timming expected are from the buildbot that have an i7-920 @
76 | # 2.67GHz with hyperthread enabled for the cpu, 12G of ram. An GeForce GTX
77 | # 285 for the GPU. OS=Fedora 14, gcc=4.5.1, python/BLAS from EPD
78 | # 7.1-2 (python 2.7.2, mkl unknow). BLAS with only 1 thread.
79 |
80 | expected_times_64 = numpy.asarray([10.0, 22.5, 76.1, 73.7, 116.4,
81 | 346.9, 381.9, 558.1, 186.3])
82 | expected_times_32 = numpy.asarray([11.6, 29.6, 42.5, 66.5, 71,
83 | 191.2, 226.8, 432.8, 176.2])
84 |
85 | # Number with just 1 decimal are new value that are faster with
86 | # the Theano version 0.5rc2 Other number are older. They are not
87 | # updated, as we where faster in the past!
88 | # TODO: find why and fix this!
89 |
90 | # Here is the value for the buildbot on February 3th 2012.
91 | # sgd, cg mlp conv da
92 | # sda dbn rbm
93 | # gpu times[3.72957802, 9.94316864, 29.1772666, 9.13857198, 25.91144657,
94 | # 18.30802011, 53.38651466, 285.41386175]
95 | # expected [3.076634879, 7.555234910, 18.99226785, 9.58915591, 24.130070450,
96 | # 24.77524018, 92.66246653, 322.340329170]
97 | # sgd, cg mlp conv da
98 | # sda dbn rbm
99 | #expected/get [0.82492841, 0.75984178, 0.65092691, 1.04930573, 0.93125138
100 | # 1.35324519 1.7356905 1.12937868]
101 | expected_times_gpu = numpy.asarray([3.07663488, 7.55523491, 18.99226785,
102 | 9.6, 24.13007045,
103 | 20.4, 56, 302.6, 315.4])
104 | expected_times_64 = [s for idx, s in enumerate(expected_times_64)
105 | if to_exec[idx]]
106 | expected_times_32 = [s for idx, s in enumerate(expected_times_32)
107 | if to_exec[idx]]
108 | expected_times_gpu = [s for idx, s in enumerate(expected_times_gpu)
109 | if to_exec[idx]]
110 |
111 | def time_test(m, l, idx, f, **kwargs):
112 | if not to_exec[idx]:
113 | return
114 | print algo[idx]
115 | ts = m.call_time
116 | try:
117 | f(**kwargs)
118 | except Exception, e:
119 | print >> sys.stderr, 'test', algo[idx], 'FAILED', e
120 | l.append(numpy.nan)
121 | return
122 | te = m.call_time
123 | l.append(te - ts)
124 |
125 | def do_tests():
126 | m = theano.compile.mode.get_default_mode()
127 | l = []
128 | time_test(m, l, 0, logistic_sgd.sgd_optimization_mnist, n_epochs=30)
129 | time_test(m, l, 1, logistic_cg.cg_optimization_mnist, n_epochs=30)
130 | time_test(m, l, 2, mlp.test_mlp, n_epochs=5)
131 | time_test(m, l, 3, convolutional_mlp.evaluate_lenet5, n_epochs=5,
132 | nkerns=[5, 5])
133 | time_test(m, l, 4, dA.test_dA, training_epochs=2,
134 | output_folder='tmp_dA_plots')
135 | time_test(m, l, 5, SdA.test_SdA, pretraining_epochs=1,
136 | training_epochs=2, batch_size=300)
137 | time_test(m, l, 6, DBN.test_DBN, pretraining_epochs=1,
138 | training_epochs=2, batch_size=300)
139 | time_test(m, l, 7, rbm.test_rbm, training_epochs=1, batch_size=300,
140 | n_chains=1, n_samples=1, output_folder='tmp_rbm_plots')
141 | time_test(m, l, 8, rnnrbm.test_rnnrbm, num_epochs=1)
142 | return numpy.asarray(l)
143 |
144 | #test in float64 in FAST_RUN mode on the cpu
145 | import theano
146 | if do_float64:
147 | theano.config.floatX = 'float64'
148 | theano.config.mode = 'FAST_RUN'
149 | float64_times = do_tests()
150 | print >> sys.stderr, algo_executed
151 | print >> sys.stderr, 'float64 times', float64_times
152 | print >> sys.stderr, 'float64 expected', expected_times_64
153 | print >> sys.stderr, 'float64 % expected/get', (
154 | expected_times_64 / float64_times)
155 |
156 | #test in float32 in FAST_RUN mode on the cpu
157 | theano.config.floatX = 'float32'
158 | if do_float32:
159 | float32_times = do_tests()
160 | print >> sys.stderr, algo_executed
161 | print >> sys.stderr, 'float32 times', float32_times
162 | print >> sys.stderr, 'float32 expected', expected_times_32
163 | print >> sys.stderr, 'float32 % expected/get', (
164 | expected_times_32 / float32_times)
165 |
166 | if do_float64:
167 | print >> sys.stderr, 'float64/float32', (
168 | float64_times / float32_times)
169 | print >> sys.stderr
170 | print >> sys.stderr, 'Duplicate the timing to have everything in one place'
171 | print >> sys.stderr, algo_executed
172 | print >> sys.stderr, 'float64 times', float64_times
173 | print >> sys.stderr, 'float64 expected', expected_times_64
174 | print >> sys.stderr, 'float64 % expected/get', (
175 | expected_times_64 / float64_times)
176 | print >> sys.stderr, 'float32 times', float32_times
177 | print >> sys.stderr, 'float32 expected', expected_times_32
178 | print >> sys.stderr, 'float32 % expected/get', (
179 | expected_times_32 / float32_times)
180 |
181 | print >> sys.stderr, 'float64/float32', (
182 | float64_times / float32_times)
183 | print >> sys.stderr, 'expected float64/float32', (
184 | expected_times_64 / float32_times)
185 |
186 | #test in float32 in FAST_RUN mode on the gpu
187 | import theano.sandbox.cuda
188 | if do_gpu:
189 | theano.sandbox.cuda.use('gpu')
190 | gpu_times = do_tests()
191 | print >> sys.stderr, algo_executed
192 | print >> sys.stderr, 'gpu times', gpu_times
193 | print >> sys.stderr, 'gpu expected', expected_times_gpu
194 | print >> sys.stderr, 'gpu % expected/get', (
195 | expected_times_gpu / gpu_times)
196 |
197 | if do_float64:
198 | print >> sys.stderr, 'float64/gpu', float64_times / gpu_times
199 |
200 | if (do_float64 + do_float32 + do_gpu) > 1:
201 | print >> sys.stderr
202 | print >> sys.stderr, 'Duplicate the timing to have everything in one place'
203 | print >> sys.stderr, algo_executed
204 | if do_float64:
205 | print >> sys.stderr, 'float64 times', float64_times
206 | print >> sys.stderr, 'float64 expected', expected_times_64
207 | print >> sys.stderr, 'float64 % expected/get', (
208 | expected_times_64 / float64_times)
209 | if do_float32:
210 | print >> sys.stderr, 'float32 times', float32_times
211 | print >> sys.stderr, 'float32 expected', expected_times_32
212 | print >> sys.stderr, 'float32 % expected/get', (
213 | expected_times_32 / float32_times)
214 | if do_gpu:
215 | print >> sys.stderr, 'gpu times', gpu_times
216 | print >> sys.stderr, 'gpu expected', expected_times_gpu
217 | print >> sys.stderr, 'gpu % expected/get', (
218 | expected_times_gpu / gpu_times)
219 |
220 | if do_float64 and do_float32:
221 | print >> sys.stderr, 'float64/float32', (
222 | float64_times / float32_times)
223 | print >> sys.stderr, 'expected float64/float32', (
224 | expected_times_64 / float32_times)
225 | if do_float64 and do_gpu:
226 | print >> sys.stderr, 'float64/gpu', float64_times / gpu_times
227 | print >> sys.stderr, 'expected float64/gpu', (
228 | expected_times_64 / gpu_times)
229 | if do_float32 and do_gpu:
230 | print >> sys.stderr, 'float32/gpu', float32_times / gpu_times
231 | print >> sys.stderr, 'expected float32/gpu', (
232 | expected_times_32 / gpu_times)
233 |
234 | def compare(x, y):
235 | ratio = x / y
236 | # If there is more then 5% difference between the expected
237 | # time and the real time, we consider this an error.
238 | return sum((ratio < 0.95) + (ratio > 1.05))
239 |
240 | if do_float64:
241 | err = compare(expected_times_64, float64_times)
242 | print >> sys.stderr, 'speed_failure_float64=' + str(err)
243 | if do_float32:
244 | err = compare(expected_times_32, float32_times)
245 | print >> sys.stderr, 'speed_failure_float32=' + str(err)
246 | if do_gpu:
247 | err = compare(expected_times_gpu, gpu_times)
248 | print >> sys.stderr, 'speed_failure_gpu=' + str(err)
249 |
250 | assert not numpy.isnan(gpu_times).any()
251 |
--------------------------------------------------------------------------------
/TheanoDL/utils.py:
--------------------------------------------------------------------------------
1 | """ This file contains different utility functions that are not connected
2 | in anyway to the networks presented in the tutorials, but rather help in
3 | processing the outputs into a more understandable way.
4 |
5 | For example ``tile_raster_images`` helps in generating a easy to grasp
6 | image from a set of samples or weights.
7 | """
8 |
9 |
10 | import numpy
11 |
12 |
13 | def scale_to_unit_interval(ndar, eps=1e-8):
14 | """ Scales all values in the ndarray ndar to be between 0 and 1 """
15 | ndar = ndar.copy()
16 | ndar -= ndar.min()
17 | ndar *= 1.0 / (ndar.max() + eps)
18 | return ndar
19 |
20 |
21 | def zero_mean_unit_variance(Data):
22 | """ Scales all values in the ndarray ndar to be between 0 and 1 """
23 | Mean = numpy.mean(Data, axis=0)
24 | Data -= Mean
25 |
26 | Std = numpy.std(Data, axis = 0)
27 | index = (numpy.abs(Std<10**-5))
28 | Std[index] = 1
29 | Data /= Std
30 | return [Data, Mean, Std]
31 |
32 |
33 | def normalize(Data, Mean, Std):
34 | Data -= Mean
35 | Data /= Std
36 | return Data
37 |
38 | def tile_raster_images(X, img_shape, tile_shape, tile_spacing=(0, 0),
39 | scale_rows_to_unit_interval=True,
40 | output_pixel_vals=True):
41 | """
42 | Transform an array with one flattened image per row, into an array in
43 | which images are reshaped and layed out like tiles on a floor.
44 |
45 | This function is useful for visualizing datasets whose rows are images,
46 | and also columns of matrices for transforming those rows
47 | (such as the first layer of a neural net).
48 |
49 | :type X: a 2-D ndarray or a tuple of 4 channels, elements of which can
50 | be 2-D ndarrays or None;
51 | :param X: a 2-D array in which every row is a flattened image.
52 |
53 | :type img_shape: tuple; (height, width)
54 | :param img_shape: the original shape of each image
55 |
56 | :type tile_shape: tuple; (rows, cols)
57 | :param tile_shape: the number of images to tile (rows, cols)
58 |
59 | :param output_pixel_vals: if output should be pixel values (i.e. int8
60 | values) or floats
61 |
62 | :param scale_rows_to_unit_interval: if the values need to be scaled before
63 | being plotted to [0,1] or not
64 |
65 |
66 | :returns: array suitable for viewing as an image.
67 | (See:`PIL.Image.fromarray`.)
68 | :rtype: a 2-d array with same dtype as X.
69 |
70 | """
71 |
72 | assert len(img_shape) == 2
73 | assert len(tile_shape) == 2
74 | assert len(tile_spacing) == 2
75 |
76 | # The expression below can be re-written in a more C style as
77 | # follows :
78 | #
79 | # out_shape = [0,0]
80 | # out_shape[0] = (img_shape[0]+tile_spacing[0])*tile_shape[0] -
81 | # tile_spacing[0]
82 | # out_shape[1] = (img_shape[1]+tile_spacing[1])*tile_shape[1] -
83 | # tile_spacing[1]
84 | out_shape = [(ishp + tsp) * tshp - tsp for ishp, tshp, tsp
85 | in zip(img_shape, tile_shape, tile_spacing)]
86 |
87 | if isinstance(X, tuple):
88 | assert len(X) == 4
89 | # Create an output numpy ndarray to store the image
90 | if output_pixel_vals:
91 | out_array = numpy.zeros((out_shape[0], out_shape[1], 4),
92 | dtype='uint8')
93 | else:
94 | out_array = numpy.zeros((out_shape[0], out_shape[1], 4),
95 | dtype=X.dtype)
96 |
97 | #colors default to 0, alpha defaults to 1 (opaque)
98 | if output_pixel_vals:
99 | channel_defaults = [0, 0, 0, 255]
100 | else:
101 | channel_defaults = [0., 0., 0., 1.]
102 |
103 | for i in xrange(4):
104 | if X[i] is None:
105 | # if channel is None, fill it with zeros of the correct
106 | # dtype
107 | dt = out_array.dtype
108 | if output_pixel_vals:
109 | dt = 'uint8'
110 | out_array[:, :, i] = numpy.zeros(out_shape,
111 | dtype=dt) + channel_defaults[i]
112 | else:
113 | # use a recurrent call to compute the channel and store it
114 | # in the output
115 | out_array[:, :, i] = tile_raster_images(
116 | X[i], img_shape, tile_shape, tile_spacing,
117 | scale_rows_to_unit_interval, output_pixel_vals)
118 | return out_array
119 |
120 | else:
121 | # if we are dealing with only one channel
122 | H, W = img_shape
123 | Hs, Ws = tile_spacing
124 |
125 | # generate a matrix to store the output
126 | dt = X.dtype
127 | if output_pixel_vals:
128 | dt = 'uint8'
129 | out_array = numpy.zeros(out_shape, dtype=dt)
130 |
131 | for tile_row in xrange(tile_shape[0]):
132 | for tile_col in xrange(tile_shape[1]):
133 | if tile_row * tile_shape[1] + tile_col < X.shape[0]:
134 | this_x = X[tile_row * tile_shape[1] + tile_col]
135 | if scale_rows_to_unit_interval:
136 | # if we should scale values to be between 0 and 1
137 | # do this by calling the `scale_to_unit_interval`
138 | # function
139 | this_img = scale_to_unit_interval(
140 | this_x.reshape(img_shape))
141 | else:
142 | this_img = this_x.reshape(img_shape)
143 | # add the slice to the corresponding position in the
144 | # output array
145 | c = 1
146 | if output_pixel_vals:
147 | c = 255
148 | out_array[
149 | tile_row * (H + Hs): tile_row * (H + Hs) + H,
150 | tile_col * (W + Ws): tile_col * (W + Ws) + W
151 | ] = this_img * c
152 | return out_array
153 |
--------------------------------------------------------------------------------
/Transition_matrix.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stevenwudi/CVPR_2014_code/7976acb440dbe4e8d060685371e3f0c5d35a4796/Transition_matrix.mat
--------------------------------------------------------------------------------
/cvpr_2014_diwu.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stevenwudi/CVPR_2014_code/7976acb440dbe4e8d060685371e3f0c5d35a4796/cvpr_2014_diwu.pdf
--------------------------------------------------------------------------------
/dbn_2014-05-23-20-07-28.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stevenwudi/CVPR_2014_code/7976acb440dbe4e8d060685371e3f0c5d35a4796/dbn_2014-05-23-20-07-28.npy
--------------------------------------------------------------------------------
/distance_median.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stevenwudi/CVPR_2014_code/7976acb440dbe4e8d060685371e3f0c5d35a4796/distance_median.npy
--------------------------------------------------------------------------------
/template.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stevenwudi/CVPR_2014_code/7976acb440dbe4e8d060685371e3f0c5d35a4796/template.png
--------------------------------------------------------------------------------