├── A_hybrid_method_of_exponential_smoothing_and_recurrent_neural_networks_for_time_series_forecasting.pdf
├── ES_RNN_SlawekSmyl.pdf
├── LICENSE
├── R
    ├── merge.R
    ├── merge_PI.R
    └── readme.txt
├── README.md
├── c++
    ├── ES_RNN.cc
    ├── ES_RNN_E.cc
    ├── ES_RNN_E_PI.cc
    ├── ES_RNN_PI.cc
    ├── linux_example_scripts
    │   ├── build_mkl
    │   ├── readme.txt
    │   └── run18
    ├── readme.txt
    ├── slstm.cpp
    ├── slstm.h
    └── windows_VisualStudio
    │   ├── M4.sln
    │   ├── M41
    │       ├── ES_RNN.cc
    │       ├── M41.vcxproj
    │       ├── slstm.cpp
    │       └── slstm.h
    │   ├── M42
    │       ├── ES_RNN_PI.cc
    │       ├── M42.vcxproj
    │       ├── M42.vcxproj.filters
    │       └── slstm.h
    │   ├── M43
    │       ├── ES_RNN_E.cc
    │       ├── M43.filters
    │       ├── M43.vcxproj
    │       └── slstm.h
    │   ├── M44
    │       ├── ES_RNN_E_PI.cc
    │       ├── M44.filters
    │       ├── M44.vcxproj
    │       └── slstm.h
    │   ├── readme.txt
    │   └── x64
    │       └── RelWithDebug
    │           ├── readme.txt
    │           ├── run61.cmd
    │           └── run61_e.cmd
├── readme.txt
└── sql
    ├── createM72nn_SQLServer.sql
    ├── createM72nn_mysql.txt
    └── readme.txt


/A_hybrid_method_of_exponential_smoothing_and_recurrent_neural_networks_for_time_series_forecasting.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/slaweks17/ES_RNN/36414dd68c7f15632d6f9008d2651b7331bfca56/A_hybrid_method_of_exponential_smoothing_and_recurrent_neural_networks_for_time_series_forecasting.pdf


--------------------------------------------------------------------------------
/ES_RNN_SlawekSmyl.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/slaweks17/ES_RNN/36414dd68c7f15632d6f9008d2651b7331bfca56/ES_RNN_SlawekSmyl.pdf


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 slaweks17
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/R/merge.R:
--------------------------------------------------------------------------------
  1 | # Merging outputs, per category, M4 competition, for point forecasts, so for ES_RNN and ES_RNN_E
  2 | # Author: Slawek Smyl, Mar-May 2018
  3 | 
  4 | 
  5 | #The c++ executables write to one (occasinally two, sorry :-), so in such case move files to one dir before continuing) directories. 
  6 | #(One logical run of several instances of the same program will produce a number files, e.g. outputs with different ibig value) 
  7 | #This script merges, averages values, and writes them down to the same directory - FOREC_DIR
  8 | ###############################################################################
  9 | 
 10 | #directory that should include all *-train.csv files, as well as M4-info.csv
 11 | DATA_DIR="F:/progs/data/M4DataSet/"
 12 | m4Info_df=read.csv(paste0(DATA_DIR,"M4-info.csv"))
 13 | options(stringsAsFactors =FALSE)
 14 | 
 15 | #directory with all the output files produced by the c++ code  we want to merge
 16 | FOREC_DIR='F:\\progs\\data\\M4\\Quarterly2018-05-31_09_30'  #do not end with separator
 17 | 
 18 | LBACK=1  #shoud be as in the c++ code, LBACK>0 means backtesting
 19 | SP="Quarterly"
 20 | #SP="Yearly"
 21 | #SP="Daily"
 22 | #SP="Hourly"
 23 | 
 24 | #//----------------PARAMS ----------   comment/uncomment following 3 variables
 25 | #for ES_RNN_E, so for all except Monthly and Quarterly runs: 
 26 | #NUM_OF_SEEDS=1
 27 | #NUM_OF_CHUNKS=1
 28 | #IBIGS=<number of input files n FOREC_DIR>
 29 | 
 30 | #for ES_RNN (do for Monthly and Quarterly):
 31 | NUM_OF_CHUNKS=2 #same as NUM_OF_CHUNKS constant the the c++ cource code, changing it is not recommended. 
 32 | NUM_OF_SEEDS=3  #It is equal to the number of seeds in the startup script, (or number of teams of worker processes)
 33 | # so number_of_concurrent_executables==number_of_lines_in_the_running script/NUM_OF_CHUNKS, and number_of_chunks 
 34 | #E.g if using following script for ES_RNN:
 35 | # start <this_executable> 10 1 0
 36 | # start <this_executable> 10 2 0
 37 | # start <this_executable> 20 1 5
 38 | # start <this_executable> 20 2 5
 39 | # start <this_executable> 30 1 10
 40 | # start <this_executable> 30 2 10
 41 | # we have here three seeds: 10,20,30, and two chunks: 1,2. (The pairs of workes have IBIG offsets of 0,5,10)
 42 | IBIGS=3 #number of complete runs by each executables, so if programs are not interrupted, this should be equal to the constant BIG_LOOP in the c++ code, by default 3.
 43 | 
 44 | 
 45 | m4_df=read.csv(paste0(DATA_DIR,SP,"-train.csv"))
 46 | 
 47 | sMAPE<-function(forec,actual) {
 48 | 	mean(abs(forec-actual)/(abs(forec)+abs(actual)))*200 
 49 | }
 50 | errorFunc=sMAPE
 51 | 
 52 | 
 53 | spInfo_df=m4Info_df[m4Info_df$SP==SP,]
 54 | ids=spInfo_df$M4id
 55 | horizon=spInfo_df[1,"Horizon"]
 56 | 
 57 | #VARIABLE + "_" + to_string(seedForChunks) + "_" + to_string(chunkNo) + "_" + to_string(ibigDb)+"_LB"+ to_string(LBACK)+ ".csv";
 58 | inputFiles=list.files(path = FOREC_DIR, pattern = paste0(SP,".*LB",LBACK), full.names = T)
 59 | if (length(inputFiles)!=NUM_OF_SEEDS*NUM_OF_CHUNKS*IBIGS) {
 60 | 	stop("length(inputFiles)!=NUM_OF_SEEDS*NUM_OF_CHUNKS*IBIGS")
 61 | }
 62 | 
 63 | 
 64 | comp_df=NULL
 65 | fil=inputFiles[1]
 66 | for (fil in inputFiles) {
 67 |   print(fil)
 68 | 	c_df=read.csv(fil, header=F)
 69 | 	comp_df=rbind(comp_df,c_df)
 70 | } 
 71 | names(comp_df)[1]='id'
 72 | 
 73 | forecSeries=sort(unique(comp_df$id))
 74 | if (length(forecSeries)!=length(ids) && LBACK==0) {
 75 | 	stop(paste0("Expected number of cases:",length(ids)," but got:",length(forecSeries)))
 76 | }
 77 | 
 78 | SIZE_OF_CHUNK=1000
 79 | out_df=NULL; ou_df=NULL
 80 | fSeries=forecSeries[1]
 81 | for (fSeries in forecSeries) {
 82 | 	oneSeriesForecs_df=comp_df[comp_df$id==fSeries,]
 83 | 	o1=colMeans(oneSeriesForecs_df[,2:ncol(oneSeriesForecs_df)])
 84 | 	o_df=data.frame(id=fSeries, as.list(o1), stringsAsFactors =F)
 85 | 	ou_df=rbind(ou_df, o_df)
 86 | 	if (nrow(ou_df)>=SIZE_OF_CHUNK) {
 87 | 		out_df=rbind(out_df,ou_df)
 88 | 		ou_df=NULL
 89 | 		print(nrow(out_df))
 90 | 	}
 91 | }
 92 | out_df=rbind(out_df,ou_df)
 93 | print(nrow(out_df))
 94 | out_df=out_df[order(as.integer(substring(out_df$id, 2))),]
 95 | 
 96 | #FOREC_DIR="e:\\temp"
 97 | outPath=paste0(FOREC_DIR,'\\',SP,"Forec.csv")
 98 | write.csv(out_df,file=outPath,row.names = F)
 99 | 
100 | ################ Main work done, now just diagnostics calculations and plots
101 | 
102 | #display a sample of forecasts and, if LBACK>0,  actuals
103 | MAX_NUM_OF_POINTS_TO_SHOW=200
104 | for (i in 1:100) {
105 | 	irand=sample(1:length(forecSeries),1)
106 | 	fSeries=forecSeries[irand]
107 | 	forec=as.numeric(out_df[out_df$id==fSeries,2:ncol(out_df)])
108 | 	actual=as.numeric(m4_df[m4_df$V1==fSeries,2:ncol(m4_df)])
109 | 	actual=actual[!is.na(actual)]
110 | 	if (length(actual)>MAX_NUM_OF_POINTS_TO_SHOW) {
111 | 		actual=actual[(length(actual)-MAX_NUM_OF_POINTS_TO_SHOW):length(actual)]	
112 | 	}
113 | 	if (LBACK==0) {
114 | 		plot(c(actual,forec), col=c(rep(1,length(actual)),rep(2,length(forec))), main=fSeries)	
115 | 	} else {
116 | 		ymin=min(actual,forec)
117 | 		ymax=max(actual,forec)
118 | 		plot(1:length(actual),actual, main=fSeries, ylim=c(ymin,ymax))
119 | 		lines((length(actual)-length(forec)+1):length(actual), forec, col=2, type='p')
120 | 	}
121 |   
122 | 	Sys.sleep(5)
123 | }
124 | 
125 | 
126 | #calc error metrics
127 | if (LBACK>0) {
128 | 	summErrors=0
129 | 	fSeries=forecSeries[1]
130 | 	i=1
131 | 	for (fSeries in forecSeries) {
132 | 		if (i%%1000==0)
133 | 			cat(".")
134 | 		forec=as.numeric(out_df[out_df$id==fSeries,2:ncol(out_df)])
135 | 		actual=as.numeric(m4_df[m4_df$V1==fSeries,2:ncol(m4_df)])
136 | 		actual=actual[!is.na(actual)]
137 | 		actual=actual[(length(actual)-LBACK*horizon+1):(length(actual)-(LBACK-1)*horizon)]
138 | 		summErrors=summErrors+errorFunc(forec,actual)
139 | 		i=i+1
140 | 	}
141 | 	print(".")
142 | 	print(paste0("avg error:",round(summErrors/length(forecSeries),2)))
143 | }
144 | 


--------------------------------------------------------------------------------
/R/merge_PI.R:
--------------------------------------------------------------------------------
  1 | # Merging outputs, per category, M4 competition, for Prediction Intervals , so for ES_RNN_PI and ES_RNN_E_PI
  2 | # Author: Slawek Smyl, Mar-May 2018
  3 | 
  4 | 
  5 | #The c++ executables write to one (occasinally two, sorry :-), so in such case move files to one dir before continuing) directories. 
  6 | #(One logical run of several instances of the same program will produce a number files, e.g. outputs with different ibig value) 
  7 | #This script merges, averages values, and writes them down to the same directory - FOREC_DIR
  8 | ###############################################################################
  9 | 
 10 | #directory that should include all *-train.csv files, as well as M4-info.csv
 11 | DATA_DIR="F:/progs/data/M4DataSet/"
 12 | m4Info_df=read.csv(paste0(DATA_DIR,"M4-info.csv"))
 13 | options(stringsAsFactors =FALSE)
 14 | memory.limit(10000)
 15 | 
 16 | #directory with all the output files produced by the c++ code  we want to merge
 17 | FOREC_DIR='F:\\progs\\data\\M4\\Hourlygood'  #do not end with separator
 18 | 
 19 | LBACK=1  #shoud be as in the c++ code, LBACK>0 means backtesting
 20 | #SP="Quarterly"
 21 | #SP="Yearly"
 22 | #SP="Daily"
 23 | SP="Hourly"
 24 | m4_df=read.csv(paste0(DATA_DIR,SP,"-train.csv"))
 25 | 
 26 | 
 27 | #//----------------PARAMS ----------   comment/uncomment following 3 variables
 28 | #for ES_RNN_E_PI, so for all except Monthly and Quarterly runs: 
 29 | NUM_OF_SEEDS=1
 30 | NUM_OF_CHUNKS=1
 31 | #IBIGS=<number of input files n FOREC_DIR>/2
 32 | IBIGS=6
 33 | 
 34 | #for ES_RNN_PI (do for Monthly and Quarterly):
 35 | #NUM_OF_CHUNKS=2 #same as NUM_OF_CHUNKS constant the the c++ cource code, changing it is not recommended. 
 36 | #NUM_OF_SEEDS=3  #It is equal to the number of seeds in the startup script, (or number of teams of worker processes)
 37 | # so number_of_concurrent_executables==number_of_lines_in_the_running script/NUM_OF_CHUNKS, and number_of_chunks 
 38 | #E.g if using following script for ES_RNN:
 39 | # start <this_executable> 10 1 0
 40 | # start <this_executable> 10 2 0
 41 | # start <this_executable> 20 1 5
 42 | # start <this_executable> 20 2 5
 43 | # start <this_executable> 30 1 10
 44 | # start <this_executable> 30 2 10
 45 | # we have here three seeds: 10,20,30, and two chunks: 1,2. (The pairs of workes have IBIG offsets of 0,5,10)
 46 | #IBIGS=3 #number of complete runs by each executables, so if programs are not interrupted, this should be equal to the constant BIG_LOOP in the c++ code, by default 3.
 47 | 
 48 | ALPHA = 0.05;
 49 | ALPHA_MULTIP = 2 / ALPHA;
 50 | 
 51 | MSIS<-function(forecL,forecH,actual) {
 52 | 	sumDiffs=0
 53 | 	for (i in 1:(length(actual)-seasonality)) {
 54 | 		sumDiffs=sumDiffs+abs(actual[i+seasonality]-actual[i])
 55 | 	}
 56 | 	avgAbsDiff=sumDiffs/(length(actual)-seasonality)
 57 | 	
 58 | 	actual=actual[(length(actual)-LBACK*horizon+1):(length(actual)-(LBACK-1)*horizon)]
 59 | 	
 60 | 	msis=sum(forecH-forecL)+sum(pmax(0,forecL-actual))*ALPHA_MULTIP+sum(pmax(0,actual-forecH))*ALPHA_MULTIP
 61 | 	msis/horizon/avgAbsDiff
 62 | }
 63 | errorFunc=MSIS
 64 | 
 65 | spInfo_df=m4Info_df[m4Info_df$SP==SP,]
 66 | ids=spInfo_df$M4id
 67 | horizon=spInfo_df[1,"Horizon"]
 68 | seasonality=spInfo_df[1,"Frequency"]
 69 | 
 70 | 
 71 | #lower
 72 | #VARIABLE + "_" + to_string(seedForChunks) + "_" + to_string(chunkNo) + "_" + to_string(ibigDb)+"_LB"+ to_string(LBACK)+ ".csv";
 73 | inputFiles=list.files(path = FOREC_DIR, pattern = paste0(SP,".*LLB",LBACK), full.names = T)
 74 | if (length(inputFiles)!=NUM_OF_SEEDS*NUM_OF_CHUNKS*IBIGS) {
 75 | 	stop("length(inputFiles)!=NUM_OF_SEEDS*NUM_OF_CHUNKS*IBIGS")
 76 | }
 77 | 
 78 | comp_df=NULL
 79 | fil=inputFiles[1]
 80 | for (fil in inputFiles) {
 81 |   print(fil)
 82 | 	c_df=read.csv(fil, header=F)
 83 | 	comp_df=rbind(comp_df,c_df)
 84 | } 
 85 | names(comp_df)[1]='id'
 86 | 
 87 | forecSeries=sort(unique(comp_df$id))
 88 | if (length(forecSeries)!=length(ids) && LBACK==0) {
 89 | 	stop(paste0("Expected number of cases:",length(ids)," but got:",length(forecSeries)))
 90 | }
 91 | 
 92 | SIZE_OF_CHUNK=1000
 93 | out_df=NULL; ou_df=NULL
 94 | fSeries=forecSeries[1]
 95 | for (fSeries in forecSeries) {
 96 | 	oneSeriesForecs_df=comp_df[comp_df$id==fSeries,]
 97 | 	o1=colMeans(oneSeriesForecs_df[,2:ncol(oneSeriesForecs_df)])
 98 | 	o_df=data.frame(id=fSeries, as.list(o1), stringsAsFactors =F)
 99 | 	ou_df=rbind(ou_df, o_df)
100 | 	if (nrow(ou_df)>=SIZE_OF_CHUNK) {
101 | 		out_df=rbind(out_df,ou_df)
102 | 		ou_df=NULL
103 | 		print(nrow(out_df))
104 | 	}
105 | }
106 | out_df=rbind(out_df,ou_df)
107 | print(nrow(out_df))
108 | out_df=out_df[order(as.integer(substring(out_df$id, 2))),]
109 | 
110 | outPath=paste0(FOREC_DIR,'\\',SP,"ForecL.csv")
111 | write.csv(out_df,file=outPath,row.names = F)
112 | 
113 | lower_df=out_df
114 | 
115 | #####################################
116 | #higher
117 | inputFiles=list.files(path = FOREC_DIR, pattern = paste0(SP,".*HLB",LBACK), full.names = T)
118 | if (length(inputFiles)!=NUM_OF_SEEDS*NUM_OF_CHUNKS*IBIGS) {
119 | 	stop("length(inputFiles)!=NUM_OF_SEEDS*NUM_OF_CHUNKS*IBIGS")
120 | }
121 | 
122 | comp_df=NULL
123 | fil=inputFiles[1]
124 | for (fil in inputFiles) {
125 | 	print(fil)
126 | 	c_df=read.csv(fil, header=F)
127 | 	comp_df=rbind(comp_df,c_df)
128 | } 
129 | names(comp_df)[1]='id'
130 | 
131 | forecSeries=sort(unique(comp_df$id))
132 | if (length(forecSeries)!=length(ids) && LBACK==0) {
133 | 	print(paste0("Warning. Expected number of cases:",length(ids)," but got:",length(forecSeries)))
134 | }
135 | 
136 | SIZE_OF_CHUNK=1000
137 | out_df=NULL; ou_df=NULL
138 | fSeries=forecSeries[1]
139 | for (fSeries in forecSeries) {
140 | 	oneSeriesForecs_df=comp_df[comp_df$id==fSeries,]
141 | 	o1=colMeans(oneSeriesForecs_df[,2:ncol(oneSeriesForecs_df)])
142 | 	o_df=data.frame(id=fSeries, as.list(o1), stringsAsFactors =F)
143 | 	ou_df=rbind(ou_df, o_df)
144 | 	if (nrow(ou_df)>=SIZE_OF_CHUNK) {
145 | 		out_df=rbind(out_df,ou_df)
146 | 		ou_df=NULL
147 | 		print(nrow(out_df))
148 | 	}
149 | }
150 | out_df=rbind(out_df,ou_df)
151 | print(nrow(out_df))
152 | out_df=out_df[order(as.integer(substring(out_df$id, 2))),]
153 | 
154 | outPath=paste0(FOREC_DIR,'\\',SP,"ForecH.csv")
155 | write.csv(out_df,file=outPath,row.names = F)
156 | 
157 | higher_df=out_df
158 | 
159 | 
160 | ################ Main work done, now just diagnostics calculations and plots
161 | 
162 | #display a sample of forecasts and, if LBACK>0,  actuals
163 | MAX_NUM_OF_POINTS_TO_SHOW=200
164 | i=1
165 | for (i in 1:100) {
166 | 	irand=sample(1:length(forecSeries),1)
167 | 	fSeries=forecSeries[irand]
168 | 	forecL=as.numeric(lower_df[lower_df$id==fSeries,2:ncol(lower_df)])
169 | 	forecH=as.numeric(higher_df[higher_df$id==fSeries,2:ncol(higher_df)])
170 | 	actual=as.numeric(m4_df[m4_df$V1==fSeries,2:ncol(m4_df)])
171 | 	actual=actual[!is.na(actual)]
172 | 	if (length(actual)>MAX_NUM_OF_POINTS_TO_SHOW) {
173 | 		actual=actual[(length(actual)-MAX_NUM_OF_POINTS_TO_SHOW):length(actual)]	
174 | 	}
175 | 	if (LBACK==0) {
176 | 		plot(c(actual,forecH), col=c(rep(1,length(actual)),rep(2,length(forecH))), main=fSeries)
177 |     lines(c(actual,forecL), col=c(rep(1,length(actual)),rep(3,length(forecL))), type='p')		
178 | 	} else {
179 | 		ymin=min(actual,forecL)
180 | 		ymax=max(actual,forecH)
181 | 		plot(1:length(actual),actual, main=fSeries, ylim=c(ymin,ymax))
182 | 		lines((length(actual)-length(forecH)+1):length(actual), forecH, col=2, type='p')
183 | 		lines((length(actual)-length(forecL)+1):length(actual), forecL, col=3, type='p')
184 | 	}
185 | 	
186 | 	Sys.sleep(5)
187 | }
188 | 
189 | 
190 | 
191 | #calc error metric: MSIS
192 | if (LBACK>0) {
193 | 	summErrors=0
194 | 	fSeries=forecSeries[1]
195 | 	i=1
196 | 	for (fSeries in forecSeries) {
197 | 		if (i%%1000==0)
198 | 			cat(".")
199 | 		forecL=as.numeric(lower_df[lower_df$id==fSeries,2:ncol(lower_df)])
200 | 		forecH=as.numeric(higher_df[higher_df$id==fSeries,2:ncol(higher_df)])
201 | 		actual=as.numeric(m4_df[m4_df$V1==fSeries,2:ncol(m4_df)])
202 | 		actual=actual[!is.na(actual)]
203 | 		summErrors=summErrors+errorFunc(forecL, forecH, actual)
204 | 		i=i+1
205 | 	}
206 | 	print(".")
207 | 	print(paste0("avg error:",round(summErrors/length(forecSeries),2)))
208 | }
209 | 
210 | 
211 | 


--------------------------------------------------------------------------------
/R/readme.txt:
--------------------------------------------------------------------------------
1 | When the c++ workers run, they output results (forecasts) to a directory or two. 
2 | (Sorry occasionally two directories are filled, in such case first "manually" put all the output files to a single dir)
3 | These scripts merge them into one file and save it, show a sample of graphs, and if this is backtesting run (LBACK>0), calculate some accuracy metrics.
4 | 
5 | Both scripts needs to be updated with your input, output dirs, and other params, see inside, there are a lot of comments there.
6 | 
7 | merge.R is meant to be used for point forecst runs, so for ES_RNN and ES_RNN_E programs.
8 | mergePI.R - for Prediction Interval runs, so for ES_RNN_PI and ES_RNN_E_PI programs.
9 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # ES_RNN
2 | The repository contains current, slightly updated, version of ES_RNN - a hybrid Exponential Smoothing/Recurrent NN method that won M4 Forecasting Competition
3 | 


--------------------------------------------------------------------------------
/c++/linux_example_scripts/build_mkl:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | c++ -DEIGEN_FAST_MATH -fPIC -funroll-loops -fno-finite-math-only -Wall -Wno-missing-braces -std=c++11 -Ofast -g -march=native -O2 -g -DNDEBUG -I/home/uber/progs/dynet -I/home/uber/progs/eigen -I/home/uber/progs/dynet/buildMKL $1.cc slstm.cpp -o $1 -lodbc -rdynamic /home/uber/progs/dynet/buildMKL/dynet/libdynet.so -lpthread -lrt -Wl,-rpath,/home/uber/progs/dynet/buildMKL/dynet
3 | 
4 | 


--------------------------------------------------------------------------------
/c++/linux_example_scripts/readme.txt:
--------------------------------------------------------------------------------
 1 | build_mkl builds a specified program , linking it with MKL-compiled version of Dynet.
 2 | usage, e.g.:
 3 | ./build_mkl ES_RNN
 4 | (no extension).
 5 | ____You need to modify it, to point to your location of Dynet library.____
 6 | Also, remove -lodbc if you do not use it, and especially if you had not installed it :-)
 7 | 
 8 | run18 is a script that runs 9 pairs of workers, to be used with ES_RNN and ES_RNN_PI. 
 9 | So it assumes it runs on a nice 18-core machine :-), and in such case you BIG_LOOP constant in the .cc files should probably be = 1, no big need for more than 9 runs for assembling.
10 | usage, e.g.:
11 | ./run18 ES_RNN
12 | 
13 | 
14 | 


--------------------------------------------------------------------------------
/c++/linux_example_scripts/run18:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | rm ./nohup.out
 3 | nohup nice -n 10 ./$1 9 1  &
 4 | nohup nice -n 10 ./$1 9 2  &
 5 | nohup nice -n 10 ./$1 10 1 5 &
 6 | nohup nice -n 10 ./$1 10 2 5 &
 7 | nohup nice -n 10 ./$1 11 1 10 &
 8 | nohup nice -n 10 ./$1 11 2 10 &
 9 | nohup nice -n 10 ./$1 12 1 15  &
10 | nohup nice -n 10 ./$1 12 2 15  &
11 | nohup nice -n 10 ./$1 13 1 20 &
12 | nohup nice -n 10 ./$1 13 2 20 &
13 | nohup nice -n 10 ./$1 14 1 25 &
14 | nohup nice -n 10 ./$1 14 2 25 &
15 | nohup nice -n 10 ./$1 15 1 30  &
16 | nohup nice -n 10 ./$1 15 2 30  &
17 | nohup nice -n 10 ./$1 16 1 35 &
18 | nohup nice -n 10 ./$1 16 2 35 &
19 | nohup nice -n 10 ./$1 17 1 40 &
20 | nohup nice -n 10 ./$1 17 2 40 &
21 | 


--------------------------------------------------------------------------------
/c++/readme.txt:
--------------------------------------------------------------------------------
1 | The programs require Dynet (https://github.com/clab/dynet) installed, compiled for C++.
2 | I have also been using Intel MKL, donwloadable freely, and built Dynet to use MKL. 
3 | In my early testing CPU perf was better than GPU one, so did not used GPU builds of Dynet.
4 | There will be 4 projects, each containing one .cc file and slstm.*.
5 | The programs can be run on Windows, Linux, and Mac.
6 | See inside *.cc files - there are more details. You need to setup some params.
7 | 
8 | I provide example scripts for Linux, and a VS 2015 solution for Windows.


--------------------------------------------------------------------------------
/c++/slstm.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | My implementation of dilated LSTMs, based on Dynet LSTM builders
  3 | - DilatedLSTMBuilder - standard Dilated LSTM (https://papers.nips.cc/paper/6613-dilated-recurrent-neural-networks.pdf)
  4 | - ResidualDilatedLSTMBuilder - Dilated LSTM with special Residual shortcuts, after https://arxiv.org/abs/1701.03360
  5 | - AttentiveDilatedLSTMBuilder - Dilated LSTM with Attention mechanism, as in the second stage of https://arxiv.org/abs/1704.02971
  6 | *
  7 | Slawek Smyl, Mar-May 2018
  8 | */
  9 | 
 10 | #include "slstm.h"
 11 | #include "dynet/lstm.h"
 12 | #include "dynet/param-init.h"
 13 | 
 14 | #include <fstream>
 15 | #include <string>
 16 | #include <vector>
 17 | #include <iostream>
 18 | 
 19 | #if defined DEBUG
 20 |   #define _DEBUG
 21 | #endif
 22 | 
 23 | using namespace std;
 24 | 
 25 | namespace dynet {
 26 | 
 27 |   // ResidualDilatedLSTMBuilder based on Vanilla LSTM
 28 |   enum { _X2I, _H2I, _BI, _X2F, _H2F, _BF, _X2O, _H2O, _BO, _X2G, _H2G, _BG };
 29 |   enum { LN_GH, LN_BH, LN_GX, LN_BX, LN_GC, LN_BC };
 30 | 
 31 |   ResidualDilatedLSTMBuilder::ResidualDilatedLSTMBuilder() : has_initial_state(false), layers(0), input_dim(0), hid(0), dropout_rate_h(0), ln_lstm(false), forget_bias(1.f), dropout_masks_valid(false) { }
 32 | 
 33 |   ResidualDilatedLSTMBuilder::ResidualDilatedLSTMBuilder(vector<unsigned> dilations,
 34 |     unsigned input_dim,
 35 |     unsigned hidden_dim,
 36 |     ParameterCollection& model,
 37 |     bool ln_lstm, float forget_bias) : dilations(dilations), layers(unsigned(dilations.size())),
 38 |       input_dim(input_dim), hid(hidden_dim), ln_lstm(ln_lstm), forget_bias(forget_bias), dropout_masks_valid(false) {
 39 |     unsigned layer_input_dim = input_dim;
 40 |     local_model = model.add_subcollection("ResidualDilated-lstm-builder");
 41 |     for (unsigned i = 0; i < layers; ++i) {
 42 |       // i
 43 |       Parameter p_x2i = local_model.add_parameters({ hidden_dim * 4, layer_input_dim });
 44 |       Parameter p_h2i = local_model.add_parameters({ hidden_dim * 4, hidden_dim });
 45 |       //Parameter p_c2i = model.add_parameters({hidden_dim, hidden_dim});
 46 |       Parameter p_bi = local_model.add_parameters({ hidden_dim * 4 }, ParameterInitConst(0.f));
 47 | 
 48 |       layer_input_dim = hidden_dim;  // output (hidden) from 1st layer is input to next
 49 | 
 50 |       vector<Parameter> ps = { p_x2i, p_h2i, /*p_c2i,*/ p_bi };
 51 |       params.push_back(ps);
 52 | 
 53 |       if (ln_lstm) {
 54 |         Parameter p_gh = model.add_parameters({ hidden_dim * 4 }, ParameterInitConst(1.f));
 55 |         Parameter p_bh = model.add_parameters({ hidden_dim * 4 }, ParameterInitConst(0.f));
 56 |         Parameter p_gx = model.add_parameters({ hidden_dim * 4 }, ParameterInitConst(1.f));
 57 |         Parameter p_bx = model.add_parameters({ hidden_dim * 4 }, ParameterInitConst(0.f));
 58 |         Parameter p_gc = model.add_parameters({ hidden_dim }, ParameterInitConst(1.f));
 59 |         Parameter p_bc = model.add_parameters({ hidden_dim }, ParameterInitConst(0.f));
 60 |         vector<Parameter> ln_ps = { p_gh, p_bh, p_gx, p_bx, p_gc, p_bc };
 61 |         ln_params.push_back(ln_ps);
 62 |       }
 63 |     }  // layers
 64 |     dropout_rate = 0.f;
 65 |     dropout_rate_h = 0.f;
 66 |   }
 67 | 
 68 |   void ResidualDilatedLSTMBuilder::new_graph_impl(ComputationGraph& cg, bool update) {
 69 |     param_vars.clear();
 70 |     if (ln_lstm)ln_param_vars.clear();
 71 |     for (unsigned i = 0; i < layers; ++i) {
 72 |       auto& p = params[i];
 73 |       vector<Expression> vars;
 74 |       for (unsigned j = 0; j < p.size(); ++j) { vars.push_back(update ? parameter(cg, p[j]) : const_parameter(cg, p[j])); }
 75 |       param_vars.push_back(vars);
 76 |       if (ln_lstm) {
 77 |         auto& ln_p = ln_params[i];
 78 |         vector<Expression> ln_vars;
 79 |         for (unsigned j = 0; j < ln_p.size(); ++j) { ln_vars.push_back(update ? parameter(cg, ln_p[j]) : const_parameter(cg, ln_p[j])); }
 80 |         ln_param_vars.push_back(ln_vars);
 81 |       }
 82 |     }
 83 | 
 84 |     _cg = &cg;
 85 |   }
 86 |   // layout: 0..layers = c
 87 |   //         layers+1..2*layers = h
 88 |   void ResidualDilatedLSTMBuilder::start_new_sequence_impl(const vector<Expression>& hinit) {
 89 |     h.clear();
 90 |     c.clear();
 91 | 
 92 |     if (hinit.size() > 0) {
 93 |       DYNET_ARG_CHECK(layers * 2 == hinit.size(),
 94 |         "ResidualDilatedLSTMBuilder must be initialized with 2 times as many expressions as layers "
 95 |         "(hidden state, and cell for each layer). However, for " << layers << " layers, " <<
 96 |         hinit.size() << " expressions were passed in");
 97 |       h0.resize(layers);
 98 |       c0.resize(layers);
 99 |       for (unsigned i = 0; i < layers; ++i) {
100 |         c0[i] = hinit[i];
101 |         h0[i] = hinit[i + layers];
102 |       }
103 |       has_initial_state = true;
104 |     }
105 |     else {
106 |       has_initial_state = false;
107 |     }
108 | 
109 |     dropout_masks_valid = false;
110 |   }
111 | 
112 |   void ResidualDilatedLSTMBuilder::set_dropout_masks(unsigned batch_size) {
113 |     masks.clear();
114 |     for (unsigned i = 0; i < layers; ++i) {
115 |       std::vector<Expression> masks_i;
116 |       unsigned idim = (i == 0) ? input_dim : hid;
117 |       if (dropout_rate > 0.f || dropout_rate_h > 0.f) {
118 |         float retention_rate = 1.f - dropout_rate;
119 |         float retention_rate_h = 1.f - dropout_rate_h;
120 |         float scale = 1.f / retention_rate;
121 |         float scale_h = 1.f / retention_rate_h;
122 |         // in
123 |         masks_i.push_back(random_bernoulli(*_cg, Dim({ idim }, batch_size), retention_rate, scale));
124 |         // h
125 |         masks_i.push_back(random_bernoulli(*_cg, Dim({ hid }, batch_size), retention_rate_h, scale_h));
126 |         masks.push_back(masks_i);
127 |       }
128 |     }
129 |     dropout_masks_valid = true;
130 |   }
131 | 
132 |   ParameterCollection & ResidualDilatedLSTMBuilder::get_parameter_collection() {
133 |     return local_model;
134 |   }
135 | 
136 |   // TODO - Make this correct
137 |   // Copied c from the previous step (otherwise c.size()< h.size())
138 |   // Also is creating a new step something we want?
139 |   // wouldn't overwriting the current one be better?
140 |   Expression ResidualDilatedLSTMBuilder::set_h_impl(int prev, const vector<Expression>& h_new) {
141 |     DYNET_ARG_CHECK(h_new.empty() || h_new.size() == layers,
142 |       "ResidualDilatedLSTMBuilder::set_h expects as many inputs as layers, but got " <<
143 |       h_new.size() << " inputs for " << layers << " layers");
144 |     const unsigned t = h.size();
145 |     h.push_back(vector<Expression>(layers));
146 |     c.push_back(vector<Expression>(layers));
147 |     for (unsigned i = 0; i < layers; ++i) {
148 |       Expression h_i = h_new[i];
149 |       Expression c_i = c[t - 1][i];
150 |       h[t][i] = h_i;
151 |       c[t][i] = c_i;
152 |     }
153 |     return h[t].back();
154 |   }
155 |   // Current implementation : s_new is either {new_c[0],...,new_c[n]}
156 |   // or {new_c[0],...,new_c[n],new_h[0],...,new_h[n]}
157 |   Expression ResidualDilatedLSTMBuilder::set_s_impl(int prev, const std::vector<Expression>& s_new) {
158 |     DYNET_ARG_CHECK(s_new.size() == layers || s_new.size() == 2 * layers,
159 |       "ResidualDilatedLSTMBuilder::set_s expects either as many inputs or twice as many inputs as layers, but got " << s_new.size() << " inputs for " << layers << " layers");
160 |     bool only_c = s_new.size() == layers;
161 |     const unsigned t = c.size();
162 |     h.push_back(vector<Expression>(layers));
163 |     c.push_back(vector<Expression>(layers));
164 |     for (unsigned i = 0; i < layers; ++i) {
165 |       Expression h_i = only_c ? h[t - 1][i] : s_new[i + layers];
166 |       Expression c_i = s_new[i];
167 |       h[t][i] = h_i;
168 |       c[t][i] = c_i;
169 |     }
170 |     return h[t].back();
171 |   }
172 | 
173 |   Expression ResidualDilatedLSTMBuilder::add_input_impl(int prev, const Expression& x) {
174 |     h.push_back(vector<Expression>(layers));
175 |     c.push_back(vector<Expression>(layers));
176 |     vector<Expression>& ht = h.back();
177 |     vector<Expression>& ct = c.back();
178 |     Expression in = x;
179 |     if ((dropout_rate > 0.f || dropout_rate_h > 0.f) && !dropout_masks_valid) set_dropout_masks(x.dim().bd);
180 |     for (unsigned i = 0; i < layers; ++i) {
181 |     	int dilation_offset = dilations[i] - 1;
182 |       const vector<Expression>& vars = param_vars[i];
183 | 
184 |       Expression i_h_tm1, i_c_tm1;
185 |       bool has_prev_state = (prev >= 0 || has_initial_state);
186 |       if (prev < dilation_offset) {
187 |         if (has_initial_state) {
188 |           // intial value for h and c at timestep 0 in layer i
189 |           // defaults to zero matrix input if not set in add_parameter_edges
190 |           i_h_tm1 = h0[i];
191 |           i_c_tm1 = c0[i];
192 |         }
193 |         else {
194 |           i_h_tm1 = zeros(*_cg, Dim({ vars[_BI].dim()[0] / 4 }, x.dim().bd));
195 |           i_c_tm1 = i_h_tm1;
196 |         }
197 |       }
198 |       else {
199 |         i_h_tm1 = h[prev - dilation_offset][i];
200 |         i_c_tm1 = c[prev - dilation_offset][i];
201 |       }
202 |       // apply dropout according to https://arxiv.org/abs/1512.05287 (tied weights)
203 |       if (dropout_rate > 0.f) {
204 |         in = cmult(in, masks[i][0]);
205 |       }
206 |       if (has_prev_state && dropout_rate_h > 0.f)
207 |         i_h_tm1 = cmult(i_h_tm1, masks[i][1]);
208 |       // input
209 |       Expression tmp;
210 |       Expression i_ait;
211 |       Expression i_aft;
212 |       Expression i_aot;
213 |       Expression i_agt;
214 |       if (ln_lstm) {
215 |         const vector<Expression>& ln_vars = ln_param_vars[i];
216 |         if (has_prev_state)
217 |           tmp = vars[_BI] + layer_norm(vars[_X2I] * in, ln_vars[LN_GX], ln_vars[LN_BX]) + layer_norm(vars[_H2I] * i_h_tm1, ln_vars[LN_GH], ln_vars[LN_BH]);
218 |         else
219 |           tmp = vars[_BI] + layer_norm(vars[_X2I] * in, ln_vars[LN_GX], ln_vars[LN_BX]);
220 |       }
221 |       else {
222 |         if (has_prev_state)
223 |           tmp = affine_transform({ vars[_BI], vars[_X2I], in, vars[_H2I], i_h_tm1 });
224 |         else
225 |           tmp = affine_transform({ vars[_BI], vars[_X2I], in });
226 |       }
227 |       i_ait = pick_range(tmp, 0, hid);
228 |       i_aft = pick_range(tmp, hid, hid * 2);
229 |       i_aot = pick_range(tmp, hid * 2, hid * 3);
230 |       i_agt = pick_range(tmp, hid * 3, hid * 4);
231 |       Expression i_it = logistic(i_ait);
232 |       if (forget_bias != 0.0)
233 |         tmp = logistic(i_aft + forget_bias);
234 |       else
235 |         tmp = logistic(i_aft);
236 | 
237 |       Expression i_ft = tmp;
238 |       Expression i_ot = logistic(i_aot);
239 |       Expression i_gt = tanh(i_agt);
240 | 
241 |       ct[i] = has_prev_state ? (cmult(i_ft, i_c_tm1) + cmult(i_it, i_gt)) : cmult(i_it, i_gt);
242 |       if (ln_lstm) {
243 |         const vector<Expression>& ln_vars = ln_param_vars[i];
244 |         if (i==0)
245 |         	in = ht[i] = cmult(i_ot, tanh(layer_norm(ct[i], ln_vars[LN_GC], ln_vars[LN_BC])));
246 |         else
247 |         	in = ht[i] = cmult(i_ot, in+tanh(layer_norm(ct[i], ln_vars[LN_GC], ln_vars[LN_BC])));
248 |       }
249 |       else  {
250 |       	if (i==0)
251 |           in = ht[i] = cmult(i_ot, tanh(ct[i]));
252 |       	else
253 |       		in = ht[i] = cmult(i_ot, in+tanh(ct[i]));
254 |       }
255 |     }
256 |     return ht.back();
257 |   }
258 | 
259 |   void ResidualDilatedLSTMBuilder::copy(const RNNBuilder & rnn) {
260 |     const ResidualDilatedLSTMBuilder & rnn_lstm = (const ResidualDilatedLSTMBuilder&)rnn;
261 |     DYNET_ARG_CHECK(params.size() == rnn_lstm.params.size(),
262 |       "Attempt to copy ResidualDilatedLSTMBuilder with different number of parameters "
263 |       "(" << params.size() << " != " << rnn_lstm.params.size() << ")");
264 |     for (size_t i = 0; i < params.size(); ++i)
265 |       for (size_t j = 0; j < params[i].size(); ++j)
266 |         params[i][j] = rnn_lstm.params[i][j];
267 |     for (size_t i = 0; i < ln_params.size(); ++i)
268 |       for (size_t j = 0; j < ln_params[i].size(); ++j)
269 |         ln_params[i][j] = rnn_lstm.ln_params[i][j];
270 |   }
271 | 
272 |   void ResidualDilatedLSTMBuilder::set_dropout(float d) {
273 |     DYNET_ARG_CHECK(d >= 0.f && d <= 1.f,
274 |       "dropout rate must be a probability (>=0 and <=1)");
275 |     dropout_rate = d;
276 |     dropout_rate_h = d;
277 |   }
278 | 
279 |   void ResidualDilatedLSTMBuilder::set_dropout(float d, float d_h) {
280 |     DYNET_ARG_CHECK(d >= 0.f && d <= 1.f && d_h >= 0.f && d_h <= 1.f,
281 |       "dropout rate must be a probability (>=0 and <=1)");
282 |     dropout_rate = d;
283 |     dropout_rate_h = d_h;
284 |   }
285 | 
286 |   void ResidualDilatedLSTMBuilder::disable_dropout() {
287 |     dropout_rate = 0.f;
288 |     dropout_rate_h = 0.f;
289 |   }
290 | 
291 | 
292 | 
293 | 
294 |   //enum { _X2I, _H2I, _BI, _X2F, _H2F, _BF, _X2O, _H2O, _BO, _X2G, _H2G, _BG };
295 |   enum { _X2I_, _H2I_, _BI_, _XA1, _HA1, _SA1, _BA1, _A2, _B2 };
296 | 
297 | 
298 | //***************************
299 | 
300 | 
301 |   
302 |   AttentiveDilatedLSTMBuilder::AttentiveDilatedLSTMBuilder() : has_initial_state(false), layers(0), input_dim(0), hid(0), dropout_rate_h(0), weightnoise_std(0), dropout_masks_valid(false) { }
303 |   
304 |   AttentiveDilatedLSTMBuilder::AttentiveDilatedLSTMBuilder(vector<unsigned> max_dilations,
305 |                                          unsigned input_dim,
306 |                                          unsigned hidden_dim,
307 |                                          unsigned attention_dim,
308 |                                          ParameterCollection& model)
309 |   : max_dilations(max_dilations), layers(unsigned(max_dilations.size())),
310 |     input_dim(input_dim), hid(hidden_dim), attention_dim(attention_dim), weightnoise_std(0), dropout_masks_valid(false) {
311 |     unsigned layer_input_dim = input_dim;
312 |     local_model = model.add_subcollection("compact-vanilla-lstm-builder");
313 |     for (unsigned i = 0; i < layers; ++i) {
314 |       // i
315 |       Parameter p_Wx = local_model.add_parameters({ hidden_dim * 4, layer_input_dim });
316 |       Parameter p_Wh = local_model.add_parameters({ hidden_dim * 4, hidden_dim });
317 |       Parameter p_b = local_model.add_parameters({ hidden_dim * 4 }, ParameterInitConst(0.f));
318 |       
319 |       Parameter p_Wxa1 = local_model.add_parameters({ attention_dim, layer_input_dim });
320 |       Parameter p_Wha1 = local_model.add_parameters({ attention_dim, hidden_dim });
321 |       Parameter p_Wsa1 = local_model.add_parameters({ attention_dim, hidden_dim });
322 |       Parameter p_ba1 = local_model.add_parameters({ attention_dim }, ParameterInitConst(0.f));
323 |       
324 |       Parameter p_Wa2 = local_model.add_parameters({ max_dilations[i], attention_dim });
325 |       Parameter p_ba2 = local_model.add_parameters({ max_dilations[i] }, ParameterInitConst(0.f));
326 |       
327 |       layer_input_dim = hidden_dim;  // output (hidden) from 1st layer is input to next
328 |       
329 |       vector<Parameter> ps = { p_Wx, p_Wh, p_b, p_Wxa1, p_Wha1, p_Wsa1, p_ba1, p_Wa2, p_ba2 };
330 |       params.push_back(ps);
331 |       
332 |     }  // layers
333 |     dropout_rate = 0.f;
334 |     dropout_rate_h = 0.f;
335 |   }
336 |   
337 |   void AttentiveDilatedLSTMBuilder::new_graph_impl(ComputationGraph& cg, bool update) {
338 |     param_vars.clear();
339 |     for (unsigned i = 0; i < layers; ++i) {
340 |       auto& p = params[i];
341 |       vector<Expression> vars;
342 |       for (unsigned j = 0; j < p.size(); ++j) { 
343 |         vars.push_back(update ? parameter(cg, p[j]) : const_parameter(cg, p[j])); 
344 |       }
345 |       param_vars.push_back(vars);
346 |     }
347 |     
348 |     _cg = &cg;
349 |   }
350 |   // layout: 0..layers = c
351 |   //         layers+1..2*layers = h
352 |   void AttentiveDilatedLSTMBuilder::start_new_sequence_impl(const vector<Expression>& hinit) {
353 |     h.clear();
354 |     c.clear();
355 |     
356 |     if (hinit.size() > 0) {
357 |       DYNET_ARG_CHECK(layers * 2 == hinit.size(),
358 |                       "AttentiveDilatedLSTMBuilder must be initialized with 2 times as many expressions as layers "
359 |                       "(hidden state, and cell for each layer). However, for " << layers << " layers, " <<
360 |                       hinit.size() << " expressions were passed in");
361 |       h0.resize(layers);
362 |       c0.resize(layers);
363 |       for (unsigned i = 0; i < layers; ++i) {
364 |         c0[i] = hinit[i];
365 |         h0[i] = hinit[i + layers];
366 |       }
367 |       has_initial_state = true;
368 |     }
369 |     else {
370 |       has_initial_state = false;
371 |     }
372 |     
373 |     dropout_masks_valid = false;
374 |   }
375 |   
376 |   void AttentiveDilatedLSTMBuilder::set_dropout_masks(unsigned batch_size) {
377 |     masks.clear();
378 |     for (unsigned i = 0; i < layers; ++i) {
379 |       std::vector<Expression> masks_i;
380 |       unsigned idim = (i == 0) ? input_dim : hid;
381 |       if (dropout_rate > 0.f || dropout_rate_h > 0.f) {
382 |         float retention_rate = 1.f - dropout_rate;
383 |         float retention_rate_h = 1.f - dropout_rate_h;
384 |         float scale = 1.f / retention_rate;
385 |         float scale_h = 1.f / retention_rate_h;
386 |         // in
387 |         masks_i.push_back(random_bernoulli(*_cg, Dim({ idim }, batch_size), retention_rate, scale));
388 |         // h
389 |         masks_i.push_back(random_bernoulli(*_cg, Dim({ hid }, batch_size), retention_rate_h, scale_h));
390 |         masks.push_back(masks_i);
391 |       }
392 |     }
393 |     dropout_masks_valid = true;
394 |   }
395 |   
396 |   ParameterCollection & AttentiveDilatedLSTMBuilder::get_parameter_collection() {
397 |     return local_model;
398 |   }
399 |   
400 |   // TODO - Make this correct
401 |   // Copied c from the previous step (otherwise c.size()< h.size())
402 |   // Also is creating a new step something we want?
403 |   // wouldn't overwriting the current one be better?
404 |   Expression AttentiveDilatedLSTMBuilder::set_h_impl(int prev, const vector<Expression>& h_new) {
405 |     DYNET_ARG_CHECK(h_new.empty() || h_new.size() == layers,
406 |                     "AttentiveDilatedLSTMBuilder::set_h expects as many inputs as layers, but got " <<
407 |                     h_new.size() << " inputs for " << layers << " layers");
408 |     const unsigned t = unsigned(h.size());
409 |     h.push_back(vector<Expression>(layers));
410 |     c.push_back(vector<Expression>(layers));
411 |     for (unsigned i = 0; i < layers; ++i) {
412 |       Expression h_i = h_new[i];
413 |       Expression c_i = c[t - 1][i];
414 |       h[t][i] = h_i;
415 |       c[t][i] = c_i;
416 |     }
417 |     return h[t].back();
418 |   }
419 |   // Current implementation : s_new is either {new_c[0],...,new_c[n]}
420 |   // or {new_c[0],...,new_c[n],new_h[0],...,new_h[n]}
421 |   Expression AttentiveDilatedLSTMBuilder::set_s_impl(int prev, const std::vector<Expression>& s_new) {
422 |     DYNET_ARG_CHECK(s_new.size() == layers || s_new.size() == 2 * layers,
423 |                     "AttentiveDilatedLSTMBuilder::set_s expects either as many inputs or twice as many inputs as layers, but got " << s_new.size() << " inputs for " << layers << " layers");
424 |     bool only_c = s_new.size() == layers;
425 |     const unsigned t = unsigned(c.size());
426 |     h.push_back(vector<Expression>(layers));
427 |     c.push_back(vector<Expression>(layers));
428 |     for (unsigned i = 0; i < layers; ++i) {
429 |       Expression h_i = only_c ? h[t - 1][i] : s_new[i + layers];
430 |       Expression c_i = s_new[i];
431 |       h[t][i] = h_i;
432 |       c[t][i] = c_i;
433 |     }
434 |     return h[t].back();
435 |   }
436 |   
437 |   Expression AttentiveDilatedLSTMBuilder::add_input_impl(int prev, const Expression& x) {
438 |     h.push_back(vector<Expression>(layers));
439 |     c.push_back(vector<Expression>(layers));
440 |     vector<Expression>& ht = h.back();
441 |     vector<Expression>& ct = c.back();
442 |     Expression in = x;
443 |     if ((dropout_rate > 0.f || dropout_rate_h > 0.f) && !dropout_masks_valid) set_dropout_masks(x.dim().bd);
444 |     for (unsigned i = 0; i < layers; ++i) {
445 |       int dilation_offset= max_dilations[i]-1;
446 |       const vector<Expression>& vars = param_vars[i];
447 |       Expression i_h_tm1, i_c_tm1;
448 |       if (prev < dilation_offset) {
449 |         if (has_initial_state) {
450 |           // initial value for h and c at timestep 0 in layer i
451 |           // defaults to zero matrix input if not set in add_parameter_edges
452 |           i_h_tm1 = h0[i];
453 |           i_c_tm1 = c0[i];
454 |         }
455 |         else {
456 |           i_h_tm1 = zeros(*_cg, Dim({ vars[_BI].dim()[0] / 4 }, x.dim().bd));
457 |           i_c_tm1 = i_h_tm1;
458 |         }
459 |       }
460 |       else {
461 |         if (dilation_offset>0) {
462 |           //enum { _X2I, _H2I, _BI, _XA1, _HA1, _SA1, _BA1, _A2, _B2 };
463 |           Expression weights_ex=vars[_XA1]*in+ vars[_HA1]*h[prev][i]+ vars[_SA1]*c[prev][i]+ vars[_BA1];
464 |           weights_ex=tanh(weights_ex);
465 |           weights_ex=vars[_A2]* weights_ex+ vars[_B2];
466 |           weights_ex =softmax(weights_ex);
467 |           #if defined _DEBUG
468 |             vector<float> weights=as_vector(weights_ex.value());
469 |           #endif
470 | 
471 |           unsigned indx=0;
472 |           Expression w_ex = pick(weights_ex, indx);
473 |           Expression avg_h= cmult(h[prev][i], w_ex);
474 |           for (indx=1; indx <= dilation_offset; indx++) {//dilation_offset==max_dilations[i]-1, so together with indx==0, we cover max_dilations[i] steps
475 |             w_ex = pick(weights_ex, indx);
476 |             avg_h = avg_h+cmult(h[prev- indx][i], w_ex);
477 |           }
478 |           i_h_tm1 = avg_h;
479 |         } else {
480 |           i_h_tm1 = h[prev- dilation_offset][i];
481 |         }
482 |         i_c_tm1 = c[prev- dilation_offset][i];
483 |       }
484 |       if (dropout_rate > 0.f || dropout_rate_h > 0.f) {
485 |         // apply dropout according to https://arxiv.org/abs/1512.05287 (tied weights)
486 |         Expression gates_t = vanilla_lstm_gates_dropout({ in }, i_h_tm1, vars[_X2I], vars[_H2I], vars[_BI], masks[i][0], masks[i][1], weightnoise_std);
487 |         ct[i] = vanilla_lstm_c(i_c_tm1, gates_t);
488 |         in = ht[i] = vanilla_lstm_h(ct[i], gates_t);
489 |       }
490 |       else {
491 |         Expression gates_t = vanilla_lstm_gates({ in }, i_h_tm1, vars[_X2I], vars[_H2I], vars[_BI], weightnoise_std);
492 |         ct[i] = vanilla_lstm_c(i_c_tm1, gates_t);
493 |         in = ht[i] = vanilla_lstm_h(ct[i], gates_t);
494 |       }
495 |     }
496 |     return ht.back();
497 |   }
498 |   
499 |   void AttentiveDilatedLSTMBuilder::copy(const RNNBuilder & rnn) {
500 |     const AttentiveDilatedLSTMBuilder & rnn_lstm = (const AttentiveDilatedLSTMBuilder&)rnn;
501 |     DYNET_ARG_CHECK(params.size() == rnn_lstm.params.size(),
502 |                     "Attempt to copy AttentiveDilatedLSTMBuilder with different number of parameters "
503 |                     "(" << params.size() << " != " << rnn_lstm.params.size() << ")");
504 |     for (size_t i = 0; i < params.size(); ++i)
505 |       for (size_t j = 0; j < params[i].size(); ++j)
506 |         params[i][j] = rnn_lstm.params[i][j];
507 |   }
508 |   
509 |   void AttentiveDilatedLSTMBuilder::set_dropout(float d) {
510 |     DYNET_ARG_CHECK(d >= 0.f && d <= 1.f,
511 |                     "dropout rate must be a probability (>=0 and <=1)");
512 |     dropout_rate = d;
513 |     dropout_rate_h = d;
514 |   }
515 |   
516 |   void AttentiveDilatedLSTMBuilder::set_dropout(float d, float d_h) {
517 |     DYNET_ARG_CHECK(d >= 0.f && d <= 1.f && d_h >= 0.f && d_h <= 1.f,
518 |                     "dropout rate must be a probability (>=0 and <=1)");
519 |     dropout_rate = d;
520 |     dropout_rate_h = d_h;
521 |   }
522 |   
523 |   void AttentiveDilatedLSTMBuilder::disable_dropout() {
524 |     dropout_rate = 0.f;
525 |     dropout_rate_h = 0.f;
526 |   }
527 |   void AttentiveDilatedLSTMBuilder::set_weightnoise(float std) {
528 |     DYNET_ARG_CHECK(std >= 0.f, "weight noise must have standard deviation >=0");
529 |     weightnoise_std = std;
530 |   }
531 | 
532 |   //*/
533 | 
534 |   DilatedLSTMBuilder::DilatedLSTMBuilder() : has_initial_state(false), layers(0), input_dim(0), hid(0), dropout_rate_h(0), weightnoise_std(0), dropout_masks_valid(false) { }
535 | 
536 |   DilatedLSTMBuilder::DilatedLSTMBuilder(vector<unsigned> dilations,
537 |     unsigned input_dim,
538 |     unsigned hidden_dim,
539 |     ParameterCollection& model)
540 |     : dilations(dilations), layers(unsigned(dilations.size())),
541 |     input_dim(input_dim), hid(hidden_dim), weightnoise_std(0), dropout_masks_valid(false) {
542 |     unsigned layer_input_dim = input_dim;
543 |     local_model = model.add_subcollection("compact-vanilla-lstm-builder");
544 |     for (unsigned i = 0; i < layers; ++i) {
545 |       // i
546 |       Parameter p_Wx = local_model.add_parameters({ hidden_dim * 4, layer_input_dim });
547 |       Parameter p_Wh = local_model.add_parameters({ hidden_dim * 4, hidden_dim });
548 |       Parameter p_b = local_model.add_parameters({ hidden_dim * 4 }, ParameterInitConst(0.f));
549 | 
550 |       layer_input_dim = hidden_dim;  // output (hidden) from 1st layer is input to next
551 | 
552 |       vector<Parameter> ps = { p_Wx, p_Wh, p_b };
553 |       params.push_back(ps);
554 | 
555 |     }  // layers
556 |     dropout_rate = 0.f;
557 |     dropout_rate_h = 0.f;
558 |   }
559 | 
560 |   void DilatedLSTMBuilder::new_graph_impl(ComputationGraph& cg, bool update) {
561 |     param_vars.clear();
562 |     for (unsigned i = 0; i < layers; ++i) {
563 |       auto& p = params[i];
564 |       vector<Expression> vars;
565 |       for (unsigned j = 0; j < p.size(); ++j) { vars.push_back(update ? parameter(cg, p[j]) : const_parameter(cg, p[j])); }
566 |       param_vars.push_back(vars);
567 |     }
568 | 
569 |     _cg = &cg;
570 |   }
571 |   // layout: 0..layers = c
572 |   //         layers+1..2*layers = h
573 |   void DilatedLSTMBuilder::start_new_sequence_impl(const vector<Expression>& hinit) {
574 |     h.clear();
575 |     c.clear();
576 | 
577 |     if (hinit.size() > 0) {
578 |       DYNET_ARG_CHECK(layers * 2 == hinit.size(),
579 |         "DilatedLSTMBuilder must be initialized with 2 times as many expressions as layers "
580 |         "(hidden state, and cell for each layer). However, for " << layers << " layers, " <<
581 |         hinit.size() << " expressions were passed in");
582 |       h0.resize(layers);
583 |       c0.resize(layers);
584 |       for (unsigned i = 0; i < layers; ++i) {
585 |         c0[i] = hinit[i];
586 |         h0[i] = hinit[i + layers];
587 |       }
588 |       has_initial_state = true;
589 |     } else {
590 |       has_initial_state = false;
591 |     }
592 | 
593 |     dropout_masks_valid = false;
594 |   }
595 | 
596 |   void DilatedLSTMBuilder::set_dropout_masks(unsigned batch_size) {
597 |     masks.clear();
598 |     for (unsigned i = 0; i < layers; ++i) {
599 |       std::vector<Expression> masks_i;
600 |       unsigned idim = (i == 0) ? input_dim : hid;
601 |       if (dropout_rate > 0.f || dropout_rate_h > 0.f) {
602 |         float retention_rate = 1.f - dropout_rate;
603 |         float retention_rate_h = 1.f - dropout_rate_h;
604 |         float scale = 1.f / retention_rate;
605 |         float scale_h = 1.f / retention_rate_h;
606 |         // in
607 |         masks_i.push_back(random_bernoulli(*_cg, Dim({ idim }, batch_size), retention_rate, scale));
608 |         // h
609 |         masks_i.push_back(random_bernoulli(*_cg, Dim({ hid }, batch_size), retention_rate_h, scale_h));
610 |         masks.push_back(masks_i);
611 |       }
612 |     }
613 |     dropout_masks_valid = true;
614 |   }
615 | 
616 |   ParameterCollection & DilatedLSTMBuilder::get_parameter_collection() {
617 |     return local_model;
618 |   }
619 | 
620 |   // TODO - Make this correct
621 |   // Copied c from the previous step (otherwise c.size()< h.size())
622 |   // Also is creating a new step something we want?
623 |   // wouldn't overwriting the current one be better?
624 |   Expression DilatedLSTMBuilder::set_h_impl(int prev, const vector<Expression>& h_new) {
625 |     DYNET_ARG_CHECK(h_new.empty() || h_new.size() == layers,
626 |       "DilatedLSTMBuilder::set_h expects as many inputs as layers, but got " <<
627 |       h_new.size() << " inputs for " << layers << " layers");
628 |     const unsigned t = unsigned(h.size());
629 |     h.push_back(vector<Expression>(layers));
630 |     c.push_back(vector<Expression>(layers));
631 |     for (unsigned i = 0; i < layers; ++i) {
632 |       Expression h_i = h_new[i];
633 |       Expression c_i = c[t - 1][i];
634 |       h[t][i] = h_i;
635 |       c[t][i] = c_i;
636 |     }
637 |     return h[t].back();
638 |   }
639 |   // Current implementation : s_new is either {new_c[0],...,new_c[n]}
640 |   // or {new_c[0],...,new_c[n],new_h[0],...,new_h[n]}
641 |   Expression DilatedLSTMBuilder::set_s_impl(int prev, const std::vector<Expression>& s_new) {
642 |     DYNET_ARG_CHECK(s_new.size() == layers || s_new.size() == 2 * layers,
643 |       "DilatedLSTMBuilder::set_s expects either as many inputs or twice as many inputs as layers, but got " << s_new.size() << " inputs for " << layers << " layers");
644 |     bool only_c = s_new.size() == layers;
645 |     const unsigned t = unsigned(c.size());
646 |     h.push_back(vector<Expression>(layers));
647 |     c.push_back(vector<Expression>(layers));
648 |     for (unsigned i = 0; i < layers; ++i) {
649 |       Expression h_i = only_c ? h[t - 1][i] : s_new[i + layers];
650 |       Expression c_i = s_new[i];
651 |       h[t][i] = h_i;
652 |       c[t][i] = c_i;
653 |     }
654 |     return h[t].back();
655 |   }
656 | 
657 |   Expression DilatedLSTMBuilder::add_input_impl(int prev, const Expression& x) {
658 |     h.push_back(vector<Expression>(layers));
659 |     c.push_back(vector<Expression>(layers));
660 |     vector<Expression>& ht = h.back();
661 |     vector<Expression>& ct = c.back();
662 |     Expression in = x;
663 |     if ((dropout_rate > 0.f || dropout_rate_h > 0.f) && !dropout_masks_valid) set_dropout_masks(x.dim().bd);
664 |     for (unsigned i = 0; i < layers; ++i) {
665 |       int dilation_offset = dilations[i] - 1;
666 |       const vector<Expression>& vars = param_vars[i];
667 |       Expression i_h_tm1, i_c_tm1;
668 |       if (prev < dilation_offset) {
669 |         if (has_initial_state) {
670 |           // initial value for h and c at timestep 0 in layer i
671 |           // defaults to zero matrix input if not set in add_parameter_edges
672 |           i_h_tm1 = h0[i];
673 |           i_c_tm1 = c0[i];
674 |         } else {
675 |           i_h_tm1 = zeros(*_cg, Dim({ vars[_BI].dim()[0] / 4 }, x.dim().bd));
676 |           i_c_tm1 = i_h_tm1;
677 |         }
678 |       } else {  // t > 0
679 |         i_h_tm1 = h[prev - dilation_offset][i];
680 |         i_c_tm1 = c[prev - dilation_offset][i];
681 |       }
682 |       if (dropout_rate > 0.f || dropout_rate_h > 0.f) {
683 |         // apply dropout according to https://arxiv.org/abs/1512.05287 (tied weights)
684 |         Expression gates_t = vanilla_lstm_gates_dropout({ in }, i_h_tm1, vars[_X2I], vars[_H2I], vars[_BI], masks[i][0], masks[i][1], weightnoise_std);
685 |         ct[i] = vanilla_lstm_c(i_c_tm1, gates_t);
686 |         in = ht[i] = vanilla_lstm_h(ct[i], gates_t);
687 |       } else {
688 |         Expression gates_t = vanilla_lstm_gates({ in }, i_h_tm1, vars[_X2I], vars[_H2I], vars[_BI], weightnoise_std);
689 |         ct[i] = vanilla_lstm_c(i_c_tm1, gates_t);
690 |         in = ht[i] = vanilla_lstm_h(ct[i], gates_t);
691 |       }
692 |     }
693 |     return ht.back();
694 |   }
695 | 
696 |   void DilatedLSTMBuilder::copy(const RNNBuilder & rnn) {
697 |     const DilatedLSTMBuilder & rnn_lstm = (const DilatedLSTMBuilder&)rnn;
698 |     DYNET_ARG_CHECK(params.size() == rnn_lstm.params.size(),
699 |       "Attempt to copy DilatedLSTMBuilder with different number of parameters "
700 |       "(" << params.size() << " != " << rnn_lstm.params.size() << ")");
701 |     for (size_t i = 0; i < params.size(); ++i)
702 |       for (size_t j = 0; j < params[i].size(); ++j)
703 |         params[i][j] = rnn_lstm.params[i][j];
704 |   }
705 | 
706 |   void DilatedLSTMBuilder::set_dropout(float d) {
707 |     DYNET_ARG_CHECK(d >= 0.f && d <= 1.f,
708 |       "dropout rate must be a probability (>=0 and <=1)");
709 |     dropout_rate = d;
710 |     dropout_rate_h = d;
711 |   }
712 | 
713 |   void DilatedLSTMBuilder::set_dropout(float d, float d_h) {
714 |     DYNET_ARG_CHECK(d >= 0.f && d <= 1.f && d_h >= 0.f && d_h <= 1.f,
715 |       "dropout rate must be a probability (>=0 and <=1)");
716 |     dropout_rate = d;
717 |     dropout_rate_h = d_h;
718 |   }
719 | 
720 |   void DilatedLSTMBuilder::disable_dropout() {
721 |     dropout_rate = 0.f;
722 |     dropout_rate_h = 0.f;
723 |   }
724 |   void DilatedLSTMBuilder::set_weightnoise(float std) {
725 |     DYNET_ARG_CHECK(std >= 0.f, "weight noise must have standard deviation >=0");
726 |     weightnoise_std = std;
727 |   }
728 | 
729 | } // namespace dynet
730 | 


--------------------------------------------------------------------------------
/c++/slstm.h:
--------------------------------------------------------------------------------
  1 | /**
  2 | * file slstm.h
  3 | * header for my implementation of dilated LSTMs, based on Dynet LSTM builders
  4 |   - DilatedLSTMBuilder - standard Dilated LSTM (https://papers.nips.cc/paper/6613-dilated-recurrent-neural-networks.pdf)
  5 |   - ResidualDilatedLSTMBuilder - Dilated LSTM with special Residual shortcuts, after https://arxiv.org/abs/1701.03360
  6 |   - AttentiveDilatedLSTMBuilder - Dilated LSTM with Attention mechanism, as in the second stage of https://arxiv.org/abs/1704.02971
  7 | *
  8 | Slawek Smyl, Mar-May 2018
  9 | */
 10 | 
 11 | #ifndef DYNET_SLSTMS_H_
 12 | #define DYNET_SLSTMS_H_
 13 | 
 14 | #include "dynet/dynet.h"
 15 | #include "dynet/rnn.h"
 16 | #include "dynet/expr.h"
 17 | 
 18 | using namespace std;
 19 | 
 20 | namespace dynet {
 21 | 
 22 |   //basd on VanillaLSTMBuilder
 23 |   struct ResidualDilatedLSTMBuilder : public RNNBuilder {
 24 |     /**
 25 |     * @brief Default Constructor
 26 |     */
 27 |     ResidualDilatedLSTMBuilder();
 28 |     /**
 29 |     * \brief Constructor for the ResidualDilatedLSTMBuilder
 30 |     *
 31 |     * \param dilations Vector of dilations
 32 |     * \param input_dim Dimention of the input \f$x_t\f$
 33 |     * \param hidden_dim Dimention of the hidden states \f$h_t\f$ and \f$c_t\f$
 34 |     * \param model ParameterCollection holding the parameters
 35 |     * \param ln_lstm Whether to use layer normalization
 36 |     * \param forget_bias value(float) to use as bias for the forget gate(default = 1.0)
 37 |     */
 38 |     explicit ResidualDilatedLSTMBuilder(vector<unsigned> dilations,
 39 |       unsigned input_dim,
 40 |       unsigned hidden_dim,
 41 |       ParameterCollection& model,
 42 |       bool ln_lstm = false,
 43 |       float forget_bias = 1.f);
 44 | 
 45 |     Expression back() const override { return (cur == -1 ? h0.back() : h[cur].back()); }
 46 |     std::vector<Expression> final_h() const override { return (h.size() == 0 ? h0 : h.back()); }
 47 |     std::vector<Expression> final_s() const override {
 48 |       std::vector<Expression> ret = (c.size() == 0 ? c0 : c.back());
 49 |       for (auto my_h : final_h()) ret.push_back(my_h);
 50 |       return ret;
 51 |     }
 52 |     unsigned num_h0_components() const override { return 2 * layers; }
 53 | 
 54 |     std::vector<Expression> get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); }
 55 |     std::vector<Expression> get_s(RNNPointer i) const override {
 56 |       std::vector<Expression> ret = (i == -1 ? c0 : c[i]);
 57 |       for (auto my_h : get_h(i)) ret.push_back(my_h);
 58 |       return ret;
 59 |     }
 60 | 
 61 |     void copy(const RNNBuilder & params) override;
 62 | 
 63 |     /**
 64 |     * \brief Set the dropout rates to a unique value
 65 |     * \details This has the same effect as `set_dropout(d,d_h)` except that all the dropout rates are set to the same value.
 66 |     * \param d Dropout rate to be applied on all of \f$x,h\f$
 67 |     */
 68 |     void set_dropout(float d);
 69 |     /**
 70 |     * \brief Set the dropout rates
 71 |     * \details The dropout implemented here is the variational dropout with tied weights introduced in [Gal, 2016](http://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks)
 72 |     * More specifically, dropout masks \f$\mathbf{z_x}\sim \mathrm{Bernoulli}(1-d_x)\f$,\f$\mathbf{z_h}\sim \mathrm{Bernoulli}(1-d_h)\f$ are sampled at the start of each sequence.
 73 |     * The dynamics of the cell are then modified to :
 74 |     *
 75 |     * \f$
 76 |     * \begin{split}
 77 |     i_t & =\sigma(W_{ix}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ih}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_i)\\
 78 |     f_t & = \sigma(W_{fx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{fh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_f)\\
 79 |     o_t & = \sigma(W_{ox}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{oh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_o)\\
 80 |     \tilde{c_t} & = \tanh(W_{cx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ch}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_c)\\
 81 |     c_t & = c_{t-1}\circ f_t + \tilde{c_t}\circ i_t\\
 82 |     h_t & = \tanh(c_t)\circ o_t\\
 83 |     \end{split}
 84 |     * \f$
 85 |     *
 86 |     * For more detail as to why scaling is applied, see the "Unorthodox" section of the documentation
 87 |     * \param d Dropout rate \f$d_x\f$ for the input \f$x_t\f$
 88 |     * \param d_h Dropout rate \f$d_h\f$ for the output \f$h_t\f$
 89 |     */
 90 |     void set_dropout(float d, float d_r);
 91 |     /**
 92 |     * \brief Set all dropout rates to 0
 93 |     * \details This is equivalent to `set_dropout(0)` or `set_dropout(0,0,0)`
 94 |     *
 95 |     */
 96 |     void disable_dropout();
 97 |     /**
 98 |     * \brief Set dropout masks at the beginning of a sequence for a specific batch size
 99 |     * \details If this function is not called on batched input, the same mask will be applied across
100 |     * all batch elements. Use this to apply different masks to each batch element
101 |     *
102 |     * \param batch_size Batch size
103 |     */
104 |     void set_dropout_masks(unsigned batch_size = 1);
105 |     /**
106 |     * \brief Get parameters in ResidualDilatedLSTMBuilder
107 |     * \return list of points to ParameterStorage objects
108 |     */
109 |     ParameterCollection & get_parameter_collection() override;
110 |   protected:
111 |     void new_graph_impl(ComputationGraph& cg, bool update) override;
112 |     void start_new_sequence_impl(const std::vector<Expression>& h0) override;
113 |     Expression add_input_impl(int prev, const Expression& x) override;
114 |     Expression set_h_impl(int prev, const std::vector<Expression>& h_new) override;
115 |     Expression set_s_impl(int prev, const std::vector<Expression>& s_new) override;
116 | 
117 |   public:
118 |     ParameterCollection local_model;
119 |     // first index is layer, then ...
120 |     std::vector<std::vector<Parameter>> params;
121 |     // first index is layer, then ...
122 |     std::vector<std::vector<Parameter>> ln_params;
123 | 
124 |     // first index is layer, then ...
125 |     std::vector<std::vector<Expression>> param_vars;
126 |     // first index is layer, then ...
127 |     std::vector<std::vector<Expression>> ln_param_vars;
128 | 
129 |     // first index is layer, then ...
130 |     std::vector<std::vector<Expression>> masks;
131 | 
132 |     // first index is time, second is layer
133 |     std::vector<std::vector<Expression>> h, c;
134 | 
135 |     // initial values of h and c at each layer
136 |     // - both default to zero matrix input
137 |     bool has_initial_state; // if this is false, treat h0 and c0 as 0
138 |     std::vector<Expression> h0;
139 |     std::vector<Expression> c0;
140 |     unsigned layers;
141 |     unsigned input_dim, hid;
142 |     float dropout_rate_h;
143 |     bool ln_lstm;
144 |     float forget_bias;
145 |     bool dropout_masks_valid;
146 |     vector<unsigned> dilations; //one int per layer
147 | 
148 |   private:
149 |     ComputationGraph* _cg; // Pointer to current cg
150 | 
151 |   };
152 | 
153 | 
154 |   struct DilatedLSTMBuilder : public RNNBuilder {
155 |     /**
156 |     * @brief Default Constructor
157 |     */
158 |     DilatedLSTMBuilder();
159 |     /**
160 |     * \brief Constructor for the DilatedLSTMBuilder
161 |     *
162 |     * \param dilations Vector of dilations
163 |     * \param input_dim Dimention of the input \f$x_t\f$
164 |     * \param hidden_dim Dimention of the hidden states \f$h_t\f$ and \f$c_t\f$
165 |     * \param model ParameterCollection holding the parameters
166 |     */
167 |     explicit DilatedLSTMBuilder(vector<unsigned> dilations,
168 |       unsigned input_dim,
169 |       unsigned hidden_dim,
170 |       ParameterCollection& model);
171 | 
172 |     Expression back() const override { return (cur == -1 ? h0.back() : h[cur].back()); }
173 |     std::vector<Expression> final_h() const override { return (h.size() == 0 ? h0 : h.back()); }
174 |     std::vector<Expression> final_s() const override {
175 |       std::vector<Expression> ret = (c.size() == 0 ? c0 : c.back());
176 |       for (auto my_h : final_h()) ret.push_back(my_h);
177 |       return ret;
178 |     }
179 |     unsigned num_h0_components() const override { return 2 * layers; }
180 | 
181 |     std::vector<Expression> get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); }
182 |     std::vector<Expression> get_s(RNNPointer i) const override {
183 |       std::vector<Expression> ret = (i == -1 ? c0 : c[i]);
184 |       for (auto my_h : get_h(i)) ret.push_back(my_h);
185 |       return ret;
186 |     }
187 | 
188 |     void copy(const RNNBuilder & params) override;
189 | 
190 |     /**
191 |     * \brief Set the dropout rates to a unique value
192 |     * \details This has the same effect as `set_dropout(d,d_h)` except that all the dropout rates are set to the same value.
193 |     * \param d Dropout rate to be applied on all of \f$x,h\f$
194 |     */
195 |     void set_dropout(float d);
196 |     /**
197 |     * \brief Set the dropout rates
198 |     * \details The dropout implemented here is the variational dropout with tied weights introduced in [Gal, 2016](http://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks)
199 |     * More specifically, dropout masks \f$\mathbf{z_x}\sim \mathrm{Bernoulli}(1-d_x)\f$,\f$\mathbf{z_h}\sim \mathrm{Bernoulli}(1-d_h)\f$ are sampled at the start of each sequence.
200 |     * The dynamics of the cell are then modified to :
201 |     *
202 |     * \f$
203 |     * \begin{split}
204 |     i_t & =\sigma(W_{ix}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ih}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_i)\\
205 |     f_t & = \sigma(W_{fx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{fh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_f)\\
206 |     o_t & = \sigma(W_{ox}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{oh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_o)\\
207 |     \tilde{c_t} & = \tanh(W_{cx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ch}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_c)\\
208 |     c_t & = c_{t-1}\circ f_t + \tilde{c_t}\circ i_t\\
209 |     h_t & = \tanh(c_t)\circ o_t\\
210 |     \end{split}
211 |     * \f$
212 |     *
213 |     * For more detail as to why scaling is applied, see the "Unorthodox" section of the documentation
214 |     * \param d Dropout rate \f$d_x\f$ for the input \f$x_t\f$
215 |     */
216 |     void set_dropout(float d, float d_r);
217 |     /**
218 |     * \brief Set all dropout rates to 0
219 |     * \details This is equivalent to `set_dropout(0)` or `set_dropout(0,0,0)`
220 |     *
221 |     */
222 |     void disable_dropout();
223 |     /**
224 |     * \brief Set dropout masks at the beginning of a sequence for a specific batch size
225 |     * \details If this function is not called on batched input, the same mask will be applied across
226 |     * all batch elements. Use this to apply different masks to each batch element
227 |     *
228 |     * \param batch_size Batch size
229 |     */
230 |     void set_dropout_masks(unsigned batch_size = 1);
231 | 
232 |     void set_weightnoise(float std);
233 |     ParameterCollection & get_parameter_collection() override;
234 |   protected:
235 |     void new_graph_impl(ComputationGraph& cg, bool update) override;
236 |     void start_new_sequence_impl(const std::vector<Expression>& h0) override;
237 |     Expression add_input_impl(int prev, const Expression& x) override;
238 |     Expression set_h_impl(int prev, const std::vector<Expression>& h_new) override;
239 |     Expression set_s_impl(int prev, const std::vector<Expression>& s_new) override;
240 | 
241 |   public:
242 |     ParameterCollection local_model;
243 |     // first index is layer, then ...
244 |     std::vector<std::vector<Parameter>> params;
245 | 
246 |     // first index is layer, then ...
247 |     std::vector<std::vector<Expression>> param_vars;
248 | 
249 |     // first index is layer, then ...
250 |     std::vector<std::vector<Expression>> masks;
251 | 
252 |     // first index is time, second is layer
253 |     std::vector<std::vector<Expression>> h, c;
254 | 
255 |     // initial values of h and c at each layer
256 |     // - both default to zero matrix input
257 |     bool has_initial_state; // if this is false, treat h0 and c0 as 0
258 |     std::vector<Expression> h0;
259 |     std::vector<Expression> c0;
260 |     unsigned layers;
261 |     unsigned input_dim, hid;
262 |     float dropout_rate_h;
263 |     float weightnoise_std;
264 |     vector<unsigned> dilations; //one int per layer
265 | 
266 |     bool dropout_masks_valid;
267 |   private:
268 |     ComputationGraph* _cg; // Pointer to current cg
269 | 
270 |   };
271 |   
272 |   
273 |   struct AttentiveDilatedLSTMBuilder : public RNNBuilder {
274 |     /**
275 |      * @brief Default Constructor
276 |      */
277 |     AttentiveDilatedLSTMBuilder();
278 |     /**
279 |      * \brief Constructor for the AttentiveDilatedLSTMBuilder
280 |      *
281 |      * \param max_dilations Vector, maximum dilations (per layer)
282 |      * \param input_dim Dimention of the input \f$x_t\f$
283 |      * \param hidden_dim Dimention of the hidden states \f$h_t\f$ and \f$c_t\f$
284 |      * \param model ParameterCollection holding the parameters
285 |      */
286 |     explicit AttentiveDilatedLSTMBuilder(vector<unsigned> max_dilations,
287 |                                 unsigned input_dim,
288 |                                 unsigned hidden_dim,
289 |                                 unsigned attention_dim,
290 |                                 ParameterCollection& model);
291 |     
292 |     Expression back() const override { return (cur == -1 ? h0.back() : h[cur].back()); }
293 |     std::vector<Expression> final_h() const override { return (h.size() == 0 ? h0 : h.back()); }
294 |     std::vector<Expression> final_s() const override {
295 |       std::vector<Expression> ret = (c.size() == 0 ? c0 : c.back());
296 |       for (auto my_h : final_h()) ret.push_back(my_h);
297 |       return ret;
298 |     }
299 |     unsigned num_h0_components() const override { return 2 * layers; }
300 |     
301 |     std::vector<Expression> get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); }
302 |     std::vector<Expression> get_s(RNNPointer i) const override {
303 |       std::vector<Expression> ret = (i == -1 ? c0 : c[i]);
304 |       for (auto my_h : get_h(i)) ret.push_back(my_h);
305 |       return ret;
306 |     }
307 |     
308 |     void copy(const RNNBuilder & params) override;
309 |     
310 |     /**
311 |      * \brief Set the dropout rates to a unique value
312 |      * \details This has the same effect as `set_dropout(d,d_h)` except that all the dropout rates are set to the same value.
313 |      * \param d Dropout rate to be applied on all of \f$x,h\f$
314 |      */
315 |     void set_dropout(float d);
316 |     /**
317 |      * \brief Set the dropout rates
318 |      * \details The dropout implemented here is the variational dropout with tied weights introduced in [Gal, 2016](http://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks)
319 |      * More specifically, dropout masks \f$\mathbf{z_x}\sim \mathrm{Bernoulli}(1-d_x)\f$,\f$\mathbf{z_h}\sim \mathrm{Bernoulli}(1-d_h)\f$ are sampled at the start of each sequence.
320 |      * The dynamics of the cell are then modified to :
321 |      *
322 |      * \f$
323 |      * \begin{split}
324 |      i_t & =\sigma(W_{ix}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ih}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_i)\\
325 |      f_t & = \sigma(W_{fx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{fh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_f)\\
326 |      o_t & = \sigma(W_{ox}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{oh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_o)\\
327 |      \tilde{c_t} & = \tanh(W_{cx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ch}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_c)\\
328 |      c_t & = c_{t-1}\circ f_t + \tilde{c_t}\circ i_t\\
329 |      h_t & = \tanh(c_t)\circ o_t\\
330 |      \end{split}
331 |      * \f$
332 |      *
333 |      * For more detail as to why scaling is applied, see the "Unorthodox" section of the documentation
334 |      * \param d Dropout rate \f$d_x\f$ for the input \f$x_t\f$
335 |      */
336 |     void set_dropout(float d, float d_r);
337 |     /**
338 |      * \brief Set all dropout rates to 0
339 |      * \details This is equivalent to `set_dropout(0)` or `set_dropout(0,0,0)`
340 |      *
341 |      */
342 |     void disable_dropout();
343 |     /**
344 |      * \brief Set dropout masks at the beginning of a sequence for a specific batch size
345 |      * \details If this function is not called on batched input, the same mask will be applied across
346 |      * all batch elements. Use this to apply different masks to each batch element
347 |      *
348 |      * \param batch_size Batch size
349 |      */
350 |     void set_dropout_masks(unsigned batch_size = 1);
351 | 
352 |     void set_weightnoise(float std);
353 |     ParameterCollection & get_parameter_collection() override;
354 |   protected:
355 |     void new_graph_impl(ComputationGraph& cg, bool update) override;
356 |     void start_new_sequence_impl(const std::vector<Expression>& h0) override;
357 |     Expression add_input_impl(int prev, const Expression& x) override;
358 |     Expression set_h_impl(int prev, const std::vector<Expression>& h_new) override;
359 |     Expression set_s_impl(int prev, const std::vector<Expression>& s_new) override;
360 |     
361 |   public:
362 |     ParameterCollection local_model;
363 |     // first index is layer, then ...
364 |     std::vector<std::vector<Parameter>> params;
365 |     
366 |     // first index is layer, then ...
367 |     std::vector<std::vector<Expression>> param_vars;
368 |     
369 |     // first index is layer, then ...
370 |     std::vector<std::vector<Expression>> masks;
371 |     
372 |     // first index is time, second is layer
373 |     std::vector<std::vector<Expression>> h, c;
374 |     
375 |     // initial values of h and c at each layer
376 |     // - both default to zero matrix input
377 |     bool has_initial_state; // if this is false, treat h0 and c0 as 0
378 |     std::vector<Expression> h0;
379 |     std::vector<Expression> c0;
380 |     unsigned layers;
381 |     unsigned input_dim, hid;
382 |     unsigned attention_dim;
383 |     float dropout_rate_h;
384 |     float weightnoise_std;
385 |     vector<unsigned> max_dilations; //one int per layer
386 |     
387 |     bool dropout_masks_valid;
388 |   private:
389 |     ComputationGraph* _cg; // Pointer to current cg
390 |     
391 |   };
392 | } // namespace dynet
393 | 
394 | #endif
395 | 


--------------------------------------------------------------------------------
/c++/windows_VisualStudio/M4.sln:
--------------------------------------------------------------------------------
 1 | ﻿
 2 | Microsoft Visual Studio Solution File, Format Version 12.00
 3 | # Visual Studio 14
 4 | VisualStudioVersion = 14.0.25420.1
 5 | MinimumVisualStudioVersion = 10.0.40219.1
 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "M41", "M41\M41.vcxproj", "{928301A0-F01A-48F6-A499-851B3CE8BD4E}"
 7 | EndProject
 8 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "M42", "M42\M42.vcxproj", "{A16B5466-E680-43F6-A884-A4A01EB78E50}"
 9 | EndProject
10 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "M43", "M43\M43.vcxproj", "{BE951571-3F3A-4048-BAA3-0C05F38CFF42}"
11 | EndProject
12 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "M44", "M44\M44.vcxproj", "{7A192E0C-8F58-4D65-998E-3A7010AB5F87}"
13 | EndProject
14 | Global
15 | 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
16 | 		Debug|x64 = Debug|x64
17 | 		Debug|x86 = Debug|x86
18 | 		RelWithDebug|x64 = RelWithDebug|x64
19 | 		RelWithDebug|x86 = RelWithDebug|x86
20 | 	EndGlobalSection
21 | 	GlobalSection(ProjectConfigurationPlatforms) = postSolution
22 | 		{928301A0-F01A-48F6-A499-851B3CE8BD4E}.Debug|x64.ActiveCfg = Debug|x64
23 | 		{928301A0-F01A-48F6-A499-851B3CE8BD4E}.Debug|x64.Build.0 = Debug|x64
24 | 		{928301A0-F01A-48F6-A499-851B3CE8BD4E}.Debug|x86.ActiveCfg = Debug|Win32
25 | 		{928301A0-F01A-48F6-A499-851B3CE8BD4E}.Debug|x86.Build.0 = Debug|Win32
26 | 		{928301A0-F01A-48F6-A499-851B3CE8BD4E}.RelWithDebug|x64.ActiveCfg = RelWithDebug|x64
27 | 		{928301A0-F01A-48F6-A499-851B3CE8BD4E}.RelWithDebug|x64.Build.0 = RelWithDebug|x64
28 | 		{928301A0-F01A-48F6-A499-851B3CE8BD4E}.RelWithDebug|x86.ActiveCfg = RelWithDebug|Win32
29 | 		{928301A0-F01A-48F6-A499-851B3CE8BD4E}.RelWithDebug|x86.Build.0 = RelWithDebug|Win32
30 | 		{A16B5466-E680-43F6-A884-A4A01EB78E50}.Debug|x64.ActiveCfg = Debug|x64
31 | 		{A16B5466-E680-43F6-A884-A4A01EB78E50}.Debug|x64.Build.0 = Debug|x64
32 | 		{A16B5466-E680-43F6-A884-A4A01EB78E50}.Debug|x86.ActiveCfg = Debug|Win32
33 | 		{A16B5466-E680-43F6-A884-A4A01EB78E50}.Debug|x86.Build.0 = Debug|Win32
34 | 		{A16B5466-E680-43F6-A884-A4A01EB78E50}.RelWithDebug|x64.ActiveCfg = RelWithDebug|x64
35 | 		{A16B5466-E680-43F6-A884-A4A01EB78E50}.RelWithDebug|x64.Build.0 = RelWithDebug|x64
36 | 		{A16B5466-E680-43F6-A884-A4A01EB78E50}.RelWithDebug|x86.ActiveCfg = RelWithDebug|Win32
37 | 		{A16B5466-E680-43F6-A884-A4A01EB78E50}.RelWithDebug|x86.Build.0 = RelWithDebug|Win32
38 | 		{BE951571-3F3A-4048-BAA3-0C05F38CFF42}.Debug|x64.ActiveCfg = Debug|x64
39 | 		{BE951571-3F3A-4048-BAA3-0C05F38CFF42}.Debug|x64.Build.0 = Debug|x64
40 | 		{BE951571-3F3A-4048-BAA3-0C05F38CFF42}.Debug|x86.ActiveCfg = Debug|Win32
41 | 		{BE951571-3F3A-4048-BAA3-0C05F38CFF42}.Debug|x86.Build.0 = Debug|Win32
42 | 		{BE951571-3F3A-4048-BAA3-0C05F38CFF42}.RelWithDebug|x64.ActiveCfg = RelWithDebug|x64
43 | 		{BE951571-3F3A-4048-BAA3-0C05F38CFF42}.RelWithDebug|x64.Build.0 = RelWithDebug|x64
44 | 		{BE951571-3F3A-4048-BAA3-0C05F38CFF42}.RelWithDebug|x86.ActiveCfg = RelWithDebug|Win32
45 | 		{BE951571-3F3A-4048-BAA3-0C05F38CFF42}.RelWithDebug|x86.Build.0 = RelWithDebug|Win32
46 | 		{7A192E0C-8F58-4D65-998E-3A7010AB5F87}.Debug|x64.ActiveCfg = Debug|x64
47 | 		{7A192E0C-8F58-4D65-998E-3A7010AB5F87}.Debug|x64.Build.0 = Debug|x64
48 | 		{7A192E0C-8F58-4D65-998E-3A7010AB5F87}.Debug|x86.ActiveCfg = Debug|Win32
49 | 		{7A192E0C-8F58-4D65-998E-3A7010AB5F87}.Debug|x86.Build.0 = Debug|Win32
50 | 		{7A192E0C-8F58-4D65-998E-3A7010AB5F87}.RelWithDebug|x64.ActiveCfg = RelWithDebug|x64
51 | 		{7A192E0C-8F58-4D65-998E-3A7010AB5F87}.RelWithDebug|x64.Build.0 = RelWithDebug|x64
52 | 		{7A192E0C-8F58-4D65-998E-3A7010AB5F87}.RelWithDebug|x86.ActiveCfg = RelWithDebug|Win32
53 | 		{7A192E0C-8F58-4D65-998E-3A7010AB5F87}.RelWithDebug|x86.Build.0 = RelWithDebug|Win32
54 | 	EndGlobalSection
55 | 	GlobalSection(SolutionProperties) = preSolution
56 | 		HideSolutionNode = FALSE
57 | 	EndGlobalSection
58 | EndGlobal
59 | 


--------------------------------------------------------------------------------
/c++/windows_VisualStudio/M41/M41.vcxproj:
--------------------------------------------------------------------------------
  1 | ﻿<?xml version="1.0" encoding="utf-8"?>
  2 | <Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  3 |   <ItemGroup Label="ProjectConfigurations">
  4 |     <ProjectConfiguration Include="Debug|Win32">
  5 |       <Configuration>Debug</Configuration>
  6 |       <Platform>Win32</Platform>
  7 |     </ProjectConfiguration>
  8 |     <ProjectConfiguration Include="Release|Win32">
  9 |       <Configuration>Release</Configuration>
 10 |       <Platform>Win32</Platform>
 11 |     </ProjectConfiguration>
 12 |     <ProjectConfiguration Include="Debug|x64">
 13 |       <Configuration>Debug</Configuration>
 14 |       <Platform>x64</Platform>
 15 |     </ProjectConfiguration>
 16 |     <ProjectConfiguration Include="Release|x64">
 17 |       <Configuration>Release</Configuration>
 18 |       <Platform>x64</Platform>
 19 |     </ProjectConfiguration>
 20 |     <ProjectConfiguration Include="RelWithDebug|Win32">
 21 |       <Configuration>RelWithDebug</Configuration>
 22 |       <Platform>Win32</Platform>
 23 |     </ProjectConfiguration>
 24 |     <ProjectConfiguration Include="RelWithDebug|x64">
 25 |       <Configuration>RelWithDebug</Configuration>
 26 |       <Platform>x64</Platform>
 27 |     </ProjectConfiguration>
 28 |   </ItemGroup>
 29 |   <ItemGroup>
 30 |     <ClCompile Include="ES_RNN.cc" />
 31 |     <ClCompile Include="slstm.cpp" />
 32 |   </ItemGroup>
 33 |   <ItemGroup>
 34 |     <ClInclude Include="slstm.h" />
 35 |   </ItemGroup>
 36 |   <PropertyGroup Label="Globals">
 37 |     <ProjectGuid>{928301A0-F01A-48F6-A499-851B3CE8BD4E}</ProjectGuid>
 38 |     <Keyword>Win32Proj</Keyword>
 39 |     <RootNamespace>M41</RootNamespace>
 40 |     <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
 41 |   </PropertyGroup>
 42 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
 43 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
 44 |     <ConfigurationType>Application</ConfigurationType>
 45 |     <UseDebugLibraries>true</UseDebugLibraries>
 46 |     <PlatformToolset>v140</PlatformToolset>
 47 |     <CharacterSet>Unicode</CharacterSet>
 48 |   </PropertyGroup>
 49 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|Win32'" Label="Configuration">
 50 |     <ConfigurationType>Application</ConfigurationType>
 51 |     <UseDebugLibraries>true</UseDebugLibraries>
 52 |     <PlatformToolset>v140</PlatformToolset>
 53 |     <CharacterSet>Unicode</CharacterSet>
 54 |   </PropertyGroup>
 55 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
 56 |     <ConfigurationType>Application</ConfigurationType>
 57 |     <UseDebugLibraries>false</UseDebugLibraries>
 58 |     <PlatformToolset>v140</PlatformToolset>
 59 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 60 |     <CharacterSet>Unicode</CharacterSet>
 61 |   </PropertyGroup>
 62 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
 63 |     <ConfigurationType>Application</ConfigurationType>
 64 |     <UseDebugLibraries>true</UseDebugLibraries>
 65 |     <PlatformToolset>v140</PlatformToolset>
 66 |     <CharacterSet>Unicode</CharacterSet>
 67 |     <UseIntelMKL>Sequential</UseIntelMKL>
 68 |   </PropertyGroup>
 69 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|x64'" Label="Configuration">
 70 |     <ConfigurationType>Application</ConfigurationType>
 71 |     <UseDebugLibraries>true</UseDebugLibraries>
 72 |     <PlatformToolset>v140</PlatformToolset>
 73 |     <CharacterSet>Unicode</CharacterSet>
 74 |     <UseIntelMKL>Sequential</UseIntelMKL>
 75 |   </PropertyGroup>
 76 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
 77 |     <ConfigurationType>Application</ConfigurationType>
 78 |     <UseDebugLibraries>false</UseDebugLibraries>
 79 |     <PlatformToolset>v140</PlatformToolset>
 80 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 81 |     <CharacterSet>Unicode</CharacterSet>
 82 |   </PropertyGroup>
 83 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
 84 |   <ImportGroup Label="ExtensionSettings">
 85 |   </ImportGroup>
 86 |   <ImportGroup Label="Shared">
 87 |   </ImportGroup>
 88 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 89 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 90 |   </ImportGroup>
 91 |   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|Win32'" Label="PropertySheets">
 92 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 93 |   </ImportGroup>
 94 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
 95 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 96 |   </ImportGroup>
 97 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 98 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 99 |   </ImportGroup>
100 |   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|x64'" Label="PropertySheets">
101 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
102 |   </ImportGroup>
103 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
104 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
105 |   </ImportGroup>
106 |   <PropertyGroup Label="UserMacros" />
107 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
108 |     <LinkIncremental>true</LinkIncremental>
109 |   </PropertyGroup>
110 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|Win32'">
111 |     <LinkIncremental>true</LinkIncremental>
112 |   </PropertyGroup>
113 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
114 |     <LinkIncremental>true</LinkIncremental>
115 |   </PropertyGroup>
116 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|x64'">
117 |     <LinkIncremental>true</LinkIncremental>
118 |   </PropertyGroup>
119 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
120 |     <LinkIncremental>false</LinkIncremental>
121 |   </PropertyGroup>
122 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
123 |     <LinkIncremental>false</LinkIncremental>
124 |   </PropertyGroup>
125 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
126 |     <ClCompile>
127 |       <PrecompiledHeader>
128 |       </PrecompiledHeader>
129 |       <WarningLevel>Level3</WarningLevel>
130 |       <Optimization>Disabled</Optimization>
131 |       <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
132 |     </ClCompile>
133 |     <Link>
134 |       <SubSystem>Console</SubSystem>
135 |       <GenerateDebugInformation>true</GenerateDebugInformation>
136 |     </Link>
137 |   </ItemDefinitionGroup>
138 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|Win32'">
139 |     <ClCompile>
140 |       <PrecompiledHeader>
141 |       </PrecompiledHeader>
142 |       <WarningLevel>Level3</WarningLevel>
143 |       <Optimization>Disabled</Optimization>
144 |       <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
145 |     </ClCompile>
146 |     <Link>
147 |       <SubSystem>Console</SubSystem>
148 |       <GenerateDebugInformation>true</GenerateDebugInformation>
149 |     </Link>
150 |   </ItemDefinitionGroup>
151 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
152 |     <ClCompile>
153 |       <PrecompiledHeader>NotUsing</PrecompiledHeader>
154 |       <WarningLevel>Level1</WarningLevel>
155 |       <Optimization>Disabled</Optimization>
156 |       <PreprocessorDefinitions>WIN32;_WINDOWS;EIGEN_USE_MKL_ALL;EIGEN_FAST_MATH;NOMINMAX;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
157 |       <AdditionalIncludeDirectories>E:\progs2\dynet;E:\progs\Eigen;</AdditionalIncludeDirectories>
158 |     </ClCompile>
159 |     <Link>
160 |       <SubSystem>Console</SubSystem>
161 |       <GenerateDebugInformation>true</GenerateDebugInformation>
162 |       <AdditionalLibraryDirectories>E:\progs2\dynet\buildMKL\dynet\Debug</AdditionalLibraryDirectories>
163 |       <AdditionalDependencies>dynet.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
164 |     </Link>
165 |   </ItemDefinitionGroup>
166 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|x64'">
167 |     <ClCompile>
168 |       <PrecompiledHeader>NotUsing</PrecompiledHeader>
169 |       <WarningLevel>Level1</WarningLevel>
170 |       <Optimization>MaxSpeed</Optimization>
171 |       <PreprocessorDefinitions>WIN32;_WINDOWS;EIGEN_FAST_MATH;EIGEN_USE_MKL_ALL;NOMINMAX;NDEBUG_;CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
172 |       <AdditionalIncludeDirectories>E:\progs2\dynet;E:\progs\Eigen;</AdditionalIncludeDirectories>
173 |       <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
174 |       <IntrinsicFunctions>true</IntrinsicFunctions>
175 |       <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
176 |       <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
177 |       <BasicRuntimeChecks>Default</BasicRuntimeChecks>
178 |       <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
179 |       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
180 |       <MultiProcessorCompilation>true</MultiProcessorCompilation>
181 |       <MinimalRebuild>false</MinimalRebuild>
182 |     </ClCompile>
183 |     <Link>
184 |       <SubSystem>Console</SubSystem>
185 |       <GenerateDebugInformation>true</GenerateDebugInformation>
186 |       <AdditionalLibraryDirectories>E:\progs2\dynet\buildMKL\dynet\RelWithDebInfo</AdditionalLibraryDirectories>
187 |       <AdditionalDependencies>dynet.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
188 |     </Link>
189 |   </ItemDefinitionGroup>
190 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
191 |     <ClCompile>
192 |       <WarningLevel>Level3</WarningLevel>
193 |       <PrecompiledHeader>
194 |       </PrecompiledHeader>
195 |       <Optimization>MaxSpeed</Optimization>
196 |       <FunctionLevelLinking>true</FunctionLevelLinking>
197 |       <IntrinsicFunctions>true</IntrinsicFunctions>
198 |       <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
199 |     </ClCompile>
200 |     <Link>
201 |       <SubSystem>Console</SubSystem>
202 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
203 |       <OptimizeReferences>true</OptimizeReferences>
204 |       <GenerateDebugInformation>true</GenerateDebugInformation>
205 |     </Link>
206 |   </ItemDefinitionGroup>
207 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
208 |     <ClCompile>
209 |       <WarningLevel>Level3</WarningLevel>
210 |       <PrecompiledHeader>
211 |       </PrecompiledHeader>
212 |       <Optimization>MaxSpeed</Optimization>
213 |       <FunctionLevelLinking>true</FunctionLevelLinking>
214 |       <IntrinsicFunctions>true</IntrinsicFunctions>
215 |       <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
216 |     </ClCompile>
217 |     <Link>
218 |       <SubSystem>Console</SubSystem>
219 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
220 |       <OptimizeReferences>true</OptimizeReferences>
221 |       <GenerateDebugInformation>true</GenerateDebugInformation>
222 |     </Link>
223 |   </ItemDefinitionGroup>
224 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
225 |   <ImportGroup Label="ExtensionTargets">
226 |   </ImportGroup>
227 | </Project>


--------------------------------------------------------------------------------
/c++/windows_VisualStudio/M41/slstm.h:
--------------------------------------------------------------------------------
  1 | /**
  2 | * file slstm.h
  3 | * header for my implementation of dilated LSTMs, based on Dynet LSTM builders
  4 |   - DilatedLSTMBuilder - standard Dilated LSTM (https://papers.nips.cc/paper/6613-dilated-recurrent-neural-networks.pdf)
  5 |   - ResidualDilatedLSTMBuilder - Dilated LSTM with special Residual shortcuts, after https://arxiv.org/abs/1701.03360
  6 |   - AttentiveDilatedLSTMBuilder - Dilated LSTM with Attention mechanism, as in the second stage of https://arxiv.org/abs/1704.02971
  7 | *
  8 | Slawek Smyl, Mar-May 2018
  9 | */
 10 | 
 11 | #ifndef DYNET_SLSTMS_H_
 12 | #define DYNET_SLSTMS_H_
 13 | 
 14 | #include "dynet/dynet.h"
 15 | #include "dynet/rnn.h"
 16 | #include "dynet/expr.h"
 17 | 
 18 | using namespace std;
 19 | 
 20 | namespace dynet {
 21 | 
 22 |   //basd on VanillaLSTMBuilder
 23 |   struct ResidualDilatedLSTMBuilder : public RNNBuilder {
 24 |     /**
 25 |     * @brief Default Constructor
 26 |     */
 27 |     ResidualDilatedLSTMBuilder();
 28 |     /**
 29 |     * \brief Constructor for the ResidualDilatedLSTMBuilder
 30 |     *
 31 |     * \param dilations Vector of dilations
 32 |     * \param input_dim Dimention of the input \f$x_t\f$
 33 |     * \param hidden_dim Dimention of the hidden states \f$h_t\f$ and \f$c_t\f$
 34 |     * \param model ParameterCollection holding the parameters
 35 |     * \param ln_lstm Whether to use layer normalization
 36 |     * \param forget_bias value(float) to use as bias for the forget gate(default = 1.0)
 37 |     */
 38 |     explicit ResidualDilatedLSTMBuilder(vector<unsigned> dilations,
 39 |       unsigned input_dim,
 40 |       unsigned hidden_dim,
 41 |       ParameterCollection& model,
 42 |       bool ln_lstm = false,
 43 |       float forget_bias = 1.f);
 44 | 
 45 |     Expression back() const override { return (cur == -1 ? h0.back() : h[cur].back()); }
 46 |     std::vector<Expression> final_h() const override { return (h.size() == 0 ? h0 : h.back()); }
 47 |     std::vector<Expression> final_s() const override {
 48 |       std::vector<Expression> ret = (c.size() == 0 ? c0 : c.back());
 49 |       for (auto my_h : final_h()) ret.push_back(my_h);
 50 |       return ret;
 51 |     }
 52 |     unsigned num_h0_components() const override { return 2 * layers; }
 53 | 
 54 |     std::vector<Expression> get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); }
 55 |     std::vector<Expression> get_s(RNNPointer i) const override {
 56 |       std::vector<Expression> ret = (i == -1 ? c0 : c[i]);
 57 |       for (auto my_h : get_h(i)) ret.push_back(my_h);
 58 |       return ret;
 59 |     }
 60 | 
 61 |     void copy(const RNNBuilder & params) override;
 62 | 
 63 |     /**
 64 |     * \brief Set the dropout rates to a unique value
 65 |     * \details This has the same effect as `set_dropout(d,d_h)` except that all the dropout rates are set to the same value.
 66 |     * \param d Dropout rate to be applied on all of \f$x,h\f$
 67 |     */
 68 |     void set_dropout(float d);
 69 |     /**
 70 |     * \brief Set the dropout rates
 71 |     * \details The dropout implemented here is the variational dropout with tied weights introduced in [Gal, 2016](http://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks)
 72 |     * More specifically, dropout masks \f$\mathbf{z_x}\sim \mathrm{Bernoulli}(1-d_x)\f$,\f$\mathbf{z_h}\sim \mathrm{Bernoulli}(1-d_h)\f$ are sampled at the start of each sequence.
 73 |     * The dynamics of the cell are then modified to :
 74 |     *
 75 |     * \f$
 76 |     * \begin{split}
 77 |     i_t & =\sigma(W_{ix}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ih}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_i)\\
 78 |     f_t & = \sigma(W_{fx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{fh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_f)\\
 79 |     o_t & = \sigma(W_{ox}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{oh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_o)\\
 80 |     \tilde{c_t} & = \tanh(W_{cx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ch}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_c)\\
 81 |     c_t & = c_{t-1}\circ f_t + \tilde{c_t}\circ i_t\\
 82 |     h_t & = \tanh(c_t)\circ o_t\\
 83 |     \end{split}
 84 |     * \f$
 85 |     *
 86 |     * For more detail as to why scaling is applied, see the "Unorthodox" section of the documentation
 87 |     * \param d Dropout rate \f$d_x\f$ for the input \f$x_t\f$
 88 |     * \param d_h Dropout rate \f$d_h\f$ for the output \f$h_t\f$
 89 |     */
 90 |     void set_dropout(float d, float d_r);
 91 |     /**
 92 |     * \brief Set all dropout rates to 0
 93 |     * \details This is equivalent to `set_dropout(0)` or `set_dropout(0,0,0)`
 94 |     *
 95 |     */
 96 |     void disable_dropout();
 97 |     /**
 98 |     * \brief Set dropout masks at the beginning of a sequence for a specific batch size
 99 |     * \details If this function is not called on batched input, the same mask will be applied across
100 |     * all batch elements. Use this to apply different masks to each batch element
101 |     *
102 |     * \param batch_size Batch size
103 |     */
104 |     void set_dropout_masks(unsigned batch_size = 1);
105 |     /**
106 |     * \brief Get parameters in ResidualDilatedLSTMBuilder
107 |     * \return list of points to ParameterStorage objects
108 |     */
109 |     ParameterCollection & get_parameter_collection() override;
110 |   protected:
111 |     void new_graph_impl(ComputationGraph& cg, bool update) override;
112 |     void start_new_sequence_impl(const std::vector<Expression>& h0) override;
113 |     Expression add_input_impl(int prev, const Expression& x) override;
114 |     Expression set_h_impl(int prev, const std::vector<Expression>& h_new) override;
115 |     Expression set_s_impl(int prev, const std::vector<Expression>& s_new) override;
116 | 
117 |   public:
118 |     ParameterCollection local_model;
119 |     // first index is layer, then ...
120 |     std::vector<std::vector<Parameter>> params;
121 |     // first index is layer, then ...
122 |     std::vector<std::vector<Parameter>> ln_params;
123 | 
124 |     // first index is layer, then ...
125 |     std::vector<std::vector<Expression>> param_vars;
126 |     // first index is layer, then ...
127 |     std::vector<std::vector<Expression>> ln_param_vars;
128 | 
129 |     // first index is layer, then ...
130 |     std::vector<std::vector<Expression>> masks;
131 | 
132 |     // first index is time, second is layer
133 |     std::vector<std::vector<Expression>> h, c;
134 | 
135 |     // initial values of h and c at each layer
136 |     // - both default to zero matrix input
137 |     bool has_initial_state; // if this is false, treat h0 and c0 as 0
138 |     std::vector<Expression> h0;
139 |     std::vector<Expression> c0;
140 |     unsigned layers;
141 |     unsigned input_dim, hid;
142 |     float dropout_rate_h;
143 |     bool ln_lstm;
144 |     float forget_bias;
145 |     bool dropout_masks_valid;
146 |     vector<unsigned> dilations; //one int per layer
147 | 
148 |   private:
149 |     ComputationGraph* _cg; // Pointer to current cg
150 | 
151 |   };
152 | 
153 | 
154 |   struct DilatedLSTMBuilder : public RNNBuilder {
155 |     /**
156 |     * @brief Default Constructor
157 |     */
158 |     DilatedLSTMBuilder();
159 |     /**
160 |     * \brief Constructor for the DilatedLSTMBuilder
161 |     *
162 |     * \param dilations Vector of dilations
163 |     * \param input_dim Dimention of the input \f$x_t\f$
164 |     * \param hidden_dim Dimention of the hidden states \f$h_t\f$ and \f$c_t\f$
165 |     * \param model ParameterCollection holding the parameters
166 |     */
167 |     explicit DilatedLSTMBuilder(vector<unsigned> dilations,
168 |       unsigned input_dim,
169 |       unsigned hidden_dim,
170 |       ParameterCollection& model);
171 | 
172 |     Expression back() const override { return (cur == -1 ? h0.back() : h[cur].back()); }
173 |     std::vector<Expression> final_h() const override { return (h.size() == 0 ? h0 : h.back()); }
174 |     std::vector<Expression> final_s() const override {
175 |       std::vector<Expression> ret = (c.size() == 0 ? c0 : c.back());
176 |       for (auto my_h : final_h()) ret.push_back(my_h);
177 |       return ret;
178 |     }
179 |     unsigned num_h0_components() const override { return 2 * layers; }
180 | 
181 |     std::vector<Expression> get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); }
182 |     std::vector<Expression> get_s(RNNPointer i) const override {
183 |       std::vector<Expression> ret = (i == -1 ? c0 : c[i]);
184 |       for (auto my_h : get_h(i)) ret.push_back(my_h);
185 |       return ret;
186 |     }
187 | 
188 |     void copy(const RNNBuilder & params) override;
189 | 
190 |     /**
191 |     * \brief Set the dropout rates to a unique value
192 |     * \details This has the same effect as `set_dropout(d,d_h)` except that all the dropout rates are set to the same value.
193 |     * \param d Dropout rate to be applied on all of \f$x,h\f$
194 |     */
195 |     void set_dropout(float d);
196 |     /**
197 |     * \brief Set the dropout rates
198 |     * \details The dropout implemented here is the variational dropout with tied weights introduced in [Gal, 2016](http://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks)
199 |     * More specifically, dropout masks \f$\mathbf{z_x}\sim \mathrm{Bernoulli}(1-d_x)\f$,\f$\mathbf{z_h}\sim \mathrm{Bernoulli}(1-d_h)\f$ are sampled at the start of each sequence.
200 |     * The dynamics of the cell are then modified to :
201 |     *
202 |     * \f$
203 |     * \begin{split}
204 |     i_t & =\sigma(W_{ix}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ih}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_i)\\
205 |     f_t & = \sigma(W_{fx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{fh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_f)\\
206 |     o_t & = \sigma(W_{ox}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{oh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_o)\\
207 |     \tilde{c_t} & = \tanh(W_{cx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ch}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_c)\\
208 |     c_t & = c_{t-1}\circ f_t + \tilde{c_t}\circ i_t\\
209 |     h_t & = \tanh(c_t)\circ o_t\\
210 |     \end{split}
211 |     * \f$
212 |     *
213 |     * For more detail as to why scaling is applied, see the "Unorthodox" section of the documentation
214 |     * \param d Dropout rate \f$d_x\f$ for the input \f$x_t\f$
215 |     */
216 |     void set_dropout(float d, float d_r);
217 |     /**
218 |     * \brief Set all dropout rates to 0
219 |     * \details This is equivalent to `set_dropout(0)` or `set_dropout(0,0,0)`
220 |     *
221 |     */
222 |     void disable_dropout();
223 |     /**
224 |     * \brief Set dropout masks at the beginning of a sequence for a specific batch size
225 |     * \details If this function is not called on batched input, the same mask will be applied across
226 |     * all batch elements. Use this to apply different masks to each batch element
227 |     *
228 |     * \param batch_size Batch size
229 |     */
230 |     void set_dropout_masks(unsigned batch_size = 1);
231 | 
232 |     void set_weightnoise(float std);
233 |     ParameterCollection & get_parameter_collection() override;
234 |   protected:
235 |     void new_graph_impl(ComputationGraph& cg, bool update) override;
236 |     void start_new_sequence_impl(const std::vector<Expression>& h0) override;
237 |     Expression add_input_impl(int prev, const Expression& x) override;
238 |     Expression set_h_impl(int prev, const std::vector<Expression>& h_new) override;
239 |     Expression set_s_impl(int prev, const std::vector<Expression>& s_new) override;
240 | 
241 |   public:
242 |     ParameterCollection local_model;
243 |     // first index is layer, then ...
244 |     std::vector<std::vector<Parameter>> params;
245 | 
246 |     // first index is layer, then ...
247 |     std::vector<std::vector<Expression>> param_vars;
248 | 
249 |     // first index is layer, then ...
250 |     std::vector<std::vector<Expression>> masks;
251 | 
252 |     // first index is time, second is layer
253 |     std::vector<std::vector<Expression>> h, c;
254 | 
255 |     // initial values of h and c at each layer
256 |     // - both default to zero matrix input
257 |     bool has_initial_state; // if this is false, treat h0 and c0 as 0
258 |     std::vector<Expression> h0;
259 |     std::vector<Expression> c0;
260 |     unsigned layers;
261 |     unsigned input_dim, hid;
262 |     float dropout_rate_h;
263 |     float weightnoise_std;
264 |     vector<unsigned> dilations; //one int per layer
265 | 
266 |     bool dropout_masks_valid;
267 |   private:
268 |     ComputationGraph* _cg; // Pointer to current cg
269 | 
270 |   };
271 |   
272 |   
273 |   struct AttentiveDilatedLSTMBuilder : public RNNBuilder {
274 |     /**
275 |      * @brief Default Constructor
276 |      */
277 |     AttentiveDilatedLSTMBuilder();
278 |     /**
279 |      * \brief Constructor for the AttentiveDilatedLSTMBuilder
280 |      *
281 |      * \param max_dilations Vector, maximum dilations (per layer)
282 |      * \param input_dim Dimention of the input \f$x_t\f$
283 |      * \param hidden_dim Dimention of the hidden states \f$h_t\f$ and \f$c_t\f$
284 |      * \param model ParameterCollection holding the parameters
285 |      */
286 |     explicit AttentiveDilatedLSTMBuilder(vector<unsigned> max_dilations,
287 |                                 unsigned input_dim,
288 |                                 unsigned hidden_dim,
289 |                                 unsigned attention_dim,
290 |                                 ParameterCollection& model);
291 |     
292 |     Expression back() const override { return (cur == -1 ? h0.back() : h[cur].back()); }
293 |     std::vector<Expression> final_h() const override { return (h.size() == 0 ? h0 : h.back()); }
294 |     std::vector<Expression> final_s() const override {
295 |       std::vector<Expression> ret = (c.size() == 0 ? c0 : c.back());
296 |       for (auto my_h : final_h()) ret.push_back(my_h);
297 |       return ret;
298 |     }
299 |     unsigned num_h0_components() const override { return 2 * layers; }
300 |     
301 |     std::vector<Expression> get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); }
302 |     std::vector<Expression> get_s(RNNPointer i) const override {
303 |       std::vector<Expression> ret = (i == -1 ? c0 : c[i]);
304 |       for (auto my_h : get_h(i)) ret.push_back(my_h);
305 |       return ret;
306 |     }
307 |     
308 |     void copy(const RNNBuilder & params) override;
309 |     
310 |     /**
311 |      * \brief Set the dropout rates to a unique value
312 |      * \details This has the same effect as `set_dropout(d,d_h)` except that all the dropout rates are set to the same value.
313 |      * \param d Dropout rate to be applied on all of \f$x,h\f$
314 |      */
315 |     void set_dropout(float d);
316 |     /**
317 |      * \brief Set the dropout rates
318 |      * \details The dropout implemented here is the variational dropout with tied weights introduced in [Gal, 2016](http://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks)
319 |      * More specifically, dropout masks \f$\mathbf{z_x}\sim \mathrm{Bernoulli}(1-d_x)\f$,\f$\mathbf{z_h}\sim \mathrm{Bernoulli}(1-d_h)\f$ are sampled at the start of each sequence.
320 |      * The dynamics of the cell are then modified to :
321 |      *
322 |      * \f$
323 |      * \begin{split}
324 |      i_t & =\sigma(W_{ix}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ih}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_i)\\
325 |      f_t & = \sigma(W_{fx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{fh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_f)\\
326 |      o_t & = \sigma(W_{ox}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{oh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_o)\\
327 |      \tilde{c_t} & = \tanh(W_{cx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ch}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_c)\\
328 |      c_t & = c_{t-1}\circ f_t + \tilde{c_t}\circ i_t\\
329 |      h_t & = \tanh(c_t)\circ o_t\\
330 |      \end{split}
331 |      * \f$
332 |      *
333 |      * For more detail as to why scaling is applied, see the "Unorthodox" section of the documentation
334 |      * \param d Dropout rate \f$d_x\f$ for the input \f$x_t\f$
335 |      */
336 |     void set_dropout(float d, float d_r);
337 |     /**
338 |      * \brief Set all dropout rates to 0
339 |      * \details This is equivalent to `set_dropout(0)` or `set_dropout(0,0,0)`
340 |      *
341 |      */
342 |     void disable_dropout();
343 |     /**
344 |      * \brief Set dropout masks at the beginning of a sequence for a specific batch size
345 |      * \details If this function is not called on batched input, the same mask will be applied across
346 |      * all batch elements. Use this to apply different masks to each batch element
347 |      *
348 |      * \param batch_size Batch size
349 |      */
350 |     void set_dropout_masks(unsigned batch_size = 1);
351 | 
352 |     void set_weightnoise(float std);
353 |     ParameterCollection & get_parameter_collection() override;
354 |   protected:
355 |     void new_graph_impl(ComputationGraph& cg, bool update) override;
356 |     void start_new_sequence_impl(const std::vector<Expression>& h0) override;
357 |     Expression add_input_impl(int prev, const Expression& x) override;
358 |     Expression set_h_impl(int prev, const std::vector<Expression>& h_new) override;
359 |     Expression set_s_impl(int prev, const std::vector<Expression>& s_new) override;
360 |     
361 |   public:
362 |     ParameterCollection local_model;
363 |     // first index is layer, then ...
364 |     std::vector<std::vector<Parameter>> params;
365 |     
366 |     // first index is layer, then ...
367 |     std::vector<std::vector<Expression>> param_vars;
368 |     
369 |     // first index is layer, then ...
370 |     std::vector<std::vector<Expression>> masks;
371 |     
372 |     // first index is time, second is layer
373 |     std::vector<std::vector<Expression>> h, c;
374 |     
375 |     // initial values of h and c at each layer
376 |     // - both default to zero matrix input
377 |     bool has_initial_state; // if this is false, treat h0 and c0 as 0
378 |     std::vector<Expression> h0;
379 |     std::vector<Expression> c0;
380 |     unsigned layers;
381 |     unsigned input_dim, hid;
382 |     unsigned attention_dim;
383 |     float dropout_rate_h;
384 |     float weightnoise_std;
385 |     vector<unsigned> max_dilations; //one int per layer
386 |     
387 |     bool dropout_masks_valid;
388 |   private:
389 |     ComputationGraph* _cg; // Pointer to current cg
390 |     
391 |   };
392 | } // namespace dynet
393 | 
394 | #endif
395 | 


--------------------------------------------------------------------------------
/c++/windows_VisualStudio/M42/M42.vcxproj:
--------------------------------------------------------------------------------
  1 | ﻿<?xml version="1.0" encoding="utf-8"?>
  2 | <Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  3 |   <ItemGroup Label="ProjectConfigurations">
  4 |     <ProjectConfiguration Include="Debug|Win32">
  5 |       <Configuration>Debug</Configuration>
  6 |       <Platform>Win32</Platform>
  7 |     </ProjectConfiguration>
  8 |     <ProjectConfiguration Include="Release|Win32">
  9 |       <Configuration>Release</Configuration>
 10 |       <Platform>Win32</Platform>
 11 |     </ProjectConfiguration>
 12 |     <ProjectConfiguration Include="Debug|x64">
 13 |       <Configuration>Debug</Configuration>
 14 |       <Platform>x64</Platform>
 15 |     </ProjectConfiguration>
 16 |     <ProjectConfiguration Include="Release|x64">
 17 |       <Configuration>Release</Configuration>
 18 |       <Platform>x64</Platform>
 19 |     </ProjectConfiguration>
 20 |     <ProjectConfiguration Include="RelWithDebug|Win32">
 21 |       <Configuration>RelWithDebug</Configuration>
 22 |       <Platform>Win32</Platform>
 23 |     </ProjectConfiguration>
 24 |     <ProjectConfiguration Include="RelWithDebug|x64">
 25 |       <Configuration>RelWithDebug</Configuration>
 26 |       <Platform>x64</Platform>
 27 |     </ProjectConfiguration>
 28 |   </ItemGroup>
 29 |   <ItemGroup>
 30 |     <ClCompile Include="..\M41\slstm.cpp" />
 31 |     <ClCompile Include="ES_RNN_PI.cc" />
 32 |   </ItemGroup>
 33 |   <ItemGroup>
 34 |     <ClInclude Include="..\M41\slstm.h" />
 35 |   </ItemGroup>
 36 |   <PropertyGroup Label="Globals">
 37 |     <ProjectGuid>{A16B5466-E680-43F6-A884-A4A01EB78E50}</ProjectGuid>
 38 |     <Keyword>Win32Proj</Keyword>
 39 |     <RootNamespace>M42</RootNamespace>
 40 |     <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
 41 |   </PropertyGroup>
 42 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
 43 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
 44 |     <ConfigurationType>Application</ConfigurationType>
 45 |     <UseDebugLibraries>true</UseDebugLibraries>
 46 |     <PlatformToolset>v140</PlatformToolset>
 47 |     <CharacterSet>Unicode</CharacterSet>
 48 |   </PropertyGroup>
 49 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|Win32'" Label="Configuration">
 50 |     <ConfigurationType>Application</ConfigurationType>
 51 |     <UseDebugLibraries>true</UseDebugLibraries>
 52 |     <PlatformToolset>v140</PlatformToolset>
 53 |     <CharacterSet>Unicode</CharacterSet>
 54 |   </PropertyGroup>
 55 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
 56 |     <ConfigurationType>Application</ConfigurationType>
 57 |     <UseDebugLibraries>false</UseDebugLibraries>
 58 |     <PlatformToolset>v140</PlatformToolset>
 59 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 60 |     <CharacterSet>Unicode</CharacterSet>
 61 |   </PropertyGroup>
 62 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
 63 |     <ConfigurationType>Application</ConfigurationType>
 64 |     <UseDebugLibraries>true</UseDebugLibraries>
 65 |     <PlatformToolset>v140</PlatformToolset>
 66 |     <CharacterSet>Unicode</CharacterSet>
 67 |     <UseIntelMKL>Sequential</UseIntelMKL>
 68 |   </PropertyGroup>
 69 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|x64'" Label="Configuration">
 70 |     <ConfigurationType>Application</ConfigurationType>
 71 |     <UseDebugLibraries>true</UseDebugLibraries>
 72 |     <PlatformToolset>v140</PlatformToolset>
 73 |     <CharacterSet>Unicode</CharacterSet>
 74 |     <UseIntelMKL>Sequential</UseIntelMKL>
 75 |   </PropertyGroup>
 76 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
 77 |     <ConfigurationType>Application</ConfigurationType>
 78 |     <UseDebugLibraries>false</UseDebugLibraries>
 79 |     <PlatformToolset>v140</PlatformToolset>
 80 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 81 |     <CharacterSet>Unicode</CharacterSet>
 82 |   </PropertyGroup>
 83 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
 84 |   <ImportGroup Label="ExtensionSettings">
 85 |   </ImportGroup>
 86 |   <ImportGroup Label="Shared">
 87 |   </ImportGroup>
 88 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 89 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 90 |   </ImportGroup>
 91 |   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|Win32'" Label="PropertySheets">
 92 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 93 |   </ImportGroup>
 94 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
 95 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 96 |   </ImportGroup>
 97 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 98 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 99 |   </ImportGroup>
100 |   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|x64'" Label="PropertySheets">
101 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
102 |   </ImportGroup>
103 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
104 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
105 |   </ImportGroup>
106 |   <PropertyGroup Label="UserMacros" />
107 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
108 |     <LinkIncremental>true</LinkIncremental>
109 |   </PropertyGroup>
110 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|Win32'">
111 |     <LinkIncremental>true</LinkIncremental>
112 |   </PropertyGroup>
113 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
114 |     <LinkIncremental>true</LinkIncremental>
115 |   </PropertyGroup>
116 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|x64'">
117 |     <LinkIncremental>true</LinkIncremental>
118 |   </PropertyGroup>
119 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
120 |     <LinkIncremental>false</LinkIncremental>
121 |   </PropertyGroup>
122 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
123 |     <LinkIncremental>false</LinkIncremental>
124 |   </PropertyGroup>
125 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
126 |     <ClCompile>
127 |       <PrecompiledHeader>
128 |       </PrecompiledHeader>
129 |       <WarningLevel>Level3</WarningLevel>
130 |       <Optimization>Disabled</Optimization>
131 |       <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
132 |     </ClCompile>
133 |     <Link>
134 |       <SubSystem>Console</SubSystem>
135 |       <GenerateDebugInformation>true</GenerateDebugInformation>
136 |     </Link>
137 |   </ItemDefinitionGroup>
138 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|Win32'">
139 |     <ClCompile>
140 |       <PrecompiledHeader>
141 |       </PrecompiledHeader>
142 |       <WarningLevel>Level3</WarningLevel>
143 |       <Optimization>Disabled</Optimization>
144 |       <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
145 |     </ClCompile>
146 |     <Link>
147 |       <SubSystem>Console</SubSystem>
148 |       <GenerateDebugInformation>true</GenerateDebugInformation>
149 |     </Link>
150 |   </ItemDefinitionGroup>
151 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
152 |     <ClCompile>
153 |       <PrecompiledHeader>NotUsing</PrecompiledHeader>
154 |       <WarningLevel>Level1</WarningLevel>
155 |       <Optimization>Disabled</Optimization>
156 |       <PreprocessorDefinitions>WIN32;_WINDOWS;EIGEN_USE_MKL_ALL;EIGEN_FAST_MATH;NOMINMAX;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
157 |       <AdditionalIncludeDirectories>E:\progs2\dynet;E:\progs\Eigen;</AdditionalIncludeDirectories>
158 |     </ClCompile>
159 |     <Link>
160 |       <SubSystem>Console</SubSystem>
161 |       <GenerateDebugInformation>true</GenerateDebugInformation>
162 |       <AdditionalLibraryDirectories>E:\progs2\dynet\buildMKL\dynet\Debug</AdditionalLibraryDirectories>
163 |       <AdditionalDependencies>dynet.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
164 |     </Link>
165 |   </ItemDefinitionGroup>
166 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|x64'">
167 |     <ClCompile>
168 |       <PrecompiledHeader>NotUsing</PrecompiledHeader>
169 |       <WarningLevel>Level1</WarningLevel>
170 |       <Optimization>MaxSpeed</Optimization>
171 |       <PreprocessorDefinitions>WIN32;_WINDOWS;EIGEN_FAST_MATH;EIGEN_USE_MKL_ALL;NOMINMAX;NDEBUG_;CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
172 |       <AdditionalIncludeDirectories>E:\progs2\dynet;E:\progs\Eigen;</AdditionalIncludeDirectories>
173 |       <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
174 |       <IntrinsicFunctions>true</IntrinsicFunctions>
175 |       <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
176 |       <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
177 |       <BasicRuntimeChecks>Default</BasicRuntimeChecks>
178 |       <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
179 |       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
180 |       <MultiProcessorCompilation>true</MultiProcessorCompilation>
181 |       <MinimalRebuild>false</MinimalRebuild>
182 |     </ClCompile>
183 |     <Link>
184 |       <SubSystem>Console</SubSystem>
185 |       <GenerateDebugInformation>true</GenerateDebugInformation>
186 |       <AdditionalLibraryDirectories>E:\progs2\dynet\buildMKL\dynet\RelWithDebInfo</AdditionalLibraryDirectories>
187 |       <AdditionalDependencies>dynet.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
188 |     </Link>
189 |   </ItemDefinitionGroup>
190 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
191 |     <ClCompile>
192 |       <WarningLevel>Level3</WarningLevel>
193 |       <PrecompiledHeader>
194 |       </PrecompiledHeader>
195 |       <Optimization>MaxSpeed</Optimization>
196 |       <FunctionLevelLinking>true</FunctionLevelLinking>
197 |       <IntrinsicFunctions>true</IntrinsicFunctions>
198 |       <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
199 |     </ClCompile>
200 |     <Link>
201 |       <SubSystem>Console</SubSystem>
202 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
203 |       <OptimizeReferences>true</OptimizeReferences>
204 |       <GenerateDebugInformation>true</GenerateDebugInformation>
205 |     </Link>
206 |   </ItemDefinitionGroup>
207 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
208 |     <ClCompile>
209 |       <WarningLevel>Level3</WarningLevel>
210 |       <PrecompiledHeader>
211 |       </PrecompiledHeader>
212 |       <Optimization>MaxSpeed</Optimization>
213 |       <FunctionLevelLinking>true</FunctionLevelLinking>
214 |       <IntrinsicFunctions>true</IntrinsicFunctions>
215 |       <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
216 |     </ClCompile>
217 |     <Link>
218 |       <SubSystem>Console</SubSystem>
219 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
220 |       <OptimizeReferences>true</OptimizeReferences>
221 |       <GenerateDebugInformation>true</GenerateDebugInformation>
222 |     </Link>
223 |   </ItemDefinitionGroup>
224 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
225 |   <ImportGroup Label="ExtensionTargets">
226 |   </ImportGroup>
227 | </Project>


--------------------------------------------------------------------------------
/c++/windows_VisualStudio/M42/M42.vcxproj.filters:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <ItemGroup>
 4 |     <Filter Include="Source Files">
 5 |       <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
 6 |       <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
 7 |     </Filter>
 8 |     <Filter Include="Header Files">
 9 |       <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
10 |       <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
11 |     </Filter>
12 |     <Filter Include="Resource Files">
13 |       <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
14 |       <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
15 |     </Filter>
16 |   </ItemGroup>
17 |   <ItemGroup>
18 |     <ClCompile Include="ES_RNN_PI.cc">
19 |       <Filter>Source Files</Filter>
20 |     </ClCompile>
21 |     <ClCompile Include="..\M41\slstm.cpp">
22 |       <Filter>Source Files</Filter>
23 |     </ClCompile>
24 |   </ItemGroup>
25 |   <ItemGroup>
26 |     <ClInclude Include="..\M41\slstm.h">
27 |       <Filter>Header Files</Filter>
28 |     </ClInclude>
29 |   </ItemGroup>
30 | </Project>


--------------------------------------------------------------------------------
/c++/windows_VisualStudio/M42/slstm.h:
--------------------------------------------------------------------------------
  1 | /**
  2 | * file slstm.h
  3 | * header for my implementation of dilated LSTMs, based on Dynet LSTM builders
  4 |   - DilatedLSTMBuilder - standard Dilated LSTM (https://papers.nips.cc/paper/6613-dilated-recurrent-neural-networks.pdf)
  5 |   - ResidualDilatedLSTMBuilder - Dilated LSTM with special Residual shortcuts, after https://arxiv.org/abs/1701.03360
  6 |   - AttentiveDilatedLSTMBuilder - Dilated LSTM with Attention mechanism, as in the second stage of https://arxiv.org/abs/1704.02971
  7 | *
  8 | Slawek Smyl, Mar-May 2018
  9 | */
 10 | 
 11 | #ifndef DYNET_SLSTMS_H_
 12 | #define DYNET_SLSTMS_H_
 13 | 
 14 | #include "dynet/dynet.h"
 15 | #include "dynet/rnn.h"
 16 | #include "dynet/expr.h"
 17 | 
 18 | using namespace std;
 19 | 
 20 | namespace dynet {
 21 | 
 22 |   //basd on VanillaLSTMBuilder
 23 |   struct ResidualDilatedLSTMBuilder : public RNNBuilder {
 24 |     /**
 25 |     * @brief Default Constructor
 26 |     */
 27 |     ResidualDilatedLSTMBuilder();
 28 |     /**
 29 |     * \brief Constructor for the ResidualDilatedLSTMBuilder
 30 |     *
 31 |     * \param dilations Vector of dilations
 32 |     * \param input_dim Dimention of the input \f$x_t\f$
 33 |     * \param hidden_dim Dimention of the hidden states \f$h_t\f$ and \f$c_t\f$
 34 |     * \param model ParameterCollection holding the parameters
 35 |     * \param ln_lstm Whether to use layer normalization
 36 |     * \param forget_bias value(float) to use as bias for the forget gate(default = 1.0)
 37 |     */
 38 |     explicit ResidualDilatedLSTMBuilder(vector<unsigned> dilations,
 39 |       unsigned input_dim,
 40 |       unsigned hidden_dim,
 41 |       ParameterCollection& model,
 42 |       bool ln_lstm = false,
 43 |       float forget_bias = 1.f);
 44 | 
 45 |     Expression back() const override { return (cur == -1 ? h0.back() : h[cur].back()); }
 46 |     std::vector<Expression> final_h() const override { return (h.size() == 0 ? h0 : h.back()); }
 47 |     std::vector<Expression> final_s() const override {
 48 |       std::vector<Expression> ret = (c.size() == 0 ? c0 : c.back());
 49 |       for (auto my_h : final_h()) ret.push_back(my_h);
 50 |       return ret;
 51 |     }
 52 |     unsigned num_h0_components() const override { return 2 * layers; }
 53 | 
 54 |     std::vector<Expression> get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); }
 55 |     std::vector<Expression> get_s(RNNPointer i) const override {
 56 |       std::vector<Expression> ret = (i == -1 ? c0 : c[i]);
 57 |       for (auto my_h : get_h(i)) ret.push_back(my_h);
 58 |       return ret;
 59 |     }
 60 | 
 61 |     void copy(const RNNBuilder & params) override;
 62 | 
 63 |     /**
 64 |     * \brief Set the dropout rates to a unique value
 65 |     * \details This has the same effect as `set_dropout(d,d_h)` except that all the dropout rates are set to the same value.
 66 |     * \param d Dropout rate to be applied on all of \f$x,h\f$
 67 |     */
 68 |     void set_dropout(float d);
 69 |     /**
 70 |     * \brief Set the dropout rates
 71 |     * \details The dropout implemented here is the variational dropout with tied weights introduced in [Gal, 2016](http://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks)
 72 |     * More specifically, dropout masks \f$\mathbf{z_x}\sim \mathrm{Bernoulli}(1-d_x)\f$,\f$\mathbf{z_h}\sim \mathrm{Bernoulli}(1-d_h)\f$ are sampled at the start of each sequence.
 73 |     * The dynamics of the cell are then modified to :
 74 |     *
 75 |     * \f$
 76 |     * \begin{split}
 77 |     i_t & =\sigma(W_{ix}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ih}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_i)\\
 78 |     f_t & = \sigma(W_{fx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{fh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_f)\\
 79 |     o_t & = \sigma(W_{ox}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{oh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_o)\\
 80 |     \tilde{c_t} & = \tanh(W_{cx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ch}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_c)\\
 81 |     c_t & = c_{t-1}\circ f_t + \tilde{c_t}\circ i_t\\
 82 |     h_t & = \tanh(c_t)\circ o_t\\
 83 |     \end{split}
 84 |     * \f$
 85 |     *
 86 |     * For more detail as to why scaling is applied, see the "Unorthodox" section of the documentation
 87 |     * \param d Dropout rate \f$d_x\f$ for the input \f$x_t\f$
 88 |     * \param d_h Dropout rate \f$d_h\f$ for the output \f$h_t\f$
 89 |     */
 90 |     void set_dropout(float d, float d_r);
 91 |     /**
 92 |     * \brief Set all dropout rates to 0
 93 |     * \details This is equivalent to `set_dropout(0)` or `set_dropout(0,0,0)`
 94 |     *
 95 |     */
 96 |     void disable_dropout();
 97 |     /**
 98 |     * \brief Set dropout masks at the beginning of a sequence for a specific batch size
 99 |     * \details If this function is not called on batched input, the same mask will be applied across
100 |     * all batch elements. Use this to apply different masks to each batch element
101 |     *
102 |     * \param batch_size Batch size
103 |     */
104 |     void set_dropout_masks(unsigned batch_size = 1);
105 |     /**
106 |     * \brief Get parameters in ResidualDilatedLSTMBuilder
107 |     * \return list of points to ParameterStorage objects
108 |     */
109 |     ParameterCollection & get_parameter_collection() override;
110 |   protected:
111 |     void new_graph_impl(ComputationGraph& cg, bool update) override;
112 |     void start_new_sequence_impl(const std::vector<Expression>& h0) override;
113 |     Expression add_input_impl(int prev, const Expression& x) override;
114 |     Expression set_h_impl(int prev, const std::vector<Expression>& h_new) override;
115 |     Expression set_s_impl(int prev, const std::vector<Expression>& s_new) override;
116 | 
117 |   public:
118 |     ParameterCollection local_model;
119 |     // first index is layer, then ...
120 |     std::vector<std::vector<Parameter>> params;
121 |     // first index is layer, then ...
122 |     std::vector<std::vector<Parameter>> ln_params;
123 | 
124 |     // first index is layer, then ...
125 |     std::vector<std::vector<Expression>> param_vars;
126 |     // first index is layer, then ...
127 |     std::vector<std::vector<Expression>> ln_param_vars;
128 | 
129 |     // first index is layer, then ...
130 |     std::vector<std::vector<Expression>> masks;
131 | 
132 |     // first index is time, second is layer
133 |     std::vector<std::vector<Expression>> h, c;
134 | 
135 |     // initial values of h and c at each layer
136 |     // - both default to zero matrix input
137 |     bool has_initial_state; // if this is false, treat h0 and c0 as 0
138 |     std::vector<Expression> h0;
139 |     std::vector<Expression> c0;
140 |     unsigned layers;
141 |     unsigned input_dim, hid;
142 |     float dropout_rate_h;
143 |     bool ln_lstm;
144 |     float forget_bias;
145 |     bool dropout_masks_valid;
146 |     vector<unsigned> dilations; //one int per layer
147 | 
148 |   private:
149 |     ComputationGraph* _cg; // Pointer to current cg
150 | 
151 |   };
152 | 
153 | 
154 |   struct DilatedLSTMBuilder : public RNNBuilder {
155 |     /**
156 |     * @brief Default Constructor
157 |     */
158 |     DilatedLSTMBuilder();
159 |     /**
160 |     * \brief Constructor for the DilatedLSTMBuilder
161 |     *
162 |     * \param dilations Vector of dilations
163 |     * \param input_dim Dimention of the input \f$x_t\f$
164 |     * \param hidden_dim Dimention of the hidden states \f$h_t\f$ and \f$c_t\f$
165 |     * \param model ParameterCollection holding the parameters
166 |     */
167 |     explicit DilatedLSTMBuilder(vector<unsigned> dilations,
168 |       unsigned input_dim,
169 |       unsigned hidden_dim,
170 |       ParameterCollection& model);
171 | 
172 |     Expression back() const override { return (cur == -1 ? h0.back() : h[cur].back()); }
173 |     std::vector<Expression> final_h() const override { return (h.size() == 0 ? h0 : h.back()); }
174 |     std::vector<Expression> final_s() const override {
175 |       std::vector<Expression> ret = (c.size() == 0 ? c0 : c.back());
176 |       for (auto my_h : final_h()) ret.push_back(my_h);
177 |       return ret;
178 |     }
179 |     unsigned num_h0_components() const override { return 2 * layers; }
180 | 
181 |     std::vector<Expression> get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); }
182 |     std::vector<Expression> get_s(RNNPointer i) const override {
183 |       std::vector<Expression> ret = (i == -1 ? c0 : c[i]);
184 |       for (auto my_h : get_h(i)) ret.push_back(my_h);
185 |       return ret;
186 |     }
187 | 
188 |     void copy(const RNNBuilder & params) override;
189 | 
190 |     /**
191 |     * \brief Set the dropout rates to a unique value
192 |     * \details This has the same effect as `set_dropout(d,d_h)` except that all the dropout rates are set to the same value.
193 |     * \param d Dropout rate to be applied on all of \f$x,h\f$
194 |     */
195 |     void set_dropout(float d);
196 |     /**
197 |     * \brief Set the dropout rates
198 |     * \details The dropout implemented here is the variational dropout with tied weights introduced in [Gal, 2016](http://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks)
199 |     * More specifically, dropout masks \f$\mathbf{z_x}\sim \mathrm{Bernoulli}(1-d_x)\f$,\f$\mathbf{z_h}\sim \mathrm{Bernoulli}(1-d_h)\f$ are sampled at the start of each sequence.
200 |     * The dynamics of the cell are then modified to :
201 |     *
202 |     * \f$
203 |     * \begin{split}
204 |     i_t & =\sigma(W_{ix}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ih}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_i)\\
205 |     f_t & = \sigma(W_{fx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{fh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_f)\\
206 |     o_t & = \sigma(W_{ox}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{oh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_o)\\
207 |     \tilde{c_t} & = \tanh(W_{cx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ch}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_c)\\
208 |     c_t & = c_{t-1}\circ f_t + \tilde{c_t}\circ i_t\\
209 |     h_t & = \tanh(c_t)\circ o_t\\
210 |     \end{split}
211 |     * \f$
212 |     *
213 |     * For more detail as to why scaling is applied, see the "Unorthodox" section of the documentation
214 |     * \param d Dropout rate \f$d_x\f$ for the input \f$x_t\f$
215 |     */
216 |     void set_dropout(float d, float d_r);
217 |     /**
218 |     * \brief Set all dropout rates to 0
219 |     * \details This is equivalent to `set_dropout(0)` or `set_dropout(0,0,0)`
220 |     *
221 |     */
222 |     void disable_dropout();
223 |     /**
224 |     * \brief Set dropout masks at the beginning of a sequence for a specific batch size
225 |     * \details If this function is not called on batched input, the same mask will be applied across
226 |     * all batch elements. Use this to apply different masks to each batch element
227 |     *
228 |     * \param batch_size Batch size
229 |     */
230 |     void set_dropout_masks(unsigned batch_size = 1);
231 | 
232 |     void set_weightnoise(float std);
233 |     ParameterCollection & get_parameter_collection() override;
234 |   protected:
235 |     void new_graph_impl(ComputationGraph& cg, bool update) override;
236 |     void start_new_sequence_impl(const std::vector<Expression>& h0) override;
237 |     Expression add_input_impl(int prev, const Expression& x) override;
238 |     Expression set_h_impl(int prev, const std::vector<Expression>& h_new) override;
239 |     Expression set_s_impl(int prev, const std::vector<Expression>& s_new) override;
240 | 
241 |   public:
242 |     ParameterCollection local_model;
243 |     // first index is layer, then ...
244 |     std::vector<std::vector<Parameter>> params;
245 | 
246 |     // first index is layer, then ...
247 |     std::vector<std::vector<Expression>> param_vars;
248 | 
249 |     // first index is layer, then ...
250 |     std::vector<std::vector<Expression>> masks;
251 | 
252 |     // first index is time, second is layer
253 |     std::vector<std::vector<Expression>> h, c;
254 | 
255 |     // initial values of h and c at each layer
256 |     // - both default to zero matrix input
257 |     bool has_initial_state; // if this is false, treat h0 and c0 as 0
258 |     std::vector<Expression> h0;
259 |     std::vector<Expression> c0;
260 |     unsigned layers;
261 |     unsigned input_dim, hid;
262 |     float dropout_rate_h;
263 |     float weightnoise_std;
264 |     vector<unsigned> dilations; //one int per layer
265 | 
266 |     bool dropout_masks_valid;
267 |   private:
268 |     ComputationGraph* _cg; // Pointer to current cg
269 | 
270 |   };
271 |   
272 |   
273 |   struct AttentiveDilatedLSTMBuilder : public RNNBuilder {
274 |     /**
275 |      * @brief Default Constructor
276 |      */
277 |     AttentiveDilatedLSTMBuilder();
278 |     /**
279 |      * \brief Constructor for the AttentiveDilatedLSTMBuilder
280 |      *
281 |      * \param max_dilations Vector, maximum dilations (per layer)
282 |      * \param input_dim Dimention of the input \f$x_t\f$
283 |      * \param hidden_dim Dimention of the hidden states \f$h_t\f$ and \f$c_t\f$
284 |      * \param model ParameterCollection holding the parameters
285 |      */
286 |     explicit AttentiveDilatedLSTMBuilder(vector<unsigned> max_dilations,
287 |                                 unsigned input_dim,
288 |                                 unsigned hidden_dim,
289 |                                 unsigned attention_dim,
290 |                                 ParameterCollection& model);
291 |     
292 |     Expression back() const override { return (cur == -1 ? h0.back() : h[cur].back()); }
293 |     std::vector<Expression> final_h() const override { return (h.size() == 0 ? h0 : h.back()); }
294 |     std::vector<Expression> final_s() const override {
295 |       std::vector<Expression> ret = (c.size() == 0 ? c0 : c.back());
296 |       for (auto my_h : final_h()) ret.push_back(my_h);
297 |       return ret;
298 |     }
299 |     unsigned num_h0_components() const override { return 2 * layers; }
300 |     
301 |     std::vector<Expression> get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); }
302 |     std::vector<Expression> get_s(RNNPointer i) const override {
303 |       std::vector<Expression> ret = (i == -1 ? c0 : c[i]);
304 |       for (auto my_h : get_h(i)) ret.push_back(my_h);
305 |       return ret;
306 |     }
307 |     
308 |     void copy(const RNNBuilder & params) override;
309 |     
310 |     /**
311 |      * \brief Set the dropout rates to a unique value
312 |      * \details This has the same effect as `set_dropout(d,d_h)` except that all the dropout rates are set to the same value.
313 |      * \param d Dropout rate to be applied on all of \f$x,h\f$
314 |      */
315 |     void set_dropout(float d);
316 |     /**
317 |      * \brief Set the dropout rates
318 |      * \details The dropout implemented here is the variational dropout with tied weights introduced in [Gal, 2016](http://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks)
319 |      * More specifically, dropout masks \f$\mathbf{z_x}\sim \mathrm{Bernoulli}(1-d_x)\f$,\f$\mathbf{z_h}\sim \mathrm{Bernoulli}(1-d_h)\f$ are sampled at the start of each sequence.
320 |      * The dynamics of the cell are then modified to :
321 |      *
322 |      * \f$
323 |      * \begin{split}
324 |      i_t & =\sigma(W_{ix}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ih}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_i)\\
325 |      f_t & = \sigma(W_{fx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{fh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_f)\\
326 |      o_t & = \sigma(W_{ox}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{oh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_o)\\
327 |      \tilde{c_t} & = \tanh(W_{cx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ch}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_c)\\
328 |      c_t & = c_{t-1}\circ f_t + \tilde{c_t}\circ i_t\\
329 |      h_t & = \tanh(c_t)\circ o_t\\
330 |      \end{split}
331 |      * \f$
332 |      *
333 |      * For more detail as to why scaling is applied, see the "Unorthodox" section of the documentation
334 |      * \param d Dropout rate \f$d_x\f$ for the input \f$x_t\f$
335 |      */
336 |     void set_dropout(float d, float d_r);
337 |     /**
338 |      * \brief Set all dropout rates to 0
339 |      * \details This is equivalent to `set_dropout(0)` or `set_dropout(0,0,0)`
340 |      *
341 |      */
342 |     void disable_dropout();
343 |     /**
344 |      * \brief Set dropout masks at the beginning of a sequence for a specific batch size
345 |      * \details If this function is not called on batched input, the same mask will be applied across
346 |      * all batch elements. Use this to apply different masks to each batch element
347 |      *
348 |      * \param batch_size Batch size
349 |      */
350 |     void set_dropout_masks(unsigned batch_size = 1);
351 | 
352 |     void set_weightnoise(float std);
353 |     ParameterCollection & get_parameter_collection() override;
354 |   protected:
355 |     void new_graph_impl(ComputationGraph& cg, bool update) override;
356 |     void start_new_sequence_impl(const std::vector<Expression>& h0) override;
357 |     Expression add_input_impl(int prev, const Expression& x) override;
358 |     Expression set_h_impl(int prev, const std::vector<Expression>& h_new) override;
359 |     Expression set_s_impl(int prev, const std::vector<Expression>& s_new) override;
360 |     
361 |   public:
362 |     ParameterCollection local_model;
363 |     // first index is layer, then ...
364 |     std::vector<std::vector<Parameter>> params;
365 |     
366 |     // first index is layer, then ...
367 |     std::vector<std::vector<Expression>> param_vars;
368 |     
369 |     // first index is layer, then ...
370 |     std::vector<std::vector<Expression>> masks;
371 |     
372 |     // first index is time, second is layer
373 |     std::vector<std::vector<Expression>> h, c;
374 |     
375 |     // initial values of h and c at each layer
376 |     // - both default to zero matrix input
377 |     bool has_initial_state; // if this is false, treat h0 and c0 as 0
378 |     std::vector<Expression> h0;
379 |     std::vector<Expression> c0;
380 |     unsigned layers;
381 |     unsigned input_dim, hid;
382 |     unsigned attention_dim;
383 |     float dropout_rate_h;
384 |     float weightnoise_std;
385 |     vector<unsigned> max_dilations; //one int per layer
386 |     
387 |     bool dropout_masks_valid;
388 |   private:
389 |     ComputationGraph* _cg; // Pointer to current cg
390 |     
391 |   };
392 | } // namespace dynet
393 | 
394 | #endif
395 | 


--------------------------------------------------------------------------------
/c++/windows_VisualStudio/M43/M43.filters:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <ItemGroup>
 4 |     <Filter Include="Source Files">
 5 |       <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
 6 |       <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
 7 |     </Filter>
 8 |     <Filter Include="Header Files">
 9 |       <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
10 |       <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
11 |     </Filter>
12 |     <Filter Include="Resource Files">
13 |       <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
14 |       <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
15 |     </Filter>
16 |   </ItemGroup>
17 |   <ItemGroup>
18 |     <Text Include="ReadMe.txt" />
19 |   </ItemGroup>
20 |   <ItemGroup>
21 |     <ClCompile Include="yearly30.cpp">
22 |       <Filter>Source Files</Filter>
23 |     </ClCompile>
24 |   </ItemGroup>
25 | </Project>


--------------------------------------------------------------------------------
/c++/windows_VisualStudio/M43/M43.vcxproj:
--------------------------------------------------------------------------------
  1 | ﻿<?xml version="1.0" encoding="utf-8"?>
  2 | <Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  3 |   <ItemGroup Label="ProjectConfigurations">
  4 |     <ProjectConfiguration Include="Debug|Win32">
  5 |       <Configuration>Debug</Configuration>
  6 |       <Platform>Win32</Platform>
  7 |     </ProjectConfiguration>
  8 |     <ProjectConfiguration Include="Release|Win32">
  9 |       <Configuration>Release</Configuration>
 10 |       <Platform>Win32</Platform>
 11 |     </ProjectConfiguration>
 12 |     <ProjectConfiguration Include="Debug|x64">
 13 |       <Configuration>Debug</Configuration>
 14 |       <Platform>x64</Platform>
 15 |     </ProjectConfiguration>
 16 |     <ProjectConfiguration Include="Release|x64">
 17 |       <Configuration>Release</Configuration>
 18 |       <Platform>x64</Platform>
 19 |     </ProjectConfiguration>
 20 |     <ProjectConfiguration Include="RelWithDebug|Win32">
 21 |       <Configuration>RelWithDebug</Configuration>
 22 |       <Platform>Win32</Platform>
 23 |     </ProjectConfiguration>
 24 |     <ProjectConfiguration Include="RelWithDebug|x64">
 25 |       <Configuration>RelWithDebug</Configuration>
 26 |       <Platform>x64</Platform>
 27 |     </ProjectConfiguration>
 28 |   </ItemGroup>
 29 |   <ItemGroup>
 30 |     <ClCompile Include="..\M41\slstm.cpp" />
 31 |     <ClCompile Include="ES_RNN_E.cc" />
 32 |   </ItemGroup>
 33 |   <ItemGroup>
 34 |     <ClInclude Include="..\M41\slstm.h" />
 35 |   </ItemGroup>
 36 |   <PropertyGroup Label="Globals">
 37 |     <ProjectGuid>{BE951571-3F3A-4048-BAA3-0C05F38CFF42}</ProjectGuid>
 38 |     <Keyword>Win32Proj</Keyword>
 39 |     <RootNamespace>M43</RootNamespace>
 40 |     <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
 41 |   </PropertyGroup>
 42 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
 43 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
 44 |     <ConfigurationType>Application</ConfigurationType>
 45 |     <UseDebugLibraries>true</UseDebugLibraries>
 46 |     <PlatformToolset>v140</PlatformToolset>
 47 |     <CharacterSet>Unicode</CharacterSet>
 48 |   </PropertyGroup>
 49 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|Win32'" Label="Configuration">
 50 |     <ConfigurationType>Application</ConfigurationType>
 51 |     <UseDebugLibraries>true</UseDebugLibraries>
 52 |     <PlatformToolset>v140</PlatformToolset>
 53 |     <CharacterSet>Unicode</CharacterSet>
 54 |   </PropertyGroup>
 55 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
 56 |     <ConfigurationType>Application</ConfigurationType>
 57 |     <UseDebugLibraries>false</UseDebugLibraries>
 58 |     <PlatformToolset>v140</PlatformToolset>
 59 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 60 |     <CharacterSet>Unicode</CharacterSet>
 61 |   </PropertyGroup>
 62 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
 63 |     <ConfigurationType>Application</ConfigurationType>
 64 |     <UseDebugLibraries>true</UseDebugLibraries>
 65 |     <PlatformToolset>v140</PlatformToolset>
 66 |     <CharacterSet>Unicode</CharacterSet>
 67 |     <UseIntelMKL>Sequential</UseIntelMKL>
 68 |   </PropertyGroup>
 69 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|x64'" Label="Configuration">
 70 |     <ConfigurationType>Application</ConfigurationType>
 71 |     <UseDebugLibraries>true</UseDebugLibraries>
 72 |     <PlatformToolset>v140</PlatformToolset>
 73 |     <CharacterSet>Unicode</CharacterSet>
 74 |     <UseIntelMKL>Sequential</UseIntelMKL>
 75 |   </PropertyGroup>
 76 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
 77 |     <ConfigurationType>Application</ConfigurationType>
 78 |     <UseDebugLibraries>false</UseDebugLibraries>
 79 |     <PlatformToolset>v140</PlatformToolset>
 80 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 81 |     <CharacterSet>Unicode</CharacterSet>
 82 |   </PropertyGroup>
 83 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
 84 |   <ImportGroup Label="ExtensionSettings">
 85 |   </ImportGroup>
 86 |   <ImportGroup Label="Shared">
 87 |   </ImportGroup>
 88 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 89 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 90 |   </ImportGroup>
 91 |   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|Win32'" Label="PropertySheets">
 92 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 93 |   </ImportGroup>
 94 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
 95 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 96 |   </ImportGroup>
 97 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 98 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 99 |   </ImportGroup>
100 |   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|x64'" Label="PropertySheets">
101 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
102 |   </ImportGroup>
103 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
104 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
105 |   </ImportGroup>
106 |   <PropertyGroup Label="UserMacros" />
107 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
108 |     <LinkIncremental>true</LinkIncremental>
109 |   </PropertyGroup>
110 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|Win32'">
111 |     <LinkIncremental>true</LinkIncremental>
112 |   </PropertyGroup>
113 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
114 |     <LinkIncremental>true</LinkIncremental>
115 |   </PropertyGroup>
116 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|x64'">
117 |     <LinkIncremental>true</LinkIncremental>
118 |   </PropertyGroup>
119 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
120 |     <LinkIncremental>false</LinkIncremental>
121 |   </PropertyGroup>
122 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
123 |     <LinkIncremental>false</LinkIncremental>
124 |   </PropertyGroup>
125 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
126 |     <ClCompile>
127 |       <PrecompiledHeader>
128 |       </PrecompiledHeader>
129 |       <WarningLevel>Level3</WarningLevel>
130 |       <Optimization>Disabled</Optimization>
131 |       <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
132 |     </ClCompile>
133 |     <Link>
134 |       <SubSystem>Console</SubSystem>
135 |       <GenerateDebugInformation>true</GenerateDebugInformation>
136 |     </Link>
137 |   </ItemDefinitionGroup>
138 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|Win32'">
139 |     <ClCompile>
140 |       <PrecompiledHeader>
141 |       </PrecompiledHeader>
142 |       <WarningLevel>Level3</WarningLevel>
143 |       <Optimization>Disabled</Optimization>
144 |       <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
145 |     </ClCompile>
146 |     <Link>
147 |       <SubSystem>Console</SubSystem>
148 |       <GenerateDebugInformation>true</GenerateDebugInformation>
149 |     </Link>
150 |   </ItemDefinitionGroup>
151 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
152 |     <ClCompile>
153 |       <PrecompiledHeader>NotUsing</PrecompiledHeader>
154 |       <WarningLevel>Level1</WarningLevel>
155 |       <Optimization>Disabled</Optimization>
156 |       <PreprocessorDefinitions>WIN32;_WINDOWS;EIGEN_USE_MKL_ALL;EIGEN_FAST_MATH;NOMINMAX;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
157 |       <AdditionalIncludeDirectories>E:\progs2\dynet;E:\progs\Eigen;</AdditionalIncludeDirectories>
158 |     </ClCompile>
159 |     <Link>
160 |       <SubSystem>Console</SubSystem>
161 |       <GenerateDebugInformation>true</GenerateDebugInformation>
162 |       <AdditionalLibraryDirectories>E:\progs2\dynet\buildMKL\dynet\Debug</AdditionalLibraryDirectories>
163 |       <AdditionalDependencies>dynet.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
164 |     </Link>
165 |   </ItemDefinitionGroup>
166 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|x64'">
167 |     <ClCompile>
168 |       <PrecompiledHeader>NotUsing</PrecompiledHeader>
169 |       <WarningLevel>Level1</WarningLevel>
170 |       <Optimization>MaxSpeed</Optimization>
171 |       <PreprocessorDefinitions>WIN32;_WINDOWS;EIGEN_FAST_MATH;EIGEN_USE_MKL_ALL;NOMINMAX;NDEBUG_;CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
172 |       <AdditionalIncludeDirectories>E:\progs2\dynet;E:\progs\Eigen;</AdditionalIncludeDirectories>
173 |       <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
174 |       <IntrinsicFunctions>true</IntrinsicFunctions>
175 |       <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
176 |       <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
177 |       <BasicRuntimeChecks>Default</BasicRuntimeChecks>
178 |       <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
179 |       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
180 |       <MultiProcessorCompilation>true</MultiProcessorCompilation>
181 |       <MinimalRebuild>false</MinimalRebuild>
182 |     </ClCompile>
183 |     <Link>
184 |       <SubSystem>Console</SubSystem>
185 |       <GenerateDebugInformation>true</GenerateDebugInformation>
186 |       <AdditionalLibraryDirectories>E:\progs2\dynet\buildMKL\dynet\RelWithDebInfo</AdditionalLibraryDirectories>
187 |       <AdditionalDependencies>dynet.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
188 |     </Link>
189 |   </ItemDefinitionGroup>
190 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
191 |     <ClCompile>
192 |       <WarningLevel>Level3</WarningLevel>
193 |       <PrecompiledHeader>
194 |       </PrecompiledHeader>
195 |       <Optimization>MaxSpeed</Optimization>
196 |       <FunctionLevelLinking>true</FunctionLevelLinking>
197 |       <IntrinsicFunctions>true</IntrinsicFunctions>
198 |       <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
199 |     </ClCompile>
200 |     <Link>
201 |       <SubSystem>Console</SubSystem>
202 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
203 |       <OptimizeReferences>true</OptimizeReferences>
204 |       <GenerateDebugInformation>true</GenerateDebugInformation>
205 |     </Link>
206 |   </ItemDefinitionGroup>
207 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
208 |     <ClCompile>
209 |       <WarningLevel>Level3</WarningLevel>
210 |       <PrecompiledHeader>
211 |       </PrecompiledHeader>
212 |       <Optimization>MaxSpeed</Optimization>
213 |       <FunctionLevelLinking>true</FunctionLevelLinking>
214 |       <IntrinsicFunctions>true</IntrinsicFunctions>
215 |       <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
216 |     </ClCompile>
217 |     <Link>
218 |       <SubSystem>Console</SubSystem>
219 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
220 |       <OptimizeReferences>true</OptimizeReferences>
221 |       <GenerateDebugInformation>true</GenerateDebugInformation>
222 |     </Link>
223 |   </ItemDefinitionGroup>
224 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
225 |   <ImportGroup Label="ExtensionTargets">
226 |   </ImportGroup>
227 | </Project>


--------------------------------------------------------------------------------
/c++/windows_VisualStudio/M43/slstm.h:
--------------------------------------------------------------------------------
  1 | /**
  2 | * file slstm.h
  3 | * header for my implementation of dilated LSTMs, based on Dynet LSTM builders
  4 |   - DilatedLSTMBuilder - standard Dilated LSTM (https://papers.nips.cc/paper/6613-dilated-recurrent-neural-networks.pdf)
  5 |   - ResidualDilatedLSTMBuilder - Dilated LSTM with special Residual shortcuts, after https://arxiv.org/abs/1701.03360
  6 |   - AttentiveDilatedLSTMBuilder - Dilated LSTM with Attention mechanism, as in the second stage of https://arxiv.org/abs/1704.02971
  7 | *
  8 | Slawek Smyl, Mar-May 2018
  9 | */
 10 | 
 11 | #ifndef DYNET_SLSTMS_H_
 12 | #define DYNET_SLSTMS_H_
 13 | 
 14 | #include "dynet/dynet.h"
 15 | #include "dynet/rnn.h"
 16 | #include "dynet/expr.h"
 17 | 
 18 | using namespace std;
 19 | 
 20 | namespace dynet {
 21 | 
 22 |   //basd on VanillaLSTMBuilder
 23 |   struct ResidualDilatedLSTMBuilder : public RNNBuilder {
 24 |     /**
 25 |     * @brief Default Constructor
 26 |     */
 27 |     ResidualDilatedLSTMBuilder();
 28 |     /**
 29 |     * \brief Constructor for the ResidualDilatedLSTMBuilder
 30 |     *
 31 |     * \param dilations Vector of dilations
 32 |     * \param input_dim Dimention of the input \f$x_t\f$
 33 |     * \param hidden_dim Dimention of the hidden states \f$h_t\f$ and \f$c_t\f$
 34 |     * \param model ParameterCollection holding the parameters
 35 |     * \param ln_lstm Whether to use layer normalization
 36 |     * \param forget_bias value(float) to use as bias for the forget gate(default = 1.0)
 37 |     */
 38 |     explicit ResidualDilatedLSTMBuilder(vector<unsigned> dilations,
 39 |       unsigned input_dim,
 40 |       unsigned hidden_dim,
 41 |       ParameterCollection& model,
 42 |       bool ln_lstm = false,
 43 |       float forget_bias = 1.f);
 44 | 
 45 |     Expression back() const override { return (cur == -1 ? h0.back() : h[cur].back()); }
 46 |     std::vector<Expression> final_h() const override { return (h.size() == 0 ? h0 : h.back()); }
 47 |     std::vector<Expression> final_s() const override {
 48 |       std::vector<Expression> ret = (c.size() == 0 ? c0 : c.back());
 49 |       for (auto my_h : final_h()) ret.push_back(my_h);
 50 |       return ret;
 51 |     }
 52 |     unsigned num_h0_components() const override { return 2 * layers; }
 53 | 
 54 |     std::vector<Expression> get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); }
 55 |     std::vector<Expression> get_s(RNNPointer i) const override {
 56 |       std::vector<Expression> ret = (i == -1 ? c0 : c[i]);
 57 |       for (auto my_h : get_h(i)) ret.push_back(my_h);
 58 |       return ret;
 59 |     }
 60 | 
 61 |     void copy(const RNNBuilder & params) override;
 62 | 
 63 |     /**
 64 |     * \brief Set the dropout rates to a unique value
 65 |     * \details This has the same effect as `set_dropout(d,d_h)` except that all the dropout rates are set to the same value.
 66 |     * \param d Dropout rate to be applied on all of \f$x,h\f$
 67 |     */
 68 |     void set_dropout(float d);
 69 |     /**
 70 |     * \brief Set the dropout rates
 71 |     * \details The dropout implemented here is the variational dropout with tied weights introduced in [Gal, 2016](http://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks)
 72 |     * More specifically, dropout masks \f$\mathbf{z_x}\sim \mathrm{Bernoulli}(1-d_x)\f$,\f$\mathbf{z_h}\sim \mathrm{Bernoulli}(1-d_h)\f$ are sampled at the start of each sequence.
 73 |     * The dynamics of the cell are then modified to :
 74 |     *
 75 |     * \f$
 76 |     * \begin{split}
 77 |     i_t & =\sigma(W_{ix}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ih}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_i)\\
 78 |     f_t & = \sigma(W_{fx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{fh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_f)\\
 79 |     o_t & = \sigma(W_{ox}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{oh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_o)\\
 80 |     \tilde{c_t} & = \tanh(W_{cx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ch}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_c)\\
 81 |     c_t & = c_{t-1}\circ f_t + \tilde{c_t}\circ i_t\\
 82 |     h_t & = \tanh(c_t)\circ o_t\\
 83 |     \end{split}
 84 |     * \f$
 85 |     *
 86 |     * For more detail as to why scaling is applied, see the "Unorthodox" section of the documentation
 87 |     * \param d Dropout rate \f$d_x\f$ for the input \f$x_t\f$
 88 |     * \param d_h Dropout rate \f$d_h\f$ for the output \f$h_t\f$
 89 |     */
 90 |     void set_dropout(float d, float d_r);
 91 |     /**
 92 |     * \brief Set all dropout rates to 0
 93 |     * \details This is equivalent to `set_dropout(0)` or `set_dropout(0,0,0)`
 94 |     *
 95 |     */
 96 |     void disable_dropout();
 97 |     /**
 98 |     * \brief Set dropout masks at the beginning of a sequence for a specific batch size
 99 |     * \details If this function is not called on batched input, the same mask will be applied across
100 |     * all batch elements. Use this to apply different masks to each batch element
101 |     *
102 |     * \param batch_size Batch size
103 |     */
104 |     void set_dropout_masks(unsigned batch_size = 1);
105 |     /**
106 |     * \brief Get parameters in ResidualDilatedLSTMBuilder
107 |     * \return list of points to ParameterStorage objects
108 |     */
109 |     ParameterCollection & get_parameter_collection() override;
110 |   protected:
111 |     void new_graph_impl(ComputationGraph& cg, bool update) override;
112 |     void start_new_sequence_impl(const std::vector<Expression>& h0) override;
113 |     Expression add_input_impl(int prev, const Expression& x) override;
114 |     Expression set_h_impl(int prev, const std::vector<Expression>& h_new) override;
115 |     Expression set_s_impl(int prev, const std::vector<Expression>& s_new) override;
116 | 
117 |   public:
118 |     ParameterCollection local_model;
119 |     // first index is layer, then ...
120 |     std::vector<std::vector<Parameter>> params;
121 |     // first index is layer, then ...
122 |     std::vector<std::vector<Parameter>> ln_params;
123 | 
124 |     // first index is layer, then ...
125 |     std::vector<std::vector<Expression>> param_vars;
126 |     // first index is layer, then ...
127 |     std::vector<std::vector<Expression>> ln_param_vars;
128 | 
129 |     // first index is layer, then ...
130 |     std::vector<std::vector<Expression>> masks;
131 | 
132 |     // first index is time, second is layer
133 |     std::vector<std::vector<Expression>> h, c;
134 | 
135 |     // initial values of h and c at each layer
136 |     // - both default to zero matrix input
137 |     bool has_initial_state; // if this is false, treat h0 and c0 as 0
138 |     std::vector<Expression> h0;
139 |     std::vector<Expression> c0;
140 |     unsigned layers;
141 |     unsigned input_dim, hid;
142 |     float dropout_rate_h;
143 |     bool ln_lstm;
144 |     float forget_bias;
145 |     bool dropout_masks_valid;
146 |     vector<unsigned> dilations; //one int per layer
147 | 
148 |   private:
149 |     ComputationGraph* _cg; // Pointer to current cg
150 | 
151 |   };
152 | 
153 | 
154 |   struct DilatedLSTMBuilder : public RNNBuilder {
155 |     /**
156 |     * @brief Default Constructor
157 |     */
158 |     DilatedLSTMBuilder();
159 |     /**
160 |     * \brief Constructor for the DilatedLSTMBuilder
161 |     *
162 |     * \param dilations Vector of dilations
163 |     * \param input_dim Dimention of the input \f$x_t\f$
164 |     * \param hidden_dim Dimention of the hidden states \f$h_t\f$ and \f$c_t\f$
165 |     * \param model ParameterCollection holding the parameters
166 |     */
167 |     explicit DilatedLSTMBuilder(vector<unsigned> dilations,
168 |       unsigned input_dim,
169 |       unsigned hidden_dim,
170 |       ParameterCollection& model);
171 | 
172 |     Expression back() const override { return (cur == -1 ? h0.back() : h[cur].back()); }
173 |     std::vector<Expression> final_h() const override { return (h.size() == 0 ? h0 : h.back()); }
174 |     std::vector<Expression> final_s() const override {
175 |       std::vector<Expression> ret = (c.size() == 0 ? c0 : c.back());
176 |       for (auto my_h : final_h()) ret.push_back(my_h);
177 |       return ret;
178 |     }
179 |     unsigned num_h0_components() const override { return 2 * layers; }
180 | 
181 |     std::vector<Expression> get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); }
182 |     std::vector<Expression> get_s(RNNPointer i) const override {
183 |       std::vector<Expression> ret = (i == -1 ? c0 : c[i]);
184 |       for (auto my_h : get_h(i)) ret.push_back(my_h);
185 |       return ret;
186 |     }
187 | 
188 |     void copy(const RNNBuilder & params) override;
189 | 
190 |     /**
191 |     * \brief Set the dropout rates to a unique value
192 |     * \details This has the same effect as `set_dropout(d,d_h)` except that all the dropout rates are set to the same value.
193 |     * \param d Dropout rate to be applied on all of \f$x,h\f$
194 |     */
195 |     void set_dropout(float d);
196 |     /**
197 |     * \brief Set the dropout rates
198 |     * \details The dropout implemented here is the variational dropout with tied weights introduced in [Gal, 2016](http://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks)
199 |     * More specifically, dropout masks \f$\mathbf{z_x}\sim \mathrm{Bernoulli}(1-d_x)\f$,\f$\mathbf{z_h}\sim \mathrm{Bernoulli}(1-d_h)\f$ are sampled at the start of each sequence.
200 |     * The dynamics of the cell are then modified to :
201 |     *
202 |     * \f$
203 |     * \begin{split}
204 |     i_t & =\sigma(W_{ix}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ih}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_i)\\
205 |     f_t & = \sigma(W_{fx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{fh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_f)\\
206 |     o_t & = \sigma(W_{ox}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{oh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_o)\\
207 |     \tilde{c_t} & = \tanh(W_{cx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ch}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_c)\\
208 |     c_t & = c_{t-1}\circ f_t + \tilde{c_t}\circ i_t\\
209 |     h_t & = \tanh(c_t)\circ o_t\\
210 |     \end{split}
211 |     * \f$
212 |     *
213 |     * For more detail as to why scaling is applied, see the "Unorthodox" section of the documentation
214 |     * \param d Dropout rate \f$d_x\f$ for the input \f$x_t\f$
215 |     */
216 |     void set_dropout(float d, float d_r);
217 |     /**
218 |     * \brief Set all dropout rates to 0
219 |     * \details This is equivalent to `set_dropout(0)` or `set_dropout(0,0,0)`
220 |     *
221 |     */
222 |     void disable_dropout();
223 |     /**
224 |     * \brief Set dropout masks at the beginning of a sequence for a specific batch size
225 |     * \details If this function is not called on batched input, the same mask will be applied across
226 |     * all batch elements. Use this to apply different masks to each batch element
227 |     *
228 |     * \param batch_size Batch size
229 |     */
230 |     void set_dropout_masks(unsigned batch_size = 1);
231 | 
232 |     void set_weightnoise(float std);
233 |     ParameterCollection & get_parameter_collection() override;
234 |   protected:
235 |     void new_graph_impl(ComputationGraph& cg, bool update) override;
236 |     void start_new_sequence_impl(const std::vector<Expression>& h0) override;
237 |     Expression add_input_impl(int prev, const Expression& x) override;
238 |     Expression set_h_impl(int prev, const std::vector<Expression>& h_new) override;
239 |     Expression set_s_impl(int prev, const std::vector<Expression>& s_new) override;
240 | 
241 |   public:
242 |     ParameterCollection local_model;
243 |     // first index is layer, then ...
244 |     std::vector<std::vector<Parameter>> params;
245 | 
246 |     // first index is layer, then ...
247 |     std::vector<std::vector<Expression>> param_vars;
248 | 
249 |     // first index is layer, then ...
250 |     std::vector<std::vector<Expression>> masks;
251 | 
252 |     // first index is time, second is layer
253 |     std::vector<std::vector<Expression>> h, c;
254 | 
255 |     // initial values of h and c at each layer
256 |     // - both default to zero matrix input
257 |     bool has_initial_state; // if this is false, treat h0 and c0 as 0
258 |     std::vector<Expression> h0;
259 |     std::vector<Expression> c0;
260 |     unsigned layers;
261 |     unsigned input_dim, hid;
262 |     float dropout_rate_h;
263 |     float weightnoise_std;
264 |     vector<unsigned> dilations; //one int per layer
265 | 
266 |     bool dropout_masks_valid;
267 |   private:
268 |     ComputationGraph* _cg; // Pointer to current cg
269 | 
270 |   };
271 |   
272 |   
273 |   struct AttentiveDilatedLSTMBuilder : public RNNBuilder {
274 |     /**
275 |      * @brief Default Constructor
276 |      */
277 |     AttentiveDilatedLSTMBuilder();
278 |     /**
279 |      * \brief Constructor for the AttentiveDilatedLSTMBuilder
280 |      *
281 |      * \param max_dilations Vector, maximum dilations (per layer)
282 |      * \param input_dim Dimention of the input \f$x_t\f$
283 |      * \param hidden_dim Dimention of the hidden states \f$h_t\f$ and \f$c_t\f$
284 |      * \param model ParameterCollection holding the parameters
285 |      */
286 |     explicit AttentiveDilatedLSTMBuilder(vector<unsigned> max_dilations,
287 |                                 unsigned input_dim,
288 |                                 unsigned hidden_dim,
289 |                                 unsigned attention_dim,
290 |                                 ParameterCollection& model);
291 |     
292 |     Expression back() const override { return (cur == -1 ? h0.back() : h[cur].back()); }
293 |     std::vector<Expression> final_h() const override { return (h.size() == 0 ? h0 : h.back()); }
294 |     std::vector<Expression> final_s() const override {
295 |       std::vector<Expression> ret = (c.size() == 0 ? c0 : c.back());
296 |       for (auto my_h : final_h()) ret.push_back(my_h);
297 |       return ret;
298 |     }
299 |     unsigned num_h0_components() const override { return 2 * layers; }
300 |     
301 |     std::vector<Expression> get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); }
302 |     std::vector<Expression> get_s(RNNPointer i) const override {
303 |       std::vector<Expression> ret = (i == -1 ? c0 : c[i]);
304 |       for (auto my_h : get_h(i)) ret.push_back(my_h);
305 |       return ret;
306 |     }
307 |     
308 |     void copy(const RNNBuilder & params) override;
309 |     
310 |     /**
311 |      * \brief Set the dropout rates to a unique value
312 |      * \details This has the same effect as `set_dropout(d,d_h)` except that all the dropout rates are set to the same value.
313 |      * \param d Dropout rate to be applied on all of \f$x,h\f$
314 |      */
315 |     void set_dropout(float d);
316 |     /**
317 |      * \brief Set the dropout rates
318 |      * \details The dropout implemented here is the variational dropout with tied weights introduced in [Gal, 2016](http://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks)
319 |      * More specifically, dropout masks \f$\mathbf{z_x}\sim \mathrm{Bernoulli}(1-d_x)\f$,\f$\mathbf{z_h}\sim \mathrm{Bernoulli}(1-d_h)\f$ are sampled at the start of each sequence.
320 |      * The dynamics of the cell are then modified to :
321 |      *
322 |      * \f$
323 |      * \begin{split}
324 |      i_t & =\sigma(W_{ix}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ih}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_i)\\
325 |      f_t & = \sigma(W_{fx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{fh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_f)\\
326 |      o_t & = \sigma(W_{ox}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{oh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_o)\\
327 |      \tilde{c_t} & = \tanh(W_{cx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ch}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_c)\\
328 |      c_t & = c_{t-1}\circ f_t + \tilde{c_t}\circ i_t\\
329 |      h_t & = \tanh(c_t)\circ o_t\\
330 |      \end{split}
331 |      * \f$
332 |      *
333 |      * For more detail as to why scaling is applied, see the "Unorthodox" section of the documentation
334 |      * \param d Dropout rate \f$d_x\f$ for the input \f$x_t\f$
335 |      */
336 |     void set_dropout(float d, float d_r);
337 |     /**
338 |      * \brief Set all dropout rates to 0
339 |      * \details This is equivalent to `set_dropout(0)` or `set_dropout(0,0,0)`
340 |      *
341 |      */
342 |     void disable_dropout();
343 |     /**
344 |      * \brief Set dropout masks at the beginning of a sequence for a specific batch size
345 |      * \details If this function is not called on batched input, the same mask will be applied across
346 |      * all batch elements. Use this to apply different masks to each batch element
347 |      *
348 |      * \param batch_size Batch size
349 |      */
350 |     void set_dropout_masks(unsigned batch_size = 1);
351 | 
352 |     void set_weightnoise(float std);
353 |     ParameterCollection & get_parameter_collection() override;
354 |   protected:
355 |     void new_graph_impl(ComputationGraph& cg, bool update) override;
356 |     void start_new_sequence_impl(const std::vector<Expression>& h0) override;
357 |     Expression add_input_impl(int prev, const Expression& x) override;
358 |     Expression set_h_impl(int prev, const std::vector<Expression>& h_new) override;
359 |     Expression set_s_impl(int prev, const std::vector<Expression>& s_new) override;
360 |     
361 |   public:
362 |     ParameterCollection local_model;
363 |     // first index is layer, then ...
364 |     std::vector<std::vector<Parameter>> params;
365 |     
366 |     // first index is layer, then ...
367 |     std::vector<std::vector<Expression>> param_vars;
368 |     
369 |     // first index is layer, then ...
370 |     std::vector<std::vector<Expression>> masks;
371 |     
372 |     // first index is time, second is layer
373 |     std::vector<std::vector<Expression>> h, c;
374 |     
375 |     // initial values of h and c at each layer
376 |     // - both default to zero matrix input
377 |     bool has_initial_state; // if this is false, treat h0 and c0 as 0
378 |     std::vector<Expression> h0;
379 |     std::vector<Expression> c0;
380 |     unsigned layers;
381 |     unsigned input_dim, hid;
382 |     unsigned attention_dim;
383 |     float dropout_rate_h;
384 |     float weightnoise_std;
385 |     vector<unsigned> max_dilations; //one int per layer
386 |     
387 |     bool dropout_masks_valid;
388 |   private:
389 |     ComputationGraph* _cg; // Pointer to current cg
390 |     
391 |   };
392 | } // namespace dynet
393 | 
394 | #endif
395 | 


--------------------------------------------------------------------------------
/c++/windows_VisualStudio/M44/M44.filters:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <ItemGroup>
 4 |     <Filter Include="Source Files">
 5 |       <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
 6 |       <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
 7 |     </Filter>
 8 |     <Filter Include="Header Files">
 9 |       <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
10 |       <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
11 |     </Filter>
12 |     <Filter Include="Resource Files">
13 |       <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
14 |       <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
15 |     </Filter>
16 |   </ItemGroup>
17 |   <ItemGroup>
18 |     <Text Include="ReadMe.txt" />
19 |   </ItemGroup>
20 |   <ItemGroup>
21 |     <ClCompile Include="yearly30.cpp">
22 |       <Filter>Source Files</Filter>
23 |     </ClCompile>
24 |   </ItemGroup>
25 | </Project>


--------------------------------------------------------------------------------
/c++/windows_VisualStudio/M44/M44.vcxproj:
--------------------------------------------------------------------------------
  1 | ﻿<?xml version="1.0" encoding="utf-8"?>
  2 | <Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  3 |   <ItemGroup Label="ProjectConfigurations">
  4 |     <ProjectConfiguration Include="Debug|Win32">
  5 |       <Configuration>Debug</Configuration>
  6 |       <Platform>Win32</Platform>
  7 |     </ProjectConfiguration>
  8 |     <ProjectConfiguration Include="Release|Win32">
  9 |       <Configuration>Release</Configuration>
 10 |       <Platform>Win32</Platform>
 11 |     </ProjectConfiguration>
 12 |     <ProjectConfiguration Include="Debug|x64">
 13 |       <Configuration>Debug</Configuration>
 14 |       <Platform>x64</Platform>
 15 |     </ProjectConfiguration>
 16 |     <ProjectConfiguration Include="Release|x64">
 17 |       <Configuration>Release</Configuration>
 18 |       <Platform>x64</Platform>
 19 |     </ProjectConfiguration>
 20 |     <ProjectConfiguration Include="RelWithDebug|Win32">
 21 |       <Configuration>RelWithDebug</Configuration>
 22 |       <Platform>Win32</Platform>
 23 |     </ProjectConfiguration>
 24 |     <ProjectConfiguration Include="RelWithDebug|x64">
 25 |       <Configuration>RelWithDebug</Configuration>
 26 |       <Platform>x64</Platform>
 27 |     </ProjectConfiguration>
 28 |   </ItemGroup>
 29 |   <ItemGroup>
 30 |     <ClCompile Include="..\M41\slstm.cpp" />
 31 |     <ClCompile Include="ES_RNN_E_PI.cc" />
 32 |   </ItemGroup>
 33 |   <ItemGroup>
 34 |     <ClInclude Include="..\M41\slstm.h" />
 35 |   </ItemGroup>
 36 |   <PropertyGroup Label="Globals">
 37 |     <ProjectGuid>{7A192E0C-8F58-4D65-998E-3A7010AB5F87}</ProjectGuid>
 38 |     <Keyword>Win32Proj</Keyword>
 39 |     <RootNamespace>M44</RootNamespace>
 40 |     <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
 41 |   </PropertyGroup>
 42 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
 43 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
 44 |     <ConfigurationType>Application</ConfigurationType>
 45 |     <UseDebugLibraries>true</UseDebugLibraries>
 46 |     <PlatformToolset>v140</PlatformToolset>
 47 |     <CharacterSet>Unicode</CharacterSet>
 48 |   </PropertyGroup>
 49 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|Win32'" Label="Configuration">
 50 |     <ConfigurationType>Application</ConfigurationType>
 51 |     <UseDebugLibraries>true</UseDebugLibraries>
 52 |     <PlatformToolset>v140</PlatformToolset>
 53 |     <CharacterSet>Unicode</CharacterSet>
 54 |   </PropertyGroup>
 55 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
 56 |     <ConfigurationType>Application</ConfigurationType>
 57 |     <UseDebugLibraries>false</UseDebugLibraries>
 58 |     <PlatformToolset>v140</PlatformToolset>
 59 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 60 |     <CharacterSet>Unicode</CharacterSet>
 61 |   </PropertyGroup>
 62 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
 63 |     <ConfigurationType>Application</ConfigurationType>
 64 |     <UseDebugLibraries>true</UseDebugLibraries>
 65 |     <PlatformToolset>v140</PlatformToolset>
 66 |     <CharacterSet>Unicode</CharacterSet>
 67 |     <UseIntelMKL>Sequential</UseIntelMKL>
 68 |   </PropertyGroup>
 69 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|x64'" Label="Configuration">
 70 |     <ConfigurationType>Application</ConfigurationType>
 71 |     <UseDebugLibraries>true</UseDebugLibraries>
 72 |     <PlatformToolset>v140</PlatformToolset>
 73 |     <CharacterSet>Unicode</CharacterSet>
 74 |     <UseIntelMKL>Sequential</UseIntelMKL>
 75 |   </PropertyGroup>
 76 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
 77 |     <ConfigurationType>Application</ConfigurationType>
 78 |     <UseDebugLibraries>false</UseDebugLibraries>
 79 |     <PlatformToolset>v140</PlatformToolset>
 80 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 81 |     <CharacterSet>Unicode</CharacterSet>
 82 |   </PropertyGroup>
 83 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
 84 |   <ImportGroup Label="ExtensionSettings">
 85 |   </ImportGroup>
 86 |   <ImportGroup Label="Shared">
 87 |   </ImportGroup>
 88 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 89 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 90 |   </ImportGroup>
 91 |   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|Win32'" Label="PropertySheets">
 92 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 93 |   </ImportGroup>
 94 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
 95 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 96 |   </ImportGroup>
 97 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 98 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 99 |   </ImportGroup>
100 |   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|x64'" Label="PropertySheets">
101 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
102 |   </ImportGroup>
103 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
104 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
105 |   </ImportGroup>
106 |   <PropertyGroup Label="UserMacros" />
107 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
108 |     <LinkIncremental>true</LinkIncremental>
109 |   </PropertyGroup>
110 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|Win32'">
111 |     <LinkIncremental>true</LinkIncremental>
112 |   </PropertyGroup>
113 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
114 |     <LinkIncremental>true</LinkIncremental>
115 |   </PropertyGroup>
116 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|x64'">
117 |     <LinkIncremental>true</LinkIncremental>
118 |   </PropertyGroup>
119 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
120 |     <LinkIncremental>false</LinkIncremental>
121 |   </PropertyGroup>
122 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
123 |     <LinkIncremental>false</LinkIncremental>
124 |   </PropertyGroup>
125 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
126 |     <ClCompile>
127 |       <PrecompiledHeader>
128 |       </PrecompiledHeader>
129 |       <WarningLevel>Level3</WarningLevel>
130 |       <Optimization>Disabled</Optimization>
131 |       <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
132 |     </ClCompile>
133 |     <Link>
134 |       <SubSystem>Console</SubSystem>
135 |       <GenerateDebugInformation>true</GenerateDebugInformation>
136 |     </Link>
137 |   </ItemDefinitionGroup>
138 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|Win32'">
139 |     <ClCompile>
140 |       <PrecompiledHeader>
141 |       </PrecompiledHeader>
142 |       <WarningLevel>Level3</WarningLevel>
143 |       <Optimization>Disabled</Optimization>
144 |       <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
145 |     </ClCompile>
146 |     <Link>
147 |       <SubSystem>Console</SubSystem>
148 |       <GenerateDebugInformation>true</GenerateDebugInformation>
149 |     </Link>
150 |   </ItemDefinitionGroup>
151 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
152 |     <ClCompile>
153 |       <PrecompiledHeader>NotUsing</PrecompiledHeader>
154 |       <WarningLevel>Level1</WarningLevel>
155 |       <Optimization>Disabled</Optimization>
156 |       <PreprocessorDefinitions>WIN32;_WINDOWS;EIGEN_USE_MKL_ALL;EIGEN_FAST_MATH;NOMINMAX;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
157 |       <AdditionalIncludeDirectories>E:\progs2\dynet;E:\progs\Eigen;</AdditionalIncludeDirectories>
158 |     </ClCompile>
159 |     <Link>
160 |       <SubSystem>Console</SubSystem>
161 |       <GenerateDebugInformation>true</GenerateDebugInformation>
162 |       <AdditionalLibraryDirectories>E:\progs2\dynet\buildMKL\dynet\Debug</AdditionalLibraryDirectories>
163 |       <AdditionalDependencies>dynet.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
164 |     </Link>
165 |   </ItemDefinitionGroup>
166 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebug|x64'">
167 |     <ClCompile>
168 |       <PrecompiledHeader>NotUsing</PrecompiledHeader>
169 |       <WarningLevel>Level1</WarningLevel>
170 |       <Optimization>MaxSpeed</Optimization>
171 |       <PreprocessorDefinitions>WIN32;_WINDOWS;EIGEN_FAST_MATH;EIGEN_USE_MKL_ALL;NOMINMAX;NDEBUG_;CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
172 |       <AdditionalIncludeDirectories>E:\progs2\dynet;E:\progs\Eigen;</AdditionalIncludeDirectories>
173 |       <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
174 |       <IntrinsicFunctions>true</IntrinsicFunctions>
175 |       <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
176 |       <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
177 |       <BasicRuntimeChecks>Default</BasicRuntimeChecks>
178 |       <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
179 |       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
180 |       <MultiProcessorCompilation>true</MultiProcessorCompilation>
181 |       <MinimalRebuild>false</MinimalRebuild>
182 |     </ClCompile>
183 |     <Link>
184 |       <SubSystem>Console</SubSystem>
185 |       <GenerateDebugInformation>true</GenerateDebugInformation>
186 |       <AdditionalLibraryDirectories>E:\progs2\dynet\buildMKL\dynet\RelWithDebInfo</AdditionalLibraryDirectories>
187 |       <AdditionalDependencies>dynet.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
188 |     </Link>
189 |   </ItemDefinitionGroup>
190 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
191 |     <ClCompile>
192 |       <WarningLevel>Level3</WarningLevel>
193 |       <PrecompiledHeader>
194 |       </PrecompiledHeader>
195 |       <Optimization>MaxSpeed</Optimization>
196 |       <FunctionLevelLinking>true</FunctionLevelLinking>
197 |       <IntrinsicFunctions>true</IntrinsicFunctions>
198 |       <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
199 |     </ClCompile>
200 |     <Link>
201 |       <SubSystem>Console</SubSystem>
202 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
203 |       <OptimizeReferences>true</OptimizeReferences>
204 |       <GenerateDebugInformation>true</GenerateDebugInformation>
205 |     </Link>
206 |   </ItemDefinitionGroup>
207 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
208 |     <ClCompile>
209 |       <WarningLevel>Level3</WarningLevel>
210 |       <PrecompiledHeader>
211 |       </PrecompiledHeader>
212 |       <Optimization>MaxSpeed</Optimization>
213 |       <FunctionLevelLinking>true</FunctionLevelLinking>
214 |       <IntrinsicFunctions>true</IntrinsicFunctions>
215 |       <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
216 |     </ClCompile>
217 |     <Link>
218 |       <SubSystem>Console</SubSystem>
219 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
220 |       <OptimizeReferences>true</OptimizeReferences>
221 |       <GenerateDebugInformation>true</GenerateDebugInformation>
222 |     </Link>
223 |   </ItemDefinitionGroup>
224 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
225 |   <ImportGroup Label="ExtensionTargets">
226 |   </ImportGroup>
227 | </Project>


--------------------------------------------------------------------------------
/c++/windows_VisualStudio/M44/slstm.h:
--------------------------------------------------------------------------------
  1 | /**
  2 | * file slstm.h
  3 | * header for my implementation of dilated LSTMs, based on Dynet LSTM builders
  4 |   - DilatedLSTMBuilder - standard Dilated LSTM (https://papers.nips.cc/paper/6613-dilated-recurrent-neural-networks.pdf)
  5 |   - ResidualDilatedLSTMBuilder - Dilated LSTM with special Residual shortcuts, after https://arxiv.org/abs/1701.03360
  6 |   - AttentiveDilatedLSTMBuilder - Dilated LSTM with Attention mechanism, as in the second stage of https://arxiv.org/abs/1704.02971
  7 | *
  8 | Slawek Smyl, Mar-May 2018
  9 | */
 10 | 
 11 | #ifndef DYNET_SLSTMS_H_
 12 | #define DYNET_SLSTMS_H_
 13 | 
 14 | #include "dynet/dynet.h"
 15 | #include "dynet/rnn.h"
 16 | #include "dynet/expr.h"
 17 | 
 18 | using namespace std;
 19 | 
 20 | namespace dynet {
 21 | 
 22 |   //basd on VanillaLSTMBuilder
 23 |   struct ResidualDilatedLSTMBuilder : public RNNBuilder {
 24 |     /**
 25 |     * @brief Default Constructor
 26 |     */
 27 |     ResidualDilatedLSTMBuilder();
 28 |     /**
 29 |     * \brief Constructor for the ResidualDilatedLSTMBuilder
 30 |     *
 31 |     * \param dilations Vector of dilations
 32 |     * \param input_dim Dimention of the input \f$x_t\f$
 33 |     * \param hidden_dim Dimention of the hidden states \f$h_t\f$ and \f$c_t\f$
 34 |     * \param model ParameterCollection holding the parameters
 35 |     * \param ln_lstm Whether to use layer normalization
 36 |     * \param forget_bias value(float) to use as bias for the forget gate(default = 1.0)
 37 |     */
 38 |     explicit ResidualDilatedLSTMBuilder(vector<unsigned> dilations,
 39 |       unsigned input_dim,
 40 |       unsigned hidden_dim,
 41 |       ParameterCollection& model,
 42 |       bool ln_lstm = false,
 43 |       float forget_bias = 1.f);
 44 | 
 45 |     Expression back() const override { return (cur == -1 ? h0.back() : h[cur].back()); }
 46 |     std::vector<Expression> final_h() const override { return (h.size() == 0 ? h0 : h.back()); }
 47 |     std::vector<Expression> final_s() const override {
 48 |       std::vector<Expression> ret = (c.size() == 0 ? c0 : c.back());
 49 |       for (auto my_h : final_h()) ret.push_back(my_h);
 50 |       return ret;
 51 |     }
 52 |     unsigned num_h0_components() const override { return 2 * layers; }
 53 | 
 54 |     std::vector<Expression> get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); }
 55 |     std::vector<Expression> get_s(RNNPointer i) const override {
 56 |       std::vector<Expression> ret = (i == -1 ? c0 : c[i]);
 57 |       for (auto my_h : get_h(i)) ret.push_back(my_h);
 58 |       return ret;
 59 |     }
 60 | 
 61 |     void copy(const RNNBuilder & params) override;
 62 | 
 63 |     /**
 64 |     * \brief Set the dropout rates to a unique value
 65 |     * \details This has the same effect as `set_dropout(d,d_h)` except that all the dropout rates are set to the same value.
 66 |     * \param d Dropout rate to be applied on all of \f$x,h\f$
 67 |     */
 68 |     void set_dropout(float d);
 69 |     /**
 70 |     * \brief Set the dropout rates
 71 |     * \details The dropout implemented here is the variational dropout with tied weights introduced in [Gal, 2016](http://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks)
 72 |     * More specifically, dropout masks \f$\mathbf{z_x}\sim \mathrm{Bernoulli}(1-d_x)\f$,\f$\mathbf{z_h}\sim \mathrm{Bernoulli}(1-d_h)\f$ are sampled at the start of each sequence.
 73 |     * The dynamics of the cell are then modified to :
 74 |     *
 75 |     * \f$
 76 |     * \begin{split}
 77 |     i_t & =\sigma(W_{ix}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ih}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_i)\\
 78 |     f_t & = \sigma(W_{fx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{fh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_f)\\
 79 |     o_t & = \sigma(W_{ox}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{oh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_o)\\
 80 |     \tilde{c_t} & = \tanh(W_{cx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ch}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_c)\\
 81 |     c_t & = c_{t-1}\circ f_t + \tilde{c_t}\circ i_t\\
 82 |     h_t & = \tanh(c_t)\circ o_t\\
 83 |     \end{split}
 84 |     * \f$
 85 |     *
 86 |     * For more detail as to why scaling is applied, see the "Unorthodox" section of the documentation
 87 |     * \param d Dropout rate \f$d_x\f$ for the input \f$x_t\f$
 88 |     * \param d_h Dropout rate \f$d_h\f$ for the output \f$h_t\f$
 89 |     */
 90 |     void set_dropout(float d, float d_r);
 91 |     /**
 92 |     * \brief Set all dropout rates to 0
 93 |     * \details This is equivalent to `set_dropout(0)` or `set_dropout(0,0,0)`
 94 |     *
 95 |     */
 96 |     void disable_dropout();
 97 |     /**
 98 |     * \brief Set dropout masks at the beginning of a sequence for a specific batch size
 99 |     * \details If this function is not called on batched input, the same mask will be applied across
100 |     * all batch elements. Use this to apply different masks to each batch element
101 |     *
102 |     * \param batch_size Batch size
103 |     */
104 |     void set_dropout_masks(unsigned batch_size = 1);
105 |     /**
106 |     * \brief Get parameters in ResidualDilatedLSTMBuilder
107 |     * \return list of points to ParameterStorage objects
108 |     */
109 |     ParameterCollection & get_parameter_collection() override;
110 |   protected:
111 |     void new_graph_impl(ComputationGraph& cg, bool update) override;
112 |     void start_new_sequence_impl(const std::vector<Expression>& h0) override;
113 |     Expression add_input_impl(int prev, const Expression& x) override;
114 |     Expression set_h_impl(int prev, const std::vector<Expression>& h_new) override;
115 |     Expression set_s_impl(int prev, const std::vector<Expression>& s_new) override;
116 | 
117 |   public:
118 |     ParameterCollection local_model;
119 |     // first index is layer, then ...
120 |     std::vector<std::vector<Parameter>> params;
121 |     // first index is layer, then ...
122 |     std::vector<std::vector<Parameter>> ln_params;
123 | 
124 |     // first index is layer, then ...
125 |     std::vector<std::vector<Expression>> param_vars;
126 |     // first index is layer, then ...
127 |     std::vector<std::vector<Expression>> ln_param_vars;
128 | 
129 |     // first index is layer, then ...
130 |     std::vector<std::vector<Expression>> masks;
131 | 
132 |     // first index is time, second is layer
133 |     std::vector<std::vector<Expression>> h, c;
134 | 
135 |     // initial values of h and c at each layer
136 |     // - both default to zero matrix input
137 |     bool has_initial_state; // if this is false, treat h0 and c0 as 0
138 |     std::vector<Expression> h0;
139 |     std::vector<Expression> c0;
140 |     unsigned layers;
141 |     unsigned input_dim, hid;
142 |     float dropout_rate_h;
143 |     bool ln_lstm;
144 |     float forget_bias;
145 |     bool dropout_masks_valid;
146 |     vector<unsigned> dilations; //one int per layer
147 | 
148 |   private:
149 |     ComputationGraph* _cg; // Pointer to current cg
150 | 
151 |   };
152 | 
153 | 
154 |   struct DilatedLSTMBuilder : public RNNBuilder {
155 |     /**
156 |     * @brief Default Constructor
157 |     */
158 |     DilatedLSTMBuilder();
159 |     /**
160 |     * \brief Constructor for the DilatedLSTMBuilder
161 |     *
162 |     * \param dilations Vector of dilations
163 |     * \param input_dim Dimention of the input \f$x_t\f$
164 |     * \param hidden_dim Dimention of the hidden states \f$h_t\f$ and \f$c_t\f$
165 |     * \param model ParameterCollection holding the parameters
166 |     */
167 |     explicit DilatedLSTMBuilder(vector<unsigned> dilations,
168 |       unsigned input_dim,
169 |       unsigned hidden_dim,
170 |       ParameterCollection& model);
171 | 
172 |     Expression back() const override { return (cur == -1 ? h0.back() : h[cur].back()); }
173 |     std::vector<Expression> final_h() const override { return (h.size() == 0 ? h0 : h.back()); }
174 |     std::vector<Expression> final_s() const override {
175 |       std::vector<Expression> ret = (c.size() == 0 ? c0 : c.back());
176 |       for (auto my_h : final_h()) ret.push_back(my_h);
177 |       return ret;
178 |     }
179 |     unsigned num_h0_components() const override { return 2 * layers; }
180 | 
181 |     std::vector<Expression> get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); }
182 |     std::vector<Expression> get_s(RNNPointer i) const override {
183 |       std::vector<Expression> ret = (i == -1 ? c0 : c[i]);
184 |       for (auto my_h : get_h(i)) ret.push_back(my_h);
185 |       return ret;
186 |     }
187 | 
188 |     void copy(const RNNBuilder & params) override;
189 | 
190 |     /**
191 |     * \brief Set the dropout rates to a unique value
192 |     * \details This has the same effect as `set_dropout(d,d_h)` except that all the dropout rates are set to the same value.
193 |     * \param d Dropout rate to be applied on all of \f$x,h\f$
194 |     */
195 |     void set_dropout(float d);
196 |     /**
197 |     * \brief Set the dropout rates
198 |     * \details The dropout implemented here is the variational dropout with tied weights introduced in [Gal, 2016](http://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks)
199 |     * More specifically, dropout masks \f$\mathbf{z_x}\sim \mathrm{Bernoulli}(1-d_x)\f$,\f$\mathbf{z_h}\sim \mathrm{Bernoulli}(1-d_h)\f$ are sampled at the start of each sequence.
200 |     * The dynamics of the cell are then modified to :
201 |     *
202 |     * \f$
203 |     * \begin{split}
204 |     i_t & =\sigma(W_{ix}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ih}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_i)\\
205 |     f_t & = \sigma(W_{fx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{fh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_f)\\
206 |     o_t & = \sigma(W_{ox}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{oh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_o)\\
207 |     \tilde{c_t} & = \tanh(W_{cx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ch}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_c)\\
208 |     c_t & = c_{t-1}\circ f_t + \tilde{c_t}\circ i_t\\
209 |     h_t & = \tanh(c_t)\circ o_t\\
210 |     \end{split}
211 |     * \f$
212 |     *
213 |     * For more detail as to why scaling is applied, see the "Unorthodox" section of the documentation
214 |     * \param d Dropout rate \f$d_x\f$ for the input \f$x_t\f$
215 |     */
216 |     void set_dropout(float d, float d_r);
217 |     /**
218 |     * \brief Set all dropout rates to 0
219 |     * \details This is equivalent to `set_dropout(0)` or `set_dropout(0,0,0)`
220 |     *
221 |     */
222 |     void disable_dropout();
223 |     /**
224 |     * \brief Set dropout masks at the beginning of a sequence for a specific batch size
225 |     * \details If this function is not called on batched input, the same mask will be applied across
226 |     * all batch elements. Use this to apply different masks to each batch element
227 |     *
228 |     * \param batch_size Batch size
229 |     */
230 |     void set_dropout_masks(unsigned batch_size = 1);
231 | 
232 |     void set_weightnoise(float std);
233 |     ParameterCollection & get_parameter_collection() override;
234 |   protected:
235 |     void new_graph_impl(ComputationGraph& cg, bool update) override;
236 |     void start_new_sequence_impl(const std::vector<Expression>& h0) override;
237 |     Expression add_input_impl(int prev, const Expression& x) override;
238 |     Expression set_h_impl(int prev, const std::vector<Expression>& h_new) override;
239 |     Expression set_s_impl(int prev, const std::vector<Expression>& s_new) override;
240 | 
241 |   public:
242 |     ParameterCollection local_model;
243 |     // first index is layer, then ...
244 |     std::vector<std::vector<Parameter>> params;
245 | 
246 |     // first index is layer, then ...
247 |     std::vector<std::vector<Expression>> param_vars;
248 | 
249 |     // first index is layer, then ...
250 |     std::vector<std::vector<Expression>> masks;
251 | 
252 |     // first index is time, second is layer
253 |     std::vector<std::vector<Expression>> h, c;
254 | 
255 |     // initial values of h and c at each layer
256 |     // - both default to zero matrix input
257 |     bool has_initial_state; // if this is false, treat h0 and c0 as 0
258 |     std::vector<Expression> h0;
259 |     std::vector<Expression> c0;
260 |     unsigned layers;
261 |     unsigned input_dim, hid;
262 |     float dropout_rate_h;
263 |     float weightnoise_std;
264 |     vector<unsigned> dilations; //one int per layer
265 | 
266 |     bool dropout_masks_valid;
267 |   private:
268 |     ComputationGraph* _cg; // Pointer to current cg
269 | 
270 |   };
271 |   
272 |   
273 |   struct AttentiveDilatedLSTMBuilder : public RNNBuilder {
274 |     /**
275 |      * @brief Default Constructor
276 |      */
277 |     AttentiveDilatedLSTMBuilder();
278 |     /**
279 |      * \brief Constructor for the AttentiveDilatedLSTMBuilder
280 |      *
281 |      * \param max_dilations Vector, maximum dilations (per layer)
282 |      * \param input_dim Dimention of the input \f$x_t\f$
283 |      * \param hidden_dim Dimention of the hidden states \f$h_t\f$ and \f$c_t\f$
284 |      * \param model ParameterCollection holding the parameters
285 |      */
286 |     explicit AttentiveDilatedLSTMBuilder(vector<unsigned> max_dilations,
287 |                                 unsigned input_dim,
288 |                                 unsigned hidden_dim,
289 |                                 unsigned attention_dim,
290 |                                 ParameterCollection& model);
291 |     
292 |     Expression back() const override { return (cur == -1 ? h0.back() : h[cur].back()); }
293 |     std::vector<Expression> final_h() const override { return (h.size() == 0 ? h0 : h.back()); }
294 |     std::vector<Expression> final_s() const override {
295 |       std::vector<Expression> ret = (c.size() == 0 ? c0 : c.back());
296 |       for (auto my_h : final_h()) ret.push_back(my_h);
297 |       return ret;
298 |     }
299 |     unsigned num_h0_components() const override { return 2 * layers; }
300 |     
301 |     std::vector<Expression> get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); }
302 |     std::vector<Expression> get_s(RNNPointer i) const override {
303 |       std::vector<Expression> ret = (i == -1 ? c0 : c[i]);
304 |       for (auto my_h : get_h(i)) ret.push_back(my_h);
305 |       return ret;
306 |     }
307 |     
308 |     void copy(const RNNBuilder & params) override;
309 |     
310 |     /**
311 |      * \brief Set the dropout rates to a unique value
312 |      * \details This has the same effect as `set_dropout(d,d_h)` except that all the dropout rates are set to the same value.
313 |      * \param d Dropout rate to be applied on all of \f$x,h\f$
314 |      */
315 |     void set_dropout(float d);
316 |     /**
317 |      * \brief Set the dropout rates
318 |      * \details The dropout implemented here is the variational dropout with tied weights introduced in [Gal, 2016](http://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks)
319 |      * More specifically, dropout masks \f$\mathbf{z_x}\sim \mathrm{Bernoulli}(1-d_x)\f$,\f$\mathbf{z_h}\sim \mathrm{Bernoulli}(1-d_h)\f$ are sampled at the start of each sequence.
320 |      * The dynamics of the cell are then modified to :
321 |      *
322 |      * \f$
323 |      * \begin{split}
324 |      i_t & =\sigma(W_{ix}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ih}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_i)\\
325 |      f_t & = \sigma(W_{fx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{fh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_f)\\
326 |      o_t & = \sigma(W_{ox}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{oh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_o)\\
327 |      \tilde{c_t} & = \tanh(W_{cx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ch}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_c)\\
328 |      c_t & = c_{t-1}\circ f_t + \tilde{c_t}\circ i_t\\
329 |      h_t & = \tanh(c_t)\circ o_t\\
330 |      \end{split}
331 |      * \f$
332 |      *
333 |      * For more detail as to why scaling is applied, see the "Unorthodox" section of the documentation
334 |      * \param d Dropout rate \f$d_x\f$ for the input \f$x_t\f$
335 |      */
336 |     void set_dropout(float d, float d_r);
337 |     /**
338 |      * \brief Set all dropout rates to 0
339 |      * \details This is equivalent to `set_dropout(0)` or `set_dropout(0,0,0)`
340 |      *
341 |      */
342 |     void disable_dropout();
343 |     /**
344 |      * \brief Set dropout masks at the beginning of a sequence for a specific batch size
345 |      * \details If this function is not called on batched input, the same mask will be applied across
346 |      * all batch elements. Use this to apply different masks to each batch element
347 |      *
348 |      * \param batch_size Batch size
349 |      */
350 |     void set_dropout_masks(unsigned batch_size = 1);
351 | 
352 |     void set_weightnoise(float std);
353 |     ParameterCollection & get_parameter_collection() override;
354 |   protected:
355 |     void new_graph_impl(ComputationGraph& cg, bool update) override;
356 |     void start_new_sequence_impl(const std::vector<Expression>& h0) override;
357 |     Expression add_input_impl(int prev, const Expression& x) override;
358 |     Expression set_h_impl(int prev, const std::vector<Expression>& h_new) override;
359 |     Expression set_s_impl(int prev, const std::vector<Expression>& s_new) override;
360 |     
361 |   public:
362 |     ParameterCollection local_model;
363 |     // first index is layer, then ...
364 |     std::vector<std::vector<Parameter>> params;
365 |     
366 |     // first index is layer, then ...
367 |     std::vector<std::vector<Expression>> param_vars;
368 |     
369 |     // first index is layer, then ...
370 |     std::vector<std::vector<Expression>> masks;
371 |     
372 |     // first index is time, second is layer
373 |     std::vector<std::vector<Expression>> h, c;
374 |     
375 |     // initial values of h and c at each layer
376 |     // - both default to zero matrix input
377 |     bool has_initial_state; // if this is false, treat h0 and c0 as 0
378 |     std::vector<Expression> h0;
379 |     std::vector<Expression> c0;
380 |     unsigned layers;
381 |     unsigned input_dim, hid;
382 |     unsigned attention_dim;
383 |     float dropout_rate_h;
384 |     float weightnoise_std;
385 |     vector<unsigned> max_dilations; //one int per layer
386 |     
387 |     bool dropout_masks_valid;
388 |   private:
389 |     ComputationGraph* _cg; // Pointer to current cg
390 |     
391 |   };
392 | } // namespace dynet
393 | 
394 | #endif
395 | 


--------------------------------------------------------------------------------
/c++/windows_VisualStudio/readme.txt:
--------------------------------------------------------------------------------
1 | This is Visual Studio 15 solution, with 4 projects, one for each .cc file.
2 | Two targets are defined: Debug and RelWitDebug, which is Release with debug info, that I used normally.
3 | You will need to update include and link paths to point to your installation of Dynet.
4 | In x64\RelWithDebug directory you will find two example scripts to run the executables 
5 | in conjunction with one program started interactively inside VS.


--------------------------------------------------------------------------------
/c++/windows_VisualStudio/x64/RelWithDebug/readme.txt:
--------------------------------------------------------------------------------
1 | These example run scripts. They are meant to be run on 6-core computer and assume that the program, 
2 | M41.exe has been started interactively in Visual Studio, so they add 5 processes.
3 | run61.cmd should be run for ES_RNN and ES_RNN_PI, so Monthly and Quarterly series, 
4 | although for Monthly you probably want to use computer with more cores, unless you are fine waiting a week or so :-)
5 | run61_e.cmd is for ES_RNN_E and ES_RNN_E_PI, so all other cases.


--------------------------------------------------------------------------------
/c++/windows_VisualStudio/x64/RelWithDebug/run61.cmd:
--------------------------------------------------------------------------------
1 | start M41 10 2
2 | start M41 11 1 5
3 | start M41 11 2 5
4 | start M41 12 1 10
5 | start M41 12 2 10
6 | 


--------------------------------------------------------------------------------
/c++/windows_VisualStudio/x64/RelWithDebug/run61_e.cmd:
--------------------------------------------------------------------------------
1 | start M41 5
2 | start M41 10
3 | start M41 15
4 | start M41 20
5 | start M41 25
6 | 


--------------------------------------------------------------------------------
/readme.txt:
--------------------------------------------------------------------------------
 1 | ES-RNN programs, related script, and docs. 
 2 | M4 Forecasting Competition, 2018
 3 | Slawek Smyl, Uber.
 4 | 
 5 | The programs are in C++ and use Dynet - a Dynamic Graph NN system (https://github.com/clab/dynet)
 6 | 
 7 | 
 8 | 
 9 | 
10 | 


--------------------------------------------------------------------------------
/sql/createM72nn_SQLServer.sql:
--------------------------------------------------------------------------------
  1 | USE [slawek]
  2 | GO
  3 | 
  4 | /****** Object:  Table [dbo].[M72nn]    Script Date: 6/2/2018 9:37:26 AM ******/
  5 | SET ANSI_NULLS ON
  6 | GO
  7 | 
  8 | SET QUOTED_IDENTIFIER ON
  9 | GO
 10 | 
 11 | SET ANSI_PADDING ON
 12 | GO
 13 | 
 14 | CREATE TABLE [dbo].[M72nn](
 15 | 	[run] [varchar](164) NOT NULL,
 16 | 	[LBack] [smallint] NOT NULL,
 17 | 	[iBig] [smallint] NOT NULL,
 18 | 	[series] [varchar](20) NOT NULL,
 19 | 	[epoch] [smallint] NOT NULL,
 20 | 	[actual1] [real] NULL,
 21 | 	[forec1] [real] NULL,
 22 | 	[actual2] [real] NULL,
 23 | 	[forec2] [real] NULL,
 24 | 	[actual3] [real] NULL,
 25 | 	[forec3] [real] NULL,
 26 | 	[actual4] [real] NULL,
 27 | 	[forec4] [real] NULL,
 28 | 	[actual5] [real] NULL,
 29 | 	[forec5] [real] NULL,
 30 | 	[actual6] [real] NULL,
 31 | 	[forec6] [real] NULL,
 32 | 	[actual7] [real] NULL,
 33 | 	[forec7] [real] NULL,
 34 | 	[actual8] [real] NULL,
 35 | 	[forec8] [real] NULL,
 36 | 	[actual9] [real] NULL,
 37 | 	[forec9] [real] NULL,
 38 | 	[actual10] [real] NULL,
 39 | 	[forec10] [real] NULL,
 40 | 	[actual11] [real] NULL,
 41 | 	[forec11] [real] NULL,
 42 | 	[actual12] [real] NULL,
 43 | 	[forec12] [real] NULL,
 44 | 	[actual13] [real] NULL,
 45 | 	[forec13] [real] NULL,
 46 | 	[actual14] [real] NULL,
 47 | 	[forec14] [real] NULL,
 48 | 	[actual15] [real] NULL,
 49 | 	[forec15] [real] NULL,
 50 | 	[actual16] [real] NULL,
 51 | 	[forec16] [real] NULL,
 52 | 	[actual17] [real] NULL,
 53 | 	[forec17] [real] NULL,
 54 | 	[actual18] [real] NULL,
 55 | 	[forec18] [real] NULL,
 56 | 	[actual19] [real] NULL,
 57 | 	[forec19] [real] NULL,
 58 | 	[actual20] [real] NULL,
 59 | 	[forec20] [real] NULL,
 60 | 	[actual21] [real] NULL,
 61 | 	[forec21] [real] NULL,
 62 | 	[actual22] [real] NULL,
 63 | 	[forec22] [real] NULL,
 64 | 	[actual23] [real] NULL,
 65 | 	[forec23] [real] NULL,
 66 | 	[actual24] [real] NULL,
 67 | 	[forec24] [real] NULL,
 68 | 	[actual25] [real] NULL,
 69 | 	[forec25] [real] NULL,
 70 | 	[actual26] [real] NULL,
 71 | 	[forec26] [real] NULL,
 72 | 	[actual27] [real] NULL,
 73 | 	[forec27] [real] NULL,
 74 | 	[actual28] [real] NULL,
 75 | 	[forec28] [real] NULL,
 76 | 	[actual29] [real] NULL,
 77 | 	[forec29] [real] NULL,
 78 | 	[actual30] [real] NULL,
 79 | 	[forec30] [real] NULL,
 80 | 	[actual31] [real] NULL,
 81 | 	[forec31] [real] NULL,
 82 | 	[actual32] [real] NULL,
 83 | 	[forec32] [real] NULL,
 84 | 	[actual33] [real] NULL,
 85 | 	[forec33] [real] NULL,
 86 | 	[actual34] [real] NULL,
 87 | 	[forec34] [real] NULL,
 88 | 	[actual35] [real] NULL,
 89 | 	[forec35] [real] NULL,
 90 | 	[actual36] [real] NULL,
 91 | 	[forec36] [real] NULL,
 92 | 	[actual37] [real] NULL,
 93 | 	[forec37] [real] NULL,
 94 | 	[actual38] [real] NULL,
 95 | 	[forec38] [real] NULL,
 96 | 	[actual39] [real] NULL,
 97 | 	[forec39] [real] NULL,
 98 | 	[actual40] [real] NULL,
 99 | 	[forec40] [real] NULL,
100 | 	[actual41] [real] NULL,
101 | 	[forec41] [real] NULL,
102 | 	[actual42] [real] NULL,
103 | 	[forec42] [real] NULL,
104 | 	[actual43] [real] NULL,
105 | 	[forec43] [real] NULL,
106 | 	[actual44] [real] NULL,
107 | 	[forec44] [real] NULL,
108 | 	[actual45] [real] NULL,
109 | 	[forec45] [real] NULL,
110 | 	[actual46] [real] NULL,
111 | 	[forec46] [real] NULL,
112 | 	[actual47] [real] NULL,
113 | 	[forec47] [real] NULL,
114 | 	[actual48] [real] NULL,
115 | 	[forec48] [real] NULL,
116 | 	[trainingError] [real] NULL,
117 | 	[variable] [varchar](20) NOT NULL,
118 | 	[n] [smallint] NOT NULL,
119 | 	[dateTimeOfPrediction] [datetime] NOT NULL,
120 |  CONSTRAINT [M72nn_pk] PRIMARY KEY CLUSTERED 
121 | (
122 | 	[run] ASC,
123 | 	[LBack] ASC,
124 | 	[iBig] ASC,
125 | 	[series] ASC,
126 | 	[epoch] ASC
127 | )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
128 | ) ON [PRIMARY]
129 | 
130 | GO
131 | 
132 | SET ANSI_PADDING OFF
133 | GO
134 | 
135 | 
136 | 


--------------------------------------------------------------------------------
/sql/createM72nn_mysql.txt:
--------------------------------------------------------------------------------
 1 | CREATE TABLE M72nn(
 2 | 	run varchar(160) NOT NULL,
 3 | 	LBack smallint NOT NULL,
 4 | 	iBig smallint NOT NULL,
 5 | 	series varchar(20) NOT NULL,
 6 | 	epoch smallint NOT NULL,
 7 | 	actual1 float NULL,
 8 | 	forec1 float NULL,
 9 | 	actual2 float NULL,
10 | 	forec2 float NULL,
11 | 	actual3 float NULL,
12 | 	forec3 float NULL,
13 | 	actual4 float NULL,
14 | 	forec4 float NULL,
15 | 	actual5 float NULL,
16 | 	forec5 float NULL,
17 | 	actual6 float NULL,
18 | 	forec6 float NULL,
19 | 	actual7 float NULL,
20 | 	forec7 float NULL,
21 | 	actual8 float NULL,
22 | 	forec8 float NULL,
23 | 	actual9 float NULL,
24 | 	forec9 float NULL,
25 | 	actual10 float NULL,
26 | 	forec10 float NULL,
27 | 	actual11 float NULL,
28 | 	forec11 float NULL,
29 | 	actual12 float NULL,
30 | 	forec12 float NULL,
31 | 	actual13 float NULL,
32 | 	forec13 float NULL,
33 | 	actual14 float NULL,
34 | 	forec14 float NULL,
35 | 	actual15 float NULL,
36 | 	forec15 float NULL,
37 | 	actual16 float NULL,
38 | 	forec16 float NULL,
39 | 	actual17 float NULL,
40 | 	forec17 float NULL,
41 | 	actual18 float NULL,
42 | 	forec18 float NULL,
43 | 	trainingError float NULL,
44 | 	variable varchar(20) NOT NULL,
45 | 	n smallint NOT NULL,
46 | 	dateTimeOfPrediction datetime NOT NULL,
47 |  CONSTRAINT M72nn_pk PRIMARY KEY CLUSTERED 
48 | (
49 | 	run ASC,
50 | 	LBack ASC,
51 | 	iBig ASC,
52 | 	series ASC,
53 | 	epoch ASC));
54 | 
55 | 


--------------------------------------------------------------------------------
/sql/readme.txt:
--------------------------------------------------------------------------------
1 | I provide just two example table creation scrits, one for SQL Server and one for mysql. 
2 | The mysql table is limited to output vector 18, so would not be good for hourly runs.
3 | Anyway, starting using the database is a large investment of time, apart from installationm, you also need to create auxiliary tables with MASE, and a lot of queries. 
4 | I do not have time to do all of it here and suspect there will be little interest in ODBC, so this is all what you get :-)
5 | 


--------------------------------------------------------------------------------