├── .github
    └── README.md
├── .gitignore
├── .gitmodules
├── dataset
    └── train.sh
├── lib
    └── kann-master
    │   ├── .gitignore
    │   ├── LICENSE.txt
    │   ├── Makefile
    │   ├── README.md
    │   ├── dna
    │       └── README.md
    │   ├── doc
    │       ├── 01user.md
    │       ├── 02dev.md
    │       ├── 11math.tex
    │       ├── README.md
    │       └── images
    │       │   ├── autodiff.png
    │       │   ├── matmul1.png
    │       │   ├── matmul2.png
    │       │   ├── mlp.png
    │       │   ├── rnn-unroll.png
    │       │   └── rnn.png
    │   ├── examples
    │       ├── README.md
    │       ├── ae.c
    │       ├── inspect.c
    │       ├── keras
    │       │   ├── mlp.py
    │       │   └── rnn-bit.py
    │       ├── mlp.c
    │       ├── mnist-cnn.c
    │       ├── rnn-bit.c
    │       ├── tensorflow
    │       │   └── mlp.py
    │       ├── textgen.c
    │       ├── tiny-dnn
    │       │   ├── Makefile
    │       │   └── mlp.cpp
    │       └── vae.c
    │   ├── kann.c
    │   ├── kann.h
    │   ├── kann_extra
    │       ├── kann_data.c
    │       ├── kann_data.h
    │       └── kseq.h
    │   ├── kautodiff.c
    │   └── kautodiff.h
├── models
    ├── cnn.model
    ├── mlp.model
    └── rnn.model
└── src
    └── brain
        ├── build.sh
        ├── cnn_train.c
        ├── dataset.c
        ├── dataset.h
        ├── guess.c
        ├── mlp_train.c
        ├── norm.c
        ├── norm.h
        └── rnn_train.c


/.github/README.md:
--------------------------------------------------------------------------------
 1 | # C Keyword Spotting
 2 | No C++, no dependency hell. Suitable for embedded devices.
 3 | 
 4 | ### Demo
 5 | Default models pretrained on 0-9 words: zero one two three four five six seven eight nine.
 6 | 
 7 |     ~$ arecord -f S16_LE -c1 -r16000 -d1 test.wav
 8 |     ~$ aplay test.wav
 9 |     ~$ dataset/dataset/google_speech_commands/src/features/build.sh
10 |     ~$ src/brain/build.sh
11 |     ~$ alias fe=dataset/dataset/google_speech_commands/bin/fe
12 |     ~$ fe test.wav | bin/guess models/mlp.model
13 |     ~$ fe test.wav | bin/guess models/cnn.model
14 |     ~$ fe test.wav | bin/guess models/rnn.model
15 | 
16 | ### Training
17 | See [google speech commands dataset](https://github.com/42io/dataset/tree/master/google_speech_commands#custom-words) for available words.
18 | 
19 |     ~$ apt install gcc lrzip wget
20 |     ~$ wget https://github.com/42io/dataset/releases/download/v1.0/0-9up.lrz -O /tmp/0-9up.lrz
21 |     ~$ lrunzip /tmp/0-9up.lrz -o /tmp/0-9up.data # md5 87fc2460c7b6cd3dcca6807e9de78833
22 |     ~$ dataset/train.sh /tmp/0-9up.data 49 13 12 # inputs height, inputs width, outputs
23 | 
24 | It takes some time, be patient. Finally you'll see confusion matrix.
25 | 
26 |     MLP confusion matrix...
27 |     zero  | 0.91 0.00 0.03 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.05 0.00 | 603
28 |     one   | 0.00 0.92 0.00 0.01 0.00 0.01 0.00 0.00 0.00 0.02 0.05 0.00 | 575
29 |     two   | 0.01 0.00 0.89 0.01 0.01 0.00 0.00 0.01 0.00 0.00 0.05 0.01 | 564
30 |     three | 0.00 0.00 0.01 0.92 0.00 0.01 0.00 0.01 0.02 0.00 0.03 0.01 | 548
31 |     four  | 0.00 0.01 0.01 0.00 0.89 0.00 0.00 0.00 0.00 0.00 0.07 0.00 | 605
32 |     five  | 0.00 0.01 0.00 0.01 0.00 0.84 0.00 0.01 0.01 0.02 0.08 0.00 | 607
33 |     six   | 0.00 0.00 0.00 0.00 0.00 0.00 0.97 0.00 0.00 0.00 0.01 0.01 | 462
34 |     seven | 0.01 0.00 0.01 0.01 0.00 0.00 0.01 0.93 0.00 0.00 0.03 0.00 | 574
35 |     eight | 0.00 0.00 0.01 0.03 0.00 0.00 0.01 0.00 0.91 0.00 0.03 0.01 | 547
36 |     nine  | 0.00 0.03 0.00 0.01 0.00 0.01 0.00 0.00 0.00 0.86 0.08 0.01 | 596
37 |     #unk# | 0.01 0.03 0.02 0.04 0.03 0.03 0.01 0.02 0.02 0.03 0.76 0.01 | 730
38 |     #pub# | 0.00 0.00 0.00 0.00 0.01 0.00 0.01 0.01 0.00 0.00 0.01 0.95 | 730
39 |     MLP guessed wrong 773...
40 | 
41 |     CNN confusion matrix...
42 |     zero  | 0.97 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.03 0.00 | 603
43 |     one   | 0.00 0.93 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.01 0.06 0.00 | 575
44 |     two   | 0.01 0.00 0.95 0.00 0.01 0.00 0.00 0.00 0.00 0.00 0.02 0.00 | 564
45 |     three | 0.00 0.00 0.01 0.94 0.00 0.00 0.00 0.00 0.01 0.00 0.03 0.00 | 548
46 |     four  | 0.00 0.00 0.00 0.00 0.94 0.00 0.00 0.00 0.00 0.00 0.05 0.00 | 605
47 |     five  | 0.00 0.00 0.00 0.00 0.00 0.95 0.00 0.00 0.00 0.00 0.04 0.00 | 607
48 |     six   | 0.00 0.00 0.00 0.00 0.00 0.00 0.99 0.00 0.00 0.00 0.00 0.00 | 462
49 |     seven | 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.98 0.00 0.00 0.01 0.00 | 574
50 |     eight | 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.97 0.00 0.01 0.00 | 547
51 |     nine  | 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.94 0.05 0.00 | 596
52 |     #unk# | 0.00 0.01 0.00 0.01 0.01 0.00 0.00 0.00 0.00 0.00 0.95 0.01 | 730
53 |     #pub# | 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.99 | 730
54 |     CNN guessed wrong 291...
55 | 
56 |     RNN confusion matrix...
57 |     zero  | 0.98 0.00 0.00 0.00 0.00 0.00 0.00 0.01 0.00 0.00 0.01 0.00 | 603
58 |     one   | 0.00 0.95 0.00 0.00 0.00 0.01 0.00 0.00 0.00 0.01 0.03 0.00 | 575
59 |     two   | 0.01 0.00 0.96 0.01 0.01 0.00 0.00 0.00 0.00 0.00 0.01 0.00 | 564
60 |     three | 0.00 0.00 0.00 0.94 0.00 0.00 0.00 0.00 0.02 0.00 0.02 0.00 | 548
61 |     four  | 0.00 0.00 0.00 0.00 0.97 0.00 0.00 0.00 0.00 0.00 0.02 0.00 | 605
62 |     five  | 0.00 0.00 0.00 0.00 0.01 0.98 0.00 0.00 0.00 0.00 0.00 0.00 | 607
63 |     six   | 0.00 0.00 0.00 0.00 0.00 0.00 0.98 0.00 0.00 0.00 0.01 0.00 | 462
64 |     seven | 0.00 0.00 0.00 0.00 0.00 0.00 0.01 0.98 0.00 0.00 0.01 0.00 | 574
65 |     eight | 0.00 0.00 0.00 0.01 0.00 0.01 0.00 0.00 0.97 0.00 0.01 0.00 | 547
66 |     nine  | 0.00 0.00 0.00 0.00 0.00 0.01 0.00 0.00 0.00 0.97 0.01 0.01 | 596
67 |     #unk# | 0.01 0.02 0.00 0.01 0.02 0.01 0.00 0.01 0.01 0.01 0.92 0.01 | 730
68 |     #pub# | 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.01 0.98 | 730
69 |     RNN guessed wrong 254...
70 | 
71 | ### Heap Memory Usage
72 | Some magic numbers to know before stepping into embedded world.
73 | 
74 |     ~$ valgrind dataset/dataset/google_speech_commands/bin/fe test.wav # 606,416 bytes allocated
75 |     ~$ fe test.wav | valgrind bin/guess models/mlp.model               # 622,768 bytes allocated
76 |     ~$ fe test.wav | valgrind bin/guess models/cnn.model               # 2,445,100 bytes allocated
77 |     ~$ fe test.wav | valgrind bin/guess models/rnn.model               # 403,772 bytes allocated
78 | 
79 | See [ESP32](https://github.com/42io/esp32_kws) example.
80 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | bin/
2 | models/rnn-*.model


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "dataset/dataset"]
2 | 	path = dataset/dataset
3 | 	url = https://github.com/42io/dataset.git
4 | 


--------------------------------------------------------------------------------
/dataset/train.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | set -u
 5 | 
 6 | cd "`dirname "${BASH_SOURCE[0]}"`"
 7 | 
 8 | export LC_ALL=C
 9 | 
10 | readonly DATASET_FILE_NAME=$1
11 | readonly DATASET_NUM_OUTPUT=$4
12 | 
13 | bash ./../src/brain/build.sh
14 | 
15 | do_confusion_matrix() {
16 |   local model=$1
17 |   local i
18 |   for i in `seq ${DATASET_NUM_OUTPUT}` ; do
19 |     awk -v m="${DATASET_NUM_OUTPUT}" '$1 >= m' "${DATASET_FILE_NAME}" \
20 |       | awk -v i="${i}" -v m="${DATASET_NUM_OUTPUT}" '$1 == i - 1 + m || $1 == i - 1 + 2*m' \
21 |       | awk '{for(i=2;i<=NF;i++){if(i>2)printf " ";printf $i} print ""}' \
22 |       | ./../bin/guess "./../models/${model}" \
23 |       | awk '{m=$1;j=1;for(i=j;i<=NF;i++)if($i>m){m=$i;j=i;} for(i=1;i<=NF;i++){if(i>1)printf " ";printf "%d", i==j} print ""}' \
24 |       | awk '{for(i=1;i<=NF;i++)sum[i]+=$i} END {for(j=1;j<i;j++){if(j>1)printf " ";printf "%.2f", sum[j]/NR} print " | " NR}'
25 |   done
26 | }
27 | 
28 | do_validation() {
29 |   local model=$1
30 |   local i
31 |   for i in `seq ${DATASET_NUM_OUTPUT}` ; do
32 |     awk -v m="${DATASET_NUM_OUTPUT}" '$1 >= m' "${DATASET_FILE_NAME}" \
33 |       | awk -v i="${i}" -v m="${DATASET_NUM_OUTPUT}" '$1 == i - 1 + m || $1 == i - 1 + 2*m' \
34 |       | awk '{for(i=2;i<=NF;i++){if(i>2)printf " ";printf $i} print ""}' \
35 |       | ./../bin/guess "./../models/${model}" \
36 |       | awk -v x="${i}" '{m=$1;j=1;for(i=j;i<=NF;i++)if($i>m){m=$i;j=i;} if(j!=x)print x}'
37 |   done
38 | }
39 | 
40 | leave_only_the_best_model() {
41 |   local pattern="./../models/${1}"
42 |   local best_model="./../models/${2}"
43 |   local best_score
44 |   local best_score_has_value=
45 |   local score
46 |   local model
47 | 
48 |   rm "${best_model}"
49 | 
50 |   for model in `ls -tr ${pattern}`
51 |   do
52 |     score=`do_validation "${model}" | wc -l`
53 |     echo "${model} ${score}"
54 |     if ((best_score_has_value == 0 || best_score > score)); then
55 |       best_score_has_value=1
56 |       best_score=${score}
57 |       cp "${model}" "${best_model}"
58 |     fi
59 |   done
60 |   echo "Best model score is ${best_score}"
61 |   rm ${pattern}
62 | }
63 | 
64 | echo 'MLP training...'
65 | rm ./../models/mlp.model
66 | ./../bin/mlp_train "${@}"
67 | echo 'MLP confusion matrix...'
68 | do_confusion_matrix 'mlp.model'
69 | echo "MLP guessed wrong `do_validation 'mlp.model' | wc -l`..."
70 | 
71 | echo 'CNN training...'
72 | rm ./../models/cnn.model
73 | ./../bin/cnn_train "${@}"
74 | echo 'CNN confusion matrix...'
75 | do_confusion_matrix 'cnn.model'
76 | echo "CNN guessed wrong `do_validation 'cnn.model' | wc -l`..."
77 | 
78 | echo 'RNN training...'
79 | rm -f ./../models/rnn-epoch-*.model
80 | ./../bin/rnn_train "${@}"
81 | leave_only_the_best_model 'rnn-epoch-*.model' 'rnn.model'
82 | echo 'RNN confusion matrix...'
83 | do_confusion_matrix 'rnn.model'
84 | echo "RNN guessed wrong `do_validation 'rnn.model' | wc -l`..."


--------------------------------------------------------------------------------
/lib/kann-master/.gitignore:
--------------------------------------------------------------------------------
1 | .*.swp
2 | *.o
3 | *.a
4 | *.kan
5 | *.dSYM
6 | a.out
7 | 


--------------------------------------------------------------------------------
/lib/kann-master/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | The MIT License
 2 | 
 3 | Copyright (c) 2018-2019 Dana-Farber Cancer Institute
 4 |               2016-2018 Broad Institute
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining
 7 | a copy of this software and associated documentation files (the
 8 | "Software"), to deal in the Software without restriction, including
 9 | without limitation the rights to use, copy, modify, merge, publish,
10 | distribute, sublicense, and/or sell copies of the Software, and to
11 | permit persons to whom the Software is furnished to do so, subject to
12 | the following conditions:
13 | 
14 | The above copyright notice and this permission notice shall be
15 | included in all copies or substantial portions of the Software.
16 | 
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
21 | BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
22 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
23 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 | SOFTWARE.
25 | 


--------------------------------------------------------------------------------
/lib/kann-master/Makefile:
--------------------------------------------------------------------------------
 1 | CC=			gcc
 2 | CFLAGS=		-g -Wall -Wextra -Wc++-compat -O2
 3 | CFLAGS_LIB=	#-ansi -pedantic -Wno-long-long # ANSI C does not have inline which affects performance a little bit
 4 | CPPFLAGS=	-DHAVE_PTHREAD
 5 | INCLUDES=	-I.
 6 | EXE=		examples/mlp examples/mnist-cnn examples/inspect examples/textgen examples/rnn-bit \
 7 | 			examples/ae examples/vae
 8 | LIBS=		-lpthread -lz -lm
 9 | 
10 | ifdef CBLAS
11 | 	CPPFLAGS+=-DHAVE_CBLAS
12 | 	INCLUDES+=-I$(CBLAS)/include
13 | 	LIBS=-fopenmp -pthread -L$(CBLAS)/lib -lopenblas -lz -lm
14 | endif
15 | 
16 | .SUFFIXES:.c .o
17 | .PHONY:all clean depend
18 | 
19 | .c.o:
20 | 		$(CC) -c $(CFLAGS) $(INCLUDES) $(CPPFLAGS) $< -o $@
21 | 
22 | all:kautodiff.o kann.o kann_extra/kann_data.o $(EXE)
23 | 
24 | kautodiff.o:kautodiff.c
25 | 		$(CC) -c $(CFLAGS) $(CFLAGS_LIB) $(INCLUDES) $(CPPFLAGS) -o $@ $<
26 | 
27 | kann.o:kann.c
28 | 		$(CC) -c $(CFLAGS) $(CFLAGS_LIB) $(INCLUDES) $(CPPFLAGS) -o $@ $<
29 | 
30 | kann_extra/kann_data.o:kann_extra/kann_data.c
31 | 		$(CC) -c $(CFLAGS) -DHAVE_ZLIB $< -o $@
32 | 
33 | examples/mlp:examples/mlp.o kautodiff.o kann.o kann_extra/kann_data.o
34 | 		$(CC) $(CFLAGS) -o $@ $^ $(LIBS)
35 | 
36 | examples/ae:examples/ae.o kautodiff.o kann.o kann_extra/kann_data.o
37 | 		$(CC) $(CFLAGS) -o $@ $^ $(LIBS)
38 | 
39 | examples/vae:examples/vae.o kautodiff.o kann.o kann_extra/kann_data.o
40 | 		$(CC) $(CFLAGS) -o $@ $^ $(LIBS)
41 | 
42 | examples/textgen:examples/textgen.o kautodiff.o kann.o
43 | 		$(CC) $(CFLAGS) -o $@ $^ $(LIBS)
44 | 
45 | examples/rnn-bit:examples/rnn-bit.o kautodiff.o kann.o
46 | 		$(CC) $(CFLAGS) -o $@ $^ $(LIBS)
47 | 
48 | examples/inspect:examples/inspect.o kautodiff.o kann.o
49 | 		$(CC) $(CFLAGS) -o $@ $^ $(LIBS)
50 | 
51 | examples/mnist-cnn:examples/mnist-cnn.o kautodiff.o kann.o kann_extra/kann_data.o
52 | 		$(CC) $(CFLAGS) -o $@ $^ $(LIBS)
53 | 
54 | clean:
55 | 		rm -fr *.o */*.o a.out */a.out *.a *.dSYM */*.dSYM $(EXE)
56 | 
57 | depend:
58 | 		(LC_ALL=C; export LC_ALL; makedepend -Y -- $(CFLAGS) $(DFLAGS) -- *.c kann_extra/*.c examples/*.c)
59 | 
60 | # DO NOT DELETE
61 | 
62 | kann.o: kann.h kautodiff.h
63 | kautodiff.o: kautodiff.h
64 | kann_extra/kann_data.o: kann_extra/kseq.h kann_extra/kann_data.h
65 | examples/ae.o: kann.h kautodiff.h kann_extra/kann_data.h
66 | examples/inspect.o: kann.h kautodiff.h
67 | examples/mlp.o: kann.h kautodiff.h kann_extra/kann_data.h
68 | examples/mnist-cnn.o: kann_extra/kann_data.h kann.h kautodiff.h
69 | examples/rnn-bit.o: kann.h kautodiff.h
70 | examples/textgen.o: kann.h kautodiff.h
71 | examples/vae.o: kann.h kautodiff.h kann_extra/kann_data.h
72 | 


--------------------------------------------------------------------------------
/lib/kann-master/README.md:
--------------------------------------------------------------------------------
  1 | ## Getting Started
  2 | ```sh
  3 | # acquire source code and compile
  4 | git clone https://github.com/attractivechaos/kann
  5 | cd kann; make
  6 | # learn unsigned addition (30000 samples; numbers within 10000)
  7 | seq 30000 | awk -v m=10000 '{a=int(m*rand());b=int(m*rand());print a,b,a+b}' \
  8 |   | ./examples/rnn-bit -m7 -o add.kan -
  9 | # apply the model (output 1138429, the sum of the two numbers)
 10 | echo 400958 737471 | ./examples/rnn-bit -Ai add.kan -
 11 | ```
 12 | 
 13 | ## Introduction
 14 | 
 15 | KANN is a standalone and lightweight library in C for constructing and training
 16 | small to medium artificial neural networks such as [multi-layer
 17 | perceptrons][mlp], [convolutional neural networks][cnn] and [recurrent neural
 18 | networks][rnn] (including [LSTM][lstm] and [GRU][gru]). It implements
 19 | graph-based reverse-mode [automatic differentiation][ad] and allows to build
 20 | topologically complex neural networks with recurrence, shared weights and
 21 | multiple inputs/outputs/costs. In comparison to mainstream deep learning
 22 | frameworks such as [TensorFlow][tf], KANN is not as scalable, but it is close
 23 | in flexibility, has a much smaller code base and only depends on the standard C
 24 | library. In comparison to other lightweight frameworks such as [tiny-dnn][td],
 25 | KANN is still smaller, times faster and much more versatile, supporting RNN,
 26 | VAE and non-standard neural networks that may fail these lightweight
 27 | frameworks.
 28 | 
 29 | KANN could be potentially useful when you want to experiment small to medium
 30 | neural networks in C/C++, to deploy no-so-large models without worrying about
 31 | [dependency hell][dh], or to learn the internals of deep learning libraries.
 32 | 
 33 | ### Features
 34 | 
 35 | * Flexible. Model construction by building a computational graph with
 36 |   operators. Support RNNs, weight sharing and multiple inputs/outputs.
 37 | 
 38 | * Efficient. Reasonably optimized matrix product and convolution. Support
 39 |   mini-batching and effective multi-threading. Sometimes faster than mainstream
 40 |   frameworks in their CPU-only mode.
 41 | 
 42 | * Small and portable. As of now, KANN has less than 4000 lines of code in four
 43 |   source code files, with no non-standard dependencies by default. Compatible with 
 44 |   ANSI C compilers.
 45 | 
 46 | ### Limitations
 47 | 
 48 | * CPU only. As such, KANN is **not** intended for training huge neural
 49 |   networks.
 50 | 
 51 | * Lack of some common operators and architectures such as batch normalization.
 52 | 
 53 | * Verbose APIs for training RNNs.
 54 | 
 55 | ## Installation
 56 | 
 57 | The KANN library is composed of four files: `kautodiff.{h,c}` and `kann.{h,c}`.
 58 | You are encouraged to include these files in your source code tree. No
 59 | installation is needed. To compile examples:
 60 | ```sh
 61 | make
 62 | ```
 63 | This generates a few executables in the [examples](examples) directory.
 64 | 
 65 | ## Documentations
 66 | 
 67 | Comments in the header files briefly explain the APIs. More documentations can
 68 | be found in the [doc](doc) directory. Examples using the library are in the
 69 | [examples](examples) directory.
 70 | 
 71 | ### A tour of basic KANN APIs
 72 | 
 73 | Working with neural networks usually involves three steps: model construction,
 74 | training and prediction. We can use layer APIs to build a simple model:
 75 | ```c
 76 | kann_t *ann;
 77 | kad_node_t *t;
 78 | t = kann_layer_input(784); // for MNIST
 79 | t = kad_relu(kann_layer_dense(t, 64)); // a 64-neuron hidden layer with ReLU activation
 80 | t = kann_layer_cost(t, 10, KANN_C_CEM); // softmax output + multi-class cross-entropy cost
 81 | ann = kann_new(t, 0);                   // compile the network and collate variables
 82 | ```
 83 | For this simple feedforward model with one input and one output, we can train
 84 | it with:
 85 | ```c
 86 | int n;     // number of training samples
 87 | float **x; // model input, of size n * 784
 88 | float **y; // model output, of size n * 10
 89 | // fill in x and y here and then call:
 90 | kann_train_fnn1(ann, 0.001f, 64, 25, 10, 0.1f, n, x, y);
 91 | ```
 92 | We can save the model to a file with `kann_save()` or use it to classify a
 93 | MNIST image:
 94 | ```c
 95 | float *x;       // of size 784
 96 | const float *y; // this will point to an array of size 10
 97 | // fill in x here and then call:
 98 | y = kann_apply1(ann, x);
 99 | ```
100 | 
101 | Working with complex models requires to use low-level APIs. Please see
102 | [01user.md](doc/01user.md) for details.
103 | 
104 | ### A complete example
105 | 
106 | This example learns to count the number of "1" bits in an integer (i.e.
107 | popcount):
108 | ```c
109 | // to compile and run: gcc -O2 this-prog.c kann.c kautodiff.c -lm && ./a.out
110 | #include <stdlib.h>
111 | #include <stdio.h>
112 | #include "kann.h"
113 | 
114 | int main(void)
115 | {
116 | 	int i, k, max_bit = 20, n_samples = 30000, mask = (1<<max_bit)-1, n_err, max_k;
117 | 	float **x, **y, max, *x1;
118 | 	kad_node_t *t;
119 | 	kann_t *ann;
120 | 	// construct an MLP with one hidden layers
121 | 	t = kann_layer_input(max_bit);
122 | 	t = kad_relu(kann_layer_dense(t, 64));
123 | 	t = kann_layer_cost(t, max_bit + 1, KANN_C_CEM); // output uses 1-hot encoding
124 | 	ann = kann_new(t, 0);
125 | 	// generate training data
126 | 	x = (float**)calloc(n_samples, sizeof(float*));
127 | 	y = (float**)calloc(n_samples, sizeof(float*));
128 | 	for (i = 0; i < n_samples; ++i) {
129 | 		int c, a = kad_rand(0) & (mask>>1);
130 | 		x[i] = (float*)calloc(max_bit, sizeof(float));
131 | 		y[i] = (float*)calloc(max_bit + 1, sizeof(float));
132 | 		for (k = c = 0; k < max_bit; ++k)
133 | 			x[i][k] = (float)(a>>k&1), c += (a>>k&1);
134 | 		y[i][c] = 1.0f; // c is ranged from 0 to max_bit inclusive
135 | 	}
136 | 	// train
137 | 	kann_train_fnn1(ann, 0.001f, 64, 50, 10, 0.1f, n_samples, x, y);
138 | 	// predict
139 | 	x1 = (float*)calloc(max_bit, sizeof(float));
140 | 	for (i = n_err = 0; i < n_samples; ++i) {
141 | 		int c, a = kad_rand(0) & (mask>>1); // generating a new number
142 | 		const float *y1;
143 | 		for (k = c = 0; k < max_bit; ++k)
144 | 			x1[k] = (float)(a>>k&1), c += (a>>k&1);
145 | 		y1 = kann_apply1(ann, x1);
146 | 		for (k = 0, max_k = -1, max = -1.0f; k <= max_bit; ++k) // find the max
147 | 			if (max < y1[k]) max = y1[k], max_k = k;
148 | 		if (max_k != c) ++n_err;
149 | 	}
150 | 	fprintf(stderr, "Test error rate: %.2f%%\n", 100.0 * n_err / n_samples);
151 | 	kann_delete(ann); // TODO: also to free x, y and x1
152 | 	return 0;
153 | }
154 | ```
155 | 
156 | ## Benchmarks
157 | 
158 | * First of all, this benchmark only evaluates relatively small networks, but
159 |   in practice, it is huge networks on GPUs that really demonstrate the true
160 |   power of mainstream deep learning frameworks. *Please don't read too much into
161 |   the table*.
162 | 
163 | * "Linux" has 48 cores on two Xeno E5-2697 CPUs at 2.7GHz. MKL, NumPy-1.12.0
164 |   and Theano-0.8.2 were installed with Conda; Keras-1.2.2 installed with pip.
165 |   The official TensorFlow-1.0.0 wheel does not work with Cent OS 6 on this
166 |   machine, due to glibc. This machine has one Tesla K40c GPU installed. We are
167 |   using by CUDA-7.0 and cuDNN-4.0 for training on GPU.
168 | 
169 | * "Mac" has 4 cores on a Core i7-3667U CPU at 2GHz. MKL, NumPy and Theano came
170 |   with Conda, too. Keras-1.2.2 and Tensorflow-1.0.0 were installed with pip. On
171 |   both machines, Tiny-DNN was acquired from github on March 1st, 2017.
172 | 
173 | * mnist-mlp implements a simple MLP with one layer of 64 hidden neurons.
174 |   mnist-cnn applies two convolutional layers with 32 3-by-3 kernels and ReLU
175 |   activation, followed by 2-by-2 max pooling and one 128-neuron dense layer.
176 |   mul100-rnn uses two GRUs of size 160. Both input and output are 2-D
177 |   binary arrays of shape (14,2) -- 28 GRU operations for each of the 30000
178 |   training samples.
179 | 
180 | |Task       |Framework    |Machine|Device   |Real     |CPU     |Command line |
181 | |:----------|:------------|:------|--------:|--------:|-------:|:------------|
182 | |mnist-mlp  |KANN+SSE     |Linux  |1 CPU    | 31.3s   | 31.2s  |mlp -m20 -v0|
183 | |           |             |Mac    |1 CPU    | 27.1s   | 27.1s  ||
184 | |           |KANN+BLAS    |Linux  |1 CPU    | 18.8s   | 18.8s  ||
185 | |           |Theano+Keras |Linux  |1 CPU    | 33.7s   | 33.2s  |keras/mlp.py -m20 -v0|
186 | |           |             |       |4 CPUs   | 32.0s   |121.3s  ||
187 | |           |             |Mac    |1 CPU    | 37.2s   | 35.2s  ||
188 | |           |             |       |2 CPUs   | 32.9s   | 62.0s  ||
189 | |           |TensorFlow   |Mac    |1 CPU    | 33.4s   | 33.4s  |tensorflow/mlp.py -m20|
190 | |           |             |       |2 CPUs   | 29.2s   | 50.6s  |tensorflow/mlp.py -m20 -t2|
191 | |           |Tiny-dnn     |Linux  |1 CPU    | 2m19s   | 2m18s  |tiny-dnn/mlp -m20|
192 | |           |Tiny-dnn+AVX |Linux  |1 CPU    | 1m34s   | 1m33s  ||
193 | |           |             |Mac    |1 CPU    | 2m17s   | 2m16s  ||
194 | |mnist-cnn  |KANN+SSE     |Linux  |1 CPU    |57m57s   |57m53s  |mnist-cnn -v0 -m15|
195 | |           |             |       |4 CPUs   |19m09s   |68m17s  |mnist-cnn -v0 -t4 -m15|
196 | |           |Theano+Keras |Linux  |1 CPU    |37m12s   |37m09s  |keras/mlp.py -Cm15 -v0|
197 | |           |             |       |4 CPUs   |24m24s   |97m22s  ||
198 | |           |             |       |1 GPU    |2m57s    |        |keras/mlp.py -Cm15 -v0|
199 | |           |Tiny-dnn+AVX |Linux  |1 CPU    |300m40s  |300m23s |tiny-dnn/mlp -Cm15|
200 | |mul100-rnn |KANN+SSE     |Linux  |1 CPU    |40m05s   |40m02s  |rnn-bit -l2 -n160 -m25 -Nd0|
201 | |           |             |       |4 CPUs   |12m13s   |44m40s  |rnn-bit -l2 -n160 -t4 -m25 -Nd0|
202 | |           |KANN+BLAS    |Linux  |1 CPU    |22m58s   |22m56s  |rnn-bit -l2 -n160 -m25 -Nd0|
203 | |           |             |       |4 CPUs   |8m18s    |31m26s  |rnn-bit -l2 -n160 -t4 -m25 -Nd0|
204 | |           |Theano+Keras |Linux  |1 CPU    |27m30s   |27m27s  |rnn-bit.py -l2 -n160 -m25|
205 | |           |             |       |4 CPUs   |19m52s   |77m45s  ||
206 | 
207 | * In the single thread mode, Theano is about 50% faster than KANN probably due
208 |   to efficient matrix multiplication (aka. `sgemm`) implemented in MKL. As is
209 |   shown in a [previous micro-benchmark][matmul], MKL/OpenBLAS can be twice as
210 |   fast as the implementation in KANN.
211 | 
212 | * KANN can optionally use the `sgemm` routine from a BLAS library (enabled by
213 |   macro `HAVE_CBLAS`). Linked against OpenBLAS-0.2.19, KANN matches the
214 |   single-thread performance of Theano on Mul100-rnn. KANN doesn't reduce
215 |   convolution to matrix multiplication, so MNIST-cnn won't benefit from
216 |   OpenBLAS. We observed that OpenBLAS is slower than the native KANN
217 |   implementation when we use a mini-batch of size 1. The cause is unknown.
218 | 
219 | * KANN's intra-batch multi-threading model is better than Theano+Keras.
220 |   However, in its current form, this model probably won't get alone well with
221 |   GPUs.
222 | 
223 | 
224 | 
225 | [mlp]: https://en.wikipedia.org/wiki/Multilayer_perceptron
226 | [cnn]: https://en.wikipedia.org/wiki/Convolutional_neural_network
227 | [rnn]: https://en.wikipedia.org/wiki/Recurrent_neural_network
228 | [gru]: https://en.wikipedia.org/wiki/Gated_recurrent_unit
229 | [lstm]: https://en.wikipedia.org/wiki/Long_short-term_memory
230 | [ad]: https://en.wikipedia.org/wiki/Automatic_differentiation
231 | [dh]: https://en.wikipedia.org/wiki/Dependency_hell
232 | [ae]: https://en.wikipedia.org/wiki/Autoencoder
233 | [tf]: https://www.tensorflow.org
234 | [td]: https://github.com/tiny-dnn/tiny-dnn
235 | [matmul]: https://github.com/attractivechaos/matmul
236 | 


--------------------------------------------------------------------------------
/lib/kann-master/dna/README.md:
--------------------------------------------------------------------------------
1 | The examples in this directory have been moved a [separate repo][dna-nn].
2 | 
3 | [dna-nn]: https://github.com/lh3/dna-nn
4 | 


--------------------------------------------------------------------------------
/lib/kann-master/doc/01user.md:
--------------------------------------------------------------------------------
  1 | ## Table of Contents
  2 | 
  3 | * [Overview](#overview)
  4 | * [Constructing a Neural Network](#constructing-a-neural-network)
  5 |   - [Constructing a multi-layer perceptron (MLP)](#constructing-a-multi-layer-perceptron-mlp)
  6 |   - [Constructing a convolutional neural network (CNN)](#constructing-a-convolutional-neural-network-cnn)
  7 |   - [Constructing a denoising autoencoder (dAE) with tied weights](#constructing-a-denoising-autoencoder-dae-with-tied-weights)
  8 |   - [Constructing a recurrent neural network (RNN)](#constructing-a-recurrent-neural-network-rnn)
  9 | * [Training a Neural Network](#training-a-neural-network)
 10 |   - [Training a simple feedforward neural network (FNN)](#training-a-simple-feedforward-neural-network-fnn)
 11 |   - [Training a recurrent neural network (RNN)](#training-a-recurrent-neural-network-rnn)
 12 | 
 13 | 
 14 | 
 15 | ## Overview
 16 | 
 17 | In KANN, every neural network is modeled by a computation graph. A computation
 18 | graph represents one or multiple mathematical expressions. It is a directed
 19 | acyclic graph, where an external node represents a constant or a variable used
 20 | in the expressions; an internal node represents an operator (e.g. plus) or a
 21 | function (e.g. exponential); an edge from node `u` to `v` indicates `u` being
 22 | an operand of `v`.
 23 | 
 24 | Files `kautodiff.*` implement computation graphs and symbol-to-number
 25 | reverse-mode automatic differentiation. Users construct a graph by composing
 26 | math expressions with operators defined in the library, and then use the graph
 27 | to compute values and partial derivatives of a scalar function. With
 28 | `kautodiff.*`, users are required to explicitly define and manage every node.
 29 | Files `kann.*` simplify this procedure. This part provides layers that can
 30 | specify multiple nodes at the same time by not exposing variables associated
 31 | with one or multiple operators. A program is expected to call both
 32 | `kautodiff.*` and `kann.*` APIs.
 33 | 
 34 | 
 35 | 
 36 | ## Constructing a Neural Network
 37 | 
 38 | In KANN, a neural network is essentially a computational graph. Constructing a
 39 | neural network boils down to constructing a directed acyclic computational
 40 | graph.
 41 | 
 42 | ### Constructing a multi-layer perceptron (MLP)
 43 | 
 44 | The following function constructs an MLP with one hidden layer.
 45 | ```c
 46 | kann_t *model_gen(int n_in, int n_out, int n_hidden_neurons)
 47 | {
 48 | 	kad_node_t *t;
 49 | 	t = kann_layer_input(n_in);
 50 | 	t = kann_layer_linear(t, n_hidden_neurons);
 51 | 	t = kad_relu(t);
 52 | 	t = kann_layer_cost(t, n_out, KANN_C_CEB);
 53 | 	return kann_new(t, 0);
 54 | }
 55 | ```
 56 | Here `kann_layer_input()` sets an input node in the computational graph.
 57 | `kann_layer_linear()` adds a linear transformation layer to the graph and
 58 | `kad_relu()` sets the activation function. `kann_layer_cost()` adds an output
 59 | layer and a binary cross-entropy cost (specified by `KANN_C_CEB`). Finally
 60 | `kann_new()` generates the neural network.
 61 | 
 62 | ### Constructing a convolutional neural network (CNN)
 63 | 
 64 | The following function constructs a CNN to classify MNIST images:
 65 | ```c
 66 | kann_t *model_gen_mnist(int n_h_flt, int n_h_fc)
 67 | {
 68 | 	kad_node_t *t;
 69 | 	t = kad_feed(4, 1, 1, 28, 28), t->ext_flag |= KANN_F_IN;
 70 | 	t = kad_relu(kann_layer_conv2d(t, n_h_flt, 3, 3, 1, 0));
 71 | 	t = kad_relu(kann_layer_conv2d(t, n_h_flt, 3, 3, 1, 0));
 72 | 	t = kann_layer_max2d(t, 2, 2, 2, 0);
 73 | 	t = kann_layer_dropout(t, 0.2f);
 74 | 	t = kann_layer_linear(t, n_h_fc);
 75 | 	t = kad_relu(t);
 76 | 	return kann_new(kann_layer_cost(t, 10, KANN_C_CEM), 0);
 77 | }
 78 | ```
 79 | It uses a little more low-level APIs. Here we use `kad_feed()` to add an input
 80 | node and set an external flag `KANN_F_IN` to mark it. The input is a 4D array
 81 | with the four dimensions being: mini-batch size, number of channels, height and
 82 | width. **Importantly**, we note that the first dimension of input, truth and
 83 | most of internal nodes in a neural network is always the mini-batch size.
 84 | Violating this rule might lead to unexpected errors. The rest of code adds two
 85 | convolution layers and one max pooling layer. We are using multi-class
 86 | cross-entropy cost (specified by `KANN_C_CEM`) in this example.
 87 | 
 88 | ### Constructing a denoising autoencoder (dAE) with tied weights
 89 | 
 90 | The following function demonstrates how to use shared weights.
 91 | ```c
 92 | kann_t *model_gen(int n_in, int n_hidden, float i_dropout)
 93 | {
 94 | 	kad_node_t *x, *t, *w;
 95 | 	w = kann_new_weight(n_hidden, n_in);
 96 | 	x = kad_feed(2, 1, n_in), x->ext_flag |= KANN_F_IN | KANN_F_TRUTH;
 97 | 	t = kann_layer_dropout(x, i_dropout);
 98 | 	t = kad_tanh(kad_add(kad_cmul(t, w), kann_new_bias(n_hidden)));
 99 | 	t = kad_add(kad_matmul(t, w), kann_new_bias(n_in));
100 | 	t = kad_sigm(t), t->ext_flag = KANN_F_OUT;
101 | 	t = kad_ce_bin(t, x), t->ext_flag = KANN_F_COST;
102 | 	return kann_new(t, 0);
103 | }
104 | ```
105 | In this example, the input node is also marked as the truth node. The weight
106 | matrix `w`, with `n_hidden` rows and `n_in` columns, is first used at the
107 | encoding phase in `kad_cmul()` (matrix product with the second matrix
108 | transposed) and then used again at the decoding phase in `kad_matmul()` (matrix
109 | product). The input node is also reused to compute the cost.
110 | 
111 | Generally, to use a shared variable, we keep the pointer to the variable node
112 | and use it in multiple expressions. This procedure often requires to interact
113 | with low-level `kad_*` APIs, as `kann_layer_*` APIs hide variables.
114 | 
115 | ### Constructing a recurrent neural network (RNN)
116 | 
117 | The following function constructs an RNN with one GRU unit. It has a sequence
118 | of input and a sequence of output. Such a model may be used for character-level
119 | text generation.
120 | ```c
121 | kann_t *model_gen(int n_in, int n_out, int n_hidden)
122 | {
123 | 	kad_node_t *t;
124 | 	t = kann_layer_input(n_in);
125 | 	t = kann_layer_gru(t, n_hdden, 0);
126 | 	return kann_new(kann_layer_cost(t, n_out, KANN_C_CEB), 0);
127 | }
128 | ```
129 | When classify a sequence, we would like the network to have one output, instead
130 | of a sequence of output. We can construct the network this way:
131 | ```c
132 | kann_t *model_gen(int n_in, int n_out, int n_hidden)
133 | {
134 | 	kad_node_t *t;
135 | 	t = kann_layer_input(n_in);
136 | 	t = kann_layer_gru(t, n_hdden, 0);
137 | 	t = kad_avg(1, &t);
138 | 	return kann_new(kann_layer_cost(t, n_out, KANN_C_CEB), 0);
139 | }
140 | ```
141 | This model averages the hidden output from GRU and then apply a linear layer to
142 | derive the final output (done by `kann_layer_cost()`).
143 | 
144 | 
145 | 
146 | ## Training a Neural Network
147 | 
148 | ### Training a simple feedforward neural network (FNN)
149 | 
150 | If an FNN only has one input node and one output node, we can use the
151 | `kann_train_fnn1()` API to train it. The API uses RMSprop for minimization. It
152 | splits data into training and validation data and stops training until the
153 | validation accuracy is not improved after, say, 10 epochs.
154 | 
155 | The `kann_train_fnn1()` function is relatively short. We encourage users to
156 | read this function to understand its internals. When the network has multiple
157 | inputs or outputs, or when we want to use another training policy,
158 | `kann_train_fnn1()` would not work any more; we may have to roll our own
159 | training code.
160 | 
161 | ### Training a recurrent neural network (RNN)
162 | 
163 | The KANN computational graph does not keep the history of computation. To train
164 | an RNN, we have to unroll it with `kann_unroll()`. Variables and constants are
165 | shared between the original and the unrolled networks. Training the unrolled
166 | network simultaneously trains the original network. As the unrolled network has
167 | multiple input nodes, we cannot use `kann_train_fnn1()` for training.  We are
168 | not providing a `kann_train_fnn1()` like API because converting all time series
169 | data to vectors may take too much memory (for example, converting text to
170 | vectors at the character level makes the input 1000 times larger). We tried a
171 | callback-based API in an older version of KANN, but found it is confusing to
172 | use and is not flexible enough.
173 | 
174 | For now, the only way to train an RNN is to manually write our own training
175 | routine. The following example shows how to train an RNN for character-level
176 | text generation:
177 | ```c
178 | void train(kann_t *ann, float lr, int ulen, int mbs, int max_epoch, int len, const uint8_t *data)
179 | {
180 | 	int i, k, n_var, n_char;
181 | 	float **x, **y, *r, *g;
182 | 	kann_t *ua;
183 | 
184 | 	n_char = kann_dim_in(ann);
185 | 	x = (float**)calloc(ulen, sizeof(float*)); // an unrolled has _ulen_ input nodes
186 | 	y = (float**)calloc(ulen, sizeof(float*)); // ... and _ulen_ truth nodes
187 | 	for (k = 0; k < ulen; ++k) {
188 | 		x[k] = (float*)calloc(n_char, sizeof(float)); // each input node takes a (1,n_char) 2D array
189 | 		y[k] = (float*)calloc(n_char, sizeof(float)); // ... where 1 is the mini-batch size
190 | 	}
191 | 	n_var = kann_size_var(ann);               // total size of variables
192 | 	r = (float*)calloc(n_var, sizeof(float)); // temporary array for RMSprop
193 | 	g = (float*)calloc(n_var, sizeof(float)); // gradients
194 | 
195 | 	ua = kann_unroll(ann, ulen);            // unroll; the mini batch size is 1
196 | 	kann_feed_bind(ua, KANN_F_IN,    0, x); // bind _x_ to input nodes
197 | 	kann_feed_bind(ua, KANN_F_TRUTH, 0, y); // bind _y_ to truth nodes
198 | 	for (i = 0; i < max_epoch; ++i) {
199 | 		double cost = 0.0;
200 | 		int j, b, tot = 0, n_cerr = 0;
201 | 		for (j = 1; j + ulen * mbs - 1 < len; j += ulen * mbs) {
202 | 			memset(g, 0, n_var * sizeof(float));
203 | 			for (b = 0; b < mbs; ++b) { // loop through a mini-batch
204 | 				for (k = 0; k < ulen; ++k) {
205 | 					memset(x[k], 0, n_char * sizeof(float));
206 | 					memset(y[k], 0, n_char * sizeof(float));
207 | 					x[k][data[j+b*ulen+k-1]] = 1.0f;
208 | 					y[k][data[j+b*ulen+k]] = 1.0f;
209 | 				}
210 | 				cost += kann_cost(ua, 0, 1) * ulen;
211 | 				n_cerr += kann_class_error(ua);
212 | 				tot += ulen;
213 | 				for (k = 0; k < n_var; ++k) g[k] += ua->g[k];
214 | 			}
215 | 			for (k = 0; k < n_var; ++k) g[k] /= mbs; // gradients are the average of this mini batch
216 | 			kann_RMSprop(n_var, lr, 0, 0.9f, g, ua->x, r); // update all variables
217 | 		}
218 | 		fprintf(stderr, "epoch: %d; cost: %g (class error: %.2f%%)\n", i+1, cost / tot, 100.0 * n_cerr / tot);
219 | 	}
220 | 	kann_delete_unrolled(ua); // for an unrolled network, don't use kann_delete()!
221 | 
222 | 	for (k = 0; k < ulen; ++k) { free(x[k]); free(y[k]); }
223 | 	free(g); free(r); free(y); free(x);
224 | }
225 | ```
226 | 


--------------------------------------------------------------------------------
/lib/kann-master/doc/02dev.md:
--------------------------------------------------------------------------------
  1 | ## Preface 
  2 | 
  3 | This document explains how KANN is implemented and some philosophies behind the
  4 | design. Importantly, the methods described here are only one way to implement
  5 | a deep learning framework from scratch. They are not necessarily the optimal
  6 | way. Please take the words below with a grain of salt.
  7 | 
  8 | 
  9 | ## Table of Contents
 10 | 
 11 | * [Basic Concepts](#basic-concepts)
 12 |   - [N-dimensional array](#n-dimensional-array)
 13 |   - [Automatic differentiation and computational graph](#automatic-differentiation-and-computational-graph)
 14 |   - [Backpropagating matrix products](#backpropagating-matrix-products)
 15 |   - [Network layers](#network-layers)
 16 | * [Implementing Convolution](#implementing-convolution)
 17 |   - [Definition of convolution](#definition-of-convolution)
 18 |   - [The shape of n-d arrays](#the-shape-of-n-d-arrays)
 19 |   - [Implementing the convolution operation](#implementing-the-convolution-operation)
 20 | * [Implementing Recurrent Neural Network (RNN)](#implementing-recurrent-neural-network-rnn)
 21 |   - [Computational graph of RNN](#computational-graph-of-rnn)
 22 |   - [Unrolling RNN](#unrolling-rnn)
 23 | 
 24 | 
 25 | ## Basic Concepts
 26 | 
 27 | ### N-dimensional array
 28 | 
 29 | N-dimensional array, or n-d array in brief, is a fundamental object that holds
 30 | most types of numerical data in KANN. It can be described with the following
 31 | struct:
 32 | ```cpp
 33 | typedef struct {
 34 |     int n_d;   // number of dimensions
 35 |     int *d;    // dimensions, of size n_d
 36 |     float *x;  // array data, of size \prod_i{d[i]} (1 if n_d==0)
 37 | } NDArray_t;
 38 | ```
 39 | The dimensions are also called the *shape* of the array. Conventionally, we
 40 | call the n-d array a scalar if *n\_d* equals 0, a vector if *n\_d* equals 1 and
 41 | a matrix if *n\_d* equals 2. We note that deep learning frameworks often take
 42 | n-d array as a synonym of *tensor*, though according to [wiki][tensor-wiki],
 43 | this seems imprecise.
 44 | 
 45 | ### Automatic differentiation and computational graph
 46 | 
 47 | [Automatic differentiation][ad] is the backbone of several major deep learning
 48 | frameworks. It efficiently computes the gradient of a function without symbolic
 49 | derivation. Automatic differentiation is typically achieved with a graph which
 50 | is loosely called as a computational graph or a data flow graph (as in
 51 | TensorFlow). Detailed description of automatic differentiation is beyond the
 52 | scope of this note. We will only show an example here:
 53 | 
 54 | ![](images/autodiff.png)
 55 | 
 56 | A generic computational graph only distinguishes differentiable variables and
 57 | non-differentiable parameters or constants. It does not have a concept of
 58 | input, truth output, cost, hyperparameter and so on. KANN gives a node a
 59 | specific meaning by putting one label on the node. A KANN model is a labeled
 60 | computational graph.
 61 | 
 62 | The example here demonstrates the so-called symbol-to-number differentiation.
 63 | [TensorFlow][tf] and [Theano][theano] use symbol-to-symbol differentiation
 64 | by augmenting the computational graph with derivative nodes. We refer to the
 65 | [Deep Learning book][dlbook] for details.
 66 | 
 67 | ### Backpropagating matrix products
 68 | 
 69 | In implementation, each node in a computational graph holds an n-d array. An
 70 | important operation between two 2D arrays is [matrix product][matmul]. If we
 71 | let the shape of the gradient take the same shape as the variable, we can
 72 | backpropagate gradients with:
 73 | <!--
 74 | {\bf C}={\bf A}\cdot{\bf B},
 75 | \hspace{1em}\frac{\partial F}{\partial {\bf A}}\gets\frac{\partial F}{\partial {\bf C}}\cdot {\bf B}^\intercal,
 76 | \hspace{1em}\frac{\partial F}{\partial {\bf B}}\gets{\bf A}^\intercal\cdot\frac{\partial F}{\partial {\bf C}}
 77 | -->
 78 | ![](images/matmul1.png)
 79 | 
 80 | As we see here, gradients update also involes matrix product and thus can be
 81 | calculated with the GEMM routine from [BLAS][blas]. KANN more often uses matrix
 82 | product with the second matrix transposed. The backprop rule becomes:
 83 | <!--
 84 | {\bf Y}={\bf X}\cdot{\bf W}^\intercal,
 85 | \hspace{1em}\frac{\partial F}{\partial {\bf X}}\gets\frac{\partial F}{\partial {\bf Y}}\cdot {\bf W},
 86 | \hspace{1em}\frac{\partial F}{\partial {\bf W}}\gets\left(\frac{\partial F}{\partial {\bf Y}}\right)^\intercal\cdot{\bf X}
 87 | -->
 88 | ![](images/matmul2.png)
 89 | 
 90 | ### Network layers
 91 | 
 92 | In the context of computational graph, a layer is a well-defined reusable
 93 | subgraph. The following figure shows the computational graph of a [multilayer
 94 | perceptron][mlp] with one hidden layer:
 95 | 
 96 | ![](images/mlp.png)
 97 | 
 98 | In this figure, each dotted red box represents a dense (aka fully connected)
 99 | layer that has one input (in green) and one output (in blue). In KANN, a layer
100 | works exactly this way.
101 | 
102 | 
103 | 
104 | ## Implementing Convolution
105 | 
106 | ### Definition of convolution
107 | 
108 | We often see two ways to define convolution in the context of deep learning:
109 | with or without the weight matrix flipped. Both ways work in practice and are
110 | used in mainstream frameworks. KANN uses flipped weight matrices as this is
111 | closer to the mathematical definition of convolution.
112 | 
113 | ### The shape of n-d arrays
114 | 
115 | For 2D convolution, cuDNN and Theano take images in shape of
116 | (batch-size,in-channel,height,width) and convolution weights in shape of
117 | (out-channel,in-channel,kernel-height,kernel-width). KANN follows the cuDNN and
118 | Theano convention. Caffe and TensorFlow are different. They take images in
119 | shape of (batch-size,height,weight,in-channel) and weights in
120 | (kernel-height,kernel-width,in-channel,out-channel).
121 | 
122 | ### Implementing the convolution operation
123 | 
124 | Loosely speaking, there are three ways to implement the convolution operation.
125 | The first way is direct computation. It takes little working space but is
126 | usually slow especially for small kernels. In major frameworks, the most common
127 | way seems to convert convolution to a matrix multiplication problem by
128 | duplicating and expanding the input and weight matricies. A good visual
129 | explanation can be found in the [cuDNN paper][cudnn]. A C++ implementation of
130 | the core routine `im2col` can be found [in the Caffe source code][im2col].
131 | This second approach is much faster than a naive direct implementation. A
132 | drawback is it may require significant more working space for large kernels.
133 | Finally, as convolution is closely related to Fourier transformation, the
134 | convolution operation can also be implemented with Fast Fourier Transformation
135 | (FFT). [NNPACK][nnpack] gives an efficient CPU-only implementation.
136 | 
137 | KANN uses direct computation with an optimized inner loop. On a machine we use,
138 | it is eight times faster than a naive implementation, but about twice as slow
139 | as a CPU-only Theano implementation which is based on matrix product. KANN
140 | provides a reasonbly fast, though not very fast, implementation that is still
141 | simple and does not require a lot of working space.
142 | 
143 | As a side note, the performance of CPU-only Theano and TensorFlow may vary
144 | greatly depending on how they are installed. We found a [Conda][conda]
145 | installation is usually the easiest and most efficient. The Theano website
146 | [provides instructions][theano-install] on optimized installation which also
147 | works well if we have the root privilege. At our hand, a `pip` installation
148 | without efficient BLAS preinstalled is fairly slow, potentially because Theano
149 | is calling BLAS routines through NumPy.
150 | 
151 | 
152 | 
153 | ## Implementing Recurrent Neural Network (RNN)
154 | 
155 | There are usually two ways to implement an RNN. First, we may unroll the RNN to
156 | a feed-forward network (FNN) and then apply the same FNN algorithm for
157 | training. Second, we may keep the history of values and derivatives at each
158 | node without unrolling and then traverse the history backwardly to calculate
159 | the gradients. KANN takes the first approach for training as we thought this
160 | might be easier to implement. Many other deep learning libraries take the
161 | second or both approaches. For prediction, as we do not need to keep the
162 | history of computation, both approaches are equally easy to implement in KANN.
163 | 
164 | ### Computational graph of RNN
165 | 
166 | The following figure shows the computational graph of a vanilla RNN where
167 | "Dense layer" is a red dotted box in the [MLP figure](images/mlp.png).
168 | 
169 | ![](images/rnn.png)
170 | 
171 | This graph differs from typical feedforward graphs in two aspects: the presence
172 | of the backward link (in green) and a pooling node (in blue). The backward link
173 | indicates where is recurrence. In the source code, the `kad_node_t::pre`
174 | pointer in [kautodiff.h](../kautodiff.h) implements such a link. The pooling
175 | node specifies where data at different time steps should be pooled together.
176 | The figure above pools the cost at each step. It is typically used when the
177 | output is also a sequence (e.g. for text generation). Another typical use case
178 | is to put the pooling node right above the "Dense layer". In this case, the
179 | network output is not a sequence. We may use such a network, for example, to
180 | classify music.
181 | 
182 | Computational graphs of LSTM and GRU networks are similar, except that the red
183 | dotted box becomes more complex.
184 | 
185 | ### Unrolling RNN
186 | 
187 | The following figure shows an FNN by unrolling the RNN twice:
188 | 
189 | ![](images/rnn-unroll.png)
190 | 
191 | The green edge represents the backward link in the previous figure. This FNN
192 | takes (*x1*,*x2*) as input and (*y1*,*y2*) as truth output.
193 | 
194 | Generally, to unroll an RNN to a certain length, we first duplicate internal
195 | nodes and input/output nodes "below" the (possibly multiple) pooling nodes and
196 | connect different time steps following the backward links. As other external
197 | nodes are not duplicated, we also need to adjust the children of some
198 | duplicated nodes.  We then pool the duplicated descendents of the pooling nodes
199 | and finally copy over the nodes "above" the pooling nodes. Another way to
200 | understand the unrolling algorithm is to simulate forward data flows in the
201 | original RNN graph.  We add a node to the unrolled graph when the data flow
202 | visits the node; we add a pooling node when all its duplicated descendants have
203 | been computed; we copy over the rest of nodes when all pooling nodes have been
204 | computed.
205 | 
206 | 
207 | 
208 | [tensor-wiki]: https://en.wikipedia.org/wiki/Tensor
209 | [tf]: https://www.tensorflow.org
210 | [theano]: http://deeplearning.net/software/theano/
211 | [ad]: https://en.wikipedia.org/wiki/Automatic_differentiation
212 | [mlp]: https://en.wikipedia.org/wiki/Multilayer_perceptron
213 | [rnnjs]: https://github.com/karpathy/recurrentjs
214 | [matmul]: https://en.wikipedia.org/wiki/Matrix_multiplication
215 | [blas]: https://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms
216 | [dlbook]: http://www.deeplearningbook.org
217 | [cudnn]: https://arxiv.org/abs/1410.0759
218 | [im2col]: https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cpp
219 | [nnpack]: https://github.com/Maratyszcza/NNPACK
220 | [conda]: http://conda.pydata.org/docs/using/pkgs.html
221 | [theano-install]: http://deeplearning.net/software/theano/install_ubuntu.html
222 | 


--------------------------------------------------------------------------------
/lib/kann-master/doc/11math.tex:
--------------------------------------------------------------------------------
 1 | \documentclass[10pt]{article}
 2 | 
 3 | \usepackage{amssymb}
 4 | 
 5 | \begin{document}
 6 | 
 7 | \section{Back Propagations}
 8 | 
 9 | \subsection{Matrix product}
10 | 
11 | \emph{Without proof}, we note that if ${\bf C}={\bf A}\cdot{\bf B}$, we can backprop
12 | the gradient of ${\bf C}$ to ${\bf A}$ and ${\bf B}$ as follows:
13 | \[
14 | \frac{\partial F}{\partial {\bf A}}\gets\frac{\partial F}{\partial {\bf C}}\cdot {\bf B}^\intercal
15 | \]
16 | \[
17 | \frac{\partial F}{\partial {\bf B}}\gets{\bf A}^\intercal\cdot\frac{\partial F}{\partial {\bf C}}
18 | \]
19 | Similarly, if ${\bf Y}={\bf X}\cdot{\bf W}^\intercal$, the backprop rule is:
20 | \[
21 | \frac{\partial F}{\partial {\bf X}}\gets\frac{\partial F}{\partial {\bf Y}}\cdot {\bf W}
22 | \]
23 | \[
24 | \frac{\partial F}{\partial {\bf W}}\gets\left(\frac{\partial F}{\partial {\bf Y}}\right)^\intercal\cdot{\bf X}
25 | \]
26 | We use `$\gets$' instead of an equal sign because matrix ${\bf A}$ (and others)
27 | may be used elsewhere, which also contributes to ${\bf A}$'s gradient.
28 | 
29 | \subsection{Layer normalization}
30 | 
31 | Given an input vector ${\bf x}$, we compute the normalized output ${\bf z}$ as
32 | follows (i.e. the forward pass):
33 | \[
34 | y_k = x_k - \frac{1}{n}\sum_{i=1}^n x_i
35 | \]
36 | \[
37 | \sigma^2 = \frac{1}{n}\sum_i y_i^2
38 | \]
39 | \[
40 | z_k = \frac{y_k}{\sigma}
41 | \]
42 | The derivatives of intermediate variables ${\bf y}$ and $\sigma$ are:
43 | \[
44 | \frac{\partial y_i}{\partial x_k} = \delta_{ik}-\frac{1}{n}
45 | \]
46 | \[
47 | \frac{\partial\sigma}{\partial y_k} = \frac{y_k}{n\sigma}
48 | \]
49 | Backprop from ${\bf z}$ to ${\bf y}$ (we use `$=$' here because ${\bf y}$ is a transient variable):
50 | \[
51 | \frac{\partial F}{\partial y_k}=\sum_i\frac{\partial F}{\partial z_i}\frac{\partial z_i}{\partial y_k}
52 | =\frac{1}{\sigma}\left(\frac{\partial F}{\partial z_k}-\frac{z_k}{n}\sum_iz_i\frac{\partial F}{\partial z_i}\right)
53 | \]
54 | and from ${\bf y}$ to the input ${\bf x}$:
55 | \begin{eqnarray*}
56 | \frac{\partial F}{\partial x_k}&\gets&\frac{\partial F}{\partial y_k} - \frac{1}{n}\sum_i\frac{\partial F}{\partial y_i}\\
57 | &=&\frac{1}{\sigma}\left(\frac{\partial F}{\partial z_k}-\frac{z_k}{n}\sum_iz_i\frac{\partial F}{\partial z_i}\right)
58 | -\frac{1}{n\sigma}\sum_i\left(\frac{\partial F}{\partial z_i}-\frac{z_i}{n}\sum_jz_j\frac{\partial F}{\partial z_j}\right)
59 | \end{eqnarray*}
60 | As $\sum_i z_i=0$, we have backprop from ${\bf z}$ to ${\bf x}$:
61 | \[
62 | \frac{\partial F}{\partial x_k}
63 | \gets\frac{1}{\sigma}\left(\frac{\partial F}{\partial z_k}-\frac{1}{n}\sum_i\frac{\partial F}{\partial z_i}-\frac{z_k}{n}\sum_iz_i\frac{\partial F}{\partial z_i}\right)
64 | \]
65 | The two sums can be pre-calculated, so the total time complexity is linear in
66 | $n$, the dimension of ${\bf x}$.
67 | 
68 | \end{document}
69 | 


--------------------------------------------------------------------------------
/lib/kann-master/doc/README.md:
--------------------------------------------------------------------------------
1 | This directory contains KANN documentations.
2 | 
3 | * [01user.md](01user.md): for API users
4 | 
5 | * [02dev.md](02dev.md): for hackers and developers who want to understand the
6 |   internals of KANN.
7 | 
8 | * 11math.tex (in LaTeX): some math notes.
9 | 


--------------------------------------------------------------------------------
/lib/kann-master/doc/images/autodiff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42io/c_keyword_spotting/255a2aa920f18e05f113b4870124d291140491c0/lib/kann-master/doc/images/autodiff.png


--------------------------------------------------------------------------------
/lib/kann-master/doc/images/matmul1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42io/c_keyword_spotting/255a2aa920f18e05f113b4870124d291140491c0/lib/kann-master/doc/images/matmul1.png


--------------------------------------------------------------------------------
/lib/kann-master/doc/images/matmul2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42io/c_keyword_spotting/255a2aa920f18e05f113b4870124d291140491c0/lib/kann-master/doc/images/matmul2.png


--------------------------------------------------------------------------------
/lib/kann-master/doc/images/mlp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42io/c_keyword_spotting/255a2aa920f18e05f113b4870124d291140491c0/lib/kann-master/doc/images/mlp.png


--------------------------------------------------------------------------------
/lib/kann-master/doc/images/rnn-unroll.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42io/c_keyword_spotting/255a2aa920f18e05f113b4870124d291140491c0/lib/kann-master/doc/images/rnn-unroll.png


--------------------------------------------------------------------------------
/lib/kann-master/doc/images/rnn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42io/c_keyword_spotting/255a2aa920f18e05f113b4870124d291140491c0/lib/kann-master/doc/images/rnn.png


--------------------------------------------------------------------------------
/lib/kann-master/examples/README.md:
--------------------------------------------------------------------------------
 1 | ## Examples Using KANN
 2 | 
 3 | Data used for these examples and pre-trained KANN models from the data can be
 4 | downloaded from [this link][data]:
 5 | ```sh
 6 | curl -s https://github.com/attractivechaos/kann/releases/download/v0/kann-data.tgz | tar -zxf -
 7 | curl -s https://github.com/attractivechaos/kann/releases/download/v0/kann-models.tgz | tar -zxf -
 8 | ```
 9 | 
10 | ### Multi-layer perceptron
11 | 
12 | Implemented in [mlp.c](mlp.c). It reads TAB delimited files, where on each
13 | line, the first column is an arbirary name and the rest of columns gives a
14 | vector. On training, you need to provide two files, one for network input and
15 | one for output. On prediction, only network input is needed.
16 | ```sh
17 | ./mlp -o mnist-mlp.kan kann-data/mnist-train-?.snd.gz
18 | ./mlp -i mnist-mlp.kan kann-data/mnist-test-x.snd.gz | kann-data/mnist-eval.pl
19 | ```
20 | 
21 | ### Tied-weight denoising encoder
22 | 
23 | Implemented in [ae.c](ae.c). It takes the same format as `mlp.c`. This example
24 | shows how to construct a neural network with shared weights.
25 | 
26 | ### Variantional autoencoder
27 | 
28 | Implemented in [vae.c](vae.c). It uses sampling and a complex cost function.
29 | ```sh
30 | ./vae -o mnist-vae.kan -c 3 kann-data/mnist-train-x.snd.gz   # code dimension is 3
31 | ./vae -i mnist-vae.kan -A kann-data/mnist-test-x.snd.gz | kann-data/mnist-ascii.pl # reconstruction
32 | ./vae -i mnist-vae.kan -g 10 | kann-data/mnist-ascii.pl    # generate 10 random images
33 | ```
34 | 
35 | ### CNN for MNIST
36 | 
37 | Implemented in [mnist-cnn.c](mnist-cnn.c).
38 | ```sh
39 | ./mnist-cnn -o mnist-cnn.kan -t4 kann-data/mnist-train-?.snd.gz
40 | ./mnist-cnn -i mnist-cnn.kan kann-data/mnist-test-x.snd.gz | kann-data/mnist-eval.pl
41 | ```
42 | 
43 | ### RNN for simple arithmetic
44 | 
45 | Implemented in [rnn-bit.c](rnn-bit.c). This example can easily learn addition:
46 | ```sh
47 | seq 30000 | awk -v m=10000 '{a=int(m*rand());b=int(m*rand());print a,b,a+b}' \
48 |   | ./rnn-bit -m5 -o add.kan -
49 | echo 400958 737471 | ./rnn-bit -Ai add.kan -
50 | ```
51 | Although the model is trained on numbers below 10000, it can be applied to
52 | larger numbers. This example can also learn simple `a*b` where `b` is a number
53 | no more than 100:
54 | ```sh
55 | ./rnn-bit -m50 -l2 -n160 -o mul100.kan -t4 kann-data/mul100.train.txt
56 | echo 15315611231621249 78 | ./rnn-bit -Ai mul100.kan -
57 | ```
58 | A pre-trained model can be found in kann-models. There is also a [Keras-based
59 | implementation](keras/rnn-bit.py). It does not converge. That is possibly
60 | because KANN is taking initial hidden values as variables, which potentially
61 | makes the model easier to learn. KANN uses layer normalization and dropout by
62 | default, but without these operations, training does not stray too far away
63 | like the python version.
64 | 
65 | ### Character-level text generation with RNN
66 | 
67 | Implemented in [textgen.c](textgen.c). This is not a standard model in that the
68 | initial hidden states depend on the previous output. It tends to memorize text
69 | better.
70 | ```sh
71 | ./textgen -o acc.kan accelerando.txt
72 | ./textgen -i acc.kan
73 | ```
74 | You can also found a bigger model in kann-models. It can generate meaningful
75 | text even with near-to-zero temperature.
76 | ```sh
77 | ./textgen -i kann-models/acc-l3-n256r.kan -T 1e-6
78 | ```
79 | 
80 | [data]: https://github.com/attractivechaos/kann/releases/tag/v0
81 | 


--------------------------------------------------------------------------------
/lib/kann-master/examples/ae.c:
--------------------------------------------------------------------------------
 1 | #include <stdlib.h>
 2 | #include <unistd.h>
 3 | #include <assert.h>
 4 | #include <stdio.h>
 5 | #include <math.h>
 6 | #include "kann.h"
 7 | #include "kann_extra/kann_data.h"
 8 | 
 9 | static kann_t *model_gen(int n_in, int n_hidden, float i_dropout)
10 | {
11 | 	kad_node_t *x, *t, *w, *r;
12 | 	w = kann_new_weight(n_hidden, n_in);
13 | 	r = kann_new_scalar(KAD_VAR, sqrtf((float)n_in / n_hidden));
14 | 	x = kad_feed(2, 1, n_in), x->ext_flag |= KANN_F_IN | KANN_F_TRUTH;
15 | 	t = kann_layer_dropout(x, i_dropout);
16 | 	t = kad_tanh(kad_add(kad_cmul(t, w), kann_new_bias(n_hidden)));
17 | 	t = kad_mul(t, r);
18 | 	t = kad_add(kad_matmul(t, w), kann_new_bias(n_in));
19 | 	t = kad_sigm(t), t->ext_flag = KANN_F_OUT;
20 | 	t = kad_ce_bin(t, x), t->ext_flag = KANN_F_COST;
21 | 	return kann_new(t, 0);
22 | }
23 | 
24 | int main(int argc, char *argv[])
25 | {
26 | 	int max_epoch = 50, mini_size = 64, max_drop_streak = 10;
27 | 	int i, j, c, n_hidden = 64, seed = 11, to_apply = 0;
28 | 	kann_data_t *in = 0;
29 | 	kann_t *ann = 0;
30 | 	char *out_fn = 0, *in_fn = 0;
31 | 	float lr = 0.01f, frac_val = 0.1f, i_dropout = 0.0f;
32 | 
33 | 	while ((c = getopt(argc, argv, "n:s:r:m:B:o:i:d:A")) >= 0) {
34 | 		if (c == 'n') n_hidden = atoi(optarg);
35 | 		else if (c == 's') seed = atoi(optarg);
36 | 		else if (c == 'i') in_fn = optarg;
37 | 		else if (c == 'o') out_fn = optarg;
38 | 		else if (c == 'r') lr = atof(optarg);
39 | 		else if (c == 'm') max_epoch = atoi(optarg);
40 | 		else if (c == 'B') mini_size = atoi(optarg);
41 | 		else if (c == 'd') i_dropout = atof(optarg);
42 | 		else if (c == 'A') to_apply = 1;
43 | 	}
44 | 	if (argc - optind < 1) {
45 | 		FILE *fp = stdout;
46 | 		fprintf(fp, "Usage: ae [options] <in.knd>\n");
47 | 		fprintf(fp, "Options:\n");
48 | 		fprintf(fp, "  Model construction:\n");
49 | 		fprintf(fp, "    -i FILE     read trained model from FILE []\n");
50 | 		fprintf(fp, "    -o FILE     save trained model to FILE []\n");
51 | 		fprintf(fp, "    -s INT      random seed [%d]\n", seed);
52 | 		fprintf(fp, "    -n INT      number of hidden neurons [%d]\n", n_hidden);
53 | 		fprintf(fp, "    -d FLOAT    dropout at the input layer [%g]\n", i_dropout);
54 | 		fprintf(fp, "  Model training:\n");
55 | 		fprintf(fp, "    -r FLOAT    learning rate [%g]\n", lr);
56 | 		fprintf(fp, "    -m INT      max number of epochs [%d]\n", max_epoch);
57 | 		fprintf(fp, "    -B INT      mini-batch size [%d]\n", mini_size);
58 | 		return 1;
59 | 	}
60 | 
61 | 	kad_trap_fe();
62 | 	kann_srand(seed);
63 | 	in = kann_data_read(argv[optind]);
64 | 	if (in_fn) {
65 | 		ann = kann_load(in_fn);
66 | 		assert(kann_dim_in(ann) == in->n_col);
67 | 	}
68 | 
69 | 	if (!to_apply) { // train
70 | 		if (!ann)
71 | 			ann = model_gen(in->n_col, n_hidden, i_dropout);
72 | 		kann_train_fnn1(ann, lr, mini_size, max_epoch, max_drop_streak, frac_val, in->n_row, in->x, in->x);
73 | 		if (out_fn) kann_save(out_fn, ann);
74 | 	} else { // apply
75 | 		kann_switch(ann, 0);
76 | 		for (i = 0; i < in->n_row; ++i) {
77 | 			const float *y;
78 | 			y = kann_apply1(ann, in->x[i]);
79 | 			if (in->rname) printf("%s\t", in->rname[i]);
80 | 			for (j = 0; j < in->n_col; ++j) {
81 | 				if (j) putchar('\t');
82 | 				printf("%.3g", y[j] + 1.0f - 1.0f);
83 | 			}
84 | 			putchar('\n');
85 | 		}
86 | 	}
87 | 
88 | 	kann_delete(ann);
89 | 	kann_data_free(in);
90 | 	return 0;
91 | }
92 | 


--------------------------------------------------------------------------------
/lib/kann-master/examples/inspect.c:
--------------------------------------------------------------------------------
 1 | #include <unistd.h>
 2 | #include <stdlib.h>
 3 | #include "kann.h"
 4 | 
 5 | void kad_print_dot(FILE *fp, int n, kad_node_t **v)
 6 | {
 7 | 	int i, j;
 8 | 	for (i = 0; i < n; ++i) v[i]->tmp = i;
 9 | 	fprintf(fp, "digraph {\n");
10 | 	for (i = n - 1; i >= 0; --i) {
11 | 		kad_node_t *p = v[i];
12 | 		if (p->op > 0) fprintf(fp, "\t%d [label=\"%s\"]\n", i, kad_op_name[p->op]);
13 | 		for (j = 0; j < p->n_child; ++j)
14 | 			fprintf(fp, "\t%d -> %d\n", p->child[j]->tmp, i);
15 | 		if (p->pre) fprintf(fp, "\t%d -> %d [style=dotted,weight=0,constraint=false]\n", i, p->pre->tmp);
16 | 	}
17 | 	fprintf(fp, "}\n");
18 | 	for (i = 0; i < n; ++i) v[i]->tmp = 0;
19 | }
20 | 
21 | int main(int argc, char *argv[])
22 | {
23 | 	int c, *len = 0, n_len = 0;
24 | 	void (*out_func)(FILE *fp, int n, kad_node_t **v) = kad_print_graph;
25 | 	kann_t *ann;
26 | 
27 | 	while ((c = getopt(argc, argv, "dl:")) >= 0)
28 | 		if (c == 'l') ++n_len;
29 | 	if (n_len) len = (int*)calloc(n_len, sizeof(int));
30 | 	optind = 1, n_len = 0;
31 | 	while ((c = getopt(argc, argv, "dl:")) >= 0) {
32 | 		if (c == 'l') len[n_len++] = atoi(optarg);
33 | 		else if (c == 'd') out_func = kad_print_dot;
34 | 	}
35 | 	if (argc - optind == 0) {
36 | 		fprintf(stderr, "Usage: inspect [-l len] <in.kan>\n");
37 | 		return 1;
38 | 	}
39 | 	ann = kann_load(argv[optind]);
40 | 	if (len) {
41 | 		kann_t *un;
42 | 		un = kann_unroll_array(ann, len);
43 | 		out_func(stdout, un->n, un->v);
44 | 		kann_delete_unrolled(un);
45 | 	} else out_func(stdout, ann->n, ann->v);
46 | 	kann_delete(ann);
47 | 	free(len);
48 | 	return 0;
49 | }
50 | 


--------------------------------------------------------------------------------
/lib/kann-master/examples/keras/mlp.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import sys, getopt, re, gzip, time
  4 | import numpy as np
  5 | from keras.layers import Dense, Activation, Dropout, Convolution2D, MaxPooling2D, Flatten
  6 | from keras.models import Sequential, load_model
  7 | from keras.optimizers import RMSprop
  8 | 
  9 | #import theano
 10 | #theano.config.openmp = True
 11 | 
 12 | def mlp_data_read(fn):
 13 | 	x, row_names, col_names = [], [], []
 14 | 
 15 | 	def _process_fp(fp):
 16 | 		for l in fp:
 17 | 			t = l[:-1].split('\t')
 18 | 			if l[0] == '#':
 19 | 				col_names = t[1:]
 20 | 			else:
 21 | 				row_names.append(t[0])
 22 | 				x.append(t[1:]);
 23 | 
 24 | 	if re.search(r'\.gz$', fn):
 25 | 		with gzip.open(fn, 'r') as fp:
 26 | 			_process_fp(fp)
 27 | 	else:
 28 | 		with open(fn, 'r') as fp:
 29 | 			_process_fp(fp)
 30 | 	return np.array(x).astype('float32'), row_names, col_names
 31 | 
 32 | def main(argv):
 33 | 	n_hidden, n_epochs, minibatch, lr, heldout, seed, r_hidden, outfn, infn, use_multi_ce = 64, 20, 64, .001, 0.1, 11, 0.0, None, None, False
 34 | 	mnist_cnn = False
 35 | 
 36 | 	def train_help():
 37 | 		print("Usage: mlp.py [options] <input.knd> [output.knd]")
 38 | 		print("Options:")
 39 | 		print("  Model construction:")
 40 | 		print("    -i FILE    load trained model from FILE []")
 41 | 		print("    -o FILE    save trained model to FILE []")
 42 | 		print("    -s INT     random seed [11]")
 43 | 		print("    -n INT     number of hidden neurons [64]")
 44 | 		print("    -d FLOAT   dropout rate at the hidden layers [0.0]")
 45 | 		print("    -M         use multi-class cross-entropy")
 46 | 		print("    -C         MNIST-CNN, for benchmarking only")
 47 | 		print("  Model training:")
 48 | 		print("    -r FLOAT   learning rate [0.001]")
 49 | 		print("    -v FLOAT   fraction of held-out data [0.0]")
 50 | 		print("    -m INT     number of epochs [20]")
 51 | 		print("    -B INT     minibatch size [64]")
 52 | 		sys.exit(1)
 53 | 
 54 | 	try:
 55 | 		opts, args = getopt.getopt(argv[1:], "i:n:m:B:o:r:v:s:d:MC")
 56 | 	except getopt.GetoptError:
 57 | 		train_help()
 58 | 	if len(args) == 0:
 59 | 		train_help()
 60 | 
 61 | 	for opt, arg in opts:
 62 | 		if opt == '-n': n_hidden = int(arg)
 63 | 		elif opt == '-m': n_epochs = int(arg)
 64 | 		elif opt == '-B': minibatch = int(arg)
 65 | 		elif opt == '-i': infn = arg;
 66 | 		elif opt == '-o': outfn = arg
 67 | 		elif opt == '-r': lr = float(arg)
 68 | 		elif opt == '-v': heldout = float(arg)
 69 | 		elif opt == '-d': r_hidden = float(arg)
 70 | 		elif opt == '-s': seed = int(arg)
 71 | 		elif opt == '-M': use_multi_ce = True
 72 | 		elif opt == '-C': mnist_cnn = True
 73 | 
 74 | 	np.random.seed(seed)
 75 | 	x, x_rnames, x_cnames = mlp_data_read(args[0])
 76 | 	if len(args) >= 2: # training
 77 | 		y, y_rnames, y_cnames = mlp_data_read(args[1])
 78 | 		model = Sequential()
 79 | 		if mnist_cnn:
 80 | 			x = x.reshape(x.shape[0], 1, 28, 28)
 81 | 			model.add(Convolution2D(32, 3, 3, input_shape=(1, 28, 28), activation="relu", dim_ordering="th"))
 82 | 			model.add(Convolution2D(32, 3, 3, activation="relu", dim_ordering="th"))
 83 | 			model.add(MaxPooling2D(pool_size=(2, 2), dim_ordering="th"))
 84 | 			if r_hidden > 0.0 and r_hidden < 1.0: model.add(Dropout(r_hidden))
 85 | 			model.add(Flatten())
 86 | 			model.add(Dense(128, activation="relu"))
 87 | 		else:
 88 | 			model.add(Dense(n_hidden, input_dim=len(x[0]), activation='relu'))
 89 | 		if r_hidden > 0.0 and r_hidden < 1.0: model.add(Dropout(r_hidden))
 90 | 		if use_multi_ce:
 91 | 			model.add(Dense(len(y[0]), activation='softmax'))
 92 | 			model.compile(loss='categorical_crossentropy', optimizer=RMSprop(lr=lr), metrics=['accuracy'])
 93 | 		else:
 94 | 			model.add(Dense(len(y[0]), activation='sigmoid'))
 95 | 			model.compile(loss='binary_crossentropy', optimizer=RMSprop(lr=lr), metrics=['accuracy'])
 96 | 		t_cpu = time.clock()
 97 | 		t_real = time.time()
 98 | 		model.fit(x, y, nb_epoch=n_epochs, batch_size=minibatch, validation_split=heldout)
 99 | 		sys.stderr.write("CPU time for training: {:.2f}\n".format(time.clock() - t_cpu))
100 | 		sys.stderr.write("Real time for training: {:.2f}\n".format(time.time() - t_real))
101 | 		if outfn: model.save(outfn)
102 | 	elif len(args) == 1 and infn:
103 | 		model = load_model(infn)
104 | 		y = model.predict(x)
105 | 		for i in range(len(y)):
106 | 			sys.stdout.write(x_rnames[i])
107 | 			for j in range(len(y[i])):
108 | 				sys.stdout.write("\t%g" % y[i][j])
109 | 			sys.stdout.write('\n')
110 | 
111 | if __name__ == "__main__":
112 | 	main(sys.argv)
113 | 


--------------------------------------------------------------------------------
/lib/kann-master/examples/keras/rnn-bit.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import sys, getopt, time
  4 | import numpy as np
  5 | from keras.layers import Dense, Activation, GRU, TimeDistributed
  6 | from keras.models import Sequential, load_model
  7 | from keras.optimizers import RMSprop
  8 | 
  9 | #import theano
 10 | #theano.config.openmp = True
 11 | 
 12 | def rb_read_data(fn):
 13 | 	d, n_col = [], 0
 14 | 	with open(fn, 'r') as fp:
 15 | 		for line in fp:
 16 | 			t = line[:-1].split()
 17 | 			if n_col == 0: n_col = len(t)
 18 | 			elif n_col != len(t):
 19 | 				sys.exit("ERROR: different number of fields")
 20 | 			d.append(t)
 21 | 	max_bit = 0
 22 | 	for k in range(len(d)):
 23 | 		for i in range(n_col):
 24 | 			t = d[k][i] = int(d[k][i])
 25 | 			for j in range(64):
 26 | 				if (t&1) == 1: max_bit = j;
 27 | 				t >>= 1
 28 | 	max_bit += 1
 29 | 	x = np.zeros((len(d), max_bit, n_col - 1), dtype=np.bool)
 30 | 	y = np.zeros((len(d), max_bit, 2), dtype=np.bool)
 31 | 	for k in range(len(d)):
 32 | 		for i in range(n_col):
 33 | 			t = d[k][i]
 34 | 			for j in range(max_bit):
 35 | 				if i < n_col - 1:
 36 | 					x[k, j, i] = t & 1
 37 | 				else:
 38 | 					y[k, j, t&1] = 1
 39 | 				t >>= 1
 40 | 	return x, y, n_col - 1, max_bit
 41 | 
 42 | def rb_model_gen(n_in, n_layer, n_hidden, ulen, dropout):
 43 | 	model = Sequential()
 44 | 	model.add(GRU(n_hidden, input_shape=(ulen, n_in), dropout_W=dropout, dropout_U=dropout, return_sequences=True))
 45 | 	for l in range(n_layer - 1):
 46 | 		model.add(GRU(n_hidden, dropout_W=dropout, dropout_U=dropout, return_sequences=True))
 47 | 	model.add(TimeDistributed(Dense(2, activation='softmax')))
 48 | 	return model
 49 | 
 50 | def rb_usage():
 51 | 	print("Usage: rnn-bit.py [options] <data.txt>")
 52 | 	sys.exit(1)
 53 | 
 54 | def main(argv):
 55 | 	lr, to_apply, mbs, n_layer, n_hidden, max_epoch, seed, dropout = 0.01, False, 64, 1, 128, 50, 11, 0.0
 56 | 	infn, outfn = None, None
 57 | 
 58 | 	try:
 59 | 		opts, args = getopt.getopt(argv[1:], "Ar:n:B:m:d:o:i:l:")
 60 | 	except getopt.GetoptError:
 61 | 		rb_usage()
 62 | 	if len(args) < 1:
 63 | 		rb_usage()
 64 | 
 65 | 	for opt, arg in opts:
 66 | 		if opt == '-r': lr = float(arg)
 67 | 		elif opt == '-A': to_apply = True
 68 | 		elif opt == '-l': n_layer = int(arg)
 69 | 		elif opt == '-n': n_hidden = int(arg)
 70 | 		elif opt == '-B': mbs = int(arg)
 71 | 		elif opt == '-m': max_epoch = int(arg)
 72 | 		elif opt == '-d': dropout = float(arg)
 73 | 		elif opt == '-o': outfn = arg
 74 | 		elif opt == '-i': infn = arg
 75 | 
 76 | 	np.random.seed(seed)
 77 | 	x, y, n_in, max_bit = rb_read_data(args[0])
 78 | 
 79 | 	if not to_apply:
 80 | 		t_cpu = time.clock()
 81 | 		t_real = time.time()
 82 | 		model = rb_model_gen(n_in, n_layer, n_hidden, max_bit, dropout)
 83 | 		optimizer = RMSprop(lr=lr)
 84 | 		model.compile(loss='categorical_crossentropy', optimizer=optimizer)
 85 | 		model.fit(x, y, batch_size=mbs, nb_epoch=max_epoch)
 86 | 		sys.stderr.write("CPU time for training: {:.2f}\n".format(time.clock() - t_cpu))
 87 | 		sys.stderr.write("Real time for training: {:.2f}\n".format(time.time() - t_real))
 88 | 		if outfn: model.save(outfn)
 89 | 	elif infn:
 90 | 		model = load_model(infn)
 91 | 		y = model.predict(x)
 92 | 		for i in range(y.shape[0]):
 93 | 			z = 0
 94 | 			for j in range(y.shape[1]):
 95 | 				if y[i, j, 1] > y[i, j, 0]: z |= 1<<j;
 96 | 			print(z)
 97 | 
 98 | if __name__ == "__main__":
 99 | 	main(sys.argv)
100 | 


--------------------------------------------------------------------------------
/lib/kann-master/examples/mlp.c:
--------------------------------------------------------------------------------
  1 | #include <stdlib.h>
  2 | #include <unistd.h>
  3 | #include <assert.h>
  4 | #include <stdio.h>
  5 | #include "kann.h"
  6 | #include "kann_extra/kann_data.h"
  7 | 
  8 | static kann_t *model_gen(int n_in, int n_out, int loss_type, int n_h_layers, int n_h_neurons, float h_dropout)
  9 | {
 10 | 	int i;
 11 | 	kad_node_t *t;
 12 | 	t = kann_layer_input(n_in);
 13 | 	for (i = 0; i < n_h_layers; ++i)
 14 | 		t = kann_layer_dropout(kad_relu(kann_layer_dense(t, n_h_neurons)), h_dropout);
 15 | 	return kann_new(kann_layer_cost(t, n_out, loss_type), 0);
 16 | }
 17 | 
 18 | int main(int argc, char *argv[])
 19 | {
 20 | 	int max_epoch = 50, mini_size = 64, max_drop_streak = 10, loss_type = KANN_C_CEB;
 21 | 	int i, j, c, n_h_neurons = 64, n_h_layers = 1, seed = 11, n_threads = 1;
 22 | 	kann_data_t *in = 0;
 23 | 	kann_t *ann = 0;
 24 | 	char *out_fn = 0, *in_fn = 0;
 25 | 	float lr = 0.001f, frac_val = 0.1f, h_dropout = 0.0f;
 26 | 
 27 | 	while ((c = getopt(argc, argv, "n:l:s:r:m:B:o:i:d:v:Mt:")) >= 0) {
 28 | 		if (c == 'n') n_h_neurons = atoi(optarg);
 29 | 		else if (c == 'l') n_h_layers = atoi(optarg);
 30 | 		else if (c == 's') seed = atoi(optarg);
 31 | 		else if (c == 'i') in_fn = optarg;
 32 | 		else if (c == 'o') out_fn = optarg;
 33 | 		else if (c == 'r') lr = atof(optarg);
 34 | 		else if (c == 'm') max_epoch = atoi(optarg);
 35 | 		else if (c == 'B') mini_size = atoi(optarg);
 36 | 		else if (c == 'd') h_dropout = atof(optarg);
 37 | 		else if (c == 'v') frac_val = atof(optarg);
 38 | 		else if (c == 'M') loss_type = KANN_C_CEM;
 39 | 		else if (c == 't') n_threads = atoi(optarg);
 40 | 	}
 41 | 	if (argc - optind < 1) {
 42 | 		FILE *fp = stdout;
 43 | 		fprintf(fp, "Usage: mlp [options] <in.knd> [truth.knd]\n");
 44 | 		fprintf(fp, "Options:\n");
 45 | 		fprintf(fp, "  Model construction:\n");
 46 | 		fprintf(fp, "    -i FILE     read trained model from FILE []\n");
 47 | 		fprintf(fp, "    -o FILE     save trained model to FILE []\n");
 48 | 		fprintf(fp, "    -s INT      random seed [%d]\n", seed);
 49 | 		fprintf(fp, "    -l INT      number of hidden layers [%d]\n", n_h_layers);
 50 | 		fprintf(fp, "    -n INT      number of hidden neurons per layer [%d]\n", n_h_neurons);
 51 | 		fprintf(fp, "    -d FLOAT    dropout at the hidden layer(s) [%g]\n", h_dropout);
 52 | 		fprintf(fp, "    -M          use multi-class cross-entropy (binary by default)\n");
 53 | 		fprintf(fp, "  Model training:\n");
 54 | 		fprintf(fp, "    -r FLOAT    learning rate [%g]\n", lr);
 55 | 		fprintf(fp, "    -m INT      max number of epochs [%d]\n", max_epoch);
 56 | 		fprintf(fp, "    -B INT      mini-batch size [%d]\n", mini_size);
 57 | 		fprintf(fp, "    -v FLOAT    fraction of data used for validation [%g]\n", frac_val);
 58 | 		fprintf(fp, "    -t INT      number of threads [%d]\n", n_threads);
 59 | 		return 1;
 60 | 	}
 61 | 	if (argc - optind == 1 && in_fn == 0) {
 62 | 		fprintf(stderr, "ERROR: please specify a trained model with option '-i'.\n");
 63 | 		return 1;
 64 | 	}
 65 | 
 66 | 	kad_trap_fe();
 67 | 	kann_srand(seed);
 68 | 	in = kann_data_read(argv[optind]);
 69 | 	if (in_fn) {
 70 | 		ann = kann_load(in_fn);
 71 | 		assert(kann_dim_in(ann) == in->n_col);
 72 | 	}
 73 | 
 74 | 	if (optind+1 < argc) { // train
 75 | 		kann_data_t *out;
 76 | 		out = kann_data_read(argv[optind+1]);
 77 | 		assert(in->n_row == out->n_row);
 78 | 		if (ann) assert(kann_dim_out(ann) == out->n_col);
 79 | 		else ann = model_gen(in->n_col, out->n_col, loss_type, n_h_layers, n_h_neurons, h_dropout);
 80 | 		if (n_threads > 1) kann_mt(ann, n_threads, mini_size);
 81 | 		kann_train_fnn1(ann, lr, mini_size, max_epoch, max_drop_streak, frac_val, in->n_row, in->x, out->x);
 82 | 		if (out_fn) kann_save(out_fn, ann);
 83 | 		kann_data_free(out);
 84 | 	} else { // apply
 85 | 		int n_out;
 86 | 		if (in->cname) {
 87 | 			printf("#sample");
 88 | 			for (i = 0; i < in->n_col; ++i)
 89 | 				printf("\t%s", in->cname[i]);
 90 | 			printf("\n");
 91 | 		}
 92 | 		kann_switch(ann, 0);
 93 | 		n_out = kann_dim_out(ann);
 94 | 		for (i = 0; i < in->n_row; ++i) {
 95 | 			const float *y;
 96 | 			y = kann_apply1(ann, in->x[i]);
 97 | 			if (in->rname) printf("%s\t", in->rname[i]);
 98 | 			for (j = 0; j < n_out; ++j) {
 99 | 				if (j) putchar('\t');
100 | 				printf("%.3g", y[j] + 1.0f - 1.0f);
101 | 			}
102 | 			putchar('\n');
103 | 		}
104 | 	}
105 | 
106 | 	kann_delete(ann);
107 | 	kann_data_free(in);
108 | 	return 0;
109 | }
110 | 


--------------------------------------------------------------------------------
/lib/kann-master/examples/mnist-cnn.c:
--------------------------------------------------------------------------------
 1 | #include <unistd.h>
 2 | #include <stdlib.h>
 3 | #include <assert.h>
 4 | #include "kann_extra/kann_data.h"
 5 | #include "kann.h"
 6 | 
 7 | int main(int argc, char *argv[])
 8 | {
 9 | 	kann_t *ann;
10 | 	kann_data_t *x, *y;
11 | 	char *fn_in = 0, *fn_out = 0;
12 | 	int c, mini_size = 64, max_epoch = 20, max_drop_streak = 10, seed = 131, n_h_fc = 128, n_h_flt = 32, n_threads = 1;
13 | 	float lr = 0.001f, dropout = 0.2f, frac_val = 0.1f;
14 | 
15 | 	while ((c = getopt(argc, argv, "i:o:m:h:f:d:s:t:v:")) >= 0) {
16 | 		if (c == 'i') fn_in = optarg;
17 | 		else if (c == 'o') fn_out = optarg;
18 | 		else if (c == 'm') max_epoch = atoi(optarg);
19 | 		else if (c == 'h') n_h_fc = atoi(optarg);
20 | 		else if (c == 'f') n_h_flt = atoi(optarg);
21 | 		else if (c == 'd') dropout = atof(optarg);
22 | 		else if (c == 's') seed = atoi(optarg);
23 | 		else if (c == 't') n_threads = atoi(optarg);
24 | 		else if (c == 'v') frac_val = atof(optarg);
25 | 	}
26 | 
27 | 	if (argc - optind == 0 || (argc - optind == 1 && fn_in == 0)) {
28 | 		FILE *fp = stdout;
29 | 		fprintf(fp, "Usage: mnist-cnn [-i model] [-o model] [-t nThreads] <x.knd> [y.knd]\n");
30 | 		return 1;
31 | 	}
32 | 
33 | 	kad_trap_fe();
34 | 	kann_srand(seed);
35 | 	if (fn_in) {
36 | 		ann = kann_load(fn_in);
37 | 	} else {
38 | 		kad_node_t *t;
39 | 		t = kad_feed(4, 1, 1, 28, 28), t->ext_flag |= KANN_F_IN;
40 | 		t = kad_relu(kann_layer_conv2d(t, n_h_flt, 3, 3, 1, 1, 0, 0)); // 3x3 kernel; 1x1 stride; 0x0 padding
41 | 		t = kad_relu(kann_layer_conv2d(t, n_h_flt, 3, 3, 1, 1, 0, 0));
42 | 		t = kad_max2d(t, 2, 2, 2, 2, 0, 0); // 2x2 kernel; 2x2 stride; 0x0 padding
43 | 		t = kann_layer_dropout(t, dropout);
44 | 		t = kann_layer_dense(t, n_h_fc);
45 | 		t = kad_relu(t);
46 | 		t = kann_layer_dropout(t, dropout);
47 | 		ann = kann_new(kann_layer_cost(t, 10, KANN_C_CEB), 0);
48 | 	}
49 | 
50 | 	x = kann_data_read(argv[optind]);
51 | 	assert(x->n_col == 28 * 28);
52 | 	y = argc - optind >= 2? kann_data_read(argv[optind+1]) : 0;
53 | 
54 | 	if (y) { // training
55 | 		assert(y->n_col == 10);
56 | 		if (n_threads > 1) kann_mt(ann, n_threads, mini_size);
57 | 		kann_train_fnn1(ann, lr, mini_size, max_epoch, max_drop_streak, frac_val, x->n_row, x->x, y->x);
58 | 		if (fn_out) kann_save(fn_out, ann);
59 | 		kann_data_free(y);
60 | 	} else { // applying
61 | 		int i, j, n_out;
62 | 		kann_switch(ann, 0);
63 | 		n_out = kann_dim_out(ann);
64 | 		assert(n_out == 10);
65 | 		for (i = 0; i < x->n_row; ++i) {
66 | 			const float *y;
67 | 			y = kann_apply1(ann, x->x[i]);
68 | 			if (x->rname) printf("%s\t", x->rname[i]);
69 | 			for (j = 0; j < n_out; ++j) {
70 | 				if (j) putchar('\t');
71 | 				printf("%.3g", y[j] + 1.0f - 1.0f);
72 | 			}
73 | 			putchar('\n');
74 | 		}
75 | 	}
76 | 
77 | 	kann_data_free(x);
78 | 	kann_delete(ann);
79 | 	return 0;
80 | }
81 | 


--------------------------------------------------------------------------------
/lib/kann-master/examples/rnn-bit.c:
--------------------------------------------------------------------------------
  1 | #include <unistd.h>
  2 | #include <stdlib.h>
  3 | #include <string.h>
  4 | #include <assert.h>
  5 | #include <ctype.h>
  6 | #include "kann.h"
  7 | 
  8 | typedef struct {
  9 | 	int n_in, ulen;
 10 | 	int n, m;
 11 | 	uint64_t *x, *y;
 12 | } bit_data_t;
 13 | 
 14 | #define MAX_FIELDS 64
 15 | 
 16 | static int read_int(FILE *fp, uint64_t x[MAX_FIELDS])
 17 | {
 18 | 	char *p, *q, line[1024];
 19 | 	int i;
 20 | 	if (feof(fp) || fgets(line, 1024, fp) == 0) return 0;
 21 | 	for (q = p = line, i = 0; *p; ++p) {
 22 | 		if (isspace(*p)) {
 23 | 			long t;
 24 | 			t = strtol(q, &q, 10);
 25 | 			assert(t >= 0);
 26 | 			x[i++] = t;
 27 | 			if (i == MAX_FIELDS) break;
 28 | 			q = p + 1;
 29 | 		}
 30 | 	}
 31 | 	return i;
 32 | }
 33 | 
 34 | static bit_data_t *read_data(const char *fn)
 35 | {
 36 | 	bit_data_t *d;
 37 | 	FILE *fp;
 38 | 	int i, j;
 39 | 	uint64_t max, x[MAX_FIELDS];
 40 | 
 41 | 	fp = fn && strcmp(fn, "-")? fopen(fn, "r") : stdin;
 42 | 	if (fp == 0) return 0;
 43 | 	d = (bit_data_t*)calloc(1, sizeof(bit_data_t));
 44 | 	while ((i = read_int(fp, x)) > 0) {
 45 | 		assert(d->n == 0 || d->n_in == i - 1);
 46 | 		d->n_in = i - 1;
 47 | 		if (d->n == d->m) {
 48 | 			d->m = d->m? d->m<<1 : 256;
 49 | 			d->x = (uint64_t*)realloc(d->x, d->m * d->n_in * 8);
 50 | 			d->y = (uint64_t*)realloc(d->y, d->m * 8);
 51 | 		}
 52 | 		memcpy(&d->x[d->n * d->n_in], x, d->n_in * 8);
 53 | 		d->y[d->n++] = x[d->n_in];
 54 | 	}
 55 | 	fclose(fp);
 56 | 	for (i = 0, max = 0; i < d->n; ++i) {
 57 | 		int t = i * d->n_in;
 58 | 		for (j = 0; j < d->n_in; ++j)
 59 | 			max = max > d->x[t + j]? max : d->x[t + j];
 60 | 		max = max > d->y[i]? max : d->y[i];
 61 | 	}
 62 | 	for (i = 0; max; max >>= 1, ++i);
 63 | 	d->ulen = i;
 64 | 	return d;
 65 | }
 66 | 
 67 | static void train(kann_t *ann, bit_data_t *d, float lr, int mini_size, int max_epoch, const char *fn, int n_threads)
 68 | {
 69 | 	float **x, **y, *r, best_cost = 1e30f;
 70 | 	int epoch, j, n_var, *shuf;
 71 | 	kann_t *ua;
 72 | 
 73 | 	n_var = kann_size_var(ann);
 74 | 	r = (float*)calloc(n_var, sizeof(float));
 75 | 	x = (float**)malloc(d->ulen * sizeof(float*));
 76 | 	y = (float**)malloc(d->ulen * sizeof(float*));
 77 | 	for (j = 0; j < d->ulen; ++j) {
 78 | 		x[j] = (float*)calloc(mini_size * d->n_in, sizeof(float));
 79 | 		y[j] = (float*)calloc(mini_size * 2, sizeof(float));
 80 | 	}
 81 | 	shuf = (int*)calloc(d->n, sizeof(int));
 82 | 	kann_shuffle(d->n, shuf);
 83 | 
 84 | 	ua = kann_unroll(ann, d->ulen);
 85 | 	kann_set_batch_size(ua, mini_size);
 86 | 	kann_mt(ua, n_threads, mini_size);
 87 | 	kann_feed_bind(ua, KANN_F_IN,    0, x);
 88 | 	kann_feed_bind(ua, KANN_F_TRUTH, 0, y);
 89 | 	kann_switch(ua, 1);
 90 | 	for (epoch = 0; epoch < max_epoch; ++epoch) {
 91 | 		double cost = 0.0;
 92 | 		int tot = 0, tot_base = 0, n_cerr = 0;
 93 | 		for (j = 0; j < d->n - mini_size; j += mini_size) {
 94 | 			int i, b, k;
 95 | 			for (k = 0; k < d->ulen; ++k) {
 96 | 				for (b = 0; b < mini_size; ++b) {
 97 | 					int s = shuf[j + b];
 98 | 					for (i = 0; i < d->n_in; ++i)
 99 | 						x[k][b * d->n_in + i] = (float)(d->x[s * d->n_in + i] >> k & 1);
100 | 					y[k][b * 2] = y[k][b * 2 + 1] = 0.0f;
101 | 					y[k][b * 2 + (d->y[s] >> k & 1)] = 1.0f;
102 | 				}
103 | 			}
104 | 			cost += kann_cost(ua, 0, 1) * d->ulen * mini_size;
105 | 			n_cerr += kann_class_error(ua, &k);
106 | 			tot_base += k;
107 | 			//kad_check_grad(ua->n, ua->v, ua->n-1);
108 | 			kann_RMSprop(n_var, lr, 0, 0.9f, ua->g, ua->x, r);
109 | 			tot += d->ulen * mini_size;
110 | 		}
111 | 		if (cost < best_cost) {
112 | 			best_cost = cost;
113 | 			if (fn) kann_save(fn, ann);
114 | 		}
115 | 		fprintf(stderr, "epoch: %d; cost: %g (class error: %.2f%%)\n", epoch+1, cost / tot, 100.0f * n_cerr / tot_base);
116 | 	}
117 | 
118 | 	for (j = 0; j < d->ulen; ++j) {
119 | 		free(y[j]); free(x[j]);
120 | 	}
121 | 	free(y); free(x); free(r); free(shuf);
122 | }
123 | 
124 | int main(int argc, char *argv[])
125 | {
126 | 	int i, c, seed = 11, n_h_layers = 1, n_h_neurons = 64, mini_size = 64, max_epoch = 50, to_apply = 0, norm = 1, n_threads = 1;
127 | 	float lr = 0.01f, dropout = 0.2f;
128 | 	kann_t *ann = 0;
129 | 	char *fn_in = 0, *fn_out = 0;
130 | 
131 | 	while ((c = getopt(argc, argv, "i:o:l:n:m:r:s:Ad:Nt:")) >= 0) {
132 | 		if (c == 'i') fn_in = optarg;
133 | 		else if (c == 'o') fn_out = optarg;
134 | 		else if (c == 'l') n_h_layers = atoi(optarg);
135 | 		else if (c == 'n') n_h_neurons = atoi(optarg);
136 | 		else if (c == 'm') max_epoch = atoi(optarg);
137 | 		else if (c == 'r') lr = atof(optarg);
138 | 		else if (c == 's') seed = atoi(optarg);
139 | 		else if (c == 'A') to_apply = 1;
140 | 		else if (c == 'N') norm = 0;
141 | 		else if (c == 'd') dropout = atof(optarg);
142 | 		else if (c == 't') n_threads = atoi(optarg);
143 | 	}
144 | 	if (optind == argc) {
145 | 		fprintf(stderr, "Usage: rnn-bit [options] <in.txt>\n");
146 | 		return 1;
147 | 	}
148 | 	kad_trap_fe();
149 | 	kann_srand(seed);
150 | 	if (fn_in) ann = kann_load(fn_in);
151 | 
152 | 	if (!to_apply) {
153 | 		bit_data_t *d;
154 | 		d = read_data(argv[optind]);
155 | 		if (ann == 0) { // model generation
156 | 			kad_node_t *t;
157 | 			int rnn_flag = KANN_RNN_VAR_H0;
158 | 			if (norm) rnn_flag |= KANN_RNN_NORM;
159 | 			t = kann_layer_input(d->n_in);
160 | 			for (i = 0; i < n_h_layers; ++i) {
161 | 				t = kann_layer_gru(t, n_h_neurons, rnn_flag);
162 | 				t = kann_layer_dropout(t, dropout);
163 | 			}
164 | 			ann = kann_new(kann_layer_cost(t, 2, KANN_C_CEM), 0);
165 | 		}
166 | 		train(ann, d, lr, mini_size, max_epoch, fn_out, n_threads);
167 | 		free(d->x); free(d->y); free(d);
168 | 	} else {
169 | 		FILE *fp;
170 | 		uint64_t x[MAX_FIELDS], y;
171 | 		int n, i, k, n_in;
172 | 		n_in = kann_dim_in(ann);
173 | 		fp = strcmp(argv[optind], "-")? fopen(argv[optind], "r") : stdin;
174 | 		while ((n = read_int(fp, x)) > 0) {
175 | 			float x1[MAX_FIELDS];
176 | 			assert(n >= n_in);
177 | 			kann_rnn_start(ann);
178 | 			for (k = 0, y = 0; k < 64; ++k) {
179 | 				const float *y1;
180 | 				for (i = 0; i < n_in; ++i)
181 | 					x1[i] = (float)(x[i] >> k & 1);
182 | 				y1 = kann_apply1(ann, x1);
183 | 				if (y1[1] > y1[0]) y |= 1ULL << k;
184 | 			}
185 | 			kann_rnn_end(ann);
186 | 			printf("%llu\n", (unsigned long long)y);
187 | 		}
188 | 		fclose(fp);
189 | 	}
190 | 
191 | 	kann_delete(ann);
192 | 	return 0;
193 | }
194 | 


--------------------------------------------------------------------------------
/lib/kann-master/examples/tensorflow/mlp.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import sys, getopt, os, re, gzip, time
  4 | import numpy as np
  5 | import tensorflow as tf
  6 | 
  7 | def mlp_data_read(fn):
  8 | 	x, row_names, col_names = [], [], []
  9 | 
 10 | 	def _process_fp(fp):
 11 | 		for l in fp:
 12 | 			t = l[:-1].split('\t')
 13 | 			if l[0] == '#':
 14 | 				col_names = t[1:]
 15 | 			else:
 16 | 				row_names.append(t[0])
 17 | 				x.append(t[1:]);
 18 | 
 19 | 	if re.search(r'\.gz$', fn):
 20 | 		with gzip.open(fn, 'r') as fp:
 21 | 			_process_fp(fp)
 22 | 	else:
 23 | 		with open(fn, 'r') as fp:
 24 | 			_process_fp(fp)
 25 | 	return np.array(x).astype('float32'), row_names, col_names
 26 | 
 27 | def mlp_model_gen(n_in, n_out, n_layer, n_hidden, use_multi_ce):
 28 | 	t = tf.placeholder(tf.float32, [None, n_in], name="in")
 29 | 	for i in range(n_layer):
 30 | 		t = tf.layers.dense(t, n_hidden, activation=tf.nn.relu)
 31 | 	t = tf.layers.dense(t, n_out)
 32 | 	out = tf.nn.softmax(t, name="out")
 33 | 	truth = tf.placeholder(tf.float32, [None, n_out], name="truth")
 34 | 	if use_multi_ce: t = tf.nn.softmax_cross_entropy_with_logits(logits=t, labels=truth)
 35 | 	else: t = tf.nn.sigmoid_cross_entropy_with_logits(logits=t, labels=truth)
 36 | 	cost = tf.reduce_mean(t, name="cost")
 37 | 	return cost
 38 | 
 39 | def main(argv):
 40 | 	n_layer, n_hidden, max_epoch, minibatch, lr, seed, r_hidden, outdir, indir, use_multi_ce = 1, 64, 20, 64, .001, 11, 0.0, None, None, False
 41 | 	n_threads = 1
 42 | 
 43 | 	def train_help():
 44 | 		print("Usage: mlp.py [options] <input.knd> [output.knd]")
 45 | 		print("Options:")
 46 | 		print("  Model construction:")
 47 | 		print("    -i DIR     load trained model from DIR []")
 48 | 		print("    -o DIR     save trained model to DIR []")
 49 | 		print("    -s INT     random seed [11]")
 50 | 		print("    -l INT     number of hidden layers [1]")
 51 | 		print("    -n INT     number of hidden neurons per layer [64]")
 52 | 		print("    -d FLOAT   dropout at the hidden layer(s) [0.0]")
 53 | 		print("    -M         use multi-class cross-entropy")
 54 | 		print("  Model training:")
 55 | 		print("    -r FLOAT   learning rate [0.001]")
 56 | 		print("    -m INT     number of epochs [20]")
 57 | 		print("    -B INT     minibatch size [64]")
 58 | 		sys.exit(1)
 59 | 
 60 | 	try:
 61 | 		opts, args = getopt.getopt(argv[1:], "n:m:B:i:o:r:s:d:l:Mt:")
 62 | 	except getopt.GetoptError:
 63 | 		train_help()
 64 | 	if len(args) < 1:
 65 | 		train_help()
 66 | 
 67 | 	for opt, arg in opts:
 68 | 		if opt == '-n': n_hidden = int(arg)
 69 | 		elif opt == '-l': n_layer = int(arg)
 70 | 		elif opt == '-m': max_epoch = int(arg)
 71 | 		elif opt == '-B': minibatch = int(arg)
 72 | 		elif opt == '-i': indir = arg
 73 | 		elif opt == '-o': outdir = arg
 74 | 		elif opt == '-r': lr = float(arg)
 75 | 		elif opt == '-d': r_hidden = float(arg)
 76 | 		elif opt == '-s': seed = int(arg)
 77 | 		elif opt == '-M': use_multi_ce = True
 78 | 		elif opt == '-t': n_threads = int(arg)
 79 | 
 80 | 	tf.set_random_seed(seed)
 81 | 	sys.stderr.write("Reading input...\n")
 82 | 	x_dat, x_rnames, x_cnames = mlp_data_read(args[0])
 83 | 
 84 | 	conf = tf.ConfigProto(intra_op_parallelism_threads=n_threads, inter_op_parallelism_threads=n_threads)
 85 | 	if len(args) >= 2: # training
 86 | 		sys.stderr.write("Reading truth...\n")
 87 | 		y_dat, y_rnames, y_cnames = mlp_data_read(args[1])
 88 | 
 89 | 		sys.stderr.write("Training...\n")
 90 | 		t_cpu = time.clock()
 91 | 		t_real = time.time()
 92 | 		cost = mlp_model_gen(len(x_dat[0]), len(y_dat[0]), n_layer, n_hidden, use_multi_ce)
 93 | 		optimizer = tf.train.AdamOptimizer(learning_rate=lr).minimize(cost)
 94 | 
 95 | 		with tf.Session(config=conf) as sess:
 96 | 			sess.run(tf.global_variables_initializer())
 97 | 			for epoch in range(max_epoch):
 98 | 				off, tot_cost = 0, 0
 99 | 				while off < len(x_dat):
100 | 					mb = minibatch
101 | 					if mb > len(x_dat) - off: mb = len(x_dat) - off
102 | 					xb, yb = x_dat[off:off+mb], y_dat[off:off+mb]
103 | 					_, c = sess.run([optimizer, cost], { "in:0":xb, "truth:0":yb })
104 | 					tot_cost += c
105 | 					off += mb
106 | 				avg_cost = tot_cost / len(x_dat)
107 | 				sys.stderr.write("epoch: {}; cost: {:.6f}\n".format(epoch+1, avg_cost))
108 | 
109 | 			if outdir:
110 | 				if outdir and not os.path.isdir(outdir): os.mkdir(outdir)
111 | 				saver = tf.train.Saver()
112 | 				saver.save(sess, outdir + "/model")
113 | 
114 | 		sys.stderr.write("CPU time for training: {:.2f}\n".format(time.clock() - t_cpu))
115 | 		sys.stderr.write("Real time for training: {:.2f}\n".format(time.time() - t_real))
116 | 	elif len(args) == 1 and indir: # prediction
117 | 		with tf.Session(config=conf) as sess:
118 | 			saver = tf.train.import_meta_graph(indir + "/model.meta")
119 | 			saver.restore(sess, tf.train.latest_checkpoint(indir))
120 | 			out = tf.get_default_graph().get_tensor_by_name("out:0")
121 | 			y_dat = out.eval({ "in:0":x_dat })
122 | 			for i in range(len(x_dat)):
123 | 				print('{}\t{}'.format(x_rnames[i], "\t".join(map(str, y_dat[i]))))
124 | 
125 | if __name__ == "__main__":
126 | 	main(sys.argv)
127 | 


--------------------------------------------------------------------------------
/lib/kann-master/examples/textgen.c:
--------------------------------------------------------------------------------
  1 | #include <math.h>
  2 | #include <stdio.h>
  3 | #include <float.h>
  4 | #include <assert.h>
  5 | #include <unistd.h>
  6 | #include <string.h>
  7 | #include <stdlib.h>
  8 | #include "kann.h"
  9 | 
 10 | #define VERSION "r490"
 11 | 
 12 | typedef struct {
 13 | 	int len, n_char, n_para, *para_len;
 14 | 	uint8_t *data, **para;
 15 | 	int c2i[256];
 16 | } tg_data_t;
 17 | 
 18 | #define kv_roundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
 19 | 
 20 | uint8_t *tg_read_file(const char *fn, int *_len)
 21 | {
 22 | 	const int buf_len = 0x10000;
 23 | 	int len = 0, max = 0, l;
 24 | 	FILE *fp;
 25 | 	uint8_t *buf, *s = 0;
 26 | 
 27 | 	fp = fn && strcmp(fn, "-")? fopen(fn, "rb") : stdin;
 28 | 	buf = (uint8_t*)malloc(buf_len);
 29 | 	while ((l = fread(buf, 1, buf_len, fp)) > 0) {
 30 | 		if (len + l > max) {
 31 | 			max = len + buf_len;
 32 | 			kv_roundup32(max);
 33 | 			s = (uint8_t*)realloc(s, max);
 34 | 		}
 35 | 		memcpy(&s[len], buf, l);
 36 | 		len += l;
 37 | 	}
 38 | 	s = (uint8_t*)realloc(s, len);
 39 | 	*_len = len;
 40 | 	fclose(fp);
 41 | 	free(buf);
 42 | 	return s;
 43 | }
 44 | 
 45 | tg_data_t *tg_init(const char *fn)
 46 | {
 47 | 	int i, j, st, k;
 48 | 	tg_data_t *tg;
 49 | 	tg = (tg_data_t*)calloc(1, sizeof(tg_data_t));
 50 | 	tg->data = tg_read_file(fn, &tg->len);
 51 | 	for (i = 0; i < tg->len; ++i)
 52 | 		tg->c2i[tg->data[i]] = 1;
 53 | 	for (i = j = 0; i < 256; ++i)
 54 | 		if (tg->c2i[i] == 0) tg->c2i[i] = -1;
 55 | 		else tg->c2i[i] = j++;
 56 | 	tg->n_char = j;
 57 | 	for (i = 1, st = 0, tg->n_para = 0; i < tg->len; ++i)
 58 | 		if (tg->data[i] == '\n' && tg->data[i-1] == '\n' && i - st > 1)
 59 | 			++tg->n_para, st = i + 1;
 60 | 	if (i - st > 1) ++tg->n_para;
 61 | 	tg->para = (uint8_t**)calloc(tg->n_para, sizeof(uint8_t*));
 62 | 	tg->para_len = (int*)calloc(tg->n_para, sizeof(int));
 63 | 	for (i = 1, st = k = 0; i < tg->len; ++i)
 64 | 		if (tg->data[i] == '\n' && tg->data[i-1] == '\n' && i - st > 1)
 65 | 			tg->para[k] = &tg->data[st], tg->para_len[k++] = i - st, st = i + 1;
 66 | 	if (i - st > 1) tg->para[k] = &tg->data[st], tg->para_len[k++] = i - st;
 67 | 	for (i = 0; i < tg->len; ++i)
 68 | 		tg->data[i] = tg->c2i[tg->data[i]];
 69 | 	return tg;
 70 | }
 71 | 
 72 | void tg_save(const char *fn, kann_t *ann, const int c2i[256])
 73 | {
 74 | 	FILE *fp;
 75 | 	fp = fn && strcmp(fn, "-")? fopen(fn, "wb") : stdout;
 76 | 	kann_save_fp(fp, ann);
 77 | 	fwrite(c2i, sizeof(int), 256, fp);
 78 | 	fclose(fp);
 79 | }
 80 | 
 81 | kann_t *tg_load(const char *fn, int c2i[256])
 82 | {
 83 | 	FILE *fp;
 84 | 	kann_t *ann;
 85 | 	fp = fn && strcmp(fn, "-")? fopen(fn, "rb") : stdin;
 86 | 	ann = kann_load_fp(fp);
 87 | 	fread(c2i, sizeof(int), 256, fp);
 88 | 	fclose(fp);
 89 | 	return ann;
 90 | }
 91 | 
 92 | void tg_gen(FILE *fp, kann_t *ann, float temp, int len, const int c2i[256], const char *seed)
 93 | {
 94 | 	int i, c, n_char, i2c[256], i_temp;
 95 | 	float x[256];
 96 | 	memset(i2c, 0, 256 * sizeof(int));
 97 | 	for (i = 0; i < 256; ++i)
 98 | 		if (c2i[i] >= 0) i2c[c2i[i]] = i;
 99 | 	n_char = kann_dim_in(ann);
100 | 	i_temp = kann_find(ann, 0, -1);
101 | 	if (i_temp >= 0) ann->v[i_temp]->x[0] = 1.0f / temp;
102 | 	kann_rnn_start(ann);
103 | 	for (c = 0; c < ann->n; ++c) {
104 | 		kad_node_t *p = ann->v[c];
105 | 		if (p->pre) {
106 | 			int l = kad_len(p);
107 | 			for (i = 0; i < l; ++i)
108 | 				p->x[i] = 2.0 * kann_drand() - 1.0;
109 | 		}
110 | 	}
111 | 	if (seed) {
112 | 		const char *p;
113 | 		for (p = seed; *p; ++p) {
114 | 			const float *y;
115 | 			float max = -1.0f;
116 | 			int max_c = -1;
117 | 			c = c2i[(int)*p];
118 | 			assert(c >= 0);
119 | 			memset(x, 0, n_char * sizeof(float));
120 | 			x[c] = 1.0f;
121 | 			y = kann_apply1(ann, x);
122 | 			for (c = 0; c < n_char; ++c)
123 | 				if (max < y[c]) max = y[c], max_c = c;
124 | 			c = max_c;
125 | 		}
126 | 		fprintf(fp, "%s%c", seed, i2c[c]);
127 | 	} else c = c2i[(int)' '];
128 | 	for (i = 0; i < len; ++i) {
129 | 		float s, r;
130 | 		const float *y;
131 | 		memset(x, 0, n_char * sizeof(float));
132 | 		x[c] = 1.0f;
133 | 		y = kann_apply1(ann, x);
134 | 		r = kann_drand();
135 | 		for (c = 0, s = 0.0f; c < n_char; ++c)
136 | 			if (s + y[c] >= r) break;
137 | 			else s += y[c];
138 | 		fputc(i2c[c], fp);
139 | 	}
140 | 	fputc('\n', fp);
141 | 	kann_rnn_end(ann);
142 | 	if (i_temp >= 0) ann->v[i_temp]->x[0] = 1.0f;
143 | }
144 | 
145 | float tg_perplexity(kann_t *ann, const tg_data_t *tg)
146 | {
147 | 	const float tiny = 1e-6;
148 | 	float x[256], p;
149 | 	double loss = 0.0;
150 | 	int i;
151 | 	kann_rnn_start(ann);
152 | 	for (i = 0; i < tg->len - 1; ++i) {
153 | 		const float *y;
154 | 		memset(x, 0, 256 * sizeof(float));
155 | 		x[tg->data[i]] = 1.0f;
156 | 		y = kann_apply1(ann, x);
157 | 		p = y[tg->data[i+1]];
158 | 		loss += logf(p > tiny? p : tiny);
159 | 	}
160 | 	kann_rnn_end(ann);
161 | 	return (float)exp(-loss / (tg->len - 1));
162 | }
163 | 
164 | int tg_urnn_start(kann_t *ann, int batch_size)
165 | {
166 | 	int i, j, n, cnt = 0;
167 | 	for (i = 0; i < ann->n; ++i) {
168 | 		kad_node_t *p = ann->v[i];
169 | 		if (p->pre && p->n_d >= 2 && p->pre->n_d == p->n_d && p->pre->n_child == 0 && kad_len(p)/p->d[0] == kad_len(p->pre)/p->pre->d[0])
170 | 			p->pre->flag = 0;
171 | 	}
172 | 	kann_set_batch_size(ann, batch_size);
173 | 	for (i = 0; i < ann->n; ++i) {
174 | 		kad_node_t *p = ann->v[i];
175 | 		if (p->pre && p->n_d >= 2 && p->pre->n_d == p->n_d && p->pre->n_child == 0 && kad_len(p) == kad_len(p->pre)) {
176 | 			kad_node_t *q = p->pre;
177 | 			n = kad_len(p) / p->d[0];
178 | 			memset(p->x, 0, p->d[0] * n * sizeof(float));
179 | 			if (q->x)
180 | 				for (j = 0; j < p->d[0]; ++j)
181 | 					memcpy(&p->x[j * n], q->x, n * sizeof(float));
182 | 			q->x = p->x;
183 | 			++cnt;
184 | 		}
185 | 	}
186 | 	return cnt;
187 | }
188 | 
189 | void tg_train(kann_t *ann, const tg_data_t *tg, float lr, int ulen, int vlen, int cs, int mbs, int max_epoch, float grad_clip, const char *fn, int batch_len, int n_threads)
190 | {
191 | 	int i, epoch, u, n_var, n_char;
192 | 	float **x, **y, *r;
193 | 	const uint8_t **p;
194 | 	kann_t *ua;
195 | 
196 | 	batch_len = batch_len < tg->len? batch_len : tg->len;
197 | 	n_char = kann_dim_in(ann);
198 | 	x = (float**)calloc(ulen, sizeof(float*));
199 | 	y = (float**)calloc(ulen, sizeof(float*));
200 | 	for (u = 0; u < ulen; ++u) {
201 | 		x[u] = (float*)calloc(n_char * mbs, sizeof(float));
202 | 		y[u] = (float*)calloc(n_char * mbs, sizeof(float));
203 | 	}
204 | 	n_var = kann_size_var(ann);
205 | 	r = (float*)calloc(n_var, sizeof(float));
206 | 	p = (const uint8_t**)calloc(mbs, sizeof(const uint8_t*));
207 | 
208 | 	ua = kann_unroll(ann, ulen);
209 | 	tg_urnn_start(ua, mbs);
210 | 	kann_mt(ua, n_threads, mbs);
211 | 	kann_switch(ua, 1);
212 | 	kann_feed_bind(ua, KANN_F_IN,  100, x);
213 | 	kann_feed_bind(ua, KANN_F_TRUTH, 0, y);
214 | 	for (epoch = 0; epoch < max_epoch; ++epoch) {
215 | 		double cost = 0.0;
216 | 		int c, j, b, tot = 0, ctot = 0, n_cerr = 0;
217 | 		for (i = 0; i < batch_len; i += mbs * cs * ulen) {
218 | 			for (b = 0; b < mbs; ++b)
219 | 				p[b] = tg->data + (int)((tg->len - ulen * cs - 1) * kad_drand(0)) + 1;
220 | 			for (j = 0; j < ua->n; ++j) // reset initial hidden values to zero
221 | 				if (ua->v[j]->pre)
222 | 					memset(ua->v[j]->x, 0, kad_len(ua->v[j]) * sizeof(float));
223 | 			for (c = 0; c < cs; ++c) {
224 | 				int ce_len = c? ulen : ulen - vlen;
225 | 				for (u = 0; u < ulen; ++u) {
226 | 					memset(x[u], 0, mbs * n_char * sizeof(float));
227 | 					memset(y[u], 0, mbs * n_char * sizeof(float));
228 | 				}
229 | 				for (b = 0; b < mbs; ++b) {
230 | 					for (u = 0; u < ulen; ++u) {
231 | 						x[u][b * n_char + p[b][u-1]] = 1.0f;
232 | 						if (c || u >= vlen)
233 | 							y[u][b * n_char + p[b][u]] = 1.0f;
234 | 					}
235 | 					p[b] += ulen;
236 | 				}
237 | 				cost += kann_cost(ua, 0, 1) * ulen * mbs;
238 | 				n_cerr += kann_class_error(ua, &b);
239 | 				tot += ce_len * mbs, ctot += b;
240 | 				if (grad_clip > 0.0f) kann_grad_clip(grad_clip, n_var, ua->g);
241 | 				kann_RMSprop(n_var, lr, 0, 0.9f, ua->g, ua->x, r);
242 | 			}
243 | 		}
244 | 		fprintf(stderr, "epoch: %d; running cost: %g (class error: %.2f%%)\n", epoch+1, cost / tot, 100.0 * n_cerr / ctot);
245 | 		tg_gen(stderr, ann, 0.4f, 100, tg->c2i, "is");
246 | 		if (fn) tg_save(fn, ann, tg->c2i);
247 | 	}
248 | 	kann_delete_unrolled(ua);
249 | 
250 | 	for (u = 0; u < ulen; ++u) {
251 | 		free(x[u]); free(y[u]);
252 | 	}
253 | 	free(r); free(y); free(x); free(p);
254 | }
255 | 
256 | static kann_t *model_gen(int model, int n_char, int n_h_layers, int n_h_neurons, float h_dropout, int use_norm)
257 | {
258 | 	int i, flag = use_norm? KANN_RNN_NORM : 0;
259 | 	kad_node_t *t, *t1;
260 | 	t = kann_layer_input(n_char), t->ext_label = 100;
261 | 	for (i = 0; i < n_h_layers; ++i) {
262 | 		if (model == 0) t = kann_layer_rnn(t, n_h_neurons, flag);
263 | 		else if (model == 1) t = kann_layer_lstm(t, n_h_neurons, flag);
264 | 		else if (model == 2) t = kann_layer_gru(t, n_h_neurons, flag);
265 | 		t = kann_layer_dropout(t, h_dropout);
266 | 	}
267 | 	t = kann_layer_dense(t, n_char);
268 | 	t1 = kann_new_scalar(KAD_CONST, 1.0f), t1->ext_label = -1; // -1 is for backward compatibility
269 | 	t = kad_mul(t, t1); // t1 is the inverse of temperature
270 | 	t = kad_softmax(t), t->ext_flag |= KANN_F_OUT;
271 | 	t1 = kad_feed(2, 1, n_char), t1->ext_flag |= KANN_F_TRUTH;
272 | 	t = kad_ce_multi(t, t1), t->ext_flag |= KANN_F_COST;
273 | 	return kann_new(t, 0);
274 | }
275 | 
276 | int main(int argc, char *argv[])
277 | {
278 | 	int c, seed = 11, ulen = 70, vlen = 10, n_h_layers = 1, n_h_neurons = 128, model = 2, max_epoch = 50, mbs = 64, c2i[256];
279 | 	int len_gen = 1000, use_norm = 1, batch_len = 1000000, n_threads = 1, cal_perp = 0, cs = 100;
280 | 	float h_dropout = 0.0f, temp = 0.5f, lr = 0.01f, grad_clip = 10.0f;
281 | 	kann_t *ann = 0;
282 | 	char *fn_in = 0, *fn_out = 0, *prefix = 0;
283 | 
284 | 	while ((c = getopt(argc, argv, "n:l:s:r:m:B:o:i:d:b:T:M:u:L:g:Np:t:xv:c:")) >= 0) {
285 | 		if (c == 'n') n_h_neurons = atoi(optarg);
286 | 		else if (c == 'l') n_h_layers = atoi(optarg);
287 | 		else if (c == 's') seed = atoi(optarg);
288 | 		else if (c == 'i') fn_in = optarg;
289 | 		else if (c == 'o') fn_out = optarg;
290 | 		else if (c == 'r') lr = atof(optarg);
291 | 		else if (c == 'm') max_epoch = atoi(optarg);
292 | 		else if (c == 'B') mbs = atoi(optarg);
293 | 		else if (c == 'd') h_dropout = atof(optarg);
294 | 		else if (c == 'T') temp = atof(optarg);
295 | 		else if (c == 'c') cs = atoi(optarg);
296 | 		else if (c == 'u') ulen = atoi(optarg);
297 | 		else if (c == 'v') vlen = atoi(optarg);
298 | 		else if (c == 'L') len_gen = atoi(optarg);
299 | 		else if (c == 'g') grad_clip = atof(optarg);
300 | 		else if (c == 'N') use_norm = 0;
301 | 		else if (c == 'p') prefix = optarg;
302 | 		else if (c == 'b') batch_len = atoi(optarg);
303 | 		else if (c == 't') n_threads = atoi(optarg);
304 | 		else if (c == 'x') cal_perp = 1;
305 | 		else if (c == 'M') {
306 | 			if (strcmp(optarg, "rnn") == 0) model = 0;
307 | 			else if (strcmp(optarg, "lstm") == 0) model = 1;
308 | 			else if (strcmp(optarg, "gru") == 0) model = 2;
309 | 		}
310 | 	}
311 | 	if (vlen >= ulen) vlen = ulen - 1;
312 | 	if (argc == optind && fn_in == 0) {
313 | 		FILE *fp = stdout;
314 | 		fprintf(fp, "Usage: textgen [options] <in.txt>\n");
315 | 		fprintf(fp, "Options:\n");
316 | 		fprintf(fp, "  Model construction:\n");
317 | 		fprintf(fp, "    -i FILE     read trained model from FILE []\n");
318 | 		fprintf(fp, "    -o FILE     save trained model to FILE []\n");
319 | 		fprintf(fp, "    -s INT      random seed [%d]\n", seed);
320 | 		fprintf(fp, "    -l INT      number of hidden layers [%d]\n", n_h_layers);
321 | 		fprintf(fp, "    -n INT      number of hidden neurons per layer [%d]\n", n_h_neurons);
322 | 		fprintf(fp, "    -M STR      model: rnn, lstm or gru [gru]\n");
323 | 		fprintf(fp, "    -N          don't use layer normalization\n");
324 | 		fprintf(fp, "  Model training:\n");
325 | 		fprintf(fp, "    -r FLOAT    learning rate [%g]\n", lr);
326 | 		fprintf(fp, "    -d FLOAT    dropout at the hidden layer(s) [%g]\n", h_dropout);
327 | 		fprintf(fp, "    -m INT      max number of epochs [%d]\n", max_epoch);
328 | 		fprintf(fp, "    -B INT      mini-batch size [%d]\n", mbs);
329 | 		fprintf(fp, "    -u INT      max unroll [%d]\n", ulen);
330 | 		fprintf(fp, "    -v INT      burn-in length [%d]\n", vlen);
331 | 		fprintf(fp, "    -g FLOAT    gradient clipping threshold [%g]\n", grad_clip);
332 | 		fprintf(fp, "    -c INT      size of a batch [%d]\n", batch_len);
333 | 		fprintf(fp, "    -b          use minibatch (run faster but converge slower)\n");
334 | 		fprintf(fp, "    -x          compute perplexity at the end\n");
335 | 		fprintf(fp, "  Text generation:\n");
336 | 		fprintf(fp, "    -p STR      prefix []\n");
337 | 		fprintf(fp, "    -T FLOAT    temperature [%g]\n", temp);
338 | 		fprintf(fp, "    -L INT      length of text to generate [%d]\n", len_gen);
339 | 		return 1;
340 | 	}
341 | 
342 | 	fprintf(stderr, "Version: %s\n", VERSION);
343 | 	fprintf(stderr, "Command line:");
344 | 	for (c = 0; c < argc; ++c)
345 | 		fprintf(stderr, " %s", argv[c]);
346 | 	fprintf(stderr, "\n");
347 | 	kann_srand(seed);
348 | 	kad_trap_fe();
349 | 	if (fn_in) ann = tg_load(fn_in, c2i);
350 | 
351 | 	if (argc - optind >= 1) { // train
352 | 		tg_data_t *tg;
353 | 		tg = tg_init(argv[optind]);
354 | 		fprintf(stderr, "Read %d paragraphs and %d characters; alphabet size %d\n", tg->n_para, tg->len, tg->n_char);
355 | 		if (!ann) ann = model_gen(model, tg->n_char, n_h_layers, n_h_neurons, h_dropout, use_norm);
356 | 		tg_train(ann, tg, lr, ulen, vlen, cs, mbs, max_epoch, grad_clip, fn_out, batch_len, n_threads);
357 | 		if (cal_perp) fprintf(stderr, "Character-level perplexity: %g\n", tg_perplexity(ann, tg));
358 | 		free(tg->data); free(tg);
359 | 	} else tg_gen(stdout, ann, temp, len_gen, c2i, prefix);
360 | 
361 | 	kann_delete(ann);
362 | 	return 0;
363 | }
364 | 


--------------------------------------------------------------------------------
/lib/kann-master/examples/tiny-dnn/Makefile:
--------------------------------------------------------------------------------
 1 | CC=			gcc
 2 | CFLAGS=		-g -Wall -Wc++-compat -O2
 3 | CXX=		g++
 4 | CXXFLAGS=	-g -Wall -O3 -std=c++11 -ffast-math -mavx
 5 | CPPFLAGS=	-DCNN_SINGLE_THREAD -DCNN_USE_AVX
 6 | INCLUDES=	-I../..
 7 | EXE=		mlp
 8 | LIBS=		-lm -lz
 9 | 
10 | ifdef TINY_DNN
11 | 	INCLUDES+=-I$(TINY_DNN)
12 | endif
13 | 
14 | .SUFFIXES:.c .cpp .o
15 | .PHONY:all clean
16 | 
17 | .c.o:
18 | 		$(CC) -c $(CFLAGS) $(INCLUDES) $(CPPFLAGS) $< -o $@
19 | 
20 | .cpp.o:
21 | 		$(CXX) -c $(CXXFLAGS) $(INCLUDES) $(CPPFLAGS) $< -o $@
22 | 
23 | all:$(EXE)
24 | 
25 | ../../kann_extra/kann_data.o:../../kann_extra/kann_data.c
26 | 		$(CC) -c $(CFLAGS) -DHAVE_ZLIB $< -o $@
27 | 
28 | mlp:mlp.o ../../kann_extra/kann_data.o
29 | 		$(CXX) -o $@ $^ $(LIBS)
30 | 
31 | clean:
32 | 		rm -fr *.o */*.o a.out */a.out *.a *.dSYM */*.dSYM $(EXE)
33 | 


--------------------------------------------------------------------------------
/lib/kann-master/examples/tiny-dnn/mlp.cpp:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include <unistd.h>
  3 | #include <stdio.h>
  4 | #include "tiny_dnn/tiny_dnn.h"
  5 | #include "kann_extra/kann_data.h"
  6 | 
  7 | using namespace tiny_dnn;
  8 | using namespace tiny_dnn::activation;
  9 | 
 10 | network<sequential> mlp_model_gen(int n_in, int n_out, int n_layer, int n_hidden)
 11 | {
 12 | 	network<sequential> nn;
 13 | 	int n_last = n_in;
 14 | 	for (int i = 0; i < n_layer; ++i) {
 15 | 		nn << fully_connected_layer<relu>(n_last, n_hidden);
 16 | 		n_last = n_hidden;
 17 | 	}
 18 | 	nn << fully_connected_layer<softmax>(n_last, n_out);
 19 | 	return nn;
 20 | }
 21 | 
 22 | network<sequential> mnist_cnn_model_gen(void)
 23 | {
 24 | 	network<sequential> nn;
 25 | 	nn << convolutional_layer<relu>(28, 28, 3, 1, 32)
 26 | 	   << convolutional_layer<relu>(26, 26, 3, 32, 32)
 27 | 	   << max_pooling_layer<identity>(24, 24, 32, 2)
 28 | 	   << fully_connected_layer<relu>(12 * 12 * 32, 128)
 29 | 	   << fully_connected_layer<softmax>(128, 10);
 30 | 	return nn;
 31 | }
 32 | 
 33 | void mlp_float2vec(std::vector<vec_t> &data, int n, int m, float **x)
 34 | {
 35 | 	for (int i = 0; i < n; ++i) {
 36 | 		vec_t d;
 37 | 		for (int j = 0; j < m; ++j) d.push_back(x[i][j]);
 38 | 		data.push_back(d);
 39 | 	}
 40 | }
 41 | 
 42 | int main(int argc, char *argv[])
 43 | {
 44 | 	int c, n_layer = 1, n_hidden = 64, minibatch = 64, max_epoch = 20, mnist_cnn = 0;
 45 | 	kann_data_t *kdx;
 46 | 	float lr = 0.001f;
 47 | 	char *fn_out = 0, *fn_in = 0;
 48 | 
 49 | 	while ((c = getopt(argc, argv, "i:o:m:B:l:n:r:C")) >= 0) {
 50 | 		if (c == 'o') fn_out = optarg;
 51 | 		else if (c == 'i') fn_in = optarg;
 52 | 		else if (c == 'm') max_epoch = atoi(optarg);
 53 | 		else if (c == 'B') minibatch = atoi(optarg);
 54 | 		else if (c == 'l') n_layer = atoi(optarg);
 55 | 		else if (c == 'n') n_hidden = atoi(optarg);
 56 | 		else if (c == 'r') lr = atof(optarg);
 57 | 		else if (c == 'C') mnist_cnn = 1;
 58 | 	}
 59 | 	if (argc - optind < 1) {
 60 | 		fprintf(stderr, "Usage: mlp [options] <in.knd> [out.knd]\n");
 61 | 		return 1;
 62 | 	}
 63 | 
 64 | 	kdx = kann_data_read(argv[optind]);
 65 | 	if (argc - optind >= 2) { // training
 66 | 		std::vector<vec_t> dx, dy;
 67 | 		kann_data_t *kdy = kann_data_read(argv[optind+1]);
 68 | 		int n = kdx->n_row, n_in = kdx->n_col, n_out = kdy->n_col;
 69 | 
 70 | 		auto nn = mnist_cnn? mnist_cnn_model_gen() : mlp_model_gen(n_in, n_out, n_layer, n_hidden);
 71 | 		mlp_float2vec(dx, n, n_in, kdx->x);
 72 | 		mlp_float2vec(dy, n, n_out, kdy->x);
 73 | 
 74 | 		gradient_descent optimizer;
 75 | 		optimizer.alpha = lr * minibatch;
 76 | 
 77 | 		progress_display disp(static_cast<unsigned long>(n));
 78 | 		auto on_enumerate_epoch = [&]() { disp.restart(static_cast<unsigned long>(n)); };
 79 | 		auto on_enumerate_minibatch = [&]() { disp += minibatch; };
 80 | 
 81 | 		nn.fit<cross_entropy_multiclass>(optimizer, dx, dy, minibatch, max_epoch, on_enumerate_minibatch, on_enumerate_epoch);
 82 | 		if (fn_out) nn.save(fn_out);
 83 | 
 84 | 		kann_data_free(kdy);
 85 | 	} else if (fn_in) {
 86 | 		network<sequential> nn;
 87 | 		std::vector<vec_t> dx;
 88 | 		mlp_float2vec(dx, kdx->n_row, kdx->n_col, kdx->x);
 89 | 		nn.load(fn_in);
 90 | 		for (int i = 0; i < kdx->n_row; ++i) {
 91 | 			vec_t y = nn.predict(dx[i]);
 92 | 			printf("%s", kdx->rname[i]);
 93 | 			for (int j = 0; j < y.size(); ++j)
 94 | 				printf("\t%g", y[j] + 1.0f - 1.0f);
 95 | 			putchar('\n');
 96 | 		}
 97 | 	}
 98 | 	kann_data_free(kdx);
 99 | 	return 0;
100 | }
101 | 


--------------------------------------------------------------------------------
/lib/kann-master/examples/vae.c:
--------------------------------------------------------------------------------
  1 | #include <stdlib.h>
  2 | #include <unistd.h>
  3 | #include <assert.h>
  4 | #include <stdio.h>
  5 | #include <math.h>
  6 | #include "kann.h"
  7 | #include "kann_extra/kann_data.h"
  8 | 
  9 | #define const_scalar(x) kann_new_scalar(KAD_CONST, (x))
 10 | 
 11 | static kann_t *model_gen(int n_in, int n_hidden, int n_code)
 12 | {
 13 | 	kad_node_t *x, *t, *s, *mu, *sigma;
 14 | 
 15 | 	// encoder
 16 | 	x = kad_feed(2, 1, n_in), x->ext_flag |= KANN_F_IN | KANN_F_TRUTH;
 17 | 	t = kad_tanh(kann_layer_dense(x, n_hidden));
 18 | 	mu = kann_layer_dense(t, n_code);
 19 | 	sigma = kad_relu(kann_layer_dense(t, n_code));
 20 | 	t = kad_add(kad_sample_normal(sigma), mu), t->ext_label = 1;
 21 | 
 22 | 	// decoder
 23 | 	t = kad_tanh(kann_layer_dense(t, n_hidden));
 24 | 	t = kad_sigm(kann_layer_dense(t, n_in)), t->ext_flag = KANN_F_OUT;
 25 | 	t = kad_ce_bin(t, x);
 26 | 	t = kad_mul(t, const_scalar((float)n_in));
 27 | 
 28 | 	// latent loss
 29 | 	s = kad_add(kad_square(sigma), const_scalar(1e-6f)); // sigma^2, plus a pseudo-count
 30 | 	s = kad_sub(s, kad_log(s));              // sigma^2 - log(sigma^2)
 31 | 	s = kad_add(s, kad_square(mu));          // mu^2 + sigma^2 - log(sigma^2)
 32 | 	s = kad_sub(s, const_scalar(1.0f));      // mu^2 + sigma^2 - log(sigma^2) - 1
 33 | 	s = kad_reduce_sum(s, 1);
 34 | 	s = kad_mul(s, const_scalar(0.5f));
 35 | 	s = kad_reduce_mean(s, 0);
 36 | 
 37 | 	t = kad_add(t, s);
 38 | 	t = kad_mul(t, const_scalar(1.0f / (n_in + 2 * n_code))), t->ext_flag |= KANN_F_COST;
 39 | 	return kann_new(t, 0);
 40 | }
 41 | 
 42 | int main(int argc, char *argv[])
 43 | {
 44 | 	int max_epoch = 50, mini_size = 64, max_drop_streak = 10;
 45 | 	int i, j, c, n_hidden = 64, n_code = 2, seed = 11, to_apply = 0, n_gen = 0;
 46 | 	kann_data_t *in = 0;
 47 | 	kann_t *ann = 0;
 48 | 	char *out_fn = 0, *in_fn = 0;
 49 | 	float lr = 0.01f, frac_val = 0.1f;
 50 | 
 51 | 	while ((c = getopt(argc, argv, "n:s:r:m:B:o:i:Ag:c:")) >= 0) {
 52 | 		if (c == 'n') n_hidden = atoi(optarg);
 53 | 		else if (c == 's') seed = atoi(optarg);
 54 | 		else if (c == 'i') in_fn = optarg;
 55 | 		else if (c == 'o') out_fn = optarg;
 56 | 		else if (c == 'r') lr = atof(optarg);
 57 | 		else if (c == 'm') max_epoch = atoi(optarg);
 58 | 		else if (c == 'B') mini_size = atoi(optarg);
 59 | 		else if (c == 'A') to_apply = 1;
 60 | 		else if (c == 'c') n_code = atoi(optarg);
 61 | 		else if (c == 'g') n_gen = atoi(optarg);
 62 | 	}
 63 | 	if (argc - optind < 1 && in_fn == 0 && n_gen == 0) {
 64 | 		FILE *fp = stdout;
 65 | 		fprintf(fp, "Usage: vae [options] <in.knd>\n");
 66 | 		fprintf(fp, "Options:\n");
 67 | 		fprintf(fp, "  Model construction:\n");
 68 | 		fprintf(fp, "    -i FILE     read trained model from FILE []\n");
 69 | 		fprintf(fp, "    -o FILE     save trained model to FILE []\n");
 70 | 		fprintf(fp, "    -s INT      random seed [%d]\n", seed);
 71 | 		fprintf(fp, "    -n INT      number of hidden neurons [%d]\n", n_hidden);
 72 | 		fprintf(fp, "    -c INT      number of codes [%d]\n", n_code);
 73 | 		fprintf(fp, "  Model training:\n");
 74 | 		fprintf(fp, "    -r FLOAT    learning rate [%g]\n", lr);
 75 | 		fprintf(fp, "    -m INT      max number of epochs [%d]\n", max_epoch);
 76 | 		fprintf(fp, "    -B INT      mini-batch size [%d]\n", mini_size);
 77 | 		fprintf(fp, "  Prediction and generation:\n");
 78 | 		fprintf(fp, "    -A          reconstruct input\n");
 79 | 		fprintf(fp, "    -g INT      generate INT samples [%d]\n", n_gen);
 80 | 		return 1;
 81 | 	}
 82 | 
 83 | 	kad_trap_fe();
 84 | 	kann_srand(seed);
 85 | 	if (argc - optind >= 1)
 86 | 		in = kann_data_read(argv[optind]);
 87 | 	if (in_fn) {
 88 | 		ann = kann_load(in_fn);
 89 | 		if (in) assert(kann_dim_in(ann) == in->n_col);
 90 | 	}
 91 | 
 92 | 	if (!to_apply && n_gen == 0) { // train
 93 | 		if (!ann)
 94 | 			ann = model_gen(in->n_col, n_hidden, n_code);
 95 | 		kann_train_fnn1(ann, lr, mini_size, max_epoch, max_drop_streak, frac_val, in->n_row, in->x, in->x);
 96 | 		if (out_fn) kann_save(out_fn, ann);
 97 | 	} else if (to_apply) { // apply
 98 | 		for (i = 0; i < in->n_row; ++i) {
 99 | 			const float *y;
100 | 			y = kann_apply1(ann, in->x[i]);
101 | 			if (in->rname) printf("%s\t", in->rname[i]);
102 | 			for (j = 0; j < in->n_col; ++j) {
103 | 				if (j) putchar('\t');
104 | 				printf("%.3g", y[j] + 1.0f - 1.0f);
105 | 			}
106 | 			putchar('\n');
107 | 		}
108 | 	} else {
109 | 		kad_node_t *t, *out;
110 | 		int j, n_out;
111 | 		kann_set_batch_size(ann, 1);
112 | 		out = ann->v[kann_find(ann, KANN_F_OUT, 0)];
113 | 		t = ann->v[kann_find(ann, 0, 1)];
114 | 		n_code = kad_len(t);
115 | 		n_out = kad_len(out);
116 | 		for (j = 0; j < n_gen; ++j) {
117 | 			kad_eval_disable(t);
118 | 			for (i = 0; i < n_code; ++i)
119 | 				t->x[i] = kad_drand_normal(0);
120 | 			kann_eval(ann, KANN_F_OUT, 0);
121 | 			printf("%d", j + 1);
122 | 			for (i = 0; i < n_out; ++i)
123 | 				printf("\t%g", out->x[i] + 1.0f - 1.0f);
124 | 			putchar('\n');
125 | 		}
126 | 	}
127 | 
128 | 	kann_delete(ann);
129 | 	if (in) kann_data_free(in);
130 | 	return 0;
131 | }
132 | 


--------------------------------------------------------------------------------
/lib/kann-master/kann.c:
--------------------------------------------------------------------------------
  1 | #include <math.h>
  2 | #include <float.h>
  3 | #include <string.h>
  4 | #include <stdlib.h>
  5 | #include <assert.h>
  6 | #include <stdarg.h>
  7 | #include "kann.h"
  8 | 
  9 | int kann_verbose = 3;
 10 | 
 11 | /******************************************
 12 |  *** @@BASIC: fundamental KANN routines ***
 13 |  ******************************************/
 14 | 
 15 | static void kad_ext_collate(int n, kad_node_t **a, float **_x, float **_g, float **_c)
 16 | {
 17 | 	int i, j, k, l, n_var;
 18 | 	float *x, *g, *c;
 19 | 	n_var = kad_size_var(n, a);
 20 | 	x = *_x = (float*)realloc(*_x, n_var * sizeof(float));
 21 | 	g = *_g = (float*)realloc(*_g, n_var * sizeof(float));
 22 | 	c = *_c = (float*)realloc(*_c, kad_size_const(n, a) * sizeof(float));
 23 | 	memset(g, 0, n_var * sizeof(float));
 24 | 	for (i = j = k = 0; i < n; ++i) {
 25 | 		kad_node_t *v = a[i];
 26 | 		if (kad_is_var(v)) {
 27 | 			l = kad_len(v);
 28 | 			memcpy(&x[j], v->x, l * sizeof(float));
 29 | 			free(v->x);
 30 | 			v->x = &x[j];
 31 | 			v->g = &g[j];
 32 | 			j += l;
 33 | 		} else if (kad_is_const(v)) {
 34 | 			l = kad_len(v);
 35 | 			memcpy(&c[k], v->x, l * sizeof(float));
 36 | 			free(v->x);
 37 | 			v->x = &c[k];
 38 | 			k += l;
 39 | 		}
 40 | 	}
 41 | }
 42 | 
 43 | static void kad_ext_sync(int n, kad_node_t **a, float *x, float *g, float *c)
 44 | {
 45 | 	int i, j, k;
 46 | 	for (i = j = k = 0; i < n; ++i) {
 47 | 		kad_node_t *v = a[i];
 48 | 		if (kad_is_var(v)) {
 49 | 			v->x = &x[j];
 50 | 			v->g = &g[j];
 51 | 			j += kad_len(v);
 52 | 		} else if (kad_is_const(v)) {
 53 | 			v->x = &c[k];
 54 | 			k += kad_len(v);
 55 | 		}
 56 | 	}
 57 | }
 58 | 
 59 | kann_t *kann_new(kad_node_t *cost, int n_rest, ...)
 60 | {
 61 | 	kann_t *a;
 62 | 	int i, n_roots = 1 + n_rest, has_pivot = 0, has_recur = 0;
 63 | 	kad_node_t **roots;
 64 | 	va_list ap;
 65 | 
 66 | 	if (cost->n_d != 0) return 0;
 67 | 
 68 | 	va_start(ap, n_rest);
 69 | 	roots = (kad_node_t**)malloc((n_roots + 1) * sizeof(kad_node_t*));
 70 | 	for (i = 0; i < n_rest; ++i)
 71 | 		roots[i] = va_arg(ap, kad_node_t*);
 72 | 	roots[i++] = cost;
 73 | 	va_end(ap);
 74 | 
 75 | 	cost->ext_flag |= KANN_F_COST;
 76 | 	a = (kann_t*)calloc(1, sizeof(kann_t));
 77 | 	a->v = kad_compile_array(&a->n, n_roots, roots);
 78 | 
 79 | 	for (i = 0; i < a->n; ++i) {
 80 | 		if (a->v[i]->pre) has_recur = 1;
 81 | 		if (kad_is_pivot(a->v[i])) has_pivot = 1;
 82 | 	}
 83 | 	if (has_recur && !has_pivot) { /* an RNN that doesn't have a pivot; then add a pivot on top of cost and recompile */
 84 | 		cost->ext_flag &= ~KANN_F_COST;
 85 | 		roots[n_roots-1] = cost = kad_avg(1, &cost), cost->ext_flag |= KANN_F_COST;
 86 | 		free(a->v);
 87 | 		a->v = kad_compile_array(&a->n, n_roots, roots);
 88 | 	}
 89 | 	kad_ext_collate(a->n, a->v, &a->x, &a->g, &a->c);
 90 | 	free(roots);
 91 | 	return a;
 92 | }
 93 | 
 94 | kann_t *kann_clone(kann_t *a, int batch_size)
 95 | {
 96 | 	kann_t *b;
 97 | 	b = (kann_t*)calloc(1, sizeof(kann_t));
 98 | 	b->n = a->n;
 99 | 	b->v = kad_clone(a->n, a->v, batch_size);
100 | 	kad_ext_collate(b->n, b->v, &b->x, &b->g, &b->c);
101 | 	return b;
102 | }
103 | 
104 | kann_t *kann_unroll_array(kann_t *a, int *len)
105 | {
106 | 	kann_t *b;
107 | 	b = (kann_t*)calloc(1, sizeof(kann_t));
108 | 	b->x = a->x, b->g = a->g, b->c = a->c; /* these arrays are shared */
109 | 	b->v = kad_unroll(a->n, a->v, &b->n, len);
110 | 	return b;
111 | }
112 | 
113 | kann_t *kann_unroll(kann_t *a, ...)
114 | {
115 | 	kann_t *b;
116 | 	va_list ap;
117 | 	int i, n_pivots, *len;
118 | 	n_pivots = kad_n_pivots(a->n, a->v);
119 | 	len = (int*)calloc(n_pivots, sizeof(int));
120 | 	va_start(ap, a);
121 | 	for (i = 0; i < n_pivots; ++i) len[i] = va_arg(ap, int);
122 | 	va_end(ap);
123 | 	b = kann_unroll_array(a, len);
124 | 	free(len);
125 | 	return b;
126 | }
127 | 
128 | void kann_delete_unrolled(kann_t *a)
129 | {
130 | 	if (a && a->mt) kann_mt(a, 0, 0);
131 | 	if (a && a->v) kad_delete(a->n, a->v);
132 | 	free(a);
133 | }
134 | 
135 | void kann_delete(kann_t *a)
136 | {
137 | 	if (a == 0) return;
138 | 	free(a->x); free(a->g); free(a->c);
139 | 	kann_delete_unrolled(a);
140 | }
141 | 
142 | static void kann_switch_core(kann_t *a, int is_train)
143 | {
144 | 	int i;
145 | 	for (i = 0; i < a->n; ++i)
146 | 		if (a->v[i]->op == 12 && a->v[i]->n_child == 2)
147 | 			*(int32_t*)a->v[i]->ptr = !!is_train;
148 | }
149 | 
150 | #define chk_flg(flag, mask) ((mask) == 0 || ((flag) & (mask)))
151 | #define chk_lbl(label, query) ((query) == 0 || (label) == (query))
152 | 
153 | int kann_find(const kann_t *a, uint32_t ext_flag, int32_t ext_label)
154 | {
155 | 	int i, k, r = -1;
156 | 	for (i = k = 0; i < a->n; ++i)
157 | 		if (chk_flg(a->v[i]->ext_flag, ext_flag) && chk_lbl(a->v[i]->ext_label, ext_label))
158 | 			++k, r = i;
159 | 	return k == 1? r : k == 0? -1 : -2;
160 | }
161 | 
162 | int kann_feed_bind(kann_t *a, uint32_t ext_flag, int32_t ext_label, float **x)
163 | {
164 | 	int i, k;
165 | 	if (x == 0) return 0;
166 | 	for (i = k = 0; i < a->n; ++i)
167 | 		if (kad_is_feed(a->v[i]) && chk_flg(a->v[i]->ext_flag, ext_flag) && chk_lbl(a->v[i]->ext_label, ext_label))
168 | 			a->v[i]->x = x[k++];
169 | 	return k;
170 | }
171 | 
172 | int kann_feed_dim(const kann_t *a, uint32_t ext_flag, int32_t ext_label)
173 | {
174 | 	int i, k, n = 0;
175 | 	for (i = k = 0; i < a->n; ++i)
176 | 		if (kad_is_feed(a->v[i]) && chk_flg(a->v[i]->ext_flag, ext_flag) && chk_lbl(a->v[i]->ext_label, ext_label))
177 | 			++k, n = a->v[i]->n_d > 1? kad_len(a->v[i]) / a->v[i]->d[0] : a->v[i]->n_d == 1? a->v[i]->d[0] : 1;
178 | 	return k == 1? n : k == 0? -1 : -2;
179 | }
180 | 
181 | static float kann_cost_core(kann_t *a, int cost_label, int cal_grad)
182 | {
183 | 	int i_cost;
184 | 	float cost;
185 | 	i_cost = kann_find(a, KANN_F_COST, cost_label);
186 | 	assert(i_cost >= 0);
187 | 	cost = *kad_eval_at(a->n, a->v, i_cost);
188 | 	if (cal_grad) kad_grad(a->n, a->v, i_cost);
189 | 	return cost;
190 | }
191 | 
192 | int kann_eval(kann_t *a, uint32_t ext_flag, int ext_label)
193 | {
194 | 	int i, k;
195 | 	for (i = k = 0; i < a->n; ++i)
196 | 		if (chk_flg(a->v[i]->ext_flag, ext_flag) && chk_lbl(a->v[i]->ext_label, ext_label))
197 | 			++k, a->v[i]->tmp = 1;
198 | 	kad_eval_marked(a->n, a->v);
199 | 	return k;
200 | }
201 | 
202 | void kann_rnn_start(kann_t *a)
203 | {
204 | 	int i;
205 | 	kann_set_batch_size(a, 1);
206 | 	for (i = 0; i < a->n; ++i) {
207 | 		kad_node_t *p = a->v[i];
208 | 		if (p->pre) { /* NB: BE CAREFUL of the interaction between kann_rnn_start() and kann_set_batch_size() */
209 | 			kad_node_t *q = p->pre;
210 | 			if (q->x) memcpy(p->x, q->x, kad_len(p) * sizeof(float));
211 | 			else memset(p->x, 0, kad_len(p) * sizeof(float));
212 | 			if (q->n_child > 0) free(q->x);
213 | 			q->x = p->x;
214 | 		}
215 | 	}
216 | }
217 | 
218 | void kann_rnn_end(kann_t *a)
219 | {
220 | 	int i;
221 | 	kad_ext_sync(a->n, a->v, a->x, a->g, a->c);
222 | 	for (i = 0; i < a->n; ++i)
223 | 		if (a->v[i]->pre && a->v[i]->pre->n_child > 0)
224 | 			a->v[i]->pre->x = (float*)calloc(kad_len(a->v[i]->pre), sizeof(float));
225 | }
226 | 
227 | static int kann_class_error_core(const kann_t *ann, int *base)
228 | {
229 | 	int i, j, k, m, n, off, n_err = 0;
230 | 	for (i = 0, *base = 0; i < ann->n; ++i) {
231 | 		kad_node_t *p = ann->v[i];
232 | 		if (((p->op == 13 && (p->n_child == 2 || p->n_child == 3)) || (p->op == 22 && p->n_child == 2)) && p->n_d == 0) { /* ce_bin or ce_multi */
233 | 			kad_node_t *x = p->child[0], *t = p->child[1];
234 | 			n = t->d[t->n_d - 1], m = kad_len(t) / n;
235 | 			for (j = off = 0; j < m; ++j, off += n) {
236 | 				float t_sum = 0.0f, t_min = 1.0f, t_max = 0.0f, x_max = 0.0f, x_min = 1.0f;
237 | 				int x_max_k = -1, t_max_k = -1;
238 | 				for (k = 0; k < n; ++k) {
239 | 					float xk = x->x[off+k], tk = t->x[off+k];
240 | 					t_sum += tk;
241 | 					t_min = t_min < tk? t_min : tk;
242 | 					x_min = x_min < xk? x_min : xk;
243 | 					if (t_max < tk) t_max = tk, t_max_k = k;
244 | 					if (x_max < xk) x_max = xk, x_max_k = k;
245 | 				}
246 | 				if (t_sum - 1.0f == 0 && t_min >= 0.0f && x_min >= 0.0f && x_max <= 1.0f) {
247 | 					++(*base);
248 | 					n_err += (x_max_k != t_max_k);
249 | 				}
250 | 			}
251 | 		}
252 | 	}
253 | 	return n_err;
254 | }
255 | 
256 | /*************************
257 |  * @@MT: multi-threading *
258 |  *************************/
259 | 
260 | #ifdef HAVE_PTHREAD
261 | #include <pthread.h>
262 | 
263 | struct mtaux_t;
264 | 
265 | typedef struct { /* per-worker data */
266 | 	kann_t *a;
267 | 	float cost;
268 | 	int action;
269 | 	pthread_t tid;
270 | 	struct mtaux_t *g;
271 | } mtaux1_t;
272 | 
273 | typedef struct mtaux_t { /* cross-worker data */
274 | 	int n_threads, max_batch_size;
275 | 	int cal_grad, cost_label, eval_out;
276 | 	volatile int n_idle; /* we will be busy waiting on this, so volatile necessary */
277 | 	pthread_mutex_t mtx;
278 | 	pthread_cond_t cv;
279 | 	mtaux1_t *mt;
280 | } mtaux_t;
281 | 
282 | static void *mt_worker(void *data) /* pthread worker */
283 | {
284 | 	mtaux1_t *mt1 = (mtaux1_t*)data;
285 | 	mtaux_t *mt = mt1->g;
286 | 	for (;;) {
287 | 		int action;
288 | 		pthread_mutex_lock(&mt->mtx);
289 | 		mt1->action = 0;
290 | 		++mt->n_idle;
291 | 		while (mt1->action == 0)
292 | 			pthread_cond_wait(&mt->cv, &mt->mtx);
293 | 		action = mt1->action;
294 | 		pthread_mutex_unlock(&mt->mtx);
295 | 		if (action == -1) break;
296 | 
297 | 		if (mt->eval_out) kann_eval(mt1->a, KANN_F_OUT, 0);
298 | 		else mt1->cost = kann_cost_core(mt1->a, mt->cost_label, mt->cal_grad);
299 | 	}
300 | 	pthread_exit(0);
301 | }
302 | 
303 | static void mt_destroy(mtaux_t *mt) /* de-allocate an entire mtaux_t struct */
304 | {
305 | 	int i;
306 | 	pthread_mutex_lock(&mt->mtx);
307 | 	mt->n_idle = 0;
308 | 	for (i = 1; i < mt->n_threads; ++i) mt->mt[i].action = -1;
309 | 	pthread_cond_broadcast(&mt->cv);
310 | 	pthread_mutex_unlock(&mt->mtx);
311 | 	for (i = 1; i < mt->n_threads; ++i) pthread_join(mt->mt[i].tid, 0);
312 | 	for (i = 0; i < mt->n_threads; ++i) kann_delete(mt->mt[i].a);
313 | 	free(mt->mt);
314 | 	pthread_cond_destroy(&mt->cv);
315 | 	pthread_mutex_destroy(&mt->mtx);
316 | 	free(mt);
317 | }
318 | 
319 | void kann_mt(kann_t *ann, int n_threads, int max_batch_size)
320 | {
321 | 	mtaux_t *mt;
322 | 	int i, k;
323 | 
324 | 	if (n_threads <= 1) {
325 | 		if (ann->mt) mt_destroy((mtaux_t*)ann->mt);
326 | 		ann->mt = 0;
327 | 		return;
328 | 	}
329 | 	if (n_threads > max_batch_size) n_threads = max_batch_size;
330 | 	if (n_threads <= 1) return;
331 | 
332 | 	mt = (mtaux_t*)calloc(1, sizeof(mtaux_t));
333 | 	mt->n_threads = n_threads, mt->max_batch_size = max_batch_size;
334 | 	pthread_mutex_init(&mt->mtx, 0);
335 | 	pthread_cond_init(&mt->cv, 0);
336 | 	mt->mt = (mtaux1_t*)calloc(n_threads, sizeof(mtaux1_t));
337 | 	for (i = k = 0; i < n_threads; ++i) {
338 | 		int size = (max_batch_size - k) / (n_threads - i);
339 | 		mt->mt[i].a = kann_clone(ann, size);
340 | 		mt->mt[i].g = mt;
341 | 		k += size;
342 | 	}
343 | 	for (i = 1; i < n_threads; ++i)
344 | 		pthread_create(&mt->mt[i].tid, 0, mt_worker, &mt->mt[i]);
345 | 	while (mt->n_idle < n_threads - 1); /* busy waiting until all threads in sync */
346 | 	ann->mt = mt;
347 | }
348 | 
349 | static void mt_kickoff(kann_t *a, int cost_label, int cal_grad, int eval_out)
350 | {
351 | 	mtaux_t *mt = (mtaux_t*)a->mt;
352 | 	int i, j, k, B, n_var;
353 | 
354 | 	B = kad_sync_dim(a->n, a->v, -1); /* get the current batch size */
355 | 	assert(B <= mt->max_batch_size); /* TODO: can be relaxed */
356 | 	n_var = kann_size_var(a);
357 | 
358 | 	pthread_mutex_lock(&mt->mtx);
359 | 	mt->cost_label = cost_label, mt->cal_grad = cal_grad, mt->eval_out = eval_out;
360 | 	for (i = k = 0; i < mt->n_threads; ++i) {
361 | 		int size = (B - k) / (mt->n_threads - i);
362 | 		for (j = 0; j < a->n; ++j)
363 | 			if (kad_is_feed(a->v[j]))
364 | 				mt->mt[i].a->v[j]->x = &a->v[j]->x[k * kad_len(a->v[j]) / a->v[j]->d[0]];
365 | 		kad_sync_dim(mt->mt[i].a->n, mt->mt[i].a->v, size); /* TODO: we can point ->x to internal nodes, too */
366 | 		k += size;
367 | 		memcpy(mt->mt[i].a->x, a->x, n_var * sizeof(float));
368 | 		mt->mt[i].action = 1;
369 | 	}
370 | 	mt->n_idle = 0;
371 | 	pthread_cond_broadcast(&mt->cv);
372 | 	pthread_mutex_unlock(&mt->mtx);
373 | }
374 | 
375 | float kann_cost(kann_t *a, int cost_label, int cal_grad)
376 | {
377 | 	mtaux_t *mt = (mtaux_t*)a->mt;
378 | 	int i, j, B, k, n_var;
379 | 	float cost;
380 | 
381 | 	if (mt == 0) return kann_cost_core(a, cost_label, cal_grad);
382 | 	B = kad_sync_dim(a->n, a->v, -1); /* get the current batch size */
383 | 	n_var = kann_size_var(a);
384 | 
385 | 	mt_kickoff(a, cost_label, cal_grad, 0);
386 | 	mt->mt[0].cost = kann_cost_core(mt->mt[0].a, cost_label, cal_grad);
387 | 	while (mt->n_idle < mt->n_threads - 1); /* busy waiting until all threads in sync */
388 | 
389 | 	memset(a->g, 0, n_var * sizeof(float)); /* TODO: check if this is necessary when cal_grad is false */
390 | 	for (i = k = 0, cost = 0.0f; i < mt->n_threads; ++i) {
391 | 		int size = (B - k) / (mt->n_threads - i);
392 | 		cost += mt->mt[i].cost * size / B;
393 | 		kad_saxpy(n_var, (float)size / B, mt->mt[i].a->g, a->g);
394 | 		k += size;
395 | 	}
396 | 	for (j = 0; j < a->n; ++j) { /* copy values back at recurrent nodes (needed by textgen; TODO: temporary solution) */
397 | 		kad_node_t *p = a->v[j];
398 | 		if (p->pre && p->n_d >= 2 && p->d[0] == B) {
399 | 			for (i = k = 0; i < mt->n_threads; ++i) {
400 | 				kad_node_t *q = mt->mt[i].a->v[j];
401 | 				memcpy(&p->x[k], q->x, kad_len(q) * sizeof(float));
402 | 				k += kad_len(q);
403 | 			}
404 | 		}
405 | 	}
406 | 	return cost;
407 | }
408 | 
409 | int kann_eval_out(kann_t *a)
410 | {
411 | 	mtaux_t *mt = (mtaux_t*)a->mt;
412 | 	int j, B, n_eval;
413 | 	if (mt == 0) return kann_eval(a, KANN_F_OUT, 0);
414 | 	B = kad_sync_dim(a->n, a->v, -1); /* get the current batch size */
415 | 	mt_kickoff(a, 0, 0, 1);
416 | 	n_eval = kann_eval(mt->mt[0].a, KANN_F_OUT, 0);
417 | 	while (mt->n_idle < mt->n_threads - 1); /* busy waiting until all threads in sync */
418 | 	for (j = 0; j < a->n; ++j) { /* copy output values back */
419 | 		kad_node_t *p = a->v[j];
420 | 		if (p->ext_flag & KANN_F_OUT) {
421 | 			int i, t, k, d0 = p->d[0] / B, d1 = 1; /* for RNN, p->d[0] may equal unroll_len * batch_size */
422 | 			assert(p->d[0] % B == 0);
423 | 			for (i = 1; i < p->n_d; ++i) d1 *= p->d[i];
424 | 			for (i = 0; i < d0; ++i) {
425 | 				for (t = k = 0; t < mt->n_threads; ++t) { /* similar to the forward pass of kad_op_concat() */
426 | 					kad_node_t *q = mt->mt[t].a->v[j];
427 | 					int size = q->d[0] / d0;
428 | 					memcpy(&p->x[(i * B + k) * d1], &q->x[i * size * d1], size * d1 * sizeof(float));
429 | 					k += size;
430 | 				}
431 | 			}
432 | 		}
433 | 	}
434 | 	return n_eval;
435 | }
436 | 
437 | int kann_class_error(const kann_t *ann, int *base)
438 | {
439 | 	mtaux_t *mt = (mtaux_t*)ann->mt;
440 | 	int i, n_err = 0, b = 0;
441 | 	if (mt == 0) return kann_class_error_core(ann, base);
442 | 	for (i = 0; i < mt->n_threads; ++i) {
443 | 		n_err += kann_class_error_core(mt->mt[i].a, &b);
444 | 		*base += b;
445 | 	}
446 | 	return n_err;
447 | }
448 | 
449 | void kann_switch(kann_t *ann, int is_train)
450 | {
451 | 	mtaux_t *mt = (mtaux_t*)ann->mt;
452 | 	int i;
453 | 	if (mt == 0) {
454 | 		kann_switch_core(ann, is_train);
455 | 		return;
456 | 	}
457 | 	for (i = 0; i < mt->n_threads; ++i)
458 | 		kann_switch_core(mt->mt[i].a, is_train);
459 | }
460 | #else
461 | void kann_mt(kann_t *ann, int n_threads, int max_batch_size) { (void)ann; (void)n_threads; (void)max_batch_size; }
462 | float kann_cost(kann_t *a, int cost_label, int cal_grad) { return kann_cost_core(a, cost_label, cal_grad); }
463 | int kann_eval_out(kann_t *a) { return kann_eval(a, KANN_F_OUT, 0); }
464 | int kann_class_error(const kann_t *a, int *base) { return kann_class_error_core(a, base); }
465 | void kann_switch(kann_t *ann, int is_train) { kann_switch_core(ann, is_train); }
466 | #endif
467 | 
468 | /***********************
469 |  *** @@IO: model I/O ***
470 |  ***********************/
471 | 
472 | #define KANN_MAGIC "KAN\1"
473 | 
474 | void kann_save_fp(FILE *fp, kann_t *ann)
475 | {
476 | 	kann_set_batch_size(ann, 1);
477 | 	fwrite(KANN_MAGIC, 1, 4, fp);
478 | 	kad_save(fp, ann->n, ann->v);
479 | 	fwrite(ann->x, sizeof(float), kann_size_var(ann), fp);
480 | 	fwrite(ann->c, sizeof(float), kann_size_const(ann), fp);
481 | }
482 | 
483 | void kann_save(const char *fn, kann_t *ann)
484 | {
485 | 	FILE *fp;
486 | 	fp = fn && strcmp(fn, "-")? fopen(fn, "wb") : stdout;
487 | 	kann_save_fp(fp, ann);
488 | 	fclose(fp);
489 | }
490 | 
491 | kann_t *kann_load_fp(FILE *fp)
492 | {
493 | 	char magic[4];
494 | 	kann_t *ann;
495 | 	int n_var, n_const;
496 | 
497 | 	fread(magic, 1, 4, fp);
498 | 	if (strncmp(magic, KANN_MAGIC, 4) != 0) {
499 | 		fclose(fp);
500 | 		return 0;
501 | 	}
502 | 	ann = (kann_t*)calloc(1, sizeof(kann_t));
503 | 	ann->v = kad_load(fp, &ann->n);
504 | 	n_var = kad_size_var(ann->n, ann->v);
505 | 	n_const = kad_size_const(ann->n, ann->v);
506 | 	ann->x = (float*)malloc(n_var * sizeof(float));
507 | 	ann->g = (float*)calloc(n_var, sizeof(float));
508 | 	ann->c = (float*)malloc(n_const * sizeof(float));
509 | 	fread(ann->x, sizeof(float), n_var, fp);
510 | 	fread(ann->c, sizeof(float), n_const, fp);
511 | 	kad_ext_sync(ann->n, ann->v, ann->x, ann->g, ann->c);
512 | 	return ann;
513 | }
514 | 
515 | kann_t *kann_load(const char *fn)
516 | {
517 | 	FILE *fp;
518 | 	kann_t *ann;
519 | 	fp = fn && strcmp(fn, "-")? fopen(fn, "rb") : stdin;
520 | 	ann = kann_load_fp(fp);
521 | 	fclose(fp);
522 | 	return ann;
523 | }
524 | 
525 | /**********************************************
526 |  *** @@LAYER: layers and model generation ***
527 |  **********************************************/
528 | 
529 | /********** General but more complex APIs **********/
530 | 
531 | kad_node_t *kann_new_leaf_array(int *offset, kad_node_p *par, uint8_t flag, float x0_01, int n_d, int32_t d[KAD_MAX_DIM])
532 | {
533 | 	int i, len, off = offset && par? *offset : -1;
534 | 	kad_node_t *p;
535 | 
536 | 	if (off >= 0 && par[off]) return par[(*offset)++];
537 | 	p = (kad_node_t*)calloc(1, sizeof(kad_node_t));
538 | 	p->n_d = n_d, p->flag = flag;
539 | 	memcpy(p->d, d, n_d * sizeof(int32_t));
540 | 	len = kad_len(p);
541 | 	p->x = (float*)calloc(len, sizeof(float));
542 | 	if (p->n_d <= 1) {
543 | 		for (i = 0; i < len; ++i)
544 | 			p->x[i] = x0_01;
545 | 	} else {
546 | 		double sdev_inv;
547 | 		sdev_inv = 1.0 / sqrt((double)len / p->d[0]);
548 | 		for (i = 0; i < len; ++i)
549 | 			p->x[i] = (float)(kad_drand_normal(0) * sdev_inv);
550 | 	}
551 | 	if (off >= 0) par[off] = p, ++(*offset);
552 | 	return p;
553 | }
554 | 
555 | kad_node_t *kann_new_leaf2(int *offset, kad_node_p *par, uint8_t flag, float x0_01, int n_d, ...)
556 | {
557 | 	int32_t i, d[KAD_MAX_DIM];
558 | 	va_list ap;
559 | 	va_start(ap, n_d); for (i = 0; i < n_d; ++i) d[i] = va_arg(ap, int); va_end(ap);
560 | 	return kann_new_leaf_array(offset, par, flag, x0_01, n_d, d);
561 | }
562 | 
563 | kad_node_t *kann_layer_dense2(int *offset, kad_node_p *par, kad_node_t *in, int n1)
564 | {
565 | 	int n0;
566 | 	kad_node_t *w, *b;
567 | 	n0 = in->n_d >= 2? kad_len(in) / in->d[0] : kad_len(in);
568 | 	w = kann_new_leaf2(offset, par, KAD_VAR, 0.0f, 2, n1, n0);
569 | 	b = kann_new_leaf2(offset, par, KAD_VAR, 0.0f, 1, n1);
570 | 	return kad_add(kad_cmul(in, w), b);
571 | }
572 | 
573 | kad_node_t *kann_layer_dropout2(int *offset, kad_node_p *par, kad_node_t *t, float r)
574 | {
575 | 	kad_node_t *x[2], *cr;
576 | 	cr = kann_new_leaf2(offset, par, KAD_CONST, r, 0);
577 | 	x[0] = t, x[1] = kad_dropout(t, cr);
578 | 	return kad_switch(2, x);
579 | }
580 | 
581 | kad_node_t *kann_layer_layernorm2(int *offset, kad_node_t **par, kad_node_t *in)
582 | {
583 | 	int n0;
584 | 	kad_node_t *alpha, *beta;
585 | 	n0 = in->n_d >= 2? kad_len(in) / in->d[0] : kad_len(in);
586 | 	alpha = kann_new_leaf2(offset, par, KAD_VAR, 1.0f, 1, n0);
587 | 	beta  = kann_new_leaf2(offset, par, KAD_VAR, 0.0f, 1, n0);
588 | 	return kad_add(kad_mul(kad_stdnorm(in), alpha), beta);
589 | }
590 | 
591 | static inline kad_node_t *cmul_norm2(int *offset, kad_node_t **par, kad_node_t *x, kad_node_t *w, int use_norm)
592 | {
593 | 	return use_norm? kann_layer_layernorm2(offset, par, kad_cmul(x, w)) : kad_cmul(x, w);
594 | }
595 | 
596 | kad_node_t *kann_layer_rnn2(int *offset, kad_node_t **par, kad_node_t *in, kad_node_t *h0, int rnn_flag)
597 | {
598 | 	int n0, n1 = h0->d[h0->n_d-1], use_norm = !!(rnn_flag & KANN_RNN_NORM);
599 | 	kad_node_t *t, *w, *u, *b, *out;
600 | 
601 | 	u = kann_new_leaf2(offset, par, KAD_VAR, 0.0f, 2, n1, n1);
602 | 	b = kann_new_leaf2(offset, par, KAD_VAR, 0.0f, 1, n1);
603 | 	t = cmul_norm2(offset, par, h0, u, use_norm);
604 | 	if (in) {
605 | 		n0 = in->n_d >= 2? kad_len(in) / in->d[0] : kad_len(in);
606 | 		w = kann_new_leaf2(offset, par, KAD_VAR, 0.0f, 2, n1, n0);
607 | 		t = kad_add(cmul_norm2(offset, par, in, w, use_norm), t);
608 | 	}
609 | 	out = kad_tanh(kad_add(t, b));
610 | 	out->pre = h0;
611 | 	return out;
612 | }
613 | 
614 | kad_node_t *kann_layer_gru2(int *offset, kad_node_t **par, kad_node_t *in, kad_node_t *h0, int rnn_flag)
615 | {
616 | 	int n0 = 0, n1 = h0->d[h0->n_d-1], use_norm = !!(rnn_flag & KANN_RNN_NORM);
617 | 	kad_node_t *t, *r, *z, *w, *u, *b, *s, *out;
618 | 
619 | 	if (in) n0 = in->n_d >= 2? kad_len(in) / in->d[0] : kad_len(in);
620 | 	/* z = sigm(x_t * W_z + h_{t-1} * U_z + b_z) */
621 | 	u = kann_new_leaf2(offset, par, KAD_VAR, 0.0f, 2, n1, n1);
622 | 	b = kann_new_leaf2(offset, par, KAD_VAR, 0.0f, 1, n1);
623 | 	t = cmul_norm2(offset, par, h0, u, use_norm);
624 | 	if (in) {
625 | 		w = kann_new_leaf2(offset, par, KAD_VAR, 0.0f, 2, n1, n0);
626 | 		t = kad_add(cmul_norm2(offset, par, in, w, use_norm), t);
627 | 	}
628 | 	z = kad_sigm(kad_add(t, b));
629 | 	/* r = sigm(x_t * W_r + h_{t-1} * U_r + b_r) */
630 | 	u = kann_new_leaf2(offset, par, KAD_VAR, 0.0f, 2, n1, n1);
631 | 	b = kann_new_leaf2(offset, par, KAD_VAR, 0.0f, 1, n1);
632 | 	t = cmul_norm2(offset, par, h0, u, use_norm);
633 | 	if (in) {
634 | 		w = kann_new_leaf2(offset, par, KAD_VAR, 0.0f, 2, n1, n0);
635 | 		t = kad_add(cmul_norm2(offset, par, in, w, use_norm), t);
636 | 	}
637 | 	r = kad_sigm(kad_add(t, b));
638 | 	/* s = tanh(x_t * W_s + (h_{t-1} # r) * U_s + b_s) */
639 | 	u = kann_new_leaf2(offset, par, KAD_VAR, 0.0f, 2, n1, n1);
640 | 	b = kann_new_leaf2(offset, par, KAD_VAR, 0.0f, 1, n1);
641 | 	t = cmul_norm2(offset, par, kad_mul(r, h0), u, use_norm);
642 | 	if (in) {
643 | 		w = kann_new_leaf2(offset, par, KAD_VAR, 0.0f, 2, n1, n0);
644 | 		t = kad_add(cmul_norm2(offset, par, in, w, use_norm), t);
645 | 	}
646 | 	s = kad_tanh(kad_add(t, b));
647 | 	/* h_t = z # h_{t-1} + (1 - z) # s */
648 | 	out = kad_add(kad_mul(kad_1minus(z), s), kad_mul(z, h0));
649 | 	out->pre = h0;
650 | 	return out;
651 | }
652 | 
653 | /********** APIs without offset & par **********/
654 | 
655 | kad_node_t *kann_new_leaf(uint8_t flag, float x0_01, int n_d, ...)
656 | {
657 | 	int32_t i, d[KAD_MAX_DIM];
658 | 	va_list ap;
659 | 	va_start(ap, n_d); for (i = 0; i < n_d; ++i) d[i] = va_arg(ap, int); va_end(ap);
660 | 	return kann_new_leaf_array(0, 0, flag, x0_01, n_d, d);
661 | }
662 | 
663 | kad_node_t *kann_new_scalar(uint8_t flag, float x) { return kann_new_leaf(flag, x, 0); }
664 | kad_node_t *kann_new_weight(int n_row, int n_col) { return kann_new_leaf(KAD_VAR, 0.0f, 2, n_row, n_col); }
665 | kad_node_t *kann_new_vec(int n, float x) { return kann_new_leaf(KAD_VAR, x, 1, n); }
666 | kad_node_t *kann_new_bias(int n) { return kann_new_vec(n, 0.0f); }
667 | kad_node_t *kann_new_weight_conv2d(int n_out, int n_in, int k_row, int k_col) { return kann_new_leaf(KAD_VAR, 0.0f, 4, n_out, n_in, k_row, k_col); }
668 | kad_node_t *kann_new_weight_conv1d(int n_out, int n_in, int kernel_len) { return kann_new_leaf(KAD_VAR, 0.0f, 3, n_out, n_in, kernel_len); }
669 | 
670 | kad_node_t *kann_layer_input(int n1)
671 | {
672 | 	kad_node_t *t;
673 | 	t = kad_feed(2, 1, n1), t->ext_flag |= KANN_F_IN;
674 | 	return t;
675 | }
676 | 
677 | kad_node_t *kann_layer_dense(kad_node_t *in, int n1) { return kann_layer_dense2(0, 0, in, n1); }
678 | kad_node_t *kann_layer_dropout(kad_node_t *t, float r) { return kann_layer_dropout2(0, 0, t, r); }
679 | kad_node_t *kann_layer_layernorm(kad_node_t *in) { return kann_layer_layernorm2(0, 0, in); }
680 | 
681 | kad_node_t *kann_layer_rnn(kad_node_t *in, int n1, int rnn_flag)
682 | {
683 | 	kad_node_t *h0;
684 | 	h0 = (rnn_flag & KANN_RNN_VAR_H0)? kad_var(0, 0, 2, 1, n1) : kad_const(0, 2, 1, n1);
685 | 	h0->x = (float*)calloc(n1, sizeof(float));
686 | 	return kann_layer_rnn2(0, 0, in, h0, rnn_flag);
687 | }
688 | 
689 | kad_node_t *kann_layer_gru(kad_node_t *in, int n1, int rnn_flag)
690 | {
691 | 	kad_node_t *h0;
692 | 	h0 = (rnn_flag & KANN_RNN_VAR_H0)? kad_var(0, 0, 2, 1, n1) : kad_const(0, 2, 1, n1);
693 | 	h0->x = (float*)calloc(n1, sizeof(float));
694 | 	return kann_layer_gru2(0, 0, in, h0, rnn_flag);
695 | }
696 | 
697 | static kad_node_t *kann_cmul_norm(kad_node_t *x, kad_node_t *w)
698 | {
699 | 	return kann_layer_layernorm(kad_cmul(x, w));
700 | }
701 | 
702 | kad_node_t *kann_layer_lstm(kad_node_t *in, int n1, int rnn_flag)
703 | {
704 | 	int n0;
705 | 	kad_node_t *i, *f, *o, *g, *w, *u, *b, *h0, *c0, *c, *out;
706 | 	kad_node_t *(*cmul)(kad_node_t*, kad_node_t*) = (rnn_flag & KANN_RNN_NORM)? kann_cmul_norm : kad_cmul;
707 | 
708 | 	n0 = in->n_d >= 2? kad_len(in) / in->d[0] : kad_len(in);
709 | 	h0 = (rnn_flag & KANN_RNN_VAR_H0)? kad_var(0, 0, 2, 1, n1) : kad_const(0, 2, 1, n1);
710 | 	h0->x = (float*)calloc(n1, sizeof(float));
711 | 	c0 = (rnn_flag & KANN_RNN_VAR_H0)? kad_var(0, 0, 2, 1, n1) : kad_const(0, 2, 1, n1);
712 | 	c0->x = (float*)calloc(n1, sizeof(float));
713 | 
714 | 	/* i = sigm(x_t * W_i + h_{t-1} * U_i + b_i) */
715 | 	w = kann_new_weight(n1, n0);
716 | 	u = kann_new_weight(n1, n1);
717 | 	b = kann_new_bias(n1);
718 | 	i = kad_sigm(kad_add(kad_add(cmul(in, w), cmul(h0, u)), b));
719 | 	/* f = sigm(x_t * W_f + h_{t-1} * U_f + b_f) */
720 | 	w = kann_new_weight(n1, n0);
721 | 	u = kann_new_weight(n1, n1);
722 | 	b = kann_new_vec(n1, 1.0f); /* see Jozefowicz et al on using a large bias */
723 | 	f = kad_sigm(kad_add(kad_add(cmul(in, w), cmul(h0, u)), b));
724 | 	/* o = sigm(x_t * W_o + h_{t-1} * U_o + b_o) */
725 | 	w = kann_new_weight(n1, n0);
726 | 	u = kann_new_weight(n1, n1);
727 | 	b = kann_new_bias(n1);
728 | 	o = kad_sigm(kad_add(kad_add(cmul(in, w), cmul(h0, u)), b));
729 | 	/* g = tanh(x_t * W_g + h_{t-1} * U_g + b_g) */
730 | 	w = kann_new_weight(n1, n0);
731 | 	u = kann_new_weight(n1, n1);
732 | 	b = kann_new_bias(n1);
733 | 	g = kad_tanh(kad_add(kad_add(cmul(in, w), cmul(h0, u)), b));
734 | 	/* c_t = c_{t-1} # f + g # i */
735 | 	c = kad_add(kad_mul(f, c0), kad_mul(g, i)); /* can't be kad_mul(c0, f)!!! */
736 | 	c->pre = c0;
737 | 	/* h_t = tanh(c_t) # o */
738 | 	if (rnn_flag & KANN_RNN_NORM) c = kann_layer_layernorm(c); /* see Ba et al (2016) about how to apply layer normalization to LSTM */
739 | 	out = kad_mul(kad_tanh(c), o);
740 | 	out->pre = h0;
741 | 	return out;
742 | }
743 | 
744 | kad_node_t *kann_layer_conv2d(kad_node_t *in, int n_flt, int k_rows, int k_cols, int stride_r, int stride_c, int pad_r, int pad_c)
745 | {
746 | 	kad_node_t *w;
747 | 	w = kann_new_weight_conv2d(n_flt, in->d[1], k_rows, k_cols);
748 | 	return kad_conv2d(in, w, stride_r, stride_c, pad_r, pad_c);
749 | }
750 | 
751 | kad_node_t *kann_layer_conv1d(kad_node_t *in, int n_flt, int k_size, int stride, int pad)
752 | {
753 | 	kad_node_t *w;
754 | 	w = kann_new_weight_conv1d(n_flt, in->d[1], k_size);
755 | 	return kad_conv1d(in, w, stride, pad);
756 | }
757 | 
758 | kad_node_t *kann_layer_cost(kad_node_t *t, int n_out, int cost_type)
759 | {
760 | 	kad_node_t *cost = 0, *truth = 0;
761 | 	assert(cost_type == KANN_C_CEB || cost_type == KANN_C_CEM || cost_type == KANN_C_CEB_NEG || cost_type == KANN_C_MSE);
762 | 	t = kann_layer_dense(t, n_out);
763 | 	truth = kad_feed(2, 1, n_out), truth->ext_flag |= KANN_F_TRUTH;
764 | 	if (cost_type == KANN_C_MSE) {
765 | 		cost = kad_mse(t, truth);
766 | 	} else if (cost_type == KANN_C_CEB) {
767 | 		t = kad_sigm(t);
768 | 		cost = kad_ce_bin(t, truth);
769 | 	} else if (cost_type == KANN_C_CEB_NEG) {
770 | 		t = kad_tanh(t);
771 | 		cost = kad_ce_bin_neg(t, truth);
772 | 	} else if (cost_type == KANN_C_CEM) {
773 | 		t = kad_softmax(t);
774 | 		cost = kad_ce_multi(t, truth);
775 | 	}
776 | 	t->ext_flag |= KANN_F_OUT, cost->ext_flag |= KANN_F_COST;
777 | 	return cost;
778 | }
779 | 
780 | void kann_shuffle(int n, int *s)
781 | {
782 | 	int i, j, t;
783 | 	for (i = 0; i < n; ++i) s[i] = i;
784 | 	for (i = n; i > 0; --i) {
785 | 		j = (int)(i * kad_drand(0));
786 | 		t = s[j], s[j] = s[i-1], s[i-1] = t;
787 | 	}
788 | }
789 | 
790 | /***************************
791 |  *** @@MIN: minimization ***
792 |  ***************************/
793 | 
794 | #ifdef __SSE__
795 | #include <xmmintrin.h>
796 | 
797 | void kann_RMSprop(int n, float h0, const float *h, float decay, const float *g, float *t, float *r)
798 | {
799 | 	int i, n4 = n>>2<<2;
800 | 	__m128 vh, vg, vr, vt, vd, vd1, tmp, vtiny;
801 | 	vh = _mm_set1_ps(h0);
802 | 	vd = _mm_set1_ps(decay);
803 | 	vd1 = _mm_set1_ps(1.0f - decay);
804 | 	vtiny = _mm_set1_ps(1e-6f);
805 | 	for (i = 0; i < n4; i += 4) {
806 | 		vt = _mm_loadu_ps(&t[i]);
807 | 		vr = _mm_loadu_ps(&r[i]);
808 | 		vg = _mm_loadu_ps(&g[i]);
809 | 		if (h) vh = _mm_loadu_ps(&h[i]);
810 | 		vr = _mm_add_ps(_mm_mul_ps(vd1, _mm_mul_ps(vg, vg)), _mm_mul_ps(vd, vr));
811 | 		_mm_storeu_ps(&r[i], vr);
812 | 		tmp = _mm_sub_ps(vt, _mm_mul_ps(_mm_mul_ps(vh, _mm_rsqrt_ps(_mm_add_ps(vtiny, vr))), vg));
813 | 		_mm_storeu_ps(&t[i], tmp);
814 | 	}
815 | 	for (; i < n; ++i) {
816 | 		r[i] = (1. - decay) * g[i] * g[i] + decay * r[i];
817 | 		t[i] -= (h? h[i] : h0) / sqrtf(1e-6f + r[i]) * g[i];
818 | 	}
819 | }
820 | #else
821 | void kann_RMSprop(int n, float h0, const float *h, float decay, const float *g, float *t, float *r)
822 | {
823 | 	int i;
824 | 	for (i = 0; i < n; ++i) {
825 | 		float lr = h? h[i] : h0;
826 | 		r[i] = (1.0f - decay) * g[i] * g[i] + decay * r[i];
827 | 		t[i] -= lr / sqrtf(1e-6f + r[i]) * g[i];
828 | 	}
829 | }
830 | #endif
831 | 
832 | float kann_grad_clip(float thres, int n, float *g)
833 | {
834 | 	int i;
835 | 	double s2 = 0.0;
836 | 	for (i = 0; i < n; ++i)
837 | 		s2 += g[i] * g[i];
838 | 	s2 = sqrt(s2);
839 | 	if (s2 > thres)
840 | 		for (i = 0, s2 = 1.0 / s2; i < n; ++i)
841 | 			g[i] *= (float)s2;
842 | 	return (float)s2 / thres;
843 | }
844 | 
845 | /****************************************************************
846 |  *** @@XY: simpler API for network with a single input/output ***
847 |  ****************************************************************/
848 | 
849 | int kann_train_fnn1(kann_t *ann, float lr, int mini_size, int max_epoch, int max_drop_streak, float frac_val, int n, float **_x, float **_y)
850 | {
851 | 	int i, j, *shuf, n_train, n_val, n_in, n_out, n_var, n_const, drop_streak = 0, min_set = 0;
852 | 	float **x, **y, *x1, *y1, *r, min_val_cost = FLT_MAX, *min_x, *min_c;
853 | 
854 | 	n_in = kann_dim_in(ann);
855 | 	n_out = kann_dim_out(ann);
856 | 	if (n_in < 0 || n_out < 0) return -1;
857 | 	n_var = kann_size_var(ann);
858 | 	n_const = kann_size_const(ann);
859 | 	r = (float*)calloc(n_var, sizeof(float));
860 | 	shuf = (int*)malloc(n * sizeof(int));
861 | 	x = (float**)malloc(n * sizeof(float*));
862 | 	y = (float**)malloc(n * sizeof(float*));
863 | 	kann_shuffle(n, shuf);
864 | 	for (j = 0; j < n; ++j)
865 | 		x[j] = _x[shuf[j]], y[j] = _y[shuf[j]];
866 | 	n_val = (int)(n * frac_val);
867 | 	n_train = n - n_val;
868 | 	min_x = (float*)malloc(n_var * sizeof(float));
869 | 	min_c = (float*)malloc(n_const * sizeof(float));
870 | 
871 | 	x1 = (float*)malloc(n_in  * mini_size * sizeof(float));
872 | 	y1 = (float*)malloc(n_out * mini_size * sizeof(float));
873 | 	kann_feed_bind(ann, KANN_F_IN,    0, &x1);
874 | 	kann_feed_bind(ann, KANN_F_TRUTH, 0, &y1);
875 | 
876 | 	for (i = 0; i < max_epoch; ++i) {
877 | 		int n_proc = 0, n_train_err = 0, n_val_err = 0, n_train_base = 0, n_val_base = 0;
878 | 		double train_cost = 0.0, val_cost = 0.0;
879 | 		kann_shuffle(n_train, shuf);
880 | 		kann_switch(ann, 1);
881 | 		while (n_proc < n_train) {
882 | 			int b, c, ms = n_train - n_proc < mini_size? n_train - n_proc : mini_size;
883 | 			for (b = 0; b < ms; ++b) {
884 | 				memcpy(&x1[b*n_in],  x[shuf[n_proc+b]], n_in  * sizeof(float));
885 | 				memcpy(&y1[b*n_out], y[shuf[n_proc+b]], n_out * sizeof(float));
886 | 			}
887 | 			kann_set_batch_size(ann, ms);
888 | 			train_cost += kann_cost(ann, 0, 1) * ms;
889 | 			c = kann_class_error(ann, &b);
890 | 			n_train_err += c, n_train_base += b;
891 | 			kann_RMSprop(n_var, lr, 0, 0.9f, ann->g, ann->x, r);
892 | 			n_proc += ms;
893 | 		}
894 | 		train_cost /= n_train;
895 | 		kann_switch(ann, 0);
896 | 		n_proc = 0;
897 | 		while (n_proc < n_val) {
898 | 			int b, c, ms = n_val - n_proc < mini_size? n_val - n_proc : mini_size;
899 | 			for (b = 0; b < ms; ++b) {
900 | 				memcpy(&x1[b*n_in],  x[n_train+n_proc+b], n_in  * sizeof(float));
901 | 				memcpy(&y1[b*n_out], y[n_train+n_proc+b], n_out * sizeof(float));
902 | 			}
903 | 			kann_set_batch_size(ann, ms);
904 | 			val_cost += kann_cost(ann, 0, 0) * ms;
905 | 			c = kann_class_error(ann, &b);
906 | 			n_val_err += c, n_val_base += b;
907 | 			n_proc += ms;
908 | 		}
909 | 		if (n_val > 0) val_cost /= n_val;
910 | 		if (kann_verbose >= 3) {
911 | 			fprintf(stderr, "epoch: %d; training cost: %g", i+1, train_cost);
912 | 			if (n_train_base) fprintf(stderr, " (class error: %.2f%%)", 100.0f * n_train_err / n_train);
913 | 			if (n_val > 0) {
914 | 				fprintf(stderr, "; validation cost: %g", val_cost);
915 | 				if (n_val_base) fprintf(stderr, " (class error: %.2f%%)", 100.0f * n_val_err / n_val);
916 | 			}
917 | 			fputc('\n', stderr);
918 | 		}
919 | 		if (i >= max_drop_streak && n_val > 0) {
920 | 			if (val_cost < min_val_cost) {
921 | 				min_set = 1;
922 | 				memcpy(min_x, ann->x, n_var * sizeof(float));
923 | 				memcpy(min_c, ann->c, n_const * sizeof(float));
924 | 				drop_streak = 0;
925 | 				min_val_cost = (float)val_cost;
926 | 			} else if (++drop_streak >= max_drop_streak)
927 | 				break;
928 | 		}
929 | 	}
930 | 	if (min_set) {
931 | 		memcpy(ann->x, min_x, n_var * sizeof(float));
932 | 		memcpy(ann->c, min_c, n_const * sizeof(float));
933 | 	}
934 | 
935 | 	free(min_c); free(min_x); free(y1); free(x1); free(y); free(x); free(shuf); free(r);
936 | 	return i;
937 | }
938 | 
939 | float kann_cost_fnn1(kann_t *ann, int n, float **x, float **y)
940 | {
941 | 	int n_in, n_out, n_proc = 0, mini_size = 64 < n? 64 : n;
942 | 	float *x1, *y1;
943 | 	double cost = 0.0;
944 | 
945 | 	n_in = kann_dim_in(ann);
946 | 	n_out = kann_dim_out(ann);
947 | 	if (n <= 0 || n_in < 0 || n_out < 0) return 0.0;
948 | 
949 | 	x1 = (float*)malloc(n_in  * mini_size * sizeof(float));
950 | 	y1 = (float*)malloc(n_out * mini_size * sizeof(float));
951 | 	kann_feed_bind(ann, KANN_F_IN,    0, &x1);
952 | 	kann_feed_bind(ann, KANN_F_TRUTH, 0, &y1);
953 | 	kann_switch(ann, 0);
954 | 	while (n_proc < n) {
955 | 		int b, ms = n - n_proc < mini_size? n - n_proc : mini_size;
956 | 		for (b = 0; b < ms; ++b) {
957 | 			memcpy(&x1[b*n_in],  x[n_proc+b], n_in  * sizeof(float));
958 | 			memcpy(&y1[b*n_out], y[n_proc+b], n_out * sizeof(float));
959 | 		}
960 | 		kann_set_batch_size(ann, ms);
961 | 		cost += kann_cost(ann, 0, 0) * ms;
962 | 		n_proc += ms;
963 | 	}
964 | 	free(y1); free(x1);
965 | 	return (float)(cost / n);
966 | }
967 | 
968 | const float *kann_apply1(kann_t *a, float *x)
969 | {
970 | 	int i_out;
971 | 	i_out = kann_find(a, KANN_F_OUT, 0);
972 | 	if (i_out < 0) return 0;
973 | 	kann_set_batch_size(a, 1);
974 | 	kann_feed_bind(a, KANN_F_IN, 0, &x);
975 | 	kad_eval_at(a->n, a->v, i_out);
976 | 	return a->v[i_out]->x;
977 | }
978 | 
979 | int kann_is_rnn(kann_t *ann)
980 | {
981 | 	for (int i = 0; i < ann->n; ++i) {
982 | 		if (ann->v[i]->pre) return 1;
983 | 	}
984 | 	return 0;
985 | }


--------------------------------------------------------------------------------
/lib/kann-master/kann.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |   The MIT License
  3 | 
  4 |   Copyright (c) 2018-2019 Dana-Farber Cancer Institute
  5 |                 2016-2018 Broad Institute
  6 | 
  7 |   Permission is hereby granted, free of charge, to any person obtaining
  8 |   a copy of this software and associated documentation files (the
  9 |   "Software"), to deal in the Software without restriction, including
 10 |   without limitation the rights to use, copy, modify, merge, publish,
 11 |   distribute, sublicense, and/or sell copies of the Software, and to
 12 |   permit persons to whom the Software is furnished to do so, subject to
 13 |   the following conditions:
 14 | 
 15 |   The above copyright notice and this permission notice shall be
 16 |   included in all copies or substantial portions of the Software.
 17 | 
 18 |   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 19 |   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 20 |   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 21 |   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 22 |   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 23 |   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 24 |   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 25 |   SOFTWARE.
 26 | */
 27 | 
 28 | #ifndef KANN_H
 29 | #define KANN_H
 30 | 
 31 | #define KANN_VERSION "r536"
 32 | 
 33 | #define KANN_F_IN       0x1   /* input */
 34 | #define KANN_F_OUT      0x2   /* output */
 35 | #define KANN_F_TRUTH    0x4   /* truth output */
 36 | #define KANN_F_COST     0x8   /* final cost */
 37 | 
 38 | #define KANN_C_CEB      1   /* binary cross-entropy cost, used with sigmoid */
 39 | #define KANN_C_CEM      2   /* multi-class cross-entropy cost, used with softmax */
 40 | #define KANN_C_CEB_NEG  3   /* binary cross-enytopy-like cost, used with tanh */
 41 | #define KANN_C_MSE      4   /* mean square error */
 42 | 
 43 | #define KANN_RNN_VAR_H0 0x1 /* take the initial hidden values as variables */
 44 | #define KANN_RNN_NORM   0x2 /* apply layer normalization */
 45 | 
 46 | #include "kautodiff.h"
 47 | 
 48 | typedef struct {
 49 | 	int n;            /* number of nodes in the computational graph */
 50 | 	kad_node_t **v;   /* list of nodes */
 51 | 	float *x, *g, *c; /* collated variable values, gradients and constant values */
 52 | 	void *mt;         /* auxiliary data for multi-threading; NULL if multi-threading disabled */
 53 | } kann_t;
 54 | 
 55 | extern int kann_verbose;
 56 | 
 57 | #define kann_size_var(a) kad_size_var((a)->n, (a)->v)
 58 | #define kann_size_const(a) kad_size_const((a)->n, (a)->v)
 59 | #define kann_dim_in(a) kann_feed_dim((a), KANN_F_IN, 0)
 60 | #define kann_dim_out(a) kann_feed_dim((a), KANN_F_TRUTH, 0)
 61 | #define kann_srand(seed) kad_srand(0, (seed))
 62 | #define kann_drand() kad_drand(0)
 63 | #define kann_set_batch_size(ann, B) kad_sync_dim((ann)->n, (ann)->v, (B))
 64 | 
 65 | #ifdef __cplusplus
 66 | extern "C" {
 67 | #endif
 68 | 
 69 | /**
 70 |  * Generate a network from a computational graph
 71 |  *
 72 |  * A network must have at least one scalar cost node (i.e. whose n_d==0). It
 73 |  * may optionally contain other cost nodes or output nodes not leading to the
 74 |  * primary cost node.
 75 |  *
 76 |  * @param cost    cost node (must be a scalar, i.e. cost->n_d==0)
 77 |  * @param n_rest  number of other nodes without predecessors
 78 |  * @param ...     other nodes (of type kad_node_t*) without predecessors
 79 |  *
 80 |  * @return network on success, or NULL otherwise
 81 |  */
 82 | kann_t *kann_new(kad_node_t *cost, int n_rest, ...);
 83 | 
 84 | /**
 85 |  * Unroll an RNN
 86 |  *
 87 |  * @param a       network
 88 |  * @param len     number of unrolls
 89 |  *
 90 |  * @return an unrolled network, or NULL if the network is not an RNN
 91 |  */
 92 | kann_t *kann_unroll(kann_t *a, ...);
 93 | 
 94 | kann_t *kann_unroll_array(kann_t *a, int *len);
 95 | kann_t *kann_clone(kann_t *a, int batch_size);
 96 | void kann_delete(kann_t *a);          /* delete a network generated by kann_new() or kann_layer_final() */
 97 | void kann_delete_unrolled(kann_t *a); /* delete a network generated by kann_unroll() */
 98 | 
 99 | /**
100 |  * Enable/disable multi-threading (requiring pthread)
101 |  *
102 |  * KANN splits a mini-batch to $n_threads mini-mini-batches and puts each of
103 |  * them on one thread. So far, only kann_cost() takes the advantage of
104 |  * multi-threading.
105 |  *
106 |  * @param ann             network
107 |  * @param n_threads       number of threads; <=1 to completely disable multi-threading
108 |  * @param max_batch_size  max mini-batch size; shall no smaller than n_threads
109 |  */
110 | void kann_mt(kann_t *ann, int n_threads, int max_batch_size);
111 | 
112 | /**
113 |  * Bind float arrays to feed nodes
114 |  *
115 |  * @param a         network
116 |  * @param ext_flag  required external flags
117 |  * @param ext_label required external label
118 |  * @param x         pointers (size equal to the number of matching feed nodes)
119 |  *
120 |  * @return number of matching feed nodes
121 |  */
122 | int kann_feed_bind(kann_t *a, uint32_t ext_flag, int32_t ext_label, float **x);
123 | 
124 | /**
125 |  * Compute the cost and optionally gradients
126 |  *
127 |  * @param a          network
128 |  * @param cost_label required external label
129 |  * @param cal_grad   whether to compute gradients
130 |  *
131 |  * @return cost
132 |  */
133 | float kann_cost(kann_t *a, int cost_label, int cal_grad);
134 | 
135 | int kann_eval(kann_t *a, uint32_t ext_flag, int ext_label);
136 | int kann_eval_out(kann_t *a);
137 | int kann_class_error(const kann_t *ann, int *base);
138 | 
139 | /**
140 |  * Find a node
141 |  *
142 |  * @param a         network
143 |  * @param ext_flag  required external flags; set to 0 to match all flags
144 |  * @param ext_label required external label
145 |  *
146 |  * @return >=0 if found; -1 if not found; -2 if found multiple
147 |  */
148 | int kann_find(const kann_t *a, uint32_t ext_flag, int32_t ext_label);
149 | 
150 | /**
151 |  * Get the size of a feed node, assuming mini-batch size 1
152 |  *
153 |  * @param a         network
154 |  * @param ext_flag  required external flags
155 |  * @param ext_label required external label
156 |  *
157 |  * @return size>=0; -1 if not found; -2 if found multiple
158 |  */
159 | int kann_feed_dim(const kann_t *a, uint32_t ext_flag, int32_t ext_label);
160 | 
161 | /**
162 |  * Get an RNN ready for continuous feeding
163 |  *
164 |  * @param a         network
165 |  */
166 | void kann_rnn_start(kann_t *a);
167 | 
168 | void kann_rnn_end(kann_t *a);
169 | 
170 | /**
171 |  * Switch between training and prediction networks (effective only when there are switch nodes)
172 |  *
173 |  * @param a         network
174 |  * @param is_train  0 for prediction network and non-zero for training net
175 |  */
176 | void kann_switch(kann_t *a, int is_train);
177 | 
178 | /**
179 |  * RMSprop update
180 |  *
181 |  * @param n      number of variables
182 |  * @param h0     learning rate
183 |  * @param h      per-variable learning rate; NULL if not applicable
184 |  * @param decay  RMSprop decay; use 0.9 if unsure
185 |  * @param g      gradient, of size n
186 |  * @param t      variables to change
187 |  * @param r      memory, of size n
188 |  */
189 | void kann_RMSprop(int n, float h0, const float *h, float decay, const float *g, float *t, float *r);
190 | 
191 | void kann_shuffle(int n, int *s);
192 | float kann_grad_clip(float thres, int n, float *g);
193 | 
194 | /* common layers */
195 | kad_node_t *kann_layer_input(int n1);
196 | kad_node_t *kann_layer_dense(kad_node_t *in, int n1);
197 | kad_node_t *kann_layer_dropout(kad_node_t *t, float r);
198 | kad_node_t *kann_layer_layernorm(kad_node_t *in);
199 | kad_node_t *kann_layer_rnn(kad_node_t *in, int n1, int rnn_flag);
200 | kad_node_t *kann_layer_lstm(kad_node_t *in, int n1, int rnn_flag);
201 | kad_node_t *kann_layer_gru(kad_node_t *in, int n1, int rnn_flag);
202 | kad_node_t *kann_layer_conv2d(kad_node_t *in, int n_flt, int k_rows, int k_cols, int stride_r, int stride_c, int pad_r, int pad_c);
203 | kad_node_t *kann_layer_conv1d(kad_node_t *in, int n_flt, int k_size, int stride, int pad);
204 | kad_node_t *kann_layer_cost(kad_node_t *t, int n_out, int cost_type);
205 | 
206 | kad_node_t *kann_new_leaf(uint8_t flag, float x0_01, int n_d, ...); /* flag can be KAD_CONST or KAD_VAR */
207 | kad_node_t *kann_new_scalar(uint8_t flag, float x);
208 | kad_node_t *kann_new_weight(int n_row, int n_col);
209 | kad_node_t *kann_new_bias(int n);
210 | kad_node_t *kann_new_weight_conv2d(int n_out, int n_in, int k_row, int k_col);
211 | kad_node_t *kann_new_weight_conv1d(int n_out, int n_in, int kernel_len);
212 | 
213 | kad_node_t *kann_new_leaf2(int *offset, kad_node_p *par, uint8_t flag, float x0_01, int n_d, ...);
214 | kad_node_t *kann_layer_dense2(int *offset, kad_node_p *par, kad_node_t *in, int n1);
215 | kad_node_t *kann_layer_dropout2(int *offset, kad_node_p *par, kad_node_t *t, float r);
216 | kad_node_t *kann_layer_layernorm2(int *offset, kad_node_t **par, kad_node_t *in);
217 | kad_node_t *kann_layer_rnn2(int *offset, kad_node_t **par, kad_node_t *in, kad_node_t *h0, int rnn_flag);
218 | kad_node_t *kann_layer_gru2(int *offset, kad_node_t **par, kad_node_t *in, kad_node_t *h0, int rnn_flag);
219 | 
220 | /* operations on network with a single input node and a single output node */
221 | int kann_train_fnn1(kann_t *ann, float lr, int mini_size, int max_epoch, int max_drop_streak, float frac_val, int n, float **_x, float **_y);
222 | float kann_cost_fnn1(kann_t *a, int n, float **x, float **y);
223 | const float *kann_apply1(kann_t *a, float *x);
224 | 
225 | /* model I/O */
226 | void kann_save_fp(FILE *fp, kann_t *ann);
227 | void kann_save(const char *fn, kann_t *ann);
228 | kann_t *kann_load_fp(FILE *fp);
229 | kann_t *kann_load(const char *fn);
230 | 
231 | /* extra */
232 | 
233 | int kann_is_rnn(kann_t *ann);
234 | 
235 | #ifdef __cplusplus
236 | }
237 | #endif
238 | 
239 | #endif
240 | 


--------------------------------------------------------------------------------
/lib/kann-master/kann_extra/kann_data.c:
--------------------------------------------------------------------------------
  1 | #include <string.h>
  2 | #include <assert.h>
  3 | #include <stdio.h>
  4 | #include "kseq.h"
  5 | #include "kann_data.h"
  6 | #ifdef HAVE_ZLIB
  7 | #include <zlib.h>
  8 | KSTREAM_INIT(gzFile, gzread, 16384)
  9 | #else
 10 | #include <unistd.h>
 11 | #include <fcntl.h>
 12 | KSTREAM_INIT(int, read, 16384)
 13 | #endif
 14 | 
 15 | kann_data_t *kann_data_read(const char *fn)
 16 | {
 17 | 	kstream_t *ks;
 18 | 	kann_data_t *d;
 19 | 	int m_row = 0, dret, m_grp = 0, grp_size = 0;
 20 | 	kstring_t str = {0,0,0};
 21 | 
 22 | #ifdef HAVE_ZLIB
 23 | 	gzFile fp;
 24 | 	fp = fn && strcmp(fn, "-")? gzopen(fn, "r") : gzdopen(fileno(stdin), "r");
 25 | #else
 26 | 	int fp;
 27 | 	fp = fn && strcmp(fn, "-")? open(fn, O_RDONLY) : fileno(stdin);
 28 | #endif
 29 | 	ks = ks_init(fp);
 30 | 
 31 | 	d = (kann_data_t*)calloc(1, sizeof(kann_data_t));
 32 | 	while (ks_getuntil(ks, KS_SEP_LINE, &str, &dret) >= 0) {
 33 | 		int st, k;
 34 | 		size_t i;
 35 | 		if (str.s[0] == '#') {
 36 | 			for (i = 0, k = 0; i < str.l; ++i)
 37 | 				if (str.s[i] == '\t') ++k;
 38 | 			if (k > 0) {
 39 | 				d->n_col = k;
 40 | 				d->cname = (char**)malloc(d->n_col * sizeof(char*));
 41 | 				for (i = 0, k = st = 0; i <= str.l; ++i) {
 42 | 					if (i == str.l || str.s[i] == '\t') {
 43 | 						if (k > 0) str.s[i] = 0, d->cname[k-1] = strdup(&str.s[st]);
 44 | 						++k, st = i + 1;
 45 | 					}
 46 | 				}
 47 | 			}
 48 | 			continue;
 49 | 		}
 50 | 		if (str.s[0] == 0) {
 51 | 			if (d->n_grp == m_grp) {
 52 | 				m_grp = m_grp? m_grp<<1 : 8;
 53 | 				d->grp = (int*)realloc(d->grp, m_grp * sizeof(int));
 54 | 			}
 55 | 			d->grp[d->n_grp++] = grp_size;
 56 | 			grp_size = 0;
 57 | 			continue;
 58 | 		}
 59 | 		for (i = 0, k = 0; i < str.l; ++i)
 60 | 			if (str.s[i] == '\t') ++k;
 61 | 		if (d->n_col == 0) d->n_col = k;
 62 | 		if (k != d->n_col) continue; // TODO: throw a warning/error
 63 | 		if (d->n_row == m_row) {
 64 | 			m_row = m_row? m_row<<1 : 8;
 65 | 			d->x = (float**)realloc(d->x, m_row * sizeof(float*));
 66 | 			d->rname = (char**)realloc(d->rname, m_row * sizeof(char*));
 67 | 		}
 68 | 		d->x[d->n_row] = (float*)malloc(d->n_col * sizeof(float));
 69 | 		for (i = 0, k = st = 0; i <= str.l; ++i) {
 70 | 			if (i == str.l || str.s[i] == '\t') {
 71 | 				char *p;
 72 | 				if (k == 0) {
 73 | 					str.s[i] = 0;
 74 | 					d->rname[d->n_row] = strdup(&str.s[st]);
 75 | 				} else d->x[d->n_row][k-1] = strtod(&str.s[st], &p);
 76 | 				++k, st = i + 1;
 77 | 			}
 78 | 		}
 79 | 		++d->n_row, ++grp_size;
 80 | 	}
 81 | 	if (d->n_grp == m_grp) {
 82 | 		m_grp = m_grp? m_grp<<1 : 8;
 83 | 		d->grp = (int*)realloc(d->grp, m_grp * sizeof(int));
 84 | 	}
 85 | 	d->grp[d->n_grp++] = grp_size;
 86 | 	free(str.s);
 87 | 	ks_destroy(ks);
 88 | 
 89 | 	d->x = (float**)realloc(d->x, d->n_row * sizeof(float*));
 90 | 	d->rname = (char**)realloc(d->rname, d->n_row * sizeof(char*));
 91 | 	d->grp = (int*)realloc(d->grp, d->n_grp * sizeof(int));
 92 | #ifdef HAVE_ZLIB
 93 | 	gzclose(fp);
 94 | #else
 95 | 	close(fp);
 96 | #endif
 97 | 	return d;
 98 | }
 99 | 
100 | void kann_data_free(kann_data_t *d)
101 | {
102 | 	int i;
103 | 	if (d == 0) return;
104 | 	for (i = 0; i < d->n_row; ++i) {
105 | 		if (d->rname) free(d->rname[i]);
106 | 		free(d->x[i]);
107 | 	}
108 | 	if (d->cname) for (i = 0; i < d->n_col; ++i) free(d->cname[i]);
109 | 	free(d->x); free(d->cname); free(d->rname); free(d->grp); free(d);
110 | }
111 | 


--------------------------------------------------------------------------------
/lib/kann-master/kann_extra/kann_data.h:
--------------------------------------------------------------------------------
 1 | #ifndef KANN_DATA_H
 2 | #define KANN_DATA_H
 3 | 
 4 | typedef struct kann_data_t {
 5 | 	int n_row, n_col, n_grp;
 6 | 	float **x;
 7 | 	char **rname, **cname;
 8 | 	int *grp;
 9 | } kann_data_t;
10 | 
11 | #ifdef __cplusplus
12 | extern "C" {
13 | #endif
14 | 
15 | kann_data_t *kann_data_read(const char *fn);
16 | void kann_data_free(kann_data_t *d);
17 | 
18 | #ifdef __cplusplus
19 | }
20 | #endif
21 | 
22 | #endif
23 | 


--------------------------------------------------------------------------------
/lib/kann-master/kann_extra/kseq.h:
--------------------------------------------------------------------------------
  1 | /* The MIT License
  2 | 
  3 |    Copyright (c) 2008, 2009, 2011 Attractive Chaos <attractor@live.co.uk>
  4 | 
  5 |    Permission is hereby granted, free of charge, to any person obtaining
  6 |    a copy of this software and associated documentation files (the
  7 |    "Software"), to deal in the Software without restriction, including
  8 |    without limitation the rights to use, copy, modify, merge, publish,
  9 |    distribute, sublicense, and/or sell copies of the Software, and to
 10 |    permit persons to whom the Software is furnished to do so, subject to
 11 |    the following conditions:
 12 | 
 13 |    The above copyright notice and this permission notice shall be
 14 |    included in all copies or substantial portions of the Software.
 15 | 
 16 |    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 17 |    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 18 |    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 19 |    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 20 |    BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 21 |    ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 22 |    CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 23 |    SOFTWARE.
 24 | */
 25 | 
 26 | /* Last Modified: 05MAR2012 */
 27 | 
 28 | #ifndef AC_KSEQ_H
 29 | #define AC_KSEQ_H
 30 | 
 31 | #include <ctype.h>
 32 | #include <string.h>
 33 | #include <stdlib.h>
 34 | 
 35 | #ifndef klib_unused
 36 | #if (defined __clang__ && __clang_major__ >= 3) || (defined __GNUC__ && __GNUC__ >= 3)
 37 | #define klib_unused __attribute__ ((__unused__))
 38 | #else
 39 | #define klib_unused
 40 | #endif
 41 | #endif /* klib_unused */
 42 | 
 43 | #define KS_SEP_SPACE 0 // isspace(): \t, \n, \v, \f, \r
 44 | #define KS_SEP_TAB   1 // isspace() && !' '
 45 | #define KS_SEP_LINE  2 // line separator: "\n" (Unix) or "\r\n" (Windows)
 46 | #define KS_SEP_MAX   2
 47 | 
 48 | #define __KS_TYPE(type_t) \
 49 | 	typedef struct __kstream_t { \
 50 | 		int begin, end; \
 51 | 		int is_eof:2, bufsize:30; \
 52 | 		type_t f; \
 53 | 		unsigned char *buf; \
 54 | 	} kstream_t;
 55 | 
 56 | #define ks_eof(ks) ((ks)->is_eof && (ks)->begin >= (ks)->end)
 57 | #define ks_rewind(ks) ((ks)->is_eof = (ks)->begin = (ks)->end = 0)
 58 | 
 59 | #define __KS_BASIC(SCOPE, type_t, __bufsize) \
 60 | 	SCOPE kstream_t *ks_init(type_t f) \
 61 | 	{ \
 62 | 		kstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t)); \
 63 | 		ks->f = f; ks->bufsize = __bufsize; \
 64 | 		ks->buf = (unsigned char*)malloc(__bufsize); \
 65 | 		return ks; \
 66 | 	} \
 67 | 	SCOPE void ks_destroy(kstream_t *ks) \
 68 | 	{ \
 69 | 		if (!ks) return; \
 70 | 		free(ks->buf); \
 71 | 		free(ks); \
 72 | 	}
 73 | 
 74 | #define __KS_INLINED(__read) \
 75 | 	static inline klib_unused int ks_getc(kstream_t *ks) \
 76 | 	{ \
 77 | 		if (ks->is_eof && ks->begin >= ks->end) return -1; \
 78 | 		if (ks->begin >= ks->end) { \
 79 | 			ks->begin = 0; \
 80 | 			ks->end = __read(ks->f, ks->buf, ks->bufsize); \
 81 | 			if (ks->end < ks->bufsize) ks->is_eof = 1; \
 82 | 			if (ks->end == 0) return -1; \
 83 | 		} \
 84 | 		return (int)ks->buf[ks->begin++]; \
 85 | 	} \
 86 | 	static inline klib_unused int ks_getuntil(kstream_t *ks, int delimiter, kstring_t *str, int *dret) \
 87 | 	{ return ks_getuntil2(ks, delimiter, str, dret, 0); }
 88 | 
 89 | #ifndef KSTRING_T
 90 | #define KSTRING_T kstring_t
 91 | typedef struct __kstring_t {
 92 | 	size_t l, m;
 93 | 	char *s;
 94 | } kstring_t;
 95 | #endif
 96 | 
 97 | #ifndef kroundup32
 98 | #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
 99 | #endif
100 | 
101 | #define __KS_GETUNTIL(SCOPE, __read) \
102 | 	SCOPE int ks_getuntil2(kstream_t *ks, int delimiter, kstring_t *str, int *dret, int append) \
103 | 	{ \
104 | 		if (dret) *dret = 0; \
105 | 		str->l = append? str->l : 0; \
106 | 		if (ks->begin >= ks->end && ks->is_eof) return -1; \
107 | 		for (;;) { \
108 | 			int i; \
109 | 			if (ks->begin >= ks->end) { \
110 | 				if (!ks->is_eof) { \
111 | 					ks->begin = 0; \
112 | 					ks->end = __read(ks->f, ks->buf, ks->bufsize); \
113 | 					if (ks->end < ks->bufsize) ks->is_eof = 1; \
114 | 					if (ks->end == 0) break; \
115 | 				} else break; \
116 | 			} \
117 | 			if (delimiter == KS_SEP_LINE) { \
118 | 				for (i = ks->begin; i < ks->end; ++i) \
119 | 					if (ks->buf[i] == '\n') break; \
120 | 			} else if (delimiter > KS_SEP_MAX) { \
121 | 				for (i = ks->begin; i < ks->end; ++i) \
122 | 					if (ks->buf[i] == delimiter) break; \
123 | 			} else if (delimiter == KS_SEP_SPACE) { \
124 | 				for (i = ks->begin; i < ks->end; ++i) \
125 | 					if (isspace(ks->buf[i])) break; \
126 | 			} else if (delimiter == KS_SEP_TAB) { \
127 | 				for (i = ks->begin; i < ks->end; ++i) \
128 | 					if (isspace(ks->buf[i]) && ks->buf[i] != ' ') break; \
129 | 			} else i = 0; /* never come to here! */ \
130 | 			if (str->m - str->l < (size_t)(i - ks->begin + 1)) { \
131 | 				str->m = str->l + (i - ks->begin) + 1; \
132 | 				kroundup32(str->m); \
133 | 				str->s = (char*)realloc(str->s, str->m); \
134 | 			} \
135 | 			memcpy(str->s + str->l, ks->buf + ks->begin, i - ks->begin); \
136 | 			str->l = str->l + (i - ks->begin); \
137 | 			ks->begin = i + 1; \
138 | 			if (i < ks->end) { \
139 | 				if (dret) *dret = ks->buf[i]; \
140 | 				break; \
141 | 			} \
142 | 		} \
143 | 		if (str->s == 0) { \
144 | 			str->m = 1; \
145 | 			str->s = (char*)calloc(1, 1); \
146 | 		} else if (delimiter == KS_SEP_LINE && str->l > 1 && str->s[str->l-1] == '\r') --str->l; \
147 | 		str->s[str->l] = '\0'; \
148 | 		return str->l; \
149 | 	}
150 | 
151 | #define KSTREAM_INIT2(SCOPE, type_t, __read, __bufsize) \
152 | 	__KS_TYPE(type_t) \
153 | 	__KS_BASIC(SCOPE, type_t, __bufsize) \
154 | 	__KS_GETUNTIL(SCOPE, __read) \
155 | 	__KS_INLINED(__read)
156 | 
157 | #define KSTREAM_INIT(type_t, __read, __bufsize) KSTREAM_INIT2(static, type_t, __read, __bufsize)
158 | 
159 | #define KSTREAM_DECLARE(type_t, __read) \
160 | 	__KS_TYPE(type_t) \
161 | 	extern int ks_getuntil2(kstream_t *ks, int delimiter, kstring_t *str, int *dret, int append); \
162 | 	extern kstream_t *ks_init(type_t f); \
163 | 	extern void ks_destroy(kstream_t *ks); \
164 | 	__KS_INLINED(__read)
165 | 
166 | /******************
167 |  * FASTA/Q parser *
168 |  ******************/
169 | 
170 | #define kseq_rewind(ks) ((ks)->last_char = (ks)->f->is_eof = (ks)->f->begin = (ks)->f->end = 0)
171 | 
172 | #define __KSEQ_BASIC(SCOPE, type_t) \
173 | 	SCOPE kseq_t *kseq_init(type_t fd) \
174 | 	{ \
175 | 		kseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t)); \
176 | 		s->f = ks_init(fd); \
177 | 		return s; \
178 | 	} \
179 | 	SCOPE void kseq_destroy(kseq_t *ks) \
180 | 	{ \
181 | 		if (!ks) return; \
182 | 		free(ks->name.s); free(ks->comment.s); free(ks->seq.s);	free(ks->qual.s); \
183 | 		ks_destroy(ks->f); \
184 | 		free(ks); \
185 | 	}
186 | 
187 | /* Return value:
188 |    >=0  length of the sequence (normal)
189 |    -1   end-of-file
190 |    -2   truncated quality string
191 |  */
192 | #define __KSEQ_READ(SCOPE) \
193 | 	SCOPE int kseq_read(kseq_t *seq) \
194 | 	{ \
195 | 		int c; \
196 | 		kstream_t *ks = seq->f; \
197 | 		if (seq->last_char == 0) { /* then jump to the next header line */ \
198 | 			while ((c = ks_getc(ks)) != -1 && c != '>' && c != '@'); \
199 | 			if (c == -1) return -1; /* end of file */ \
200 | 			seq->last_char = c; \
201 | 		} /* else: the first header char has been read in the previous call */ \
202 | 		seq->comment.l = seq->seq.l = seq->qual.l = 0; /* reset all members */ \
203 | 		if (ks_getuntil(ks, 0, &seq->name, &c) < 0) return -1; /* normal exit: EOF */ \
204 | 		if (c != '\n') ks_getuntil(ks, KS_SEP_LINE, &seq->comment, 0); /* read FASTA/Q comment */ \
205 | 		if (seq->seq.s == 0) { /* we can do this in the loop below, but that is slower */ \
206 | 			seq->seq.m = 256; \
207 | 			seq->seq.s = (char*)malloc(seq->seq.m); \
208 | 		} \
209 | 		while ((c = ks_getc(ks)) != -1 && c != '>' && c != '+' && c != '@') { \
210 | 			if (c == '\n') continue; /* skip empty lines */ \
211 | 			seq->seq.s[seq->seq.l++] = c; /* this is safe: we always have enough space for 1 char */ \
212 | 			ks_getuntil2(ks, KS_SEP_LINE, &seq->seq, 0, 1); /* read the rest of the line */ \
213 | 		} \
214 | 		if (c == '>' || c == '@') seq->last_char = c; /* the first header char has been read */ \
215 | 		if (seq->seq.l + 1 >= seq->seq.m) { /* seq->seq.s[seq->seq.l] below may be out of boundary */ \
216 | 			seq->seq.m = seq->seq.l + 2; \
217 | 			kroundup32(seq->seq.m); /* rounded to the next closest 2^k */ \
218 | 			seq->seq.s = (char*)realloc(seq->seq.s, seq->seq.m); \
219 | 		} \
220 | 		seq->seq.s[seq->seq.l] = 0;	/* null terminated string */ \
221 | 		if (c != '+') return seq->seq.l; /* FASTA */ \
222 | 		if (seq->qual.m < seq->seq.m) {	/* allocate memory for qual in case insufficient */ \
223 | 			seq->qual.m = seq->seq.m; \
224 | 			seq->qual.s = (char*)realloc(seq->qual.s, seq->qual.m); \
225 | 		} \
226 | 		while ((c = ks_getc(ks)) != -1 && c != '\n'); /* skip the rest of '+' line */ \
227 | 		if (c == -1) return -2; /* error: no quality string */ \
228 | 		while (ks_getuntil2(ks, KS_SEP_LINE, &seq->qual, 0, 1) >= 0 && seq->qual.l < seq->seq.l); \
229 | 		seq->last_char = 0;	/* we have not come to the next header line */ \
230 | 		if (seq->seq.l != seq->qual.l) return -2; /* error: qual string is of a different length */ \
231 | 		return seq->seq.l; \
232 | 	}
233 | 
234 | #define __KSEQ_TYPE(type_t) \
235 | 	typedef struct { \
236 | 		kstring_t name, comment, seq, qual; \
237 | 		int last_char; \
238 | 		kstream_t *f; \
239 | 	} kseq_t;
240 | 
241 | #define KSEQ_INIT2(SCOPE, type_t, __read) \
242 | 	KSTREAM_INIT2(SCOPE, type_t, __read, 16384) \
243 | 	__KSEQ_TYPE(type_t) \
244 | 	__KSEQ_BASIC(SCOPE, type_t) \
245 | 	__KSEQ_READ(SCOPE)
246 | 
247 | #define KSEQ_INIT(type_t, __read) KSEQ_INIT2(static, type_t, __read)
248 | 
249 | #define KSEQ_DECLARE(type_t) \
250 | 	__KS_TYPE(type_t) \
251 | 	__KSEQ_TYPE(type_t) \
252 | 	extern kseq_t *kseq_init(type_t fd); \
253 | 	void kseq_destroy(kseq_t *ks); \
254 | 	int kseq_read(kseq_t *seq);
255 | 
256 | #endif
257 | 


--------------------------------------------------------------------------------
/lib/kann-master/kautodiff.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |   The MIT License
  3 | 
  4 |   Copyright (c) 2018-2019 Dana-Farber Cancer Institute
  5 |                 2016-2018 Broad Institute
  6 | 
  7 |   Permission is hereby granted, free of charge, to any person obtaining
  8 |   a copy of this software and associated documentation files (the
  9 |   "Software"), to deal in the Software without restriction, including
 10 |   without limitation the rights to use, copy, modify, merge, publish,
 11 |   distribute, sublicense, and/or sell copies of the Software, and to
 12 |   permit persons to whom the Software is furnished to do so, subject to
 13 |   the following conditions:
 14 | 
 15 |   The above copyright notice and this permission notice shall be
 16 |   included in all copies or substantial portions of the Software.
 17 | 
 18 |   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 19 |   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 20 |   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 21 |   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 22 |   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 23 |   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 24 |   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 25 |   SOFTWARE.
 26 | */
 27 | 
 28 | #ifndef KANN_AUTODIFF_H
 29 | #define KANN_AUTODIFF_H
 30 | 
 31 | #define KAD_VERSION "r544"
 32 | 
 33 | #include <stdio.h>
 34 | #include <stdint.h>
 35 | 
 36 | #ifdef __STRICT_ANSI__
 37 | #define inline
 38 | #endif
 39 | 
 40 | #define KAD_MAX_DIM 4     /* max dimension */
 41 | #define KAD_MAX_OP  64    /* max number of operators */
 42 | 
 43 | /* A computational graph is a directed acyclic graph. In the graph, an external
 44 |  * node represents a variable, a constant or a feed; an internal node
 45 |  * represents an operator; an edge from node v to w indicates v is an operand
 46 |  * of w.
 47 |  */
 48 | 
 49 | #define KAD_VAR        0x1
 50 | #define KAD_CONST      0x2
 51 | #define KAD_POOL       0x4
 52 | #define KAD_SHARE_RNG  0x10 /* with this flag on, different time step shares the same RNG status after unroll */
 53 | 
 54 | #define kad_is_back(p)  ((p)->flag & KAD_VAR)
 55 | #define kad_is_ext(p)   ((p)->n_child == 0)
 56 | #define kad_is_var(p)   (kad_is_ext(p) && kad_is_back(p))
 57 | #define kad_is_const(p) (kad_is_ext(p) && ((p)->flag & KAD_CONST))
 58 | #define kad_is_feed(p)  (kad_is_ext(p) && !kad_is_back(p) && !((p)->flag & KAD_CONST))
 59 | #define kad_is_pivot(p) ((p)->n_child == 1 && ((p)->flag & KAD_POOL))
 60 | #define kad_is_switch(p) ((p)->op == 12 && !((p)->flag & KAD_POOL))
 61 | #define kad_use_rng(p)  ((p)->op == 15 || (p)->op == 24)
 62 | 
 63 | #define kad_eval_enable(p) ((p)->tmp = 1)
 64 | #define kad_eval_disable(p) ((p)->tmp = -1)
 65 | 
 66 | /* a node in the computational graph */
 67 | typedef struct kad_node_t {
 68 | 	uint8_t     n_d;            /* number of dimensions; no larger than KAD_MAX_DIM */
 69 | 	uint8_t     flag;           /* type of the node; see KAD_F_* for valid flags */
 70 | 	uint16_t    op;             /* operator; kad_op_list[op] is the actual function */
 71 | 	int32_t     n_child;        /* number of operands/child nodes */
 72 | 	int32_t     tmp;            /* temporary field; MUST BE zero before calling kad_compile() */
 73 | 	int32_t     ptr_size;       /* size of ptr below */
 74 | 	int32_t     d[KAD_MAX_DIM]; /* dimensions */
 75 | 	int32_t     ext_label;      /* labels for external uses (not modified by the kad_* APIs) */
 76 | 	uint32_t    ext_flag;       /* flags for external uses (not modified by the kad_* APIs) */
 77 | 	float      *x;              /* value; allocated for internal nodes */
 78 | 	float      *g;              /* gradient; allocated for internal nodes */
 79 | 	void       *ptr;            /* for special operators that need additional parameters (e.g. conv2d) */
 80 | 	void       *gtmp;           /* temporary data generated at the forward pass but used at the backward pass */
 81 | 	struct kad_node_t **child;  /* operands/child nodes */
 82 | 	struct kad_node_t  *pre;    /* usually NULL; only used for RNN */
 83 | } kad_node_t, *kad_node_p;
 84 | 
 85 | #ifdef __cplusplus
 86 | extern "C" {
 87 | #endif
 88 | 
 89 | /**
 90 |  * Compile/linearize a computational graph
 91 |  *
 92 |  * @param n_node   number of nodes (out)
 93 |  * @param n_roots  number of nodes without predecessors
 94 |  * @param roots    list of nodes without predecessors
 95 |  *
 96 |  * @return list of nodes, of size *n_node
 97 |  */
 98 | kad_node_t **kad_compile_array(int *n_node, int n_roots, kad_node_t **roots);
 99 | 
100 | kad_node_t **kad_compile(int *n_node, int n_roots, ...); /* an alternative API to above */
101 | void kad_delete(int n, kad_node_t **a); /* deallocate a compiled/linearized graph */
102 | 
103 | /**
104 |  * Compute the value at a node
105 |  * 
106 |  * @param n       number of nodes
107 |  * @param a       list of nodes
108 |  * @param from    compute the value at this node, 0<=from<n
109 |  *
110 |  * @return a pointer to the value (pointing to kad_node_t::x, so don't call
111 |  *         free() on it!)
112 |  */
113 | const float *kad_eval_at(int n, kad_node_t **a, int from);
114 | 
115 | void kad_eval_marked(int n, kad_node_t **a);
116 | int kad_sync_dim(int n, kad_node_t **v, int batch_size);
117 | 
118 | /**
119 |  * Compute gradient
120 |  *
121 |  * @param n       number of nodes
122 |  * @param a       list of nodes
123 |  * @param from    the function node; must be a scalar (compute \nabla a[from])
124 |  */
125 | void kad_grad(int n, kad_node_t **a, int from);
126 | 
127 | /**
128 |  * Unroll a recurrent computation graph
129 |  *
130 |  * @param n_v     number of nodes
131 |  * @param v       list of nodes
132 |  * @param new_n   number of nodes in the unrolled graph (out)
133 |  * @param len     how many times to unroll, one for each pivot
134 |  *
135 |  * @return list of nodes in the unrolled graph
136 |  */
137 | kad_node_t **kad_unroll(int n_v, kad_node_t **v, int *new_n, int *len);
138 | int kad_n_pivots(int n_v, kad_node_t **v);
139 | 
140 | kad_node_t **kad_clone(int n, kad_node_t **v, int batch_size);
141 | 
142 | /* define a variable, a constant or a feed (placeholder in TensorFlow) */
143 | kad_node_t *kad_var(float *x, float *g, int n_d, ...); /* a variable; gradients to be computed; not unrolled */
144 | kad_node_t *kad_const(float *x, int n_d, ...);         /* a constant; no gradients computed; not unrolled */
145 | kad_node_t *kad_feed(int n_d, ...);                    /* an input/output; no gradients computed; unrolled */
146 | 
147 | /* operators taking two operands */
148 | kad_node_t *kad_add(kad_node_t *x, kad_node_t *y); /* f(x,y) = x + y (generalized element-wise addition; f[i*n+j]=x[i*n+j]+y[j], n=kad_len(y), 0<j<n, 0<i<kad_len(x)/n) */
149 | kad_node_t *kad_sub(kad_node_t *x, kad_node_t *y); /* f(x,y) = x - y (generalized element-wise subtraction) */
150 | kad_node_t *kad_mul(kad_node_t *x, kad_node_t *y); /* f(x,y) = x * y (generalized element-wise product) */
151 | 
152 | kad_node_t *kad_matmul(kad_node_t *x, kad_node_t *y);     /* f(x,y) = x * y   (general matrix product) */
153 | kad_node_t *kad_cmul(kad_node_t *x, kad_node_t *y);       /* f(x,y) = x * y^T (column-wise matrix product; i.e. y is transposed) */
154 | 
155 | /* loss functions; output scalar */
156 | kad_node_t *kad_mse(kad_node_t *x, kad_node_t *y);        /* mean square error */
157 | kad_node_t *kad_ce_multi(kad_node_t *x, kad_node_t *y);   /* multi-class cross-entropy; x is the preidction and y is the truth */
158 | kad_node_t *kad_ce_bin(kad_node_t *x, kad_node_t *y);     /* binary cross-entropy for (0,1) */
159 | kad_node_t *kad_ce_bin_neg(kad_node_t *x, kad_node_t *y); /* binary cross-entropy for (-1,1) */
160 | kad_node_t *kad_ce_multi_weighted(kad_node_t *pred, kad_node_t *truth, kad_node_t *weight);
161 | 
162 | #define KAD_PAD_NONE  0      /* use the smallest zero-padding */
163 | #define KAD_PAD_SAME  (-2)   /* output to have the same dimension as input */
164 | 
165 | kad_node_t *kad_conv2d(kad_node_t *x, kad_node_t *w, int r_stride, int c_stride, int r_pad, int c_pad);             /* 2D convolution with weight matrix flipped */
166 | kad_node_t *kad_max2d(kad_node_t *x, int kernel_h, int kernel_w, int r_stride, int c_stride, int r_pad, int c_pad); /* 2D max pooling */
167 | kad_node_t *kad_conv1d(kad_node_t *x, kad_node_t *w, int stride, int pad);  /* 1D convolution with weight flipped */
168 | kad_node_t *kad_max1d(kad_node_t *x, int kernel_size, int stride, int pad); /* 1D max pooling */
169 | kad_node_t *kad_avg1d(kad_node_t *x, int kernel_size, int stride, int pad); /* 1D average pooling */
170 | 
171 | kad_node_t *kad_dropout(kad_node_t *x, kad_node_t *r);                      /* dropout at rate r */
172 | kad_node_t *kad_sample_normal(kad_node_t *x);                               /* f(x) = x * r, where r is drawn from a standard normal distribution */
173 | 
174 | /* operators taking one operand */
175 | kad_node_t *kad_square(kad_node_t *x); /* f(x) = x^2                         (element-wise square) */
176 | kad_node_t *kad_sigm(kad_node_t *x);   /* f(x) = 1/(1+exp(-x))               (element-wise sigmoid) */
177 | kad_node_t *kad_tanh(kad_node_t *x);   /* f(x) = (1-exp(-2x)) / (1+exp(-2x)) (element-wise tanh) */
178 | kad_node_t *kad_relu(kad_node_t *x);   /* f(x) = max{0,x}                    (element-wise rectifier, aka ReLU) */
179 | kad_node_t *kad_softmax(kad_node_t *x);/* f_i(x_1,...,x_n) = exp(x_i) / \sum_j exp(x_j) (softmax: tf.nn.softmax(x,dim=-1)) */
180 | kad_node_t *kad_1minus(kad_node_t *x); /* f(x) = 1 - x */
181 | kad_node_t *kad_exp(kad_node_t *x);    /* f(x) = exp(x) */
182 | kad_node_t *kad_log(kad_node_t *x);    /* f(x) = log(x) */
183 | kad_node_t *kad_sin(kad_node_t *x);    /* f(x) = sin(x) */
184 | 
185 | kad_node_t *kad_stdnorm(kad_node_t *x); /* layer normalization; applied to the last dimension */
186 | 
187 | /* operators taking an indefinite number of operands (e.g. pooling) */
188 | kad_node_t *kad_avg(int n, kad_node_t **x);   /* f(x_1,...,x_n) = \sum_i x_i/n      (mean pooling) */
189 | kad_node_t *kad_max(int n, kad_node_t **x);   /* f(x_1,...,x_n) = max{x_1,...,x_n}  (max pooling) */
190 | kad_node_t *kad_stack(int n, kad_node_t **x); /* f(x_1,...,x_n) = [x_1,...,x_n]     (stack pooling) */
191 | kad_node_t *kad_select(int n, kad_node_t **x, int which); /* f(x_1,...,x_n;i) = x_i (select pooling; -1 for the last) */
192 | 
193 | /* dimension reduction */
194 | kad_node_t *kad_reduce_sum(kad_node_t *x, int axis);  /* tf.reduce_sum(x, axis) */
195 | kad_node_t *kad_reduce_mean(kad_node_t *x, int axis); /* tf.reduce_mean(x, axis) */
196 | 
197 | /* special operators */
198 | kad_node_t *kad_slice(kad_node_t *x, int axis, int start, int end); /* take a slice on the axis-th dimension */
199 | kad_node_t *kad_concat(int axis, int n, ...);                       /* concatenate on the axis-th dimension */
200 | kad_node_t *kad_concat_array(int axis, int n, kad_node_t **p);      /* the array version of concat */
201 | kad_node_t *kad_reshape(kad_node_t *x, int n_d, int *d);            /* reshape; similar behavior to TensorFlow's reshape() */
202 | kad_node_t *kad_reverse(kad_node_t *x, int axis);
203 | kad_node_t *kad_switch(int n, kad_node_t **p);                      /* manually (as a hyperparameter) choose one input, default to 0 */
204 | 
205 | /* miscellaneous operations on a compiled graph */
206 | int kad_size_var(int n, kad_node_t *const* v);   /* total size of all variables */
207 | int kad_size_const(int n, kad_node_t *const* v); /* total size of all constants */
208 | 
209 | /* graph I/O */
210 | int kad_save(FILE *fp, int n_node, kad_node_t **node);
211 | kad_node_t **kad_load(FILE *fp, int *_n_node);
212 | 
213 | /* random number generator */
214 | void *kad_rng(void);
215 | void kad_srand(void *d, uint64_t seed);
216 | uint64_t kad_rand(void *d);
217 | double kad_drand(void *d);
218 | double kad_drand_normal(void *d);
219 | void kad_saxpy(int n, float a, const float *x, float *y);
220 | 
221 | /* debugging routines */
222 | void kad_trap_fe(void); /* abort on divide-by-zero and NaN */
223 | void kad_print_graph(FILE *fp, int n, kad_node_t **v);
224 | void kad_check_grad(int n, kad_node_t **a, int from);
225 | 
226 | #ifdef __cplusplus
227 | }
228 | #endif
229 | 
230 | #define KAD_ALLOC      1
231 | #define KAD_FORWARD    2
232 | #define KAD_BACKWARD   3
233 | #define KAD_SYNC_DIM   4
234 | 
235 | typedef int (*kad_op_f)(kad_node_t*, int);
236 | extern kad_op_f kad_op_list[KAD_MAX_OP];
237 | extern char *kad_op_name[KAD_MAX_OP];
238 | 
239 | static inline int kad_len(const kad_node_t *p) /* calculate the size of p->x */
240 | {
241 | 	int n = 1, i;
242 | 	for (i = 0; i < p->n_d; ++i) n *= p->d[i];
243 | 	return n;
244 | }
245 | 
246 | #endif
247 | 


--------------------------------------------------------------------------------
/models/cnn.model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42io/c_keyword_spotting/255a2aa920f18e05f113b4870124d291140491c0/models/cnn.model


--------------------------------------------------------------------------------
/models/mlp.model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42io/c_keyword_spotting/255a2aa920f18e05f113b4870124d291140491c0/models/mlp.model


--------------------------------------------------------------------------------
/models/rnn.model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42io/c_keyword_spotting/255a2aa920f18e05f113b4870124d291140491c0/models/rnn.model


--------------------------------------------------------------------------------
/src/brain/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | cd "`dirname "${BASH_SOURCE[0]}"`"
 6 | 
 7 | mkdir -p ../../bin
 8 | 
 9 | gcc -Werror -Wall -Wextra -Wpedantic -Wno-sign-compare \
10 |   -I. -I../../lib/kann-master \
11 |   -DHAVE_PTHREAD \
12 |   ../../lib/kann-master/kann.c \
13 |   ../../lib/kann-master/kautodiff.c \
14 |   -o ../../bin/mlp_train mlp_train.c dataset.c norm.c -lm -lpthread
15 | 
16 | echo "MLP train build OK!"
17 | 
18 | gcc -Werror -Wall -Wextra -Wpedantic -Wno-sign-compare \
19 |   -I. -I../../lib/kann-master \
20 |   -DHAVE_PTHREAD \
21 |   ../../lib/kann-master/kann.c \
22 |   ../../lib/kann-master/kautodiff.c \
23 |   -o ../../bin/cnn_train cnn_train.c dataset.c norm.c -lm -lpthread
24 | 
25 | echo "CNN train build OK!"
26 | 
27 | gcc -Werror -Wall -Wextra -Wpedantic -Wno-sign-compare \
28 |   -I. -I../../lib/kann-master \
29 |   -DHAVE_PTHREAD \
30 |   ../../lib/kann-master/kann.c \
31 |   ../../lib/kann-master/kautodiff.c \
32 |   -o ../../bin/rnn_train rnn_train.c dataset.c -lm -lpthread
33 | 
34 | echo "RNN train build OK!"
35 | 
36 | gcc -Werror -Wall -Wextra -Wpedantic -Wno-sign-compare \
37 |   -I. -I../../lib/kann-master \
38 |   ../../lib/kann-master/kann.c \
39 |   ../../lib/kann-master/kautodiff.c \
40 |   -o ../../bin/guess guess.c norm.c -lm
41 | 
42 | echo "Guess build OK!"


--------------------------------------------------------------------------------
/src/brain/cnn_train.c:
--------------------------------------------------------------------------------
 1 | #include "kann.h"
 2 | #include "dataset.h"
 3 | #include <assert.h>
 4 | #include "norm.h"
 5 | 
 6 | /*********************************************************************/
 7 | 
 8 | static kann_t *model_gen(int height, int width, int n_out, int n_h_fc, float dropout)
 9 | {
10 |   assert(height == 49);
11 |   assert(width == 13);
12 |   kad_node_t *t;
13 |   t = kad_feed(4, 1, 1, height, width), t->ext_flag |= KANN_F_IN;
14 |   t = kad_relu(kann_layer_conv2d(t, 32, 13, 8, 1, 1, 0, 0)); // 13x8 kernel; 1x1 stride; 0x0 padding
15 |   // output height = ((H-F+2*P)/S)+1
16 |   // output height = H(input height), F(filter height), P(padding height), S(stride height)
17 |   // output height = 49 - 13 + 1 = 37
18 |   // output width  = 13 - 8 + 1 = 6
19 |   t = kad_relu(kann_layer_conv2d(t, 64, 8, 6, 1, 1, 0, 0));
20 |   // output height = 37 - 8 + 1 = 30
21 |   // output width  = 6 - 6 + 1 = 1
22 |   t = kann_layer_dropout(t, dropout);
23 |   t = kad_max2d(t, 2, 1, 2, 1, 0, 0); // 2x1 kernel; 2x1 stride; 0x0 padding
24 |   // output height = 30/2 = 15
25 |   // output width  = 1/1 = 1
26 |   t = kad_relu(kann_layer_dense(t, n_h_fc));
27 |   t = kann_layer_dropout(t, dropout);
28 |   return kann_new(kann_layer_cost(t, n_out, KANN_C_CEB), 0);
29 | }
30 | 
31 | /*********************************************************************/
32 | 
33 | static void train(kann_t *ann, dataset_t *ds)
34 | {
35 |   assert(kann_dim_in(ann) == ds->num_input);
36 |   assert(kann_dim_out(ann) == ds->num_output);
37 |   for(int i = 0; i < ds->train.len; i++)
38 |   {
39 |     norm_min_max(ds->train.input[i], ds->num_input);
40 |   }
41 |   kann_train_fnn1(ann, 0.001f, 64, 100, 10, 0.1f,
42 |                   ds->train.len, ds->train.input, ds->train.output);
43 | }
44 | 
45 | /*********************************************************************/
46 | 
47 | static void save(kann_t *ann)
48 | {
49 |   kann_save("./../models/cnn.model", ann);
50 | }
51 | 
52 | /*********************************************************************/
53 | 
54 | int main(int argc, const char *argv[])
55 | {
56 |   assert(argc == 5);
57 |   dataset_t *ds = dataset_load(argv[1], atol(argv[2]), atol(argv[3]), atol(argv[4]));
58 |   kann_srand(131 /*seed, each train results are reproducible*/);
59 |   kann_t *ann = model_gen(ds->input_height, ds->input_width, ds->num_output, 128, 0.2f);
60 |   assert(!kann_is_rnn(ann));
61 |   train(ann, ds);
62 |   save(ann);
63 |   kann_delete(ann);
64 | 
65 |   return 0;
66 | }
67 | 
68 | /*********************************************************************/
69 | 


--------------------------------------------------------------------------------
/src/brain/dataset.c:
--------------------------------------------------------------------------------
  1 | #include "dataset.h"
  2 | #include <stdio.h>
  3 | #include <stdlib.h>
  4 | #include <string.h>
  5 | #include <assert.h>
  6 | 
  7 | /*********************************************************************/
  8 | 
  9 | static dataset_t* dataset_from_fd(
 10 |   FILE *fd,
 11 |   const uint32_t input_height,
 12 |   const uint32_t input_width,
 13 |   const uint32_t num_output)
 14 | {
 15 |   dataset_t *ds = malloc(sizeof(*ds));
 16 |   assert(ds);
 17 | 
 18 |   ds->train.input  = ds->valid.input  = ds->test.input  = NULL;
 19 |   ds->train.output = ds->valid.output = ds->test.output = NULL;
 20 |   ds->train.len    = ds->valid.len    = ds->test.len    = 0;
 21 |   ds->input_width  = input_width;
 22 |   ds->input_height = input_height;
 23 |   ds->num_output   = num_output;
 24 |   ds->num_input    = input_width * input_height;
 25 | 
 26 |   int32_t output_val = 0;
 27 |   while(fscanf(fd, "%d", &output_val) == 1)
 28 |   {
 29 |     dataset_array_t *dest;
 30 |     if(output_val < num_output)
 31 |     {
 32 |       dest = &ds->train;
 33 |     }
 34 |     else if(output_val < 2 * num_output)
 35 |     {
 36 |       output_val -= num_output;
 37 |       dest = &ds->valid;
 38 |     }
 39 |     else
 40 |     {
 41 |       output_val -= 2 * num_output;
 42 |       dest = &ds->test;
 43 |     }
 44 | 
 45 |     assert(output_val < num_output);
 46 | 
 47 |     uint32_t i = dest->len++;
 48 | 
 49 |     dest->output = realloc(dest->output, dest->len * sizeof(float*));
 50 |     assert(dest->output);
 51 |     dest->output[i] = malloc(num_output * sizeof(float));
 52 |     for(int j = 0; j < num_output; j++)
 53 |     {
 54 |       dest->output[i][j] = j == output_val ? 1 : 0;
 55 |     }
 56 | 
 57 |     dest->input = realloc(dest->input, dest->len * sizeof(float*));
 58 |     assert(dest->input);
 59 |     dest->input[i] = malloc(ds->num_input * sizeof(float));
 60 |     for(int j = 0; j < ds->num_input; j++)
 61 |     {
 62 |       assert(fscanf(fd, "%f", &dest->input[i][j]) == 1);
 63 |     }
 64 |     assert(fgetc(fd) == '\n');
 65 |   }
 66 | 
 67 |   assert(fgetc(fd) == EOF);
 68 | 
 69 |   return ds;
 70 | }
 71 | 
 72 | /*********************************************************************/
 73 | 
 74 | dataset_t* dataset_load(
 75 |   const char* const path,
 76 |   const uint32_t input_height,
 77 |   const uint32_t input_width,
 78 |   const uint32_t num_output)
 79 | {
 80 |   FILE *fd = path && strcmp(path, "-") ? fopen(path, "r") : stdin;
 81 |   assert(fd);
 82 | 
 83 |   dataset_t* ds = dataset_from_fd(fd, input_height, input_width, num_output);
 84 | 
 85 |   if(fd != stdin)
 86 |   {
 87 |     fclose(fd);
 88 |   }
 89 | 
 90 |   printf("Dataset train %u, valid %u, test %u\n",
 91 |           ds->train.len, ds->valid.len, ds->test.len);
 92 | 
 93 |   assert(ds->train.output[0][0] == 1);
 94 |   assert(ds->valid.output[0][0] == 1);
 95 |   assert(ds->test.output[0][0]  == 1);
 96 |   assert(ds->train.output[0][num_output - 1] == 0);
 97 |   assert(ds->valid.output[0][num_output - 1] == 0);
 98 |   assert(ds->test.output[0][num_output  - 1] == 0);
 99 | 
100 |   assert(ds->train.output[ds->train.len - 1][num_output - 1] == 1);
101 |   assert(ds->valid.output[ds->valid.len - 1][num_output - 1] == 1);
102 |   assert(ds->test.output[ds->test.len   - 1][num_output - 1] == 1);
103 |   assert(ds->train.output[ds->train.len - 1][0] == 0);
104 |   assert(ds->valid.output[ds->valid.len - 1][0] == 0);
105 |   assert(ds->test.output[ds->test.len   - 1][0] == 0);
106 | 
107 |   return ds;
108 | }
109 | 
110 | /*********************************************************************/


--------------------------------------------------------------------------------
/src/brain/dataset.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <stdint.h>
 4 | 
 5 | /*********************************************************************/
 6 | 
 7 | typedef struct
 8 | {
 9 |   uint32_t len;
10 |   float **input;
11 |   float **output;
12 | } dataset_array_t;
13 | 
14 | /*********************************************************************/
15 | 
16 | typedef struct
17 | {
18 |   uint32_t num_input;
19 |   uint32_t num_output;
20 |   uint32_t input_width;
21 |   uint32_t input_height;
22 |   dataset_array_t train, valid, test;
23 | } dataset_t;
24 | 
25 | /*********************************************************************/
26 | 
27 | dataset_t* dataset_load(
28 |   const char* const path,
29 |   const uint32_t input_height,
30 |   const uint32_t input_width,
31 |   const uint32_t num_output);
32 | 
33 | /*********************************************************************/


--------------------------------------------------------------------------------
/src/brain/guess.c:
--------------------------------------------------------------------------------
 1 | #include "kann.h"
 2 | #include <assert.h>
 3 | #include "norm.h"
 4 | 
 5 | /*********************************************************************/
 6 | 
 7 | int main(int argc, const char *argv[])
 8 | {
 9 |   assert(argc == 2);
10 |   kann_t *ann = kann_load(argv[1]);
11 |   assert(ann);
12 | 
13 |   const int in_num = kann_dim_in(ann);
14 |   float* in = malloc(in_num * sizeof(float*));
15 |   assert(in);
16 |   const float* out = NULL;
17 | 
18 | loop:
19 | 
20 |   if(kann_is_rnn(ann))
21 |   {
22 |     assert(in_num == 13);
23 |     kann_rnn_start(ann);
24 |     for(int k = 0; k < 49; k++)
25 |     {
26 |       for(int i = 0; i < in_num; i++)
27 |       {
28 |         assert(scanf("%f", &in[i]) == 1);
29 |       }
30 |       out = kann_apply1(ann, in);
31 |     }
32 |     kann_rnn_end(ann);
33 |   }
34 |   else
35 |   {
36 |     for(int i = 0; i < in_num; i++)
37 |     {
38 |       assert(scanf("%f", &in[i]) == 1);
39 |     }
40 |     norm_min_max(in, in_num);
41 |     out = kann_apply1(ann, in);
42 |   }
43 | 
44 |   assert(getchar() == '\n');
45 |   assert(out);
46 | 
47 |   for (int i = 0; i < kann_dim_out(ann); i++)
48 |   {
49 |     if (i)
50 |     {
51 |       putchar(' ');
52 |     }
53 |     printf("%f", out[i]);
54 |   }
55 |   putchar('\n');
56 | 
57 |   const int ch = getchar();
58 |   if(ch != EOF)
59 |   {
60 |     assert(ch == ungetc(ch, stdin));
61 |     goto loop;
62 |   }
63 | 
64 |   kann_delete(ann);
65 |   free(in);
66 | 
67 |   return 0;
68 | }
69 | 
70 | /*********************************************************************/


--------------------------------------------------------------------------------
/src/brain/mlp_train.c:
--------------------------------------------------------------------------------
 1 | #include "kann.h"
 2 | #include "dataset.h"
 3 | #include <assert.h>
 4 | #include "norm.h"
 5 | 
 6 | /*********************************************************************/
 7 | 
 8 | static kann_t *model_gen(int n_in, int n_out, int loss_type, int n_h_layers, int n_h_neurons)
 9 | {
10 |   int i;
11 |   kad_node_t *t;
12 |   t = kann_layer_input(n_in);
13 |   for (i = 0; i < n_h_layers; ++i)
14 |     t = kad_relu(kann_layer_dense(t, n_h_neurons));
15 |   return kann_new(kann_layer_cost(t, n_out, loss_type), 0);
16 | }
17 | 
18 | /*********************************************************************/
19 | 
20 | static void train(kann_t *ann, dataset_t *ds)
21 | {
22 |   assert(kann_dim_in(ann) == ds->num_input);
23 |   assert(kann_dim_out(ann) == ds->num_output);
24 |   for(int i = 0; i < ds->train.len; i++)
25 |   {
26 |     norm_min_max(ds->train.input[i], ds->num_input);
27 |   }
28 |   kann_train_fnn1(ann, 0.001f, 64, 100, 10, 0.1f,
29 |                   ds->train.len, ds->train.input, ds->train.output);
30 | }
31 | 
32 | /*********************************************************************/
33 | 
34 | static void save(kann_t *ann)
35 | {
36 |   kann_save("./../models/mlp.model", ann);
37 | }
38 | 
39 | /*********************************************************************/
40 | 
41 | int main(int argc, const char *argv[])
42 | {
43 |   assert(argc == 5);
44 |   dataset_t *ds = dataset_load(argv[1], atol(argv[2]), atol(argv[3]), atol(argv[4]));
45 |   kann_srand(11 /*seed, each train results are reproducible*/);
46 |   kann_t *ann = model_gen(ds->num_input, ds->num_output, KANN_C_CEB, 2, 100);
47 |   assert(!kann_is_rnn(ann));
48 |   train(ann, ds);
49 |   save(ann);
50 |   kann_delete(ann);
51 | 
52 |   return 0;
53 | }
54 | 
55 | /*********************************************************************/


--------------------------------------------------------------------------------
/src/brain/norm.c:
--------------------------------------------------------------------------------
 1 | #include "norm.h"
 2 | 
 3 | /*********************************************************************/
 4 | 
 5 | void norm_min_max(float* const samples, const size_t n_samples)
 6 | {
 7 |   float max, min;
 8 | 
 9 |   if(n_samples < 1)
10 |   {
11 |     return;
12 |   }
13 | 
14 |   for(size_t i = 0; i < n_samples; i++)
15 |   {
16 |     const float sample = samples[i];
17 |     if(i)
18 |     {
19 |       if(sample > max)
20 |       {
21 |          max = sample;
22 |       }
23 |       if(sample < min)
24 |       {
25 |         min = sample;
26 |       }
27 |     }
28 |     else
29 |     {
30 |       max = min = sample;
31 |     }
32 |   }
33 | 
34 |   if(max - min == 0)
35 |   {
36 |     for(size_t i = 0; i < n_samples; i++)
37 |     {
38 |       samples[i] = 1;
39 |     }
40 |   }
41 |   else
42 |   {
43 |     for(size_t i = 0; i < n_samples; i++)
44 |     {
45 |       samples[i] = (samples[i] - min) / (max - min);
46 |     }
47 |   }
48 | }
49 | 
50 | /*********************************************************************/


--------------------------------------------------------------------------------
/src/brain/norm.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | 
3 | #include <stdlib.h>
4 | 
5 | /*********************************************************************/
6 | 
7 | void norm_min_max(float* const samples, const size_t n_samples);
8 | 
9 | /*********************************************************************/


--------------------------------------------------------------------------------
/src/brain/rnn_train.c:
--------------------------------------------------------------------------------
  1 | #include "kann.h"
  2 | #include "dataset.h"
  3 | #include <assert.h>
  4 | #include <stdlib.h>
  5 | #include <string.h>
  6 | 
  7 | /*********************************************************************/
  8 | 
  9 | static kann_t *model_gen(int n_in, int n_out, int n_h_layers, int n_h_neurons, float dropout)
 10 | {
 11 |   int i;
 12 |   kad_node_t *t;
 13 |   int rnn_flag = KANN_RNN_VAR_H0 | KANN_RNN_NORM;
 14 |   t = kann_layer_input(n_in);
 15 |   for (i = 0; i < n_h_layers; ++i) {
 16 |     t = kann_layer_gru(t, n_h_neurons, rnn_flag);
 17 |     t = kann_layer_dropout(t, dropout);
 18 |   }
 19 |   t = kad_select(1, &t, -1);
 20 |   return kann_new(kann_layer_cost(t, n_out, KANN_C_CEB), 0);
 21 | }
 22 | 
 23 | /*********************************************************************/
 24 | 
 25 | static void train(kann_t *ann, dataset_t *ds, float lr, int mini_size, int max_epoch, const char *fn, int n_threads)
 26 | {
 27 |   float **x, **y, *r, best_cost = 1e30f;
 28 |   int epoch, j, n_var, *shuf, ulen = ds->input_height, n_in = ds->input_width, n_out = ds->num_output;
 29 |   dataset_array_t *d = &ds->train;
 30 |   kann_t *ua;
 31 | 
 32 |   assert(kann_dim_in(ann) == n_in);
 33 |   assert(kann_dim_out(ann) == n_out);
 34 | 
 35 |   assert(ulen == 49);
 36 |   assert(n_in == 13);
 37 | 
 38 |   n_var = kann_size_var(ann);
 39 |   r = (float*)calloc(n_var, sizeof(float));
 40 |   x = (float**)malloc(ulen * sizeof(float*));
 41 |   y = (float**)malloc(1 * sizeof(float*));
 42 |   for (j = 0; j < ulen; ++j) {
 43 |     x[j] = (float*)calloc(mini_size * n_in, sizeof(float));
 44 |   }
 45 |   y[0] = (float*)calloc(mini_size * n_out, sizeof(float));
 46 |   shuf = (int*)calloc(d->len, sizeof(int));
 47 | 
 48 |   ua = kann_unroll(ann, ulen);
 49 |   kann_set_batch_size(ua, mini_size);
 50 |   kann_mt(ua, n_threads, mini_size);
 51 |   kann_feed_bind(ua, KANN_F_IN,    0, x);
 52 |   kann_feed_bind(ua, KANN_F_TRUTH, 0, y);
 53 |   kann_switch(ua, 1);
 54 |   for (epoch = 0; epoch < max_epoch; ++epoch) {
 55 |     kann_shuffle(d->len, shuf);
 56 |     double cost = 0.0;
 57 |     int tot = 0, tot_base = 0, n_cerr = 0;
 58 |     for (j = 0; j < d->len - mini_size; j += mini_size) {
 59 |       int b, k;
 60 |       for (b = 0; b < mini_size; ++b) {
 61 |         int s = shuf[j + b];
 62 |         for (k = 0; k < ulen; ++k) {
 63 |           memcpy(&x[k][b * n_in], &d->input[s][k * n_in], n_in * sizeof(float));
 64 |         }
 65 |         memcpy(&y[0][b * n_out], d->output[s], n_out * sizeof(float));
 66 |       }
 67 |       cost += kann_cost(ua, 0, 1) * ulen * mini_size;
 68 |       n_cerr += kann_class_error(ua, &k);
 69 |       tot_base += k;
 70 |       //kad_check_grad(ua->n, ua->v, ua->n-1);
 71 |       kann_RMSprop(n_var, lr, 0, 0.9f, ua->g, ua->x, r);
 72 |       tot += ulen * mini_size;
 73 |     }
 74 |     if (cost < best_cost) {
 75 |       best_cost = cost;
 76 |       if (fn) {
 77 |         const size_t len = snprintf(NULL, 0, fn, epoch+1);
 78 |         assert(len > 0);
 79 |         char *fn_ws_epoch = malloc((len+1) * sizeof(char));
 80 |         assert(fn_ws_epoch);
 81 |         assert(snprintf(fn_ws_epoch, len+1, fn, epoch+1) == len);
 82 |         kann_save(fn_ws_epoch, ann);
 83 |         free(fn_ws_epoch);
 84 |       }
 85 |     }
 86 |     fprintf(stderr, "epoch: %d; cost: %g (class error: %.2f%%)\n", epoch+1, cost / tot, 100.0f * n_cerr / tot_base);
 87 |   }
 88 | 
 89 |   kann_delete_unrolled(ua);
 90 | 
 91 |   for (j = 0; j < ulen; ++j) {
 92 |     free(x[j]);
 93 |   }
 94 |   free(y[0]); free(y); free(x); free(r); free(shuf);
 95 | }
 96 | 
 97 | /*********************************************************************/
 98 | 
 99 | int main(int argc, const char *argv[])
100 | {
101 |   assert(argc == 5);
102 |   dataset_t *ds = dataset_load(argv[1], atol(argv[2]), atol(argv[3]), atol(argv[4]));
103 | 
104 |   int mini_size = 64, max_epoch = 500, seed = 84, n_h_layers = 3, n_h_neurons = 32, n_threads = 1;
105 |   float lr = 0.001f, dropout = 0.2f;
106 | 
107 |   kann_srand(seed /*seed, each train results are reproducible*/);
108 |   kann_t *ann = model_gen(ds->input_width, ds->num_output, n_h_layers, n_h_neurons, dropout);
109 |   assert(kann_is_rnn(ann));
110 | 
111 |   train(ann, ds, lr, mini_size, max_epoch, "./../models/rnn-epoch-%d.model", n_threads);
112 |   kann_delete(ann);
113 | 
114 |   return 0;
115 | }
116 | 
117 | /*********************************************************************/
118 | 


--------------------------------------------------------------------------------