├── layer
    ├── Layer.cpp
    ├── Layer.h
    ├── FlattenLayer.h
    ├── FlattenLayer.cpp
    ├── DropoutLayer.h
    ├── ActivationLayer.cpp
    ├── FullyConnectedLayer.h
    ├── ActivationLayer.h
    ├── ConvolutionalLayer.h
    ├── DropoutLayer.cpp
    ├── FullyConnectedLayer.cpp
    └── ConvolutionalLayer.cpp
├── loss
    ├── LossFunction.cpp
    ├── LossFunction.h
    ├── MeanSquaredError.h
    └── MeanSquaredError.cpp
├── activation
    ├── ActivationFunction.cpp
    ├── ReLu.h
    ├── Tanh.h
    ├── Sigmoid.h
    ├── ActivationFunction.h
    ├── Tanh.cpp
    ├── ReLu.cpp
    └── Sigmoid.cpp
├── images
    ├── Neural-Net-cpp-circle_data.png
    ├── Neural-Net-cpp-circle_mesh.png
    ├── Neural-Net-cpp-window_data.png
    ├── Neural-Net-cpp-window_mesh.png
    ├── Neural-Net-cpp-cluster_data.png
    ├── Neural-Net-cpp-cluster_mesh.png
    ├── Neural-Net-cpp-whirlpool_data.png
    └── Neural-Net-cpp-whirlpool_mesh.png
├── test
    ├── LoadPlane.h
    ├── LoadPolynomial.h
    ├── TestCaseFile.h
    ├── PlotPlaneData.py
    ├── SavePlaneMesh.h
    ├── LoadPolynomial.cpp
    ├── PlotPlaneMesh.py
    ├── ConvertMNIST.py
    ├── SavePlaneMesh.cpp
    ├── LoadPlane.cpp
    └── TestCaseFile.cpp
├── ANN.h
├── main.cpp
├── network
    ├── Network.h
    └── Network.cpp
├── CMakeLists.txt
├── matrix
    ├── Matrix.h
    └── Matrix.cpp
├── .gitignore
└── README.md


/layer/Layer.cpp:
--------------------------------------------------------------------------------
1 | //
2 | // Created by ChrisKim on 24-4-28.
3 | //
4 | 
5 | #include "Layer.h"
6 | 


--------------------------------------------------------------------------------
/loss/LossFunction.cpp:
--------------------------------------------------------------------------------
1 | //
2 | // Created by ChrisKim on 24-4-28.
3 | //
4 | 
5 | #include "LossFunction.h"
6 | 


--------------------------------------------------------------------------------
/activation/ActivationFunction.cpp:
--------------------------------------------------------------------------------
1 | //
2 | // Created by ChrisKim on 24-4-28.
3 | //
4 | 
5 | #include "ActivationFunction.h"
6 | 


--------------------------------------------------------------------------------
/images/Neural-Net-cpp-circle_data.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ChrisKimZHT/Neural-Net-cpp/master/images/Neural-Net-cpp-circle_data.png


--------------------------------------------------------------------------------
/images/Neural-Net-cpp-circle_mesh.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ChrisKimZHT/Neural-Net-cpp/master/images/Neural-Net-cpp-circle_mesh.png


--------------------------------------------------------------------------------
/images/Neural-Net-cpp-window_data.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ChrisKimZHT/Neural-Net-cpp/master/images/Neural-Net-cpp-window_data.png


--------------------------------------------------------------------------------
/images/Neural-Net-cpp-window_mesh.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ChrisKimZHT/Neural-Net-cpp/master/images/Neural-Net-cpp-window_mesh.png


--------------------------------------------------------------------------------
/images/Neural-Net-cpp-cluster_data.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ChrisKimZHT/Neural-Net-cpp/master/images/Neural-Net-cpp-cluster_data.png


--------------------------------------------------------------------------------
/images/Neural-Net-cpp-cluster_mesh.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ChrisKimZHT/Neural-Net-cpp/master/images/Neural-Net-cpp-cluster_mesh.png


--------------------------------------------------------------------------------
/images/Neural-Net-cpp-whirlpool_data.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ChrisKimZHT/Neural-Net-cpp/master/images/Neural-Net-cpp-whirlpool_data.png


--------------------------------------------------------------------------------
/images/Neural-Net-cpp-whirlpool_mesh.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ChrisKimZHT/Neural-Net-cpp/master/images/Neural-Net-cpp-whirlpool_mesh.png


--------------------------------------------------------------------------------
/layer/Layer.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by ChrisKim on 24-4-28.
 3 | //
 4 | 
 5 | #ifndef ANN_LAYER_H
 6 | #define ANN_LAYER_H
 7 | 
 8 | #include "../matrix/Matrix.h"
 9 | 
10 | struct Layer {
11 |     Matrix input;
12 | 
13 |     virtual Matrix forward(const Matrix &input, bool is_eval) = 0;
14 | 
15 |     virtual Matrix backward(const Matrix &d_output, double is_eval) = 0;
16 | };
17 | 
18 | 
19 | #endif //ANN_LAYER_H
20 | 


--------------------------------------------------------------------------------
/layer/FlattenLayer.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by ChrisKim on 24-4-29.
 3 | //
 4 | 
 5 | #ifndef ANN_FLATTENLAYER_H
 6 | #define ANN_FLATTENLAYER_H
 7 | 
 8 | 
 9 | #include "Layer.h"
10 | 
11 | struct FlattenLayer : public Layer {
12 |     Matrix forward(const Matrix &input, bool is_eval) override;
13 | 
14 |     Matrix backward(const Matrix &d_output, double learning_rate) override;
15 | };
16 | 
17 | 
18 | #endif //ANN_FLATTENLAYER_H
19 | 


--------------------------------------------------------------------------------
/test/LoadPlane.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by ChrisKim on 2023/6/8.
 3 | //
 4 | 
 5 | #ifndef ANN_LOADPLANE_H
 6 | #define ANN_LOADPLANE_H
 7 | 
 8 | #include <vector>
 9 | #include <string>
10 | #include <utility>
11 | #include <random>
12 | #include <chrono>
13 | #include "../matrix/Matrix.h"
14 | 
15 | std::pair<std::vector<Matrix>, std::vector<Matrix>> load_plane(int size, const std::string &type);
16 | 
17 | #endif //ANN_LOADPLANE_H
18 | 


--------------------------------------------------------------------------------
/layer/FlattenLayer.cpp:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by ChrisKim on 24-4-29.
 3 | //
 4 | 
 5 | #include "FlattenLayer.h"
 6 | 
 7 | Matrix FlattenLayer::forward(const Matrix &input, bool is_eval) {
 8 |     this->input = input;
 9 |     return input.reshape(input.row() * input.col(), 1);
10 | }
11 | 
12 | Matrix FlattenLayer::backward(const Matrix &d_output, double learning_rate) {
13 |     return d_output.reshape(input.row(), input.col());
14 | }
15 | 


--------------------------------------------------------------------------------
/loss/LossFunction.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by ChrisKim on 24-4-28.
 3 | //
 4 | 
 5 | #ifndef ANN_LOSSFUNCTION_H
 6 | #define ANN_LOSSFUNCTION_H
 7 | 
 8 | 
 9 | #include "../matrix/Matrix.h"
10 | 
11 | struct LossFunction {
12 |     virtual double loss(const Matrix &predict, const Matrix &ground_truth) = 0;
13 | 
14 |     virtual Matrix derivative(const Matrix &predict, const Matrix &ground_truth) = 0;
15 | };
16 | 
17 | 
18 | #endif //ANN_LOSSFUNCTION_H
19 | 


--------------------------------------------------------------------------------
/test/LoadPolynomial.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by ChrisKim on 2023/6/6.
 3 | //
 4 | 
 5 | #ifndef ANN_LOADPOLYNOMIAL_H
 6 | #define ANN_LOADPOLYNOMIAL_H
 7 | 
 8 | #include <vector>
 9 | #include <utility>
10 | #include <random>
11 | #include <chrono>
12 | #include "../matrix/Matrix.h"
13 | 
14 | std::pair<std::vector<Matrix>, std::vector<Matrix>>
15 | load_polynomial(int size, double(*f)(double), double min = -1.0, double max = 1.0);
16 | 
17 | 
18 | #endif //ANN_LOADPOLYNOMIAL_H
19 | 


--------------------------------------------------------------------------------
/activation/ReLu.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by ChrisKim on 24-4-28.
 3 | //
 4 | 
 5 | #ifndef ANN_RELU_H
 6 | #define ANN_RELU_H
 7 | 
 8 | 
 9 | #include "ActivationFunction.h"
10 | 
11 | struct ReLu : public ActivationFunction {
12 |     double activate(double x) override;
13 | 
14 |     Matrix activate(const Matrix &m) override;
15 | 
16 |     double derivative(double x) override;
17 | 
18 |     Matrix derivative(const Matrix &m) override;
19 | };
20 | 
21 | 
22 | #endif //ANN_RELU_H
23 | 


--------------------------------------------------------------------------------
/activation/Tanh.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by ChrisKim on 24-4-28.
 3 | //
 4 | 
 5 | #ifndef ANN_TANH_H
 6 | #define ANN_TANH_H
 7 | 
 8 | 
 9 | #include "ActivationFunction.h"
10 | 
11 | struct Tanh : public ActivationFunction {
12 |     double activate(double x) override;
13 | 
14 |     Matrix activate(const Matrix &m) override;
15 | 
16 |     double derivative(double x) override;
17 | 
18 |     Matrix derivative(const Matrix &m) override;
19 | };
20 | 
21 | 
22 | #endif //ANN_TANH_H
23 | 


--------------------------------------------------------------------------------
/loss/MeanSquaredError.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by ChrisKim on 24-4-28.
 3 | //
 4 | 
 5 | #ifndef ANN_MEANSQUAREDERROR_H
 6 | #define ANN_MEANSQUAREDERROR_H
 7 | 
 8 | 
 9 | #include "LossFunction.h"
10 | 
11 | struct MeanSquaredError : public LossFunction {
12 |     double loss(const Matrix &predict, const Matrix &ground_truth) override;
13 | 
14 |     Matrix derivative(const Matrix &predict, const Matrix &ground_truth) override;
15 | };
16 | 
17 | 
18 | #endif //ANN_MEANSQUAREDERROR_H
19 | 


--------------------------------------------------------------------------------
/activation/Sigmoid.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by ChrisKim on 24-4-28.
 3 | //
 4 | 
 5 | #ifndef ANN_SIGMOID_H
 6 | #define ANN_SIGMOID_H
 7 | 
 8 | 
 9 | #include "ActivationFunction.h"
10 | 
11 | struct Sigmoid : public ActivationFunction {
12 |     double activate(double x) override;
13 | 
14 |     Matrix activate(const Matrix &m) override;
15 | 
16 |     double derivative(double x) override;
17 | 
18 |     Matrix derivative(const Matrix &m) override;
19 | };
20 | 
21 | 
22 | #endif //ANN_SIGMOID_H
23 | 


--------------------------------------------------------------------------------
/activation/ActivationFunction.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by ChrisKim on 24-4-28.
 3 | //
 4 | 
 5 | #ifndef ANN_ACTIVATIONFUNCTION_H
 6 | #define ANN_ACTIVATIONFUNCTION_H
 7 | 
 8 | 
 9 | #include "../matrix/Matrix.h"
10 | 
11 | struct ActivationFunction {
12 | public:
13 |     virtual double activate(double x) = 0;
14 | 
15 |     virtual Matrix activate(const Matrix &m) = 0;
16 | 
17 |     virtual double derivative(double x) = 0;
18 | 
19 |     virtual Matrix derivative(const Matrix &m) = 0;
20 | };
21 | 
22 | 
23 | #endif //ANN_ACTIVATIONFUNCTION_H
24 | 


--------------------------------------------------------------------------------
/layer/DropoutLayer.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by ChrisKim on 24-5-29.
 3 | //
 4 | 
 5 | #ifndef ANN_DROPOUTLAYER_H
 6 | #define ANN_DROPOUTLAYER_H
 7 | 
 8 | 
 9 | #include "Layer.h"
10 | 
11 | struct DropoutLayer : public Layer {
12 |     double dropout_rate;
13 |     Matrix mask;
14 | 
15 |     explicit DropoutLayer(double dropout_rate);
16 | 
17 |     Matrix forward(const Matrix &input, bool is_eval) override;
18 | 
19 |     Matrix backward(const Matrix &d_output, double learning_rate) override;
20 | };
21 | 
22 | 
23 | #endif //ANN_DROPOUTLAYER_H
24 | 


--------------------------------------------------------------------------------
/loss/MeanSquaredError.cpp:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by ChrisKim on 24-4-28.
 3 | //
 4 | 
 5 | #include "MeanSquaredError.h"
 6 | 
 7 | double MeanSquaredError::loss(const Matrix &predict, const Matrix &ground_truth) {
 8 |     double result = 0;
 9 |     for (int i = 0; i < predict.row(); i++) {
10 |         result += pow(predict(i, 0) - ground_truth(i, 0), 2);
11 |     }
12 |     return result / predict.row();
13 | }
14 | 
15 | Matrix MeanSquaredError::derivative(const Matrix &predict, const Matrix &ground_truth) {
16 |     return (predict - ground_truth) * 2 / predict.row();
17 | }
18 | 


--------------------------------------------------------------------------------
/layer/ActivationLayer.cpp:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by ChrisKim on 24-4-28.
 3 | //
 4 | 
 5 | #include "ActivationLayer.h"
 6 | 
 7 | ActivationLayer::ActivationLayer(ActivationFunction *activation_function) {
 8 |     this->activation_function = activation_function;
 9 | }
10 | 
11 | Matrix ActivationLayer::forward(const Matrix &input, bool is_eval) {
12 |     this->input = input;
13 |     return activation_function->activate(input);
14 | }
15 | 
16 | Matrix ActivationLayer::backward(const Matrix &d_output, double learning_rate) {
17 |     return activation_function->derivative(input).hadamard(d_output);
18 | }
19 | 


--------------------------------------------------------------------------------
/layer/FullyConnectedLayer.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by ChrisKim on 24-4-28.
 3 | //
 4 | 
 5 | #ifndef ANN_FULLYCONNECTEDLAYER_H
 6 | #define ANN_FULLYCONNECTEDLAYER_H
 7 | 
 8 | 
 9 | #include "Layer.h"
10 | 
11 | struct FullyConnectedLayer : public Layer {
12 |     int input_size, output_size;
13 |     Matrix weights, biases;
14 | 
15 |     FullyConnectedLayer(int input_size, int output_size);
16 | 
17 |     Matrix forward(const Matrix &input, bool is_eval) override;
18 | 
19 |     Matrix backward(const Matrix &d_output, double learning_rate) override;
20 | };
21 | 
22 | 
23 | #endif //ANN_FULLYCONNECTEDLAYER_H
24 | 


--------------------------------------------------------------------------------
/activation/Tanh.cpp:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by ChrisKim on 24-4-28.
 3 | //
 4 | 
 5 | #include "Tanh.h"
 6 | 
 7 | double Tanh::activate(double x) {
 8 |     return tanh(x);
 9 | }
10 | 
11 | Matrix Tanh::activate(const Matrix &m) {
12 |     Matrix result(m);
13 |     result.perform([](double x) { return tanh(x); });
14 |     return result;
15 | }
16 | 
17 | double Tanh::derivative(double x) {
18 |     return 1 - tanh(x) * tanh(x);
19 | }
20 | 
21 | Matrix Tanh::derivative(const Matrix &m) {
22 |     Matrix result(m);
23 |     result.perform([](double x) { return 1 - tanh(x) * tanh(x); });
24 |     return result;
25 | }
26 | 


--------------------------------------------------------------------------------
/activation/ReLu.cpp:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by ChrisKim on 24-4-28.
 3 | //
 4 | 
 5 | #include "ReLu.h"
 6 | 
 7 | double ReLu::activate(double x) {
 8 |     return x > 0.0 ? x : 0.0;
 9 | }
10 | 
11 | Matrix ReLu::activate(const Matrix &m) {
12 |     Matrix result(m);
13 |     result.perform([](double x) { return x > 0.0 ? x : 0.0; });
14 |     return result;
15 | }
16 | 
17 | double ReLu::derivative(double x) {
18 |     return x > 0.0 ? 1.0 : 0.0;
19 | }
20 | 
21 | Matrix ReLu::derivative(const Matrix &m) {
22 |     Matrix result(m);
23 |     result.perform([](double x) { return x > 0.0 ? 1.0 : 0.0; });
24 |     return result;
25 | }
26 | 


--------------------------------------------------------------------------------
/layer/ActivationLayer.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by ChrisKim on 24-4-28.
 3 | //
 4 | 
 5 | #ifndef ANN_ACTIVATIONLAYER_H
 6 | #define ANN_ACTIVATIONLAYER_H
 7 | 
 8 | 
 9 | #include "Layer.h"
10 | #include "../activation/ActivationFunction.h"
11 | 
12 | struct ActivationLayer : public Layer {
13 |     ActivationFunction *activation_function;
14 | 
15 |     explicit ActivationLayer(ActivationFunction *activation_function);
16 | 
17 |     Matrix forward(const Matrix &input, bool is_eval) override;
18 | 
19 |     Matrix backward(const Matrix &d_output, double learning_rate) override;
20 | };
21 | 
22 | 
23 | #endif //ANN_ACTIVATIONLAYER_H
24 | 


--------------------------------------------------------------------------------
/activation/Sigmoid.cpp:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by ChrisKim on 24-4-28.
 3 | //
 4 | 
 5 | #include "Sigmoid.h"
 6 | #include <cmath>
 7 | 
 8 | double Sigmoid::activate(double x) {
 9 |     return 1.0 / (1.0 + exp(-x));
10 | }
11 | 
12 | Matrix Sigmoid::activate(const Matrix &m) {
13 |     Matrix result(m);
14 |     result.perform([](double x) { return 1.0 / (1.0 + exp(-x)); });
15 |     return result;
16 | }
17 | 
18 | double Sigmoid::derivative(double x) {
19 |     return 1.0 / (1.0 + exp(-x)) * (1.0 - 1.0 / (1.0 + exp(-x)));
20 | }
21 | 
22 | Matrix Sigmoid::derivative(const Matrix &m) {
23 |     Matrix result(m);
24 |     result.perform([](double x) { return 1.0 / (1.0 + exp(-x)) * (1.0 - 1.0 / (1.0 + exp(-x))); });
25 |     return result;
26 | }
27 | 


--------------------------------------------------------------------------------
/test/TestCaseFile.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by ChrisKim on 2023/6/8.
 3 | //
 4 | 
 5 | #ifndef ANN_TESTCASEFILE_H
 6 | #define ANN_TESTCASEFILE_H
 7 | 
 8 | #include <vector>
 9 | #include <string>
10 | #include <fstream>
11 | #include <iomanip>
12 | #include "../matrix/Matrix.h"
13 | 
14 | std::pair<std::vector<Matrix>, std::vector<Matrix>> load_testcase(const std::string &filename);
15 | 
16 | void save_testcase(const std::string &filename, std::vector<Matrix> &x, std::vector<Matrix> &y);
17 | 
18 | std::pair<std::vector<Matrix>, std::vector<Matrix>> load_binary_testcase(const std::string &filename);
19 | 
20 | void save_binary_testcase(const std::string &filename, std::vector<Matrix> &x, std::vector<Matrix> &y);
21 | 
22 | #endif //ANN_TESTCASEFILE_H
23 | 


--------------------------------------------------------------------------------
/test/PlotPlaneData.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | with open("../test.txt", "r") as f:
 5 |     num_samples = int(f.readline().strip())
 6 | 
 7 |     input_shape = tuple(map(int, f.readline().strip().split()))
 8 |     label_shape = tuple(map(int, f.readline().strip().split()))
 9 | 
10 |     x = []
11 |     y = []
12 |     for i in range(num_samples):
13 |         data = list(map(float, f.readline().strip().split()))
14 |         x_vec = np.array(data[:2])
15 |         y_vec = np.array(data[2:])
16 |         x.append(x_vec)
17 |         y.append(y_vec)
18 |     x = np.array(x)
19 |     y = np.array(y)
20 | 
21 | plt.xlabel("x")
22 | plt.ylabel("y")
23 | plt.axis("equal")
24 | plt.scatter(x[:, 0], x[:, 1], c=y)
25 | plt.show()
26 | 


--------------------------------------------------------------------------------
/test/SavePlaneMesh.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by ChrisKim on 2023/6/8.
 3 | //
 4 | 
 5 | #ifndef ANN_SAVEPLANEMESH_H
 6 | #define ANN_SAVEPLANEMESH_H
 7 | 
 8 | #include <iostream>
 9 | #include <fstream>
10 | #include <iomanip>
11 | #include <string>
12 | #include "../matrix/Matrix.h"
13 | #include "../network/Network.h"
14 | 
15 | void save_plane_mesh(double x_start, double x_end, double x_step,
16 |                      double y_start, double y_end, double y_step,
17 |                      Network &network, const std::string &filename);
18 | 
19 | void save_plane_mesh_with_data(const std::string &data_file, const std::string &mesh_file,
20 |                                std::vector<Matrix> &x, std::vector<Matrix> &y, Network &network);
21 | 
22 | #endif //ANN_SAVEPLANEMESH_H
23 | 


--------------------------------------------------------------------------------
/ANN.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by ChrisKim on 24-4-29.
 3 | //
 4 | 
 5 | #ifndef ANN_ANN_H
 6 | #define ANN_ANN_H
 7 | 
 8 | #include "matrix/Matrix.h"
 9 | 
10 | #include "network/Network.h"
11 | 
12 | #include "layer/Layer.h"
13 | #include "layer/FlattenLayer.h"
14 | #include "layer/ConvolutionalLayer.h"
15 | #include "layer/FullyConnectedLayer.h"
16 | #include "layer/ActivationLayer.h"
17 | #include "layer/DropoutLayer.h"
18 | 
19 | #include "loss/LossFunction.h"
20 | #include "loss/MeanSquaredError.h"
21 | 
22 | #include "activation/ActivationFunction.h"
23 | #include "activation/Sigmoid.h"
24 | #include "activation/ReLU.h"
25 | #include "activation/Tanh.h"
26 | 
27 | #include "test/TestCaseFile.h"
28 | #include "test/LoadPlane.h"
29 | #include "test/LoadPolynomial.h"
30 | #include "test/SavePlaneMesh.h"
31 | 
32 | #endif //ANN_ANN_H
33 | 


--------------------------------------------------------------------------------
/layer/ConvolutionalLayer.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by ChrisKim on 24-4-29.
 3 | //
 4 | 
 5 | #ifndef ANN_CONVOLUTIONALLAYER_H
 6 | #define ANN_CONVOLUTIONALLAYER_H
 7 | 
 8 | 
 9 | #include "Layer.h"
10 | 
11 | struct ConvolutionalLayer : public Layer {
12 |     int input_h, input_w, input_c; // input channel not implemented
13 |     int filter_h, filter_w;
14 |     int output_h, output_w, output_c; // output channel not implemented
15 |     Matrix weights; // (filter_h, filter_w)
16 |     double bias;
17 | 
18 |     Matrix input;
19 | 
20 |     ConvolutionalLayer(std::tuple<int, int> input_shape, std::tuple<int, int> filter_shape);
21 | 
22 |     Matrix forward(const Matrix &input, bool is_eval) override;
23 | 
24 |     Matrix backward(const Matrix &d_output, double learning_rate) override;
25 | };
26 | 
27 | 
28 | #endif //ANN_CONVOLUTIONALLAYER_H
29 | 


--------------------------------------------------------------------------------
/layer/DropoutLayer.cpp:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by ChrisKim on 24-5-29.
 3 | //
 4 | 
 5 | #include "DropoutLayer.h"
 6 | 
 7 | DropoutLayer::DropoutLayer(double dropout_rate) {
 8 |     this->dropout_rate = dropout_rate;
 9 | }
10 | 
11 | Matrix DropoutLayer::forward(const Matrix &input, bool is_eval) {
12 |     if (is_eval) {
13 |         return input;
14 |     }
15 |     mask = Matrix(input.row(), input.col());
16 |     mask.randomize(0, 1);
17 |     for (int i = 0; i < input.row(); i++) {
18 |         for (int j = 0; j < input.col(); j++) {
19 |             mask(i, j) = mask(i, j) < dropout_rate ? 0 : 1;
20 |         }
21 |     }
22 |     return input.hadamard(mask) / (1 - dropout_rate);
23 | }
24 | 
25 | Matrix DropoutLayer::backward(const Matrix &d_output, double learning_rate) {
26 |     return d_output.hadamard(mask) / (1 - dropout_rate);
27 | }
28 | 


--------------------------------------------------------------------------------
/test/LoadPolynomial.cpp:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by ChrisKim on 2023/6/6.
 3 | //
 4 | 
 5 | #include "LoadPolynomial.h"
 6 | 
 7 | 
 8 | std::pair<std::vector<Matrix>, std::vector<Matrix>>
 9 | load_polynomial(int size, double(*f)(double), double min, double max) {
10 |     std::vector<Matrix> input, output;
11 |     auto now = std::chrono::system_clock::now();
12 |     auto now_sec = std::chrono::time_point_cast<std::chrono::seconds>(now);
13 |     auto timestamp = now_sec.time_since_epoch().count();
14 |     std::default_random_engine generator(timestamp);
15 |     std::uniform_real_distribution<double> distribution(min, max);
16 |     for (int i = 0; i < size; i++) {
17 |         double x = distribution(generator);
18 |         double y = f(x);
19 |         input.push_back(make_matrix({{x}}));
20 |         output.push_back(make_matrix({{y}}));
21 |     }
22 |     return {input, output};
23 | }
24 | 


--------------------------------------------------------------------------------
/main.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | #include "ANN.h"
 3 | 
 4 | int main() {
 5 |     std::ios::sync_with_stdio(false);
 6 | 
 7 |     Network network({
 8 |         new ConvolutionalLayer({28, 28}, {3, 3}),
 9 |         new ActivationLayer(new Sigmoid),
10 |         new ConvolutionalLayer({26, 26}, {3, 3}),
11 |         new ActivationLayer(new Sigmoid),
12 |         new FlattenLayer,
13 |         new FullyConnectedLayer(24 * 24, 100),
14 |         new ActivationLayer(new Sigmoid),
15 |         new FullyConnectedLayer(100, 10),
16 |         new ActivationLayer(new Sigmoid)
17 |     });
18 |     auto [train_input, train_output] = load_binary_testcase("../mnist_train.bin");
19 |     auto [test_input, test_output] = load_binary_testcase("../mnist_test.bin");
20 | 
21 |     network.train(train_input, train_output, new MeanSquaredError, 80, 0.1);
22 |     network.evaluate(test_input, test_output, new MeanSquaredError, true);
23 | 
24 |     return 0;
25 | }
26 | 


--------------------------------------------------------------------------------
/layer/FullyConnectedLayer.cpp:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by ChrisKim on 24-4-28.
 3 | //
 4 | 
 5 | #include "FullyConnectedLayer.h"
 6 | 
 7 | FullyConnectedLayer::FullyConnectedLayer(int input_size, int output_size) :
 8 |         input_size(input_size), output_size(output_size) {
 9 |     weights = Matrix(output_size, input_size);
10 |     biases = Matrix(output_size, 1);
11 |     weights.randomize();
12 |     biases.randomize();
13 | }
14 | 
15 | Matrix FullyConnectedLayer::forward(const Matrix &input, bool is_eval) {
16 |     this->input = input;
17 |     return weights * input + biases;
18 | }
19 | 
20 | Matrix FullyConnectedLayer::backward(const Matrix &d_output, double learning_rate) {
21 |     Matrix d_input = weights.transpose() * d_output;
22 |     Matrix d_weights = d_output * input.transpose();
23 |     const Matrix &d_biases = d_output;
24 |     weights -= d_weights * learning_rate;
25 |     biases -= d_biases * learning_rate;
26 |     return d_input;
27 | }


--------------------------------------------------------------------------------
/network/Network.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by ChrisKim on 24-4-28.
 3 | //
 4 | 
 5 | #ifndef ANN_NETWORK_H
 6 | #define ANN_NETWORK_H
 7 | 
 8 | 
 9 | #include <vector>
10 | #include "../layer/Layer.h"
11 | #include "../loss/LossFunction.h"
12 | 
13 | class Network {
14 | private:
15 |     std::vector<Layer *> layers;
16 | 
17 | public:
18 |     explicit Network() = default;
19 | 
20 |     explicit Network(std::vector<Layer *> layers) : layers(std::move(layers)) {};
21 | 
22 |     void add_layer(Layer *layer);
23 | 
24 |     double train(const std::vector<Matrix> &input, const std::vector<Matrix> &target,
25 |                  LossFunction *loss_function, int epochs, double learning_rate);
26 | 
27 |     Matrix predict(const Matrix &input, bool is_eval = true);
28 | 
29 |     double evaluate(const std::vector<Matrix> &input, const std::vector<Matrix> &target,
30 |                     LossFunction *loss_function, bool one_hot_encoding = false);
31 | };
32 | 
33 | 
34 | #endif //ANN_NETWORK_H
35 | 


--------------------------------------------------------------------------------
/test/PlotPlaneMesh.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | with open("../test.txt", "r") as f:
 5 |     num_samples = int(f.readline().strip())
 6 | 
 7 |     input_shape = tuple(map(int, f.readline().strip().split()))
 8 |     label_shape = tuple(map(int, f.readline().strip().split()))
 9 | 
10 |     x = []
11 |     y = []
12 |     for i in range(num_samples):
13 |         data = list(map(float, f.readline().strip().split()))
14 |         x_vec = np.array(data[:2])
15 |         y_vec = np.array(list(map(int, data[2:])))
16 |         x.append(x_vec)
17 |         y.append(y_vec)
18 |     x = np.array(x)
19 |     y = np.array(y)
20 | 
21 | with open("../mesh.txt", "r") as f:
22 |     x_coord = list(map(float, f.readline().strip().split()))
23 |     y_coord = list(map(float, f.readline().strip().split()))
24 |     x_coord = np.array(x_coord)
25 |     y_coord = np.array(y_coord)
26 |     gx, gy = np.meshgrid(x_coord, y_coord)
27 | 
28 |     mesh = []
29 |     for i in range(y_coord.shape[0]):
30 |         data = list(map(lambda val: 1 if float(val) >= 0 else -1, f.readline().strip().split()))
31 |         data = np.array(data)
32 |         mesh.append(data)
33 | 
34 |     mesh = np.array(mesh)
35 | 
36 | plt.xlabel("x")
37 | plt.ylabel("y")
38 | plt.axis("equal")
39 | plt.contourf(gy, gx, mesh, alpha=0.9)
40 | plt.scatter(x[:, 0], x[:, 1], c=y)
41 | plt.show()
42 | 


--------------------------------------------------------------------------------
/layer/ConvolutionalLayer.cpp:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by ChrisKim on 24-4-29.
 3 | //
 4 | 
 5 | #include "ConvolutionalLayer.h"
 6 | 
 7 | ConvolutionalLayer::ConvolutionalLayer(std::tuple<int, int> input_shape, std::tuple<int, int> filter_shape) {
 8 |     auto &[_input_h, _input_w] = input_shape;
 9 |     auto &[_filter_h, _filter_w] = filter_shape;
10 |     input_h = _input_h;
11 |     input_w = _input_w;
12 |     filter_h = _filter_h;
13 |     filter_w = _filter_w;
14 |     output_h = input_h - filter_h + 1; // stride = 1
15 |     output_w = input_w - filter_w + 1; // stride = 1
16 | 
17 |     weights = Matrix(filter_h, filter_w);
18 |     weights.randomize();
19 |     Matrix random = Matrix(1, 1);
20 |     random.randomize();
21 |     bias = random(0, 0);
22 | }
23 | 
24 | Matrix ConvolutionalLayer::forward(const Matrix &input, bool is_eval) {
25 |     this->input = input;
26 |     Matrix output = input.convolution(weights);
27 |     output += bias;
28 |     return output;
29 | }
30 | 
31 | Matrix ConvolutionalLayer::backward(const Matrix &d_output, double learning_rate) {
32 |     Matrix d_input = d_output.convolution(weights.transpose(), false);
33 |     Matrix d_filters = input.convolution(d_output.transpose(), true);
34 |     double d_bias = d_output.sum();
35 | 
36 |     weights -= d_filters * learning_rate;
37 |     bias -= d_bias * learning_rate;
38 | 
39 |     return d_input;
40 | }
41 | 
42 | 
43 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.25)
 2 | project(ANN)
 3 | 
 4 | set(CMAKE_CXX_STANDARD 17)
 5 | 
 6 | add_executable(
 7 |         ANN
 8 |         ANN.h
 9 |         main.cpp
10 | 
11 |         matrix/Matrix.cpp
12 |         matrix/Matrix.h
13 | 
14 |         network/Network.cpp
15 |         network/Network.h
16 | 
17 |         layer/Layer.cpp
18 |         layer/Layer.h
19 |         layer/Layer.h
20 |         layer/FullyConnectedLayer.cpp
21 |         layer/FullyConnectedLayer.h
22 |         layer/ActivationLayer.cpp
23 |         layer/ActivationLayer.h
24 |         layer/ConvolutionalLayer.cpp
25 |         layer/ConvolutionalLayer.h
26 |         layer/FlattenLayer.cpp
27 |         layer/FlattenLayer.h
28 |         layer/DropoutLayer.cpp
29 |         layer/DropoutLayer.h
30 | 
31 |         loss/LossFunction.cpp
32 |         loss/LossFunction.h
33 |         loss/MeanSquaredError.cpp
34 |         loss/MeanSquaredError.h
35 | 
36 |         activation/ActivationFunction.cpp
37 |         activation/ActivationFunction.h
38 |         activation/Sigmoid.cpp
39 |         activation/Sigmoid.h
40 |         activation/Tanh.cpp
41 |         activation/Tanh.h
42 |         activation/ReLu.cpp
43 |         activation/ReLu.h
44 | 
45 |         test/LoadPolynomial.cpp
46 |         test/LoadPolynomial.h
47 |         test/LoadPlane.cpp
48 |         test/LoadPlane.h
49 |         test/TestCaseFile.cpp
50 |         test/TestCaseFile.h
51 |         test/SavePlaneMesh.cpp
52 |         test/SavePlaneMesh.h
53 | )
54 | 
55 | set(CMAKE_EXE_LINKER_FLAGS "-static-libgcc -static-libstdc++")
56 | 
57 | if (CMAKE_BUILD_TYPE MATCHES Release)
58 |     set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -O3")
59 | endif ()


--------------------------------------------------------------------------------
/matrix/Matrix.h:
--------------------------------------------------------------------------------
  1 | //
  2 | // Created by ChrisKim on 2023/6/5.
  3 | //
  4 | 
  5 | #ifndef ANN_MATRIX_H
  6 | #define ANN_MATRIX_H
  7 | 
  8 | #include <iostream>
  9 | #include <vector>
 10 | #include <utility>
 11 | #include <random>
 12 | 
 13 | class Matrix {
 14 | protected:
 15 |     int _row{}, _col{};
 16 |     std::vector<double> _data;
 17 | 
 18 | public:
 19 |     Matrix();
 20 | 
 21 |     Matrix(int row, int col, double val = 0);
 22 | 
 23 |     Matrix(const Matrix &mat);
 24 | 
 25 |     explicit Matrix(const std::vector<std::vector<double>> &data);
 26 | 
 27 |     void from_vector(const std::vector<std::vector<double>> &data);
 28 | 
 29 |     [[nodiscard]] int row() const;
 30 | 
 31 |     [[nodiscard]] int col() const;
 32 | 
 33 |     [[nodiscard]] std::pair<int, int> shape() const;
 34 | 
 35 |     [[nodiscard]] Matrix transpose() const;
 36 | 
 37 |     void set(double val);
 38 | 
 39 |     void print(int precision = 2) const;
 40 | 
 41 |     void randomize(double min = -0.5, double max = 0.5);
 42 | 
 43 |     void perform(double(*f)(double));
 44 | 
 45 |     Matrix &operator=(const Matrix &mat);
 46 | 
 47 |     Matrix operator+(const Matrix &mat) const;
 48 | 
 49 |     Matrix &operator+=(const Matrix &mat);
 50 | 
 51 |     Matrix operator+(double x) const;
 52 | 
 53 |     Matrix &operator+=(double x);
 54 | 
 55 |     Matrix operator-(const Matrix &mat) const;
 56 | 
 57 |     Matrix &operator-=(const Matrix &mat);
 58 | 
 59 |     Matrix operator-(double x) const;
 60 | 
 61 |     Matrix &operator-=(double x);
 62 | 
 63 |     Matrix operator*(const Matrix &mat) const;
 64 | 
 65 |     Matrix operator*(const double &val) const;
 66 | 
 67 |     Matrix &operator*=(const double &val);
 68 | 
 69 |     Matrix operator/(const double &val) const;
 70 | 
 71 |     Matrix &operator/=(const double &val);
 72 | 
 73 |     bool operator==(const Matrix &mat) const;
 74 | 
 75 |     double operator()(int r, int c) const;
 76 | 
 77 |     double &operator()(int r, int c);
 78 | 
 79 |     [[nodiscard]] Matrix hadamard(const Matrix &mat) const;
 80 | 
 81 |     [[nodiscard]] Matrix convolution(const Matrix &mat, bool valid = true) const;
 82 | 
 83 |     [[nodiscard]] double max() const;
 84 | 
 85 |     [[nodiscard]] double min() const;
 86 | 
 87 |     [[nodiscard]] double sum() const;
 88 | 
 89 |     [[nodiscard]] std::pair<int, int> argmax() const;
 90 | 
 91 |     [[nodiscard]] std::pair<int, int> argmin() const;
 92 | 
 93 |     [[nodiscard]] Matrix reshape(int row, int col) const;
 94 | };
 95 | 
 96 | Matrix operator*(const double &val, const Matrix &mat);
 97 | 
 98 | Matrix make_matrix(const std::vector<std::vector<double>> &data);
 99 | 
100 | #endif //ANN_MATRIX_H
101 | 


--------------------------------------------------------------------------------
/test/ConvertMNIST.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from keras import datasets, utils
 3 | from tqdm import tqdm
 4 | 
 5 | 
 6 | def convert_mnist_to_txt(sample_rate: float = 1.0):
 7 |     (x_train, y_train), (x_test, y_test) = datasets.mnist.load_data()
 8 | 
 9 |     x_train = x_train[:int(x_train.shape[0] * sample_rate)]
10 |     y_train = y_train[:int(y_train.shape[0] * sample_rate)]
11 |     x_test = x_test[:int(x_test.shape[0] * sample_rate)]
12 |     y_test = y_test[:int(y_test.shape[0] * sample_rate)]
13 | 
14 |     with open('../mnist_train.txt', 'w') as f:
15 |         f.write(f"{x_train.shape[0]}\n")
16 |         f.write("28 28\n")
17 |         f.write("10 1\n")
18 |         for i in tqdm(range(x_train.shape[0])):
19 |             x = x_train[i].reshape(-1) / 255.0
20 |             y = utils.to_categorical(y_train[i], 10)
21 |             f.write(" ".join(map(str, x)) + " " + " ".join(map(str, y)) + "\n")
22 | 
23 |     with open('../mnist_test.txt', 'w') as f:
24 |         f.write(f"{x_test.shape[0]}\n")
25 |         f.write("28 28\n")
26 |         f.write("10 1\n")
27 |         for i in tqdm(range(x_test.shape[0])):
28 |             x = x_test[i].reshape(-1) / 255.0
29 |             y = utils.to_categorical(y_test[i], 10)
30 |             f.write(" ".join(map(str, x)) + " " + " ".join(map(str, y)) + "\n")
31 | 
32 | 
33 | def convert_mnist_to_bin(sample_rate: float = 1.0):
34 |     (x_train, y_train), (x_test, y_test) = datasets.mnist.load_data()
35 | 
36 |     x_train = x_train[:int(x_train.shape[0] * sample_rate)]
37 |     y_train = y_train[:int(y_train.shape[0] * sample_rate)]
38 |     x_test = x_test[:int(x_test.shape[0] * sample_rate)]
39 |     y_test = y_test[:int(y_test.shape[0] * sample_rate)]
40 | 
41 |     with open('../mnist_train.bin', 'wb') as f:
42 |         f.write(np.array(x_train.shape[0], dtype=np.int32).tobytes())
43 |         f.write(np.array([28, 28], dtype=np.int32).tobytes())
44 |         f.write(np.array([10, 1], dtype=np.int32).tobytes())
45 |         for i in tqdm(range(x_train.shape[0])):
46 |             x = x_train[i].reshape(-1) / 255.0
47 |             y = utils.to_categorical(y_train[i], 10)
48 |             f.write(x.tobytes())
49 |             f.write(y.tobytes())
50 | 
51 |     with open('../mnist_test.bin', 'wb') as f:
52 |         f.write(np.array(x_test.shape[0], dtype=np.int32).tobytes())
53 |         f.write(np.array([28, 28], dtype=np.int32).tobytes())
54 |         f.write(np.array([10, 1], dtype=np.int32).tobytes())
55 |         for i in tqdm(range(x_test.shape[0])):
56 |             x = x_test[i].reshape(-1) / 255.0
57 |             y = utils.to_categorical(y_test[i], 10)
58 |             f.write(x.tobytes())
59 |             f.write(y.tobytes())
60 | 
61 | 
62 | if __name__ == '__main__':
63 |     # convert_mnist_to_txt(0.1)
64 |     convert_mnist_to_bin(0.1)
65 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | .idea
  2 | *.bin
  3 | 
  4 | ### CUDA template
  5 | *.i
  6 | *.ii
  7 | *.gpu
  8 | *.ptx
  9 | *.cubin
 10 | *.fatbin
 11 | 
 12 | ### C++ template
 13 | # Prerequisites
 14 | *.d
 15 | 
 16 | # Compiled Object files
 17 | *.slo
 18 | *.lo
 19 | *.o
 20 | *.obj
 21 | 
 22 | # Precompiled Headers
 23 | *.gch
 24 | *.pch
 25 | 
 26 | # Compiled Dynamic libraries
 27 | *.so
 28 | *.dylib
 29 | *.dll
 30 | 
 31 | # Fortran module files
 32 | *.mod
 33 | *.smod
 34 | 
 35 | # Compiled Static libraries
 36 | *.lai
 37 | *.la
 38 | *.a
 39 | *.lib
 40 | 
 41 | # Executables
 42 | *.exe
 43 | *.out
 44 | *.app
 45 | 
 46 | ### CMake template
 47 | CMakeLists.txt.user
 48 | CMakeCache.txt
 49 | CMakeFiles
 50 | CMakeScripts
 51 | Testing
 52 | Makefile
 53 | cmake_install.cmake
 54 | install_manifest.txt
 55 | compile_commands.json
 56 | CTestTestfile.cmake
 57 | _deps
 58 | 
 59 | ### CLion template
 60 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
 61 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
 62 | 
 63 | # User-specific stuff
 64 | .idea/**/workspace.xml
 65 | .idea/**/tasks.xml
 66 | .idea/**/usage.statistics.xml
 67 | .idea/**/dictionaries
 68 | .idea/**/shelf
 69 | 
 70 | # AWS User-specific
 71 | .idea/**/aws.xml
 72 | 
 73 | # Generated files
 74 | .idea/**/contentModel.xml
 75 | 
 76 | # Sensitive or high-churn files
 77 | .idea/**/dataSources/
 78 | .idea/**/dataSources.ids
 79 | .idea/**/dataSources.local.xml
 80 | .idea/**/sqlDataSources.xml
 81 | .idea/**/dynamic.xml
 82 | .idea/**/uiDesigner.xml
 83 | .idea/**/dbnavigator.xml
 84 | 
 85 | # Gradle
 86 | .idea/**/gradle.xml
 87 | .idea/**/libraries
 88 | 
 89 | # Gradle and Maven with auto-import
 90 | # When using Gradle or Maven with auto-import, you should exclude module files,
 91 | # since they will be recreated, and may cause churn.  Uncomment if using
 92 | # auto-import.
 93 | # .idea/artifacts
 94 | # .idea/compiler.xml
 95 | # .idea/jarRepositories.xml
 96 | # .idea/modules.xml
 97 | # .idea/*.iml
 98 | # .idea/modules
 99 | # *.iml
100 | # *.ipr
101 | 
102 | # CMake
103 | cmake-build-*/
104 | 
105 | # Mongo Explorer plugin
106 | .idea/**/mongoSettings.xml
107 | 
108 | # File-based project format
109 | *.iws
110 | 
111 | # IntelliJ
112 | out/
113 | 
114 | # mpeltonen/sbt-idea plugin
115 | .idea_modules/
116 | 
117 | # JIRA plugin
118 | atlassian-ide-plugin.xml
119 | 
120 | # Cursive Clojure plugin
121 | .idea/replstate.xml
122 | 
123 | # SonarLint plugin
124 | .idea/sonarlint/
125 | 
126 | # Crashlytics plugin (for Android Studio and IntelliJ)
127 | com_crashlytics_export_strings.xml
128 | crashlytics.properties
129 | crashlytics-build.properties
130 | fabric.properties
131 | 
132 | # Editor-based Rest Client
133 | .idea/httpRequests
134 | 
135 | # Android studio 3.1+ serialized cache file
136 | .idea/caches/build_file_checksums.ser
137 | 
138 | 


--------------------------------------------------------------------------------
/test/SavePlaneMesh.cpp:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by ChrisKim on 2023/6/8.
 3 | //
 4 | 
 5 | #include "SavePlaneMesh.h"
 6 | #include "TestCaseFile.h"
 7 | 
 8 | 
 9 | void save_plane_mesh(double x_start, double x_end, double x_step,
10 |                      double y_start, double y_end, double y_step,
11 |                      Network &network, const std::string &filename) {
12 |     int y_size = ceil((y_end - y_start) / y_step);
13 |     int x_size = ceil((x_end - x_start) / x_step);
14 |     Matrix mesh(y_size, x_size);
15 |     std::vector<double> y_coords(y_size);
16 |     std::vector<double> x_coords(x_size);
17 |     std::cout << "\n[Mesh Plane]" << std::endl;
18 |     std::cout << "mesh x from " << x_start << " to " << x_end << " with step " << x_step << std::endl;
19 |     std::cout << "mesh y from " << y_start << " to " << y_end << " with step " << y_step << std::endl;
20 |     for (int i = 0; i < y_size; i++) {
21 |         for (int j = 0; j < x_size; j++) {
22 |             double y = y_start + i * y_step;
23 |             double x = x_start + j * x_step;
24 |             y_coords[i] = y;
25 |             x_coords[j] = x;
26 |             Matrix dot({std::vector{y}, std::vector{x}});
27 |             mesh(i, j) = network.predict(dot)(0, 0);
28 |             std::cout << "\r" << std::fixed << std::setprecision(0) << std::right
29 |                       << std::setw(3) << ceil(100.0 * (i * x_size + j) / (y_size * x_size)) << "% | "
30 |                       << "mesh[" << std::setw(3) << i + 1 << "][" << std::setw(3) << j + 1 << "] = "
31 |                       << std::fixed << std::setprecision(9) << std::setw(12) << mesh(i, j) << std::flush;
32 |         }
33 |     }
34 |     std::ofstream file(filename);
35 |     for (int i = 0; i < x_size; i++) {
36 |         file << std::fixed << std::setprecision(9) << std::left << std::setw(20) << x_coords[i] << " ";
37 |     }
38 |     file << std::endl;
39 |     for (int i = 0; i < y_size; i++) {
40 |         file << std::fixed << std::setprecision(9) << std::left << std::setw(20) << y_coords[i] << " ";
41 |     }
42 |     file << std::endl;
43 |     for (int i = 0; i < mesh.row(); i++) {
44 |         for (int j = 0; j < mesh.col(); j++) {
45 |             file << std::fixed << std::setprecision(9) << std::left << std::setw(20) << mesh(i, j) << " ";
46 |         }
47 |         file << std::endl;
48 |     }
49 |     file.close();
50 | }
51 | 
52 | 
53 | void save_plane_mesh_with_data(const std::string &data_file, const std::string &mesh_file,
54 |                                std::vector<Matrix> &x, std::vector<Matrix> &y, Network &network) {
55 |     save_testcase(data_file, x, y);
56 |     double x_min = 1e18, x_max = -1e18, y_min = 1e18, y_max = -1e18;
57 |     for (auto &input: x) {
58 |         y_min = std::min(y_min, input(0, 0));
59 |         y_max = std::max(y_max, input(0, 0));
60 |         x_min = std::min(x_min, input(1, 0));
61 |         x_max = std::max(x_max, input(1, 0));
62 |     }
63 |     double x_padding = (x_max - x_min) * 0.05;
64 |     double y_padding = (y_max - y_min) * 0.05;
65 |     save_plane_mesh(x_min - x_padding, x_max + x_padding, (x_max - x_min + 2 * x_padding) / 100,
66 |                     y_min - y_padding, y_max + y_padding, (y_max - y_min + 2 * y_padding) / 100,
67 |                     network, mesh_file);
68 | }


--------------------------------------------------------------------------------
/network/Network.cpp:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by ChrisKim on 24-4-28.
 3 | //
 4 | 
 5 | #include <iomanip>
 6 | #include <chrono>
 7 | #include "Network.h"
 8 | 
 9 | void Network::add_layer(Layer *layer) {
10 |     layers.push_back(layer);
11 | }
12 | 
13 | double Network::train(const std::vector<Matrix> &input, const std::vector<Matrix> &target,
14 |                       LossFunction *loss_function, int epochs, double learning_rate) {
15 |     int data_size = int(input.size());
16 |     std::cout << "[Train] " << data_size << " train data\n";
17 |     double loss = 0;
18 |     for (int e = 0; e < epochs; e++) {
19 |         loss = 0;
20 |         auto start = std::chrono::system_clock::now();
21 |         for (int i = 0; i < input.size(); i++) {
22 |             Matrix output = predict(input[i], false);
23 |             loss += loss_function->loss(output, target[i]);
24 |             Matrix d_output = loss_function->derivative(output, target[i]);
25 |             for (auto it = layers.rbegin(); it != layers.rend(); ++it) {
26 |                 d_output = (*it)->backward(d_output, learning_rate);
27 |             }
28 |             std::cout << "\r[Epoch " << e + 1 << "/" << epochs << "] "
29 |                       << std::right << std::setw(8) << i + 1 << "/"
30 |                       << std::left << std::setw(8) << data_size
31 |                       << "loss: " << std::left << std::setw(12) << std::setprecision(9) << loss / (i + 1) << std::flush;
32 |         }
33 |         auto end = std::chrono::system_clock::now();
34 |         std::cout << " (" << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms)"
35 |                   << std::endl;
36 |     }
37 |     return loss / data_size;
38 | }
39 | 
40 | Matrix Network::predict(const Matrix &input, bool is_eval) {
41 |     Matrix output = input;
42 |     for (auto &layer: layers) {
43 |         output = layer->forward(output, is_eval);
44 |     }
45 |     return output;
46 | }
47 | 
48 | double Network::evaluate(const std::vector<Matrix> &input, const std::vector<Matrix> &target,
49 |                          LossFunction *loss_function, bool one_hot_encoding) {
50 |     int data_size = int(input.size());
51 |     std::cout << "[Evaluate] " << data_size << " evaluate data\n";
52 |     if (!one_hot_encoding) {
53 |         double loss = 0;
54 |         for (int i = 0; i < input.size(); i++) {
55 |             Matrix output = predict(input[i], true);
56 |             loss += loss_function->loss(output, target[i]);
57 |             std::cout << "\r"
58 |                       << std::right << std::setw(8) << i + 1 << "/"
59 |                       << std::left << std::setw(8) << data_size
60 |                       << "loss: " << std::left << std::setw(12) << loss / (i + 1) << std::flush;
61 |         }
62 |         std::cout << std::endl;
63 |         return loss / data_size;
64 |     } else {
65 |         int correct = 0;
66 |         for (int i = 0; i < input.size(); i++) {
67 |             Matrix output = predict(input[i], true);
68 |             if (output.argmax().first == target[i].argmax().first) {
69 |                 correct++;
70 |             }
71 |             std::cout << "\r"
72 |                       << std::right << std::setw(8) << i + 1 << "/"
73 |                       << std::left << std::setw(8) << data_size
74 |                       << "accuracy: " << std::left << std::setw(4) << double(correct) / (i + 1)
75 |                       << std::flush;
76 |         }
77 |         std::cout << std::endl;
78 |         return double(correct) / data_size;
79 |     }
80 | }
81 | 


--------------------------------------------------------------------------------
/test/LoadPlane.cpp:
--------------------------------------------------------------------------------
  1 | //
  2 | // Created by ChrisKim on 2023/6/8.
  3 | //
  4 | 
  5 | #include "LoadPlane.h"
  6 | 
  7 | std::pair<std::vector<Matrix>, std::vector<Matrix>> circle_type(int size) {
  8 |     // random generator
  9 |     auto now = std::chrono::system_clock::now();
 10 |     auto now_sec = std::chrono::time_point_cast<std::chrono::seconds>(now);
 11 |     auto timestamp = now_sec.time_since_epoch().count();
 12 |     std::default_random_engine generator(timestamp);
 13 |     std::normal_distribution<double> distribution(0, 1);
 14 | 
 15 |     std::vector<Matrix> input, output;
 16 | 
 17 |     // inner circle
 18 |     for (int i = 0; i < size / 2; i++) {
 19 |         double degree = 2.0 * i / size * 2 * M_PI;
 20 |         double x = cos(degree) + 0.5 * distribution(generator);
 21 |         double y = sin(degree) + 0.5 * distribution(generator);
 22 |         input.push_back(make_matrix({{x}, {y}}));
 23 |         output.push_back(make_matrix({{1}}));
 24 |     }
 25 | 
 26 |     // outer ring
 27 |     for (int i = 0; i < size / 2; i++) {
 28 |         double degree = 2.0 * i / size * 2 * M_PI;
 29 |         double x = 4 * cos(degree) + 0.5 * distribution(generator);
 30 |         double y = 4 * sin(degree) + 0.5 * distribution(generator);
 31 |         input.push_back(make_matrix({{x}, {y}}));
 32 |         output.push_back(make_matrix({{-1}}));
 33 |     }
 34 |     return {input, output};
 35 | }
 36 | 
 37 | std::pair<std::vector<Matrix>, std::vector<Matrix>> cluster_type(int size) {
 38 |     // random generator
 39 |     auto now = std::chrono::system_clock::now();
 40 |     auto now_sec = std::chrono::time_point_cast<std::chrono::seconds>(now);
 41 |     auto timestamp = now_sec.time_since_epoch().count();
 42 |     std::default_random_engine generator(timestamp);
 43 |     std::normal_distribution<double> distribution(0, 1);
 44 | 
 45 |     std::vector<Matrix> input, output;
 46 | 
 47 |     // cluster 1
 48 |     for (int i = 0; i < size / 2; i++) {
 49 |         double x = distribution(generator);
 50 |         double y = distribution(generator);
 51 |         input.push_back(make_matrix({{x}, {y}}));
 52 |         output.push_back(make_matrix({{1}}));
 53 |     }
 54 | 
 55 |     // cluster 2
 56 |     for (int i = 0; i < size / 2; i++) {
 57 |         double x = 4 + distribution(generator);
 58 |         double y = 4 + distribution(generator);
 59 |         input.push_back(make_matrix({{x}, {y}}));
 60 |         output.push_back(make_matrix({{-1}}));
 61 |     }
 62 | 
 63 |     return {input, output};
 64 | }
 65 | 
 66 | std::pair<std::vector<Matrix>, std::vector<Matrix>> window_type(int size) {
 67 |     // random generator
 68 |     auto now = std::chrono::system_clock::now();
 69 |     auto now_sec = std::chrono::time_point_cast<std::chrono::seconds>(now);
 70 |     auto timestamp = now_sec.time_since_epoch().count();
 71 |     std::default_random_engine generator(timestamp);
 72 |     std::uniform_real_distribution<double> distribution(-1, 1);
 73 | 
 74 |     std::vector<Matrix> input, output;
 75 | 
 76 |     for (int i = 0; i < size; i++) {
 77 |         double x = distribution(generator);
 78 |         double y = distribution(generator);
 79 |         input.push_back(make_matrix({{x}, {y}}));
 80 |         output.push_back(make_matrix({{x * y >= 0 ? 1.0 : -1.0}}));
 81 |     }
 82 | 
 83 |     return {input, output};
 84 | }
 85 | 
 86 | std::pair<std::vector<Matrix>, std::vector<Matrix>> whirlpool_type(int size) {
 87 |     // random generator
 88 |     auto now = std::chrono::system_clock::now();
 89 |     auto now_sec = std::chrono::time_point_cast<std::chrono::seconds>(now);
 90 |     auto timestamp = now_sec.time_since_epoch().count();
 91 |     std::default_random_engine generator(timestamp);
 92 |     std::normal_distribution<double> distribution(0, 0.4);
 93 | 
 94 |     std::vector<Matrix> input, output;
 95 | 
 96 |     // type 1
 97 |     for (int i = 0; i < size / 2; i++) {
 98 |         double degree = 4.0 * i / size * 2 * M_PI;
 99 |         double x = degree * cos(degree) + distribution(generator);
100 |         double y = degree * sin(degree) + distribution(generator);
101 |         input.push_back(make_matrix({{x}, {y}}));
102 |         output.push_back(make_matrix({{1}}));
103 |     }
104 | 
105 |     // type -1
106 |     for (int i = 0; i < size / 2; i++) {
107 |         double degree = 4.0 * i / size * 2 * M_PI;
108 |         double x = degree * cos(degree + 2 * M_PI) + distribution(generator);
109 |         double y = degree * sin(degree + 2 * M_PI) + distribution(generator);
110 |         input.push_back(make_matrix({{-x}, {-y}}));
111 |         output.push_back(make_matrix({{-1}}));
112 |     }
113 | 
114 |     return {input, output};
115 | }
116 | 
117 | std::pair<std::vector<Matrix>, std::vector<Matrix>> load_plane(int size, const std::string &type) {
118 |     if (type == "circle") {
119 |         return circle_type(size);
120 |     } else if (type == "cluster") {
121 |         return cluster_type(size);
122 |     } else if (type == "window") {
123 |         return window_type(size);
124 |     } else if (type == "whirlpool") {
125 |         return whirlpool_type(size);
126 |     } else {
127 |         std::cerr << "Error: unknown type" << std::endl;
128 |         exit(1);
129 |     }
130 | }
131 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Neural-Net-cpp
  2 | 
  3 | C++ 实现的人工神经网络，仅用于个人练习。
  4 | 
  5 | 现有功能：
  6 | 
  7 | - 网络层 Layer
  8 |   - 全连接层 FullyConnectedLayer
  9 |   - 卷积层 ConvolutionalLayer (暂时只支持单通道输入输出)
 10 |   - 展平层 FlattenLayer
 11 |   - 激活层 ActivationLayer
 12 |   - 随机失活层 DropoutLayer
 13 |   
 14 | - 激活函数
 15 |   - Sigmoid
 16 |   - Tanh
 17 |   - ReLU
 18 | 
 19 | - 损失函数
 20 |   - 均方误差 MeanSquaredError
 21 | 
 22 | - 数据集
 23 |   - MNIST (手写体数据集)
 24 |   - 二维平面二分类
 25 |   - 多项式拟合
 26 | 
 27 | 
 28 | 现有问题：
 29 | 
 30 | - 没有保存/读取模型功能，模型均是一次性的。
 31 | - 使用朴素梯度下降进行优化，非常不稳定，运行多次训练得到的结果可能天差地别。
 32 | - 没有 Softmax 功能，没有其他损失函数。
 33 | - batch_size 功能还没有实现，现在 batch_size 相当于为 1.
 34 | 
 35 | ## 示例
 36 | 
 37 | ### MNIST
 38 | 
 39 | **代码**
 40 | 
 41 | ```cpp
 42 | Network network({
 43 |     new ConvolutionalLayer({28, 28}, {3, 3}),
 44 |     new ActivationLayer(new Sigmoid),
 45 |     new ConvolutionalLayer({26, 26}, {3, 3}),
 46 |     new ActivationLayer(new Sigmoid),
 47 |     new FlattenLayer,
 48 |     new FullyConnectedLayer(24 * 24, 100),
 49 |     new ActivationLayer(new Sigmoid),
 50 |     new FullyConnectedLayer(100, 10),
 51 |     new ActivationLayer(new Sigmoid)
 52 | });
 53 | 
 54 | auto [train_input, train_output] = load_testcase("../mnist_train.txt");
 55 | auto [test_input, test_output] = load_testcase("../mnist_test.txt");
 56 | 
 57 | network.train(train_input, train_output, new MeanSquaredError, 80, 0.1);
 58 | network.evaluate(test_input, test_output, new MeanSquaredError, true);
 59 | ```
 60 | 
 61 | **训练输出**
 62 | 
 63 | ```
 64 | [Load Dataset] 100.00%     6000/6000    
 65 | [Load Dataset] 100.00%     1000/1000    
 66 | [Train] 6000 train data
 67 | [Epoch  1/80]     6000/6000    loss: 0.092344669  (2994ms)
 68 | [Epoch 11/80]     6000/6000    loss: 0.090298650  (2969ms)
 69 | [Epoch 21/80]     6000/6000    loss: 0.090335344  (2876ms)
 70 | [Epoch 31/80]     6000/6000    loss: 0.031215471  (2525ms)
 71 | [Epoch 41/80]     6000/6000    loss: 0.017014869  (2448ms)
 72 | [Epoch 51/80]     6000/6000    loss: 0.013342757  (2454ms)
 73 | [Epoch 61/80]     6000/6000    loss: 0.011136877  (2538ms)
 74 | [Epoch 71/80]     6000/6000    loss: 0.009667794  (2508ms)
 75 | [Epoch 80/80]     6000/6000    loss: 0.008628765  (2468ms)
 76 | [Evaluate] 1000 evaluate data
 77 |     1000/1000    accuracy: 0.910000000
 78 | ```
 79 | 
 80 | **重复 10 次训练**
 81 | 
 82 | ```
 83 | #		loss			accuracy
 84 | -----------------------------------
 85 | 1		0.012405698		0.870000000
 86 | 2		0.012413764		0.902000000
 87 | 3		0.005940926		0.926000000
 88 | 4		0.008770140		0.912000000
 89 | 5		0.010302665		0.902000000
 90 | 6		0.022851723		0.838000000
 91 | 7		0.047779869		0.623000000
 92 | 8		0.011795792		0.874000000
 93 | 9		0.013515847		0.887000000
 94 | 10		0.010445787		0.907000000
 95 | -----------------------------------
 96 | avg		0.015622221		0.864100000
 97 | ```
 98 | 
 99 | ### 二维二分类问题
100 | 
101 | **代码**
102 | 
103 | ```cpp
104 | Network network({
105 |     new FullyConnectedLayer(2, 100),
106 |     new ActivationLayer(new Tanh),
107 |     new FullyConnectedLayer(100, 100),
108 |     new ActivationLayer(new Tanh),
109 |     new FullyConnectedLayer(100, 100),
110 |     new ActivationLayer(new Tanh),
111 |     new FullyConnectedLayer(100, 1),
112 |     new ActivationLayer(new Tanh)
113 | });
114 | 
115 | auto [train_input, train_output] = load_plane(2000, "whirlpool");
116 | auto [test_input, test_output] = load_plane(500, "whirlpool");
117 | 
118 | network.train(train_input, train_output, new MeanSquaredError, 1000, 0.001);
119 | 
120 | save_plane_mesh_with_data("../test.txt", "../mesh.txt", test_input, test_output, network);
121 | 
122 | // 接下来再用 Python 运行 test/PlotPlaneMesh.py 来可视化分类边界
123 | ```
124 | 
125 | **分类边界**
126 | 
127 | | Cluster                                                      | Window                                                       | Circle                                                       | Whirlpool                                                    |
128 | | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
129 | | ![](./images/Neural-Net-cpp-cluster_data.png) | ![](./images/Neural-Net-cpp-window_data.png) | ![](./images/Neural-Net-cpp-circle_data.png) | ![](./images/Neural-Net-cpp-whirlpool_data.png) |
130 | | ![](./images/Neural-Net-cpp-cluster_mesh.png) | ![](./images/Neural-Net-cpp-window_mesh.png) | ![](./images/Neural-Net-cpp-circle_mesh.png) | ![](./images/Neural-Net-cpp-whirlpool_mesh.png) |
131 | 
132 | ### 多项式拟合
133 | 
134 | **代码**
135 | 
136 | ```cpp
137 | Network network({
138 |     new FullyConnectedLayer(1, 100),
139 |     new ActivationLayer(new Sigmoid),
140 |     new FullyConnectedLayer(100, 100),
141 |     new ActivationLayer(new Sigmoid),
142 |     new FullyConnectedLayer(100, 1),
143 | });
144 | 
145 | auto poly = [](double x) { return 5 * pow(x, 3) + 2 * pow(x, 2) - 7 * x + 1; };
146 | auto [train_input, train_output] = load_polynomial(1000, poly, -2, 2);
147 | network.train(train_input, train_output, new MeanSquaredError, 100, 0.01);
148 | 
149 | for (int x = -2; x <= 2; x += 1) {
150 |     Matrix input = make_matrix({{1.0 * x}});
151 |     Matrix output = network.predict(input);
152 |     double predict = output(0, 0);
153 |     double actual = poly(x);
154 |     std::cout << "x: "
155 |               << std::fixed << std::setprecision(2) << std::setw(2) << std::right << x << " Predict: "
156 |               << std::fixed << std::setprecision(2) << std::setw(7) << predict << " Actual: "
157 |               << std::fixed << std::setprecision(2) << std::setw(7) << actual << " Error: "
158 |               << std::fixed << std::setprecision(2) << std::setw(7) << predict - actual << std::endl;
159 | }
160 | ```
161 | 
162 | **结果**
163 | 
164 | ```
165 | x: -2 Predict:  -16.85 Actual:  -17.00 Error:    0.15
166 | x: -1 Predict:    5.01 Actual:    5.00 Error:    0.01
167 | x:  0 Predict:    1.06 Actual:    1.00 Error:    0.06
168 | x:  1 Predict:    1.01 Actual:    1.00 Error:    0.01
169 | x:  2 Predict:   35.04 Actual:   35.00 Error:    0.04
170 | ```
171 | 
172 | 


--------------------------------------------------------------------------------
/test/TestCaseFile.cpp:
--------------------------------------------------------------------------------
  1 | //
  2 | // Created by ChrisKim on 2023/6/8.
  3 | //
  4 | 
  5 | #include "TestCaseFile.h"
  6 | 
  7 | std::pair<std::vector<Matrix>, std::vector<Matrix>> load_testcase(const std::string &filename) {
  8 |     std::ifstream file(filename);
  9 |     std::vector<Matrix> x, y;
 10 |     int data_size;
 11 |     std::pair<int, int> input_shape;
 12 |     std::pair<int, int> label_shape;
 13 |     file >> data_size
 14 |          >> input_shape.first >> input_shape.second
 15 |          >> label_shape.first >> label_shape.second;
 16 |     for (int i = 0; i < data_size; i++) {
 17 |         std::cout << "\r[Load Dataset] "
 18 |                   << std::right << std::fixed << std::setprecision(2)
 19 |                   << std::setw(6) << 100.0 * (i + 1) / data_size << "% "
 20 |                   << std::right << std::setw(8) << i + 1 << "/"
 21 |                   << std::left << std::setw(8) << data_size << std::flush;
 22 |         Matrix input(input_shape.first, input_shape.second);
 23 |         Matrix label(label_shape.first, label_shape.second);
 24 |         for (int j = 0; j < input_shape.first; j++) {
 25 |             for (int k = 0; k < input_shape.second; k++) {
 26 |                 file >> input(j, k);
 27 |             }
 28 |         }
 29 |         for (int j = 0; j < label_shape.first; j++) {
 30 |             for (int k = 0; k < label_shape.second; k++) {
 31 |                 file >> label(j, k);
 32 |             }
 33 |         }
 34 |         x.push_back(input);
 35 |         y.push_back(label);
 36 |     }
 37 |     std::cout << std::endl;
 38 |     return {x, y};
 39 | }
 40 | 
 41 | void save_testcase(const std::string &filename, std::vector<Matrix> &x, std::vector<Matrix> &y) {
 42 |     if (x.size() != y.size()) {
 43 |         std::cerr << "Input and label size mismatch" << std::endl;
 44 |         exit(1);
 45 |     }
 46 |     std::ofstream file(filename);
 47 |     int data_size = int(x.size());
 48 |     std::pair<int, int> input_shape = x.front().shape();
 49 |     std::pair<int, int> label_shape = y.front().shape();
 50 |     file << data_size << '\n'
 51 |          << input_shape.first << ' ' << input_shape.second << '\n'
 52 |          << label_shape.first << ' ' << label_shape.second << '\n';
 53 |     for (int k = 0; k < data_size; k++) {
 54 |         std::cout << "\r[Save Dataset] "
 55 |                   << std::right << std::fixed << std::setprecision(2)
 56 |                   << std::setw(6) << 100.0 * (k + 1) / data_size << "% "
 57 |                   << std::right << std::setw(8) << k + 1 << "/"
 58 |                   << std::left << std::setw(8) << data_size << std::flush;
 59 |         auto &input = x[k];
 60 |         auto &label = y[k];
 61 |         for (int i = 0; i < input_shape.first; i++) {
 62 |             for (int j = 0; j < input_shape.second; j++) {
 63 |                 file << std::fixed << std::setprecision(9) << std::left << std::setw(20) << input(i, j);
 64 |             }
 65 |         }
 66 |         for (int i = 0; i < label_shape.first; i++) {
 67 |             for (int j = 0; j < label_shape.second; j++) {
 68 |                 file << std::fixed << std::setprecision(9) << std::left << std::setw(20) << label(i, j);
 69 |             }
 70 |         }
 71 |         file << '\n';
 72 |     }
 73 |     std::cout << std::endl;
 74 |     file.close();
 75 | }
 76 | 
 77 | std::pair<std::vector<Matrix>, std::vector<Matrix>> load_binary_testcase(const std::string &filename) {
 78 |     std::ifstream file(filename, std::ios::binary);
 79 |     std::vector<Matrix> x, y;
 80 |     int data_size;
 81 |     std::pair<int, int> input_shape;
 82 |     std::pair<int, int> label_shape;
 83 |     file.read(reinterpret_cast<char *>(&data_size), sizeof(int));
 84 |     file.read(reinterpret_cast<char *>(&input_shape.first), sizeof(int));
 85 |     file.read(reinterpret_cast<char *>(&input_shape.second), sizeof(int));
 86 |     file.read(reinterpret_cast<char *>(&label_shape.first), sizeof(int));
 87 |     file.read(reinterpret_cast<char *>(&label_shape.second), sizeof(int));
 88 |     for (int i = 0; i < data_size; i++) {
 89 |         std::cout << "\r[Load Dataset] "
 90 |                   << std::right << std::fixed << std::setprecision(2)
 91 |                   << std::setw(6) << 100.0 * (i + 1) / data_size << "% "
 92 |                   << std::right << std::setw(8) << i + 1 << "/"
 93 |                   << std::left << std::setw(8) << data_size << std::flush;
 94 |         Matrix input(input_shape.first, input_shape.second);
 95 |         Matrix label(label_shape.first, label_shape.second);
 96 |         for (int j = 0; j < input_shape.first; j++) {
 97 |             for (int k = 0; k < input_shape.second; k++) {
 98 |                 file.read(reinterpret_cast<char *>(&input(j, k)), sizeof(double));
 99 |             }
100 |         }
101 |         for (int j = 0; j < label_shape.first; j++) {
102 |             for (int k = 0; k < label_shape.second; k++) {
103 |                 file.read(reinterpret_cast<char *>(&label(j, k)), sizeof(double));
104 |             }
105 |         }
106 |         x.push_back(input);
107 |         y.push_back(label);
108 |     }
109 |     std::cout << std::endl;
110 |     return {x, y};
111 | }
112 | 
113 | void save_binary_testcase(const std::string &filename, std::vector<Matrix> &x, std::vector<Matrix> &y) {
114 |     if (x.size() != y.size()) {
115 |         std::cerr << "Input and label size mismatch" << std::endl;
116 |         exit(1);
117 |     }
118 |     std::ofstream file(filename, std::ios::binary);
119 |     int data_size = int(x.size());
120 |     std::pair<int, int> input_shape = x.front().shape();
121 |     std::pair<int, int> label_shape = y.front().shape();
122 |     file.write(reinterpret_cast<const char *>(&data_size), sizeof(int));
123 |     file.write(reinterpret_cast<const char *>(&input_shape.first), sizeof(int));
124 |     file.write(reinterpret_cast<const char *>(&input_shape.second), sizeof(int));
125 |     file.write(reinterpret_cast<const char *>(&label_shape.first), sizeof(int));
126 |     file.write(reinterpret_cast<const char *>(&label_shape.second), sizeof(int));
127 |     for (int k = 0; k < data_size; k++) {
128 |         std::cout << "\r[Save Dataset] "
129 |                   << std::right << std::fixed << std::setprecision(2)
130 |                   << std::setw(6) << 100.0 * (k + 1) / data_size << "% "
131 |                   << std::right << std::setw(8) << k + 1 << "/"
132 |                   << std::left << std::setw(8) << data_size << std::flush;
133 |         auto &input = x[k];
134 |         auto &label = y[k];
135 |         for (int i = 0; i < input_shape.first; i++) {
136 |             for (int j = 0; j < input_shape.second; j++) {
137 |                 file.write(reinterpret_cast<const char *>(&input(i, j)), sizeof(double));
138 |             }
139 |         }
140 |         for (int i = 0; i < label_shape.first; i++) {
141 |             for (int j = 0; j < label_shape.second; j++) {
142 |                 file.write(reinterpret_cast<const char *>(&label(i, j)), sizeof(double));
143 |             }
144 |         }
145 |     }
146 |     std::cout << std::endl;
147 |     file.close();
148 | }


--------------------------------------------------------------------------------
/matrix/Matrix.cpp:
--------------------------------------------------------------------------------
  1 | //
  2 | // Created by ChrisKim on 2023/6/5.
  3 | //
  4 | 
  5 | #include <iomanip>
  6 | #include <algorithm>
  7 | #include "Matrix.h"
  8 | 
  9 | Matrix::Matrix() {
 10 |     _row = 1;
 11 |     _col = 1;
 12 |     _data = {0};
 13 | }
 14 | 
 15 | Matrix::Matrix(int row, int col, double val)
 16 |         : _row(row), _col(col) {
 17 |     if (row <= 0 || col <= 0) {
 18 |         std::cerr << "Matrix size must be positive." << std::endl;
 19 |         exit(1);
 20 |     }
 21 |     _data.resize(row * col, val);
 22 | }
 23 | 
 24 | Matrix::Matrix(const Matrix &mat)
 25 |         : _row(mat._row), _col(mat._col) {
 26 |     _data.assign(mat._data.begin(), mat._data.end());
 27 | }
 28 | 
 29 | Matrix::Matrix(const std::vector<std::vector<double>> &data) {
 30 |     this->from_vector(data);
 31 | }
 32 | 
 33 | void Matrix::from_vector(const std::vector<std::vector<double>> &data) {
 34 |     _row = int(data.size());
 35 |     _col = int(data[0].size());
 36 |     _data.resize(_row * _col);
 37 |     for (int i = 0; i < _row; i++) {
 38 |         if (int(data[i].size()) != _col) {
 39 |             std::cerr << "Invalid matrix data: " << i << "th row has different size." << std::endl;
 40 |             exit(1);
 41 |         }
 42 |     }
 43 |     for (int i = 0; i < _row; i++) {
 44 |         for (int j = 0; j < _col; j++) {
 45 |             this->operator()(i, j) = data[i][j];
 46 |         }
 47 |     }
 48 | }
 49 | 
 50 | int Matrix::row() const {
 51 |     return _row;
 52 | }
 53 | 
 54 | int Matrix::col() const {
 55 |     return _col;
 56 | }
 57 | 
 58 | std::pair<int, int> Matrix::shape() const {
 59 |     return {_row, _col};
 60 | }
 61 | 
 62 | Matrix Matrix::transpose() const {
 63 |     Matrix res;
 64 |     res._row = _col;
 65 |     res._col = _row;
 66 |     res._data.assign(_data.begin(), _data.end());
 67 |     return res;
 68 | }
 69 | 
 70 | void Matrix::set(double val) {
 71 |     _data.assign(_data.size(), val);
 72 | }
 73 | 
 74 | void Matrix::print(int precision) const {
 75 |     std::cout << "[\n";
 76 |     for (int i = 0; i < _row; i++) {
 77 |         std::cout << "  [";
 78 |         for (int j = 0; j < _col; j++) {
 79 |             if (j)
 80 |                 std::cout << ", ";
 81 |             std::cout << std::fixed << std::setprecision(precision) << this->operator()(i, j);
 82 |         }
 83 |         std::cout << "]\n";
 84 |     }
 85 |     std::cout << "]" << std::endl;
 86 | }
 87 | 
 88 | Matrix &Matrix::operator=(const Matrix &mat) {
 89 |     if (this == &mat)
 90 |         return *this;
 91 |     if (_row != mat._row || _col != mat._col) {
 92 |         _row = mat._row;
 93 |         _col = mat._col;
 94 |         _data.resize(_row * _col);
 95 |     }
 96 |     _data.assign(mat._data.begin(), mat._data.end());
 97 |     return *this;
 98 | }
 99 | 
100 | Matrix Matrix::operator+(const Matrix &mat) const {
101 |     Matrix res(*this);
102 |     res += mat;
103 |     return res;
104 | }
105 | 
106 | Matrix &Matrix::operator+=(const Matrix &mat) {
107 |     if (_row != mat._row || _col != mat._col) {
108 |         std::cerr << "Matrix size not match: can't perform addition between "
109 |                   << "(" << _row << ", " << _col << ") and "
110 |                   << "(" << mat._row << ", " << mat._col << ")" << std::endl;
111 |         exit(1);
112 |     }
113 |     for (int i = 0; i < _data.size(); i++) {
114 |         _data[i] += mat._data[i];
115 |     }
116 |     return *this;
117 | }
118 | 
119 | Matrix Matrix::operator+(double x) const {
120 |     Matrix res(*this);
121 |     res += x;
122 |     return res;
123 | }
124 | 
125 | Matrix &Matrix::operator+=(double x) {
126 |     for (double &i: _data) {
127 |         i += x;
128 |     }
129 |     return *this;
130 | }
131 | 
132 | Matrix Matrix::operator-(const Matrix &mat) const {
133 |     Matrix res(*this);
134 |     res -= mat;
135 |     return res;
136 | }
137 | 
138 | Matrix &Matrix::operator-=(const Matrix &mat) {
139 |     if (_row != mat._row || _col != mat._col) {
140 |         std::cerr << "Matrix size not match: can't perform subtraction between "
141 |                   << "(" << _row << ", " << _col << ") and "
142 |                   << "(" << mat._row << ", " << mat._col << ")" << std::endl;
143 |         exit(1);
144 |     }
145 |     for (int i = 0; i < _data.size(); i++) {
146 |         _data[i] -= mat._data[i];
147 |     }
148 |     return *this;
149 | }
150 | 
151 | Matrix Matrix::operator-(double x) const {
152 |     return *this + (-x);
153 | }
154 | 
155 | Matrix &Matrix::operator-=(double x) {
156 |     return *this += -x;
157 | }
158 | 
159 | Matrix Matrix::operator*(const Matrix &mat) const {
160 |     if (_col != mat._row) {
161 |         std::cerr << "Matrix size not match: can't perform matrix multiplication between "
162 |                   << "(" << _row << ", " << _col << ") and "
163 |                   << "(" << mat._row << ", " << mat._col << ")" << std::endl;
164 |         exit(1);
165 |     }
166 |     Matrix res(_row, mat._col);
167 |     for (int i = 0; i < _row; i++) {
168 |         for (int j = 0; j < mat._col; j++) {
169 |             for (int k = 0; k < _col; k++) {
170 |                 res(i, j) += this->operator()(i, k) * mat(k, j);
171 |             }
172 |         }
173 |     }
174 |     return res;
175 | }
176 | 
177 | Matrix Matrix::operator*(const double &val) const {
178 |     Matrix res(*this);
179 |     res *= val;
180 |     return res;
181 | }
182 | 
183 | Matrix &Matrix::operator*=(const double &val) {
184 |     for (double &i: _data) {
185 |         i *= val;
186 |     }
187 |     return *this;
188 | }
189 | 
190 | Matrix Matrix::operator/(const double &val) const {
191 |     Matrix res(*this);
192 |     res /= val;
193 |     return res;
194 | }
195 | 
196 | Matrix &Matrix::operator/=(const double &val) {
197 |     if (val == 0) {
198 |         std::cerr << "Division by zero error" << std::endl;
199 |         exit(1);
200 |     }
201 |     for (double &i: _data) {
202 |         i /= val;
203 |     }
204 |     return *this;
205 | }
206 | 
207 | bool Matrix::operator==(const Matrix &mat) const {
208 |     if (_row != mat._row || _col != mat._col) {
209 |         return false;
210 |     }
211 |     for (int i = 0; i < _data.size(); i++) {
212 |         if (_data[i] != mat._data[i]) {
213 |             return false;
214 |         }
215 |     }
216 |     return true;
217 | }
218 | 
219 | void Matrix::randomize(double min, double max) {
220 |     std::random_device rd;
221 |     std::mt19937 gen(rd());
222 |     std::uniform_real_distribution<double> dis(min, max);
223 |     for (double &i: _data) {
224 |         i = dis(gen);
225 |     }
226 | }
227 | 
228 | void Matrix::perform(double (*f)(double)) {
229 |     for (double &i: _data) {
230 |         i = f(i);
231 |     }
232 | }
233 | 
234 | Matrix Matrix::hadamard(const Matrix &mat) const {
235 |     if (_row != mat._row || _col != mat._col) {
236 |         std::cerr << "Matrix size not match: can't perform hadamard product between "
237 |                   << "(" << _row << ", " << _col << ") and "
238 |                   << "(" << mat._row << ", " << mat._col << ")" << std::endl;
239 |         exit(1);
240 |     }
241 |     Matrix res(*this);
242 |     for (int i = 0; i < _data.size(); i++) {
243 |         res._data[i] *= mat._data[i];
244 |     }
245 |     return res;
246 | }
247 | 
248 | Matrix Matrix::convolution(const Matrix &mat, bool valid) const {
249 |     // correlation between this matrix and mat matrix
250 |     // valid: if true, return valid convolution, else return full convolution
251 |     int row = valid ? _row - mat._row + 1 : _row + mat._row - 1;
252 |     int col = valid ? _col - mat._col + 1 : _col + mat._col - 1;
253 |     Matrix res(row, col);
254 |     for (int i = 0; i < row; i++) {
255 |         for (int j = 0; j < col; j++) {
256 |             // calculate convolution at (i, j)
257 |             double sum = 0;
258 |             for (int k = 0; k < mat._row; k++) {
259 |                 for (int l = 0; l < mat._col; l++) {
260 |                     if (i + k >= 0 && i + k < 0 + _row && j + l >= 0 && j + l < 0 + _col) {
261 |                         sum += this->operator()(i + k, j + l) * mat(k, l);
262 |                     }
263 |                 }
264 |             }
265 |             res(i, j) = sum;
266 |         }
267 |     }
268 |     return res;
269 | }
270 | 
271 | double Matrix::max() const {
272 |     return *std::max_element(_data.begin(), _data.end());
273 | }
274 | 
275 | double Matrix::min() const {
276 |     return *std::min_element(_data.begin(), _data.end());
277 | }
278 | 
279 | double Matrix::sum() const {
280 |     return std::accumulate(_data.begin(), _data.end(), 0.0);
281 | }
282 | 
283 | std::pair<int, int> Matrix::argmax() const {
284 |     int max_idx = int(std::max_element(_data.begin(), _data.end()) - _data.begin());
285 |     return {max_idx / _col, max_idx % _col};
286 | }
287 | 
288 | std::pair<int, int> Matrix::argmin() const {
289 |     int min_idx = int(std::min_element(_data.begin(), _data.end()) - _data.begin());
290 |     return {min_idx / _col, min_idx % _col};
291 | }
292 | 
293 | Matrix Matrix::reshape(int row, int col) const {
294 |     if (_row * _col != row * col) {
295 |         std::cerr << "Matrix size not match: can't reshape "
296 |                   << "(" << _row << ", " << _col << ") to "
297 |                   << "(" << row << ", " << col << ")" << std::endl;
298 |         exit(1);
299 |     }
300 |     Matrix res(*this);
301 |     res._row = row;
302 |     res._col = col;
303 |     return res;
304 | }
305 | 
306 | double Matrix::operator()(int r, int c) const {
307 |     return _data[r * _col + c];
308 | }
309 | 
310 | double &Matrix::operator()(int r, int c) {
311 |     return _data[r * _col + c];
312 | }
313 | 
314 | Matrix operator*(const double &val, const Matrix &mat) {
315 |     return mat * val;
316 | }
317 | 
318 | Matrix make_matrix(const std::vector<std::vector<double>> &data) {
319 |     return Matrix(data);
320 | }


--------------------------------------------------------------------------------