├── layer ├── Layer.cpp ├── Layer.h ├── FlattenLayer.h ├── FlattenLayer.cpp ├── DropoutLayer.h ├── ActivationLayer.cpp ├── FullyConnectedLayer.h ├── ActivationLayer.h ├── ConvolutionalLayer.h ├── DropoutLayer.cpp ├── FullyConnectedLayer.cpp └── ConvolutionalLayer.cpp ├── loss ├── LossFunction.cpp ├── LossFunction.h ├── MeanSquaredError.h └── MeanSquaredError.cpp ├── activation ├── ActivationFunction.cpp ├── ReLu.h ├── Tanh.h ├── Sigmoid.h ├── ActivationFunction.h ├── Tanh.cpp ├── ReLu.cpp └── Sigmoid.cpp ├── images ├── Neural-Net-cpp-circle_data.png ├── Neural-Net-cpp-circle_mesh.png ├── Neural-Net-cpp-window_data.png ├── Neural-Net-cpp-window_mesh.png ├── Neural-Net-cpp-cluster_data.png ├── Neural-Net-cpp-cluster_mesh.png ├── Neural-Net-cpp-whirlpool_data.png └── Neural-Net-cpp-whirlpool_mesh.png ├── test ├── LoadPlane.h ├── LoadPolynomial.h ├── TestCaseFile.h ├── PlotPlaneData.py ├── SavePlaneMesh.h ├── LoadPolynomial.cpp ├── PlotPlaneMesh.py ├── ConvertMNIST.py ├── SavePlaneMesh.cpp ├── LoadPlane.cpp └── TestCaseFile.cpp ├── ANN.h ├── main.cpp ├── network ├── Network.h └── Network.cpp ├── CMakeLists.txt ├── matrix ├── Matrix.h └── Matrix.cpp ├── .gitignore └── README.md /layer/Layer.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by ChrisKim on 24-4-28. 3 | // 4 | 5 | #include "Layer.h" 6 | -------------------------------------------------------------------------------- /loss/LossFunction.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by ChrisKim on 24-4-28. 3 | // 4 | 5 | #include "LossFunction.h" 6 | -------------------------------------------------------------------------------- /activation/ActivationFunction.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by ChrisKim on 24-4-28. 3 | // 4 | 5 | #include "ActivationFunction.h" 6 | -------------------------------------------------------------------------------- /images/Neural-Net-cpp-circle_data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChrisKimZHT/Neural-Net-cpp/master/images/Neural-Net-cpp-circle_data.png -------------------------------------------------------------------------------- /images/Neural-Net-cpp-circle_mesh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChrisKimZHT/Neural-Net-cpp/master/images/Neural-Net-cpp-circle_mesh.png -------------------------------------------------------------------------------- /images/Neural-Net-cpp-window_data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChrisKimZHT/Neural-Net-cpp/master/images/Neural-Net-cpp-window_data.png -------------------------------------------------------------------------------- /images/Neural-Net-cpp-window_mesh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChrisKimZHT/Neural-Net-cpp/master/images/Neural-Net-cpp-window_mesh.png -------------------------------------------------------------------------------- /images/Neural-Net-cpp-cluster_data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChrisKimZHT/Neural-Net-cpp/master/images/Neural-Net-cpp-cluster_data.png -------------------------------------------------------------------------------- /images/Neural-Net-cpp-cluster_mesh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChrisKimZHT/Neural-Net-cpp/master/images/Neural-Net-cpp-cluster_mesh.png -------------------------------------------------------------------------------- /images/Neural-Net-cpp-whirlpool_data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChrisKimZHT/Neural-Net-cpp/master/images/Neural-Net-cpp-whirlpool_data.png -------------------------------------------------------------------------------- /images/Neural-Net-cpp-whirlpool_mesh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChrisKimZHT/Neural-Net-cpp/master/images/Neural-Net-cpp-whirlpool_mesh.png -------------------------------------------------------------------------------- /layer/Layer.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by ChrisKim on 24-4-28. 3 | // 4 | 5 | #ifndef ANN_LAYER_H 6 | #define ANN_LAYER_H 7 | 8 | #include "../matrix/Matrix.h" 9 | 10 | struct Layer { 11 | Matrix input; 12 | 13 | virtual Matrix forward(const Matrix &input, bool is_eval) = 0; 14 | 15 | virtual Matrix backward(const Matrix &d_output, double is_eval) = 0; 16 | }; 17 | 18 | 19 | #endif //ANN_LAYER_H 20 | -------------------------------------------------------------------------------- /layer/FlattenLayer.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by ChrisKim on 24-4-29. 3 | // 4 | 5 | #ifndef ANN_FLATTENLAYER_H 6 | #define ANN_FLATTENLAYER_H 7 | 8 | 9 | #include "Layer.h" 10 | 11 | struct FlattenLayer : public Layer { 12 | Matrix forward(const Matrix &input, bool is_eval) override; 13 | 14 | Matrix backward(const Matrix &d_output, double learning_rate) override; 15 | }; 16 | 17 | 18 | #endif //ANN_FLATTENLAYER_H 19 | -------------------------------------------------------------------------------- /test/LoadPlane.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by ChrisKim on 2023/6/8. 3 | // 4 | 5 | #ifndef ANN_LOADPLANE_H 6 | #define ANN_LOADPLANE_H 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include "../matrix/Matrix.h" 14 | 15 | std::pair, std::vector> load_plane(int size, const std::string &type); 16 | 17 | #endif //ANN_LOADPLANE_H 18 | -------------------------------------------------------------------------------- /layer/FlattenLayer.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by ChrisKim on 24-4-29. 3 | // 4 | 5 | #include "FlattenLayer.h" 6 | 7 | Matrix FlattenLayer::forward(const Matrix &input, bool is_eval) { 8 | this->input = input; 9 | return input.reshape(input.row() * input.col(), 1); 10 | } 11 | 12 | Matrix FlattenLayer::backward(const Matrix &d_output, double learning_rate) { 13 | return d_output.reshape(input.row(), input.col()); 14 | } 15 | -------------------------------------------------------------------------------- /loss/LossFunction.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by ChrisKim on 24-4-28. 3 | // 4 | 5 | #ifndef ANN_LOSSFUNCTION_H 6 | #define ANN_LOSSFUNCTION_H 7 | 8 | 9 | #include "../matrix/Matrix.h" 10 | 11 | struct LossFunction { 12 | virtual double loss(const Matrix &predict, const Matrix &ground_truth) = 0; 13 | 14 | virtual Matrix derivative(const Matrix &predict, const Matrix &ground_truth) = 0; 15 | }; 16 | 17 | 18 | #endif //ANN_LOSSFUNCTION_H 19 | -------------------------------------------------------------------------------- /test/LoadPolynomial.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by ChrisKim on 2023/6/6. 3 | // 4 | 5 | #ifndef ANN_LOADPOLYNOMIAL_H 6 | #define ANN_LOADPOLYNOMIAL_H 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include "../matrix/Matrix.h" 13 | 14 | std::pair, std::vector> 15 | load_polynomial(int size, double(*f)(double), double min = -1.0, double max = 1.0); 16 | 17 | 18 | #endif //ANN_LOADPOLYNOMIAL_H 19 | -------------------------------------------------------------------------------- /activation/ReLu.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by ChrisKim on 24-4-28. 3 | // 4 | 5 | #ifndef ANN_RELU_H 6 | #define ANN_RELU_H 7 | 8 | 9 | #include "ActivationFunction.h" 10 | 11 | struct ReLu : public ActivationFunction { 12 | double activate(double x) override; 13 | 14 | Matrix activate(const Matrix &m) override; 15 | 16 | double derivative(double x) override; 17 | 18 | Matrix derivative(const Matrix &m) override; 19 | }; 20 | 21 | 22 | #endif //ANN_RELU_H 23 | -------------------------------------------------------------------------------- /activation/Tanh.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by ChrisKim on 24-4-28. 3 | // 4 | 5 | #ifndef ANN_TANH_H 6 | #define ANN_TANH_H 7 | 8 | 9 | #include "ActivationFunction.h" 10 | 11 | struct Tanh : public ActivationFunction { 12 | double activate(double x) override; 13 | 14 | Matrix activate(const Matrix &m) override; 15 | 16 | double derivative(double x) override; 17 | 18 | Matrix derivative(const Matrix &m) override; 19 | }; 20 | 21 | 22 | #endif //ANN_TANH_H 23 | -------------------------------------------------------------------------------- /loss/MeanSquaredError.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by ChrisKim on 24-4-28. 3 | // 4 | 5 | #ifndef ANN_MEANSQUAREDERROR_H 6 | #define ANN_MEANSQUAREDERROR_H 7 | 8 | 9 | #include "LossFunction.h" 10 | 11 | struct MeanSquaredError : public LossFunction { 12 | double loss(const Matrix &predict, const Matrix &ground_truth) override; 13 | 14 | Matrix derivative(const Matrix &predict, const Matrix &ground_truth) override; 15 | }; 16 | 17 | 18 | #endif //ANN_MEANSQUAREDERROR_H 19 | -------------------------------------------------------------------------------- /activation/Sigmoid.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by ChrisKim on 24-4-28. 3 | // 4 | 5 | #ifndef ANN_SIGMOID_H 6 | #define ANN_SIGMOID_H 7 | 8 | 9 | #include "ActivationFunction.h" 10 | 11 | struct Sigmoid : public ActivationFunction { 12 | double activate(double x) override; 13 | 14 | Matrix activate(const Matrix &m) override; 15 | 16 | double derivative(double x) override; 17 | 18 | Matrix derivative(const Matrix &m) override; 19 | }; 20 | 21 | 22 | #endif //ANN_SIGMOID_H 23 | -------------------------------------------------------------------------------- /activation/ActivationFunction.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by ChrisKim on 24-4-28. 3 | // 4 | 5 | #ifndef ANN_ACTIVATIONFUNCTION_H 6 | #define ANN_ACTIVATIONFUNCTION_H 7 | 8 | 9 | #include "../matrix/Matrix.h" 10 | 11 | struct ActivationFunction { 12 | public: 13 | virtual double activate(double x) = 0; 14 | 15 | virtual Matrix activate(const Matrix &m) = 0; 16 | 17 | virtual double derivative(double x) = 0; 18 | 19 | virtual Matrix derivative(const Matrix &m) = 0; 20 | }; 21 | 22 | 23 | #endif //ANN_ACTIVATIONFUNCTION_H 24 | -------------------------------------------------------------------------------- /layer/DropoutLayer.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by ChrisKim on 24-5-29. 3 | // 4 | 5 | #ifndef ANN_DROPOUTLAYER_H 6 | #define ANN_DROPOUTLAYER_H 7 | 8 | 9 | #include "Layer.h" 10 | 11 | struct DropoutLayer : public Layer { 12 | double dropout_rate; 13 | Matrix mask; 14 | 15 | explicit DropoutLayer(double dropout_rate); 16 | 17 | Matrix forward(const Matrix &input, bool is_eval) override; 18 | 19 | Matrix backward(const Matrix &d_output, double learning_rate) override; 20 | }; 21 | 22 | 23 | #endif //ANN_DROPOUTLAYER_H 24 | -------------------------------------------------------------------------------- /loss/MeanSquaredError.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by ChrisKim on 24-4-28. 3 | // 4 | 5 | #include "MeanSquaredError.h" 6 | 7 | double MeanSquaredError::loss(const Matrix &predict, const Matrix &ground_truth) { 8 | double result = 0; 9 | for (int i = 0; i < predict.row(); i++) { 10 | result += pow(predict(i, 0) - ground_truth(i, 0), 2); 11 | } 12 | return result / predict.row(); 13 | } 14 | 15 | Matrix MeanSquaredError::derivative(const Matrix &predict, const Matrix &ground_truth) { 16 | return (predict - ground_truth) * 2 / predict.row(); 17 | } 18 | -------------------------------------------------------------------------------- /layer/ActivationLayer.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by ChrisKim on 24-4-28. 3 | // 4 | 5 | #include "ActivationLayer.h" 6 | 7 | ActivationLayer::ActivationLayer(ActivationFunction *activation_function) { 8 | this->activation_function = activation_function; 9 | } 10 | 11 | Matrix ActivationLayer::forward(const Matrix &input, bool is_eval) { 12 | this->input = input; 13 | return activation_function->activate(input); 14 | } 15 | 16 | Matrix ActivationLayer::backward(const Matrix &d_output, double learning_rate) { 17 | return activation_function->derivative(input).hadamard(d_output); 18 | } 19 | -------------------------------------------------------------------------------- /layer/FullyConnectedLayer.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by ChrisKim on 24-4-28. 3 | // 4 | 5 | #ifndef ANN_FULLYCONNECTEDLAYER_H 6 | #define ANN_FULLYCONNECTEDLAYER_H 7 | 8 | 9 | #include "Layer.h" 10 | 11 | struct FullyConnectedLayer : public Layer { 12 | int input_size, output_size; 13 | Matrix weights, biases; 14 | 15 | FullyConnectedLayer(int input_size, int output_size); 16 | 17 | Matrix forward(const Matrix &input, bool is_eval) override; 18 | 19 | Matrix backward(const Matrix &d_output, double learning_rate) override; 20 | }; 21 | 22 | 23 | #endif //ANN_FULLYCONNECTEDLAYER_H 24 | -------------------------------------------------------------------------------- /activation/Tanh.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by ChrisKim on 24-4-28. 3 | // 4 | 5 | #include "Tanh.h" 6 | 7 | double Tanh::activate(double x) { 8 | return tanh(x); 9 | } 10 | 11 | Matrix Tanh::activate(const Matrix &m) { 12 | Matrix result(m); 13 | result.perform([](double x) { return tanh(x); }); 14 | return result; 15 | } 16 | 17 | double Tanh::derivative(double x) { 18 | return 1 - tanh(x) * tanh(x); 19 | } 20 | 21 | Matrix Tanh::derivative(const Matrix &m) { 22 | Matrix result(m); 23 | result.perform([](double x) { return 1 - tanh(x) * tanh(x); }); 24 | return result; 25 | } 26 | -------------------------------------------------------------------------------- /activation/ReLu.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by ChrisKim on 24-4-28. 3 | // 4 | 5 | #include "ReLu.h" 6 | 7 | double ReLu::activate(double x) { 8 | return x > 0.0 ? x : 0.0; 9 | } 10 | 11 | Matrix ReLu::activate(const Matrix &m) { 12 | Matrix result(m); 13 | result.perform([](double x) { return x > 0.0 ? x : 0.0; }); 14 | return result; 15 | } 16 | 17 | double ReLu::derivative(double x) { 18 | return x > 0.0 ? 1.0 : 0.0; 19 | } 20 | 21 | Matrix ReLu::derivative(const Matrix &m) { 22 | Matrix result(m); 23 | result.perform([](double x) { return x > 0.0 ? 1.0 : 0.0; }); 24 | return result; 25 | } 26 | -------------------------------------------------------------------------------- /layer/ActivationLayer.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by ChrisKim on 24-4-28. 3 | // 4 | 5 | #ifndef ANN_ACTIVATIONLAYER_H 6 | #define ANN_ACTIVATIONLAYER_H 7 | 8 | 9 | #include "Layer.h" 10 | #include "../activation/ActivationFunction.h" 11 | 12 | struct ActivationLayer : public Layer { 13 | ActivationFunction *activation_function; 14 | 15 | explicit ActivationLayer(ActivationFunction *activation_function); 16 | 17 | Matrix forward(const Matrix &input, bool is_eval) override; 18 | 19 | Matrix backward(const Matrix &d_output, double learning_rate) override; 20 | }; 21 | 22 | 23 | #endif //ANN_ACTIVATIONLAYER_H 24 | -------------------------------------------------------------------------------- /activation/Sigmoid.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by ChrisKim on 24-4-28. 3 | // 4 | 5 | #include "Sigmoid.h" 6 | #include 7 | 8 | double Sigmoid::activate(double x) { 9 | return 1.0 / (1.0 + exp(-x)); 10 | } 11 | 12 | Matrix Sigmoid::activate(const Matrix &m) { 13 | Matrix result(m); 14 | result.perform([](double x) { return 1.0 / (1.0 + exp(-x)); }); 15 | return result; 16 | } 17 | 18 | double Sigmoid::derivative(double x) { 19 | return 1.0 / (1.0 + exp(-x)) * (1.0 - 1.0 / (1.0 + exp(-x))); 20 | } 21 | 22 | Matrix Sigmoid::derivative(const Matrix &m) { 23 | Matrix result(m); 24 | result.perform([](double x) { return 1.0 / (1.0 + exp(-x)) * (1.0 - 1.0 / (1.0 + exp(-x))); }); 25 | return result; 26 | } 27 | -------------------------------------------------------------------------------- /test/TestCaseFile.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by ChrisKim on 2023/6/8. 3 | // 4 | 5 | #ifndef ANN_TESTCASEFILE_H 6 | #define ANN_TESTCASEFILE_H 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include "../matrix/Matrix.h" 13 | 14 | std::pair, std::vector> load_testcase(const std::string &filename); 15 | 16 | void save_testcase(const std::string &filename, std::vector &x, std::vector &y); 17 | 18 | std::pair, std::vector> load_binary_testcase(const std::string &filename); 19 | 20 | void save_binary_testcase(const std::string &filename, std::vector &x, std::vector &y); 21 | 22 | #endif //ANN_TESTCASEFILE_H 23 | -------------------------------------------------------------------------------- /test/PlotPlaneData.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | with open("../test.txt", "r") as f: 5 | num_samples = int(f.readline().strip()) 6 | 7 | input_shape = tuple(map(int, f.readline().strip().split())) 8 | label_shape = tuple(map(int, f.readline().strip().split())) 9 | 10 | x = [] 11 | y = [] 12 | for i in range(num_samples): 13 | data = list(map(float, f.readline().strip().split())) 14 | x_vec = np.array(data[:2]) 15 | y_vec = np.array(data[2:]) 16 | x.append(x_vec) 17 | y.append(y_vec) 18 | x = np.array(x) 19 | y = np.array(y) 20 | 21 | plt.xlabel("x") 22 | plt.ylabel("y") 23 | plt.axis("equal") 24 | plt.scatter(x[:, 0], x[:, 1], c=y) 25 | plt.show() 26 | -------------------------------------------------------------------------------- /test/SavePlaneMesh.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by ChrisKim on 2023/6/8. 3 | // 4 | 5 | #ifndef ANN_SAVEPLANEMESH_H 6 | #define ANN_SAVEPLANEMESH_H 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include "../matrix/Matrix.h" 13 | #include "../network/Network.h" 14 | 15 | void save_plane_mesh(double x_start, double x_end, double x_step, 16 | double y_start, double y_end, double y_step, 17 | Network &network, const std::string &filename); 18 | 19 | void save_plane_mesh_with_data(const std::string &data_file, const std::string &mesh_file, 20 | std::vector &x, std::vector &y, Network &network); 21 | 22 | #endif //ANN_SAVEPLANEMESH_H 23 | -------------------------------------------------------------------------------- /ANN.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by ChrisKim on 24-4-29. 3 | // 4 | 5 | #ifndef ANN_ANN_H 6 | #define ANN_ANN_H 7 | 8 | #include "matrix/Matrix.h" 9 | 10 | #include "network/Network.h" 11 | 12 | #include "layer/Layer.h" 13 | #include "layer/FlattenLayer.h" 14 | #include "layer/ConvolutionalLayer.h" 15 | #include "layer/FullyConnectedLayer.h" 16 | #include "layer/ActivationLayer.h" 17 | #include "layer/DropoutLayer.h" 18 | 19 | #include "loss/LossFunction.h" 20 | #include "loss/MeanSquaredError.h" 21 | 22 | #include "activation/ActivationFunction.h" 23 | #include "activation/Sigmoid.h" 24 | #include "activation/ReLU.h" 25 | #include "activation/Tanh.h" 26 | 27 | #include "test/TestCaseFile.h" 28 | #include "test/LoadPlane.h" 29 | #include "test/LoadPolynomial.h" 30 | #include "test/SavePlaneMesh.h" 31 | 32 | #endif //ANN_ANN_H 33 | -------------------------------------------------------------------------------- /layer/ConvolutionalLayer.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by ChrisKim on 24-4-29. 3 | // 4 | 5 | #ifndef ANN_CONVOLUTIONALLAYER_H 6 | #define ANN_CONVOLUTIONALLAYER_H 7 | 8 | 9 | #include "Layer.h" 10 | 11 | struct ConvolutionalLayer : public Layer { 12 | int input_h, input_w, input_c; // input channel not implemented 13 | int filter_h, filter_w; 14 | int output_h, output_w, output_c; // output channel not implemented 15 | Matrix weights; // (filter_h, filter_w) 16 | double bias; 17 | 18 | Matrix input; 19 | 20 | ConvolutionalLayer(std::tuple input_shape, std::tuple filter_shape); 21 | 22 | Matrix forward(const Matrix &input, bool is_eval) override; 23 | 24 | Matrix backward(const Matrix &d_output, double learning_rate) override; 25 | }; 26 | 27 | 28 | #endif //ANN_CONVOLUTIONALLAYER_H 29 | -------------------------------------------------------------------------------- /layer/DropoutLayer.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by ChrisKim on 24-5-29. 3 | // 4 | 5 | #include "DropoutLayer.h" 6 | 7 | DropoutLayer::DropoutLayer(double dropout_rate) { 8 | this->dropout_rate = dropout_rate; 9 | } 10 | 11 | Matrix DropoutLayer::forward(const Matrix &input, bool is_eval) { 12 | if (is_eval) { 13 | return input; 14 | } 15 | mask = Matrix(input.row(), input.col()); 16 | mask.randomize(0, 1); 17 | for (int i = 0; i < input.row(); i++) { 18 | for (int j = 0; j < input.col(); j++) { 19 | mask(i, j) = mask(i, j) < dropout_rate ? 0 : 1; 20 | } 21 | } 22 | return input.hadamard(mask) / (1 - dropout_rate); 23 | } 24 | 25 | Matrix DropoutLayer::backward(const Matrix &d_output, double learning_rate) { 26 | return d_output.hadamard(mask) / (1 - dropout_rate); 27 | } 28 | -------------------------------------------------------------------------------- /test/LoadPolynomial.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by ChrisKim on 2023/6/6. 3 | // 4 | 5 | #include "LoadPolynomial.h" 6 | 7 | 8 | std::pair, std::vector> 9 | load_polynomial(int size, double(*f)(double), double min, double max) { 10 | std::vector input, output; 11 | auto now = std::chrono::system_clock::now(); 12 | auto now_sec = std::chrono::time_point_cast(now); 13 | auto timestamp = now_sec.time_since_epoch().count(); 14 | std::default_random_engine generator(timestamp); 15 | std::uniform_real_distribution distribution(min, max); 16 | for (int i = 0; i < size; i++) { 17 | double x = distribution(generator); 18 | double y = f(x); 19 | input.push_back(make_matrix({{x}})); 20 | output.push_back(make_matrix({{y}})); 21 | } 22 | return {input, output}; 23 | } 24 | -------------------------------------------------------------------------------- /main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "ANN.h" 3 | 4 | int main() { 5 | std::ios::sync_with_stdio(false); 6 | 7 | Network network({ 8 | new ConvolutionalLayer({28, 28}, {3, 3}), 9 | new ActivationLayer(new Sigmoid), 10 | new ConvolutionalLayer({26, 26}, {3, 3}), 11 | new ActivationLayer(new Sigmoid), 12 | new FlattenLayer, 13 | new FullyConnectedLayer(24 * 24, 100), 14 | new ActivationLayer(new Sigmoid), 15 | new FullyConnectedLayer(100, 10), 16 | new ActivationLayer(new Sigmoid) 17 | }); 18 | auto [train_input, train_output] = load_binary_testcase("../mnist_train.bin"); 19 | auto [test_input, test_output] = load_binary_testcase("../mnist_test.bin"); 20 | 21 | network.train(train_input, train_output, new MeanSquaredError, 80, 0.1); 22 | network.evaluate(test_input, test_output, new MeanSquaredError, true); 23 | 24 | return 0; 25 | } 26 | -------------------------------------------------------------------------------- /layer/FullyConnectedLayer.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by ChrisKim on 24-4-28. 3 | // 4 | 5 | #include "FullyConnectedLayer.h" 6 | 7 | FullyConnectedLayer::FullyConnectedLayer(int input_size, int output_size) : 8 | input_size(input_size), output_size(output_size) { 9 | weights = Matrix(output_size, input_size); 10 | biases = Matrix(output_size, 1); 11 | weights.randomize(); 12 | biases.randomize(); 13 | } 14 | 15 | Matrix FullyConnectedLayer::forward(const Matrix &input, bool is_eval) { 16 | this->input = input; 17 | return weights * input + biases; 18 | } 19 | 20 | Matrix FullyConnectedLayer::backward(const Matrix &d_output, double learning_rate) { 21 | Matrix d_input = weights.transpose() * d_output; 22 | Matrix d_weights = d_output * input.transpose(); 23 | const Matrix &d_biases = d_output; 24 | weights -= d_weights * learning_rate; 25 | biases -= d_biases * learning_rate; 26 | return d_input; 27 | } -------------------------------------------------------------------------------- /network/Network.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by ChrisKim on 24-4-28. 3 | // 4 | 5 | #ifndef ANN_NETWORK_H 6 | #define ANN_NETWORK_H 7 | 8 | 9 | #include 10 | #include "../layer/Layer.h" 11 | #include "../loss/LossFunction.h" 12 | 13 | class Network { 14 | private: 15 | std::vector layers; 16 | 17 | public: 18 | explicit Network() = default; 19 | 20 | explicit Network(std::vector layers) : layers(std::move(layers)) {}; 21 | 22 | void add_layer(Layer *layer); 23 | 24 | double train(const std::vector &input, const std::vector &target, 25 | LossFunction *loss_function, int epochs, double learning_rate); 26 | 27 | Matrix predict(const Matrix &input, bool is_eval = true); 28 | 29 | double evaluate(const std::vector &input, const std::vector &target, 30 | LossFunction *loss_function, bool one_hot_encoding = false); 31 | }; 32 | 33 | 34 | #endif //ANN_NETWORK_H 35 | -------------------------------------------------------------------------------- /test/PlotPlaneMesh.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | with open("../test.txt", "r") as f: 5 | num_samples = int(f.readline().strip()) 6 | 7 | input_shape = tuple(map(int, f.readline().strip().split())) 8 | label_shape = tuple(map(int, f.readline().strip().split())) 9 | 10 | x = [] 11 | y = [] 12 | for i in range(num_samples): 13 | data = list(map(float, f.readline().strip().split())) 14 | x_vec = np.array(data[:2]) 15 | y_vec = np.array(list(map(int, data[2:]))) 16 | x.append(x_vec) 17 | y.append(y_vec) 18 | x = np.array(x) 19 | y = np.array(y) 20 | 21 | with open("../mesh.txt", "r") as f: 22 | x_coord = list(map(float, f.readline().strip().split())) 23 | y_coord = list(map(float, f.readline().strip().split())) 24 | x_coord = np.array(x_coord) 25 | y_coord = np.array(y_coord) 26 | gx, gy = np.meshgrid(x_coord, y_coord) 27 | 28 | mesh = [] 29 | for i in range(y_coord.shape[0]): 30 | data = list(map(lambda val: 1 if float(val) >= 0 else -1, f.readline().strip().split())) 31 | data = np.array(data) 32 | mesh.append(data) 33 | 34 | mesh = np.array(mesh) 35 | 36 | plt.xlabel("x") 37 | plt.ylabel("y") 38 | plt.axis("equal") 39 | plt.contourf(gy, gx, mesh, alpha=0.9) 40 | plt.scatter(x[:, 0], x[:, 1], c=y) 41 | plt.show() 42 | -------------------------------------------------------------------------------- /layer/ConvolutionalLayer.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by ChrisKim on 24-4-29. 3 | // 4 | 5 | #include "ConvolutionalLayer.h" 6 | 7 | ConvolutionalLayer::ConvolutionalLayer(std::tuple input_shape, std::tuple filter_shape) { 8 | auto &[_input_h, _input_w] = input_shape; 9 | auto &[_filter_h, _filter_w] = filter_shape; 10 | input_h = _input_h; 11 | input_w = _input_w; 12 | filter_h = _filter_h; 13 | filter_w = _filter_w; 14 | output_h = input_h - filter_h + 1; // stride = 1 15 | output_w = input_w - filter_w + 1; // stride = 1 16 | 17 | weights = Matrix(filter_h, filter_w); 18 | weights.randomize(); 19 | Matrix random = Matrix(1, 1); 20 | random.randomize(); 21 | bias = random(0, 0); 22 | } 23 | 24 | Matrix ConvolutionalLayer::forward(const Matrix &input, bool is_eval) { 25 | this->input = input; 26 | Matrix output = input.convolution(weights); 27 | output += bias; 28 | return output; 29 | } 30 | 31 | Matrix ConvolutionalLayer::backward(const Matrix &d_output, double learning_rate) { 32 | Matrix d_input = d_output.convolution(weights.transpose(), false); 33 | Matrix d_filters = input.convolution(d_output.transpose(), true); 34 | double d_bias = d_output.sum(); 35 | 36 | weights -= d_filters * learning_rate; 37 | bias -= d_bias * learning_rate; 38 | 39 | return d_input; 40 | } 41 | 42 | 43 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.25) 2 | project(ANN) 3 | 4 | set(CMAKE_CXX_STANDARD 17) 5 | 6 | add_executable( 7 | ANN 8 | ANN.h 9 | main.cpp 10 | 11 | matrix/Matrix.cpp 12 | matrix/Matrix.h 13 | 14 | network/Network.cpp 15 | network/Network.h 16 | 17 | layer/Layer.cpp 18 | layer/Layer.h 19 | layer/Layer.h 20 | layer/FullyConnectedLayer.cpp 21 | layer/FullyConnectedLayer.h 22 | layer/ActivationLayer.cpp 23 | layer/ActivationLayer.h 24 | layer/ConvolutionalLayer.cpp 25 | layer/ConvolutionalLayer.h 26 | layer/FlattenLayer.cpp 27 | layer/FlattenLayer.h 28 | layer/DropoutLayer.cpp 29 | layer/DropoutLayer.h 30 | 31 | loss/LossFunction.cpp 32 | loss/LossFunction.h 33 | loss/MeanSquaredError.cpp 34 | loss/MeanSquaredError.h 35 | 36 | activation/ActivationFunction.cpp 37 | activation/ActivationFunction.h 38 | activation/Sigmoid.cpp 39 | activation/Sigmoid.h 40 | activation/Tanh.cpp 41 | activation/Tanh.h 42 | activation/ReLu.cpp 43 | activation/ReLu.h 44 | 45 | test/LoadPolynomial.cpp 46 | test/LoadPolynomial.h 47 | test/LoadPlane.cpp 48 | test/LoadPlane.h 49 | test/TestCaseFile.cpp 50 | test/TestCaseFile.h 51 | test/SavePlaneMesh.cpp 52 | test/SavePlaneMesh.h 53 | ) 54 | 55 | set(CMAKE_EXE_LINKER_FLAGS "-static-libgcc -static-libstdc++") 56 | 57 | if (CMAKE_BUILD_TYPE MATCHES Release) 58 | set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -O3") 59 | endif () -------------------------------------------------------------------------------- /matrix/Matrix.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by ChrisKim on 2023/6/5. 3 | // 4 | 5 | #ifndef ANN_MATRIX_H 6 | #define ANN_MATRIX_H 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | class Matrix { 14 | protected: 15 | int _row{}, _col{}; 16 | std::vector _data; 17 | 18 | public: 19 | Matrix(); 20 | 21 | Matrix(int row, int col, double val = 0); 22 | 23 | Matrix(const Matrix &mat); 24 | 25 | explicit Matrix(const std::vector> &data); 26 | 27 | void from_vector(const std::vector> &data); 28 | 29 | [[nodiscard]] int row() const; 30 | 31 | [[nodiscard]] int col() const; 32 | 33 | [[nodiscard]] std::pair shape() const; 34 | 35 | [[nodiscard]] Matrix transpose() const; 36 | 37 | void set(double val); 38 | 39 | void print(int precision = 2) const; 40 | 41 | void randomize(double min = -0.5, double max = 0.5); 42 | 43 | void perform(double(*f)(double)); 44 | 45 | Matrix &operator=(const Matrix &mat); 46 | 47 | Matrix operator+(const Matrix &mat) const; 48 | 49 | Matrix &operator+=(const Matrix &mat); 50 | 51 | Matrix operator+(double x) const; 52 | 53 | Matrix &operator+=(double x); 54 | 55 | Matrix operator-(const Matrix &mat) const; 56 | 57 | Matrix &operator-=(const Matrix &mat); 58 | 59 | Matrix operator-(double x) const; 60 | 61 | Matrix &operator-=(double x); 62 | 63 | Matrix operator*(const Matrix &mat) const; 64 | 65 | Matrix operator*(const double &val) const; 66 | 67 | Matrix &operator*=(const double &val); 68 | 69 | Matrix operator/(const double &val) const; 70 | 71 | Matrix &operator/=(const double &val); 72 | 73 | bool operator==(const Matrix &mat) const; 74 | 75 | double operator()(int r, int c) const; 76 | 77 | double &operator()(int r, int c); 78 | 79 | [[nodiscard]] Matrix hadamard(const Matrix &mat) const; 80 | 81 | [[nodiscard]] Matrix convolution(const Matrix &mat, bool valid = true) const; 82 | 83 | [[nodiscard]] double max() const; 84 | 85 | [[nodiscard]] double min() const; 86 | 87 | [[nodiscard]] double sum() const; 88 | 89 | [[nodiscard]] std::pair argmax() const; 90 | 91 | [[nodiscard]] std::pair argmin() const; 92 | 93 | [[nodiscard]] Matrix reshape(int row, int col) const; 94 | }; 95 | 96 | Matrix operator*(const double &val, const Matrix &mat); 97 | 98 | Matrix make_matrix(const std::vector> &data); 99 | 100 | #endif //ANN_MATRIX_H 101 | -------------------------------------------------------------------------------- /test/ConvertMNIST.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from keras import datasets, utils 3 | from tqdm import tqdm 4 | 5 | 6 | def convert_mnist_to_txt(sample_rate: float = 1.0): 7 | (x_train, y_train), (x_test, y_test) = datasets.mnist.load_data() 8 | 9 | x_train = x_train[:int(x_train.shape[0] * sample_rate)] 10 | y_train = y_train[:int(y_train.shape[0] * sample_rate)] 11 | x_test = x_test[:int(x_test.shape[0] * sample_rate)] 12 | y_test = y_test[:int(y_test.shape[0] * sample_rate)] 13 | 14 | with open('../mnist_train.txt', 'w') as f: 15 | f.write(f"{x_train.shape[0]}\n") 16 | f.write("28 28\n") 17 | f.write("10 1\n") 18 | for i in tqdm(range(x_train.shape[0])): 19 | x = x_train[i].reshape(-1) / 255.0 20 | y = utils.to_categorical(y_train[i], 10) 21 | f.write(" ".join(map(str, x)) + " " + " ".join(map(str, y)) + "\n") 22 | 23 | with open('../mnist_test.txt', 'w') as f: 24 | f.write(f"{x_test.shape[0]}\n") 25 | f.write("28 28\n") 26 | f.write("10 1\n") 27 | for i in tqdm(range(x_test.shape[0])): 28 | x = x_test[i].reshape(-1) / 255.0 29 | y = utils.to_categorical(y_test[i], 10) 30 | f.write(" ".join(map(str, x)) + " " + " ".join(map(str, y)) + "\n") 31 | 32 | 33 | def convert_mnist_to_bin(sample_rate: float = 1.0): 34 | (x_train, y_train), (x_test, y_test) = datasets.mnist.load_data() 35 | 36 | x_train = x_train[:int(x_train.shape[0] * sample_rate)] 37 | y_train = y_train[:int(y_train.shape[0] * sample_rate)] 38 | x_test = x_test[:int(x_test.shape[0] * sample_rate)] 39 | y_test = y_test[:int(y_test.shape[0] * sample_rate)] 40 | 41 | with open('../mnist_train.bin', 'wb') as f: 42 | f.write(np.array(x_train.shape[0], dtype=np.int32).tobytes()) 43 | f.write(np.array([28, 28], dtype=np.int32).tobytes()) 44 | f.write(np.array([10, 1], dtype=np.int32).tobytes()) 45 | for i in tqdm(range(x_train.shape[0])): 46 | x = x_train[i].reshape(-1) / 255.0 47 | y = utils.to_categorical(y_train[i], 10) 48 | f.write(x.tobytes()) 49 | f.write(y.tobytes()) 50 | 51 | with open('../mnist_test.bin', 'wb') as f: 52 | f.write(np.array(x_test.shape[0], dtype=np.int32).tobytes()) 53 | f.write(np.array([28, 28], dtype=np.int32).tobytes()) 54 | f.write(np.array([10, 1], dtype=np.int32).tobytes()) 55 | for i in tqdm(range(x_test.shape[0])): 56 | x = x_test[i].reshape(-1) / 255.0 57 | y = utils.to_categorical(y_test[i], 10) 58 | f.write(x.tobytes()) 59 | f.write(y.tobytes()) 60 | 61 | 62 | if __name__ == '__main__': 63 | # convert_mnist_to_txt(0.1) 64 | convert_mnist_to_bin(0.1) 65 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | *.bin 3 | 4 | ### CUDA template 5 | *.i 6 | *.ii 7 | *.gpu 8 | *.ptx 9 | *.cubin 10 | *.fatbin 11 | 12 | ### C++ template 13 | # Prerequisites 14 | *.d 15 | 16 | # Compiled Object files 17 | *.slo 18 | *.lo 19 | *.o 20 | *.obj 21 | 22 | # Precompiled Headers 23 | *.gch 24 | *.pch 25 | 26 | # Compiled Dynamic libraries 27 | *.so 28 | *.dylib 29 | *.dll 30 | 31 | # Fortran module files 32 | *.mod 33 | *.smod 34 | 35 | # Compiled Static libraries 36 | *.lai 37 | *.la 38 | *.a 39 | *.lib 40 | 41 | # Executables 42 | *.exe 43 | *.out 44 | *.app 45 | 46 | ### CMake template 47 | CMakeLists.txt.user 48 | CMakeCache.txt 49 | CMakeFiles 50 | CMakeScripts 51 | Testing 52 | Makefile 53 | cmake_install.cmake 54 | install_manifest.txt 55 | compile_commands.json 56 | CTestTestfile.cmake 57 | _deps 58 | 59 | ### CLion template 60 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider 61 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 62 | 63 | # User-specific stuff 64 | .idea/**/workspace.xml 65 | .idea/**/tasks.xml 66 | .idea/**/usage.statistics.xml 67 | .idea/**/dictionaries 68 | .idea/**/shelf 69 | 70 | # AWS User-specific 71 | .idea/**/aws.xml 72 | 73 | # Generated files 74 | .idea/**/contentModel.xml 75 | 76 | # Sensitive or high-churn files 77 | .idea/**/dataSources/ 78 | .idea/**/dataSources.ids 79 | .idea/**/dataSources.local.xml 80 | .idea/**/sqlDataSources.xml 81 | .idea/**/dynamic.xml 82 | .idea/**/uiDesigner.xml 83 | .idea/**/dbnavigator.xml 84 | 85 | # Gradle 86 | .idea/**/gradle.xml 87 | .idea/**/libraries 88 | 89 | # Gradle and Maven with auto-import 90 | # When using Gradle or Maven with auto-import, you should exclude module files, 91 | # since they will be recreated, and may cause churn. Uncomment if using 92 | # auto-import. 93 | # .idea/artifacts 94 | # .idea/compiler.xml 95 | # .idea/jarRepositories.xml 96 | # .idea/modules.xml 97 | # .idea/*.iml 98 | # .idea/modules 99 | # *.iml 100 | # *.ipr 101 | 102 | # CMake 103 | cmake-build-*/ 104 | 105 | # Mongo Explorer plugin 106 | .idea/**/mongoSettings.xml 107 | 108 | # File-based project format 109 | *.iws 110 | 111 | # IntelliJ 112 | out/ 113 | 114 | # mpeltonen/sbt-idea plugin 115 | .idea_modules/ 116 | 117 | # JIRA plugin 118 | atlassian-ide-plugin.xml 119 | 120 | # Cursive Clojure plugin 121 | .idea/replstate.xml 122 | 123 | # SonarLint plugin 124 | .idea/sonarlint/ 125 | 126 | # Crashlytics plugin (for Android Studio and IntelliJ) 127 | com_crashlytics_export_strings.xml 128 | crashlytics.properties 129 | crashlytics-build.properties 130 | fabric.properties 131 | 132 | # Editor-based Rest Client 133 | .idea/httpRequests 134 | 135 | # Android studio 3.1+ serialized cache file 136 | .idea/caches/build_file_checksums.ser 137 | 138 | -------------------------------------------------------------------------------- /test/SavePlaneMesh.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by ChrisKim on 2023/6/8. 3 | // 4 | 5 | #include "SavePlaneMesh.h" 6 | #include "TestCaseFile.h" 7 | 8 | 9 | void save_plane_mesh(double x_start, double x_end, double x_step, 10 | double y_start, double y_end, double y_step, 11 | Network &network, const std::string &filename) { 12 | int y_size = ceil((y_end - y_start) / y_step); 13 | int x_size = ceil((x_end - x_start) / x_step); 14 | Matrix mesh(y_size, x_size); 15 | std::vector y_coords(y_size); 16 | std::vector x_coords(x_size); 17 | std::cout << "\n[Mesh Plane]" << std::endl; 18 | std::cout << "mesh x from " << x_start << " to " << x_end << " with step " << x_step << std::endl; 19 | std::cout << "mesh y from " << y_start << " to " << y_end << " with step " << y_step << std::endl; 20 | for (int i = 0; i < y_size; i++) { 21 | for (int j = 0; j < x_size; j++) { 22 | double y = y_start + i * y_step; 23 | double x = x_start + j * x_step; 24 | y_coords[i] = y; 25 | x_coords[j] = x; 26 | Matrix dot({std::vector{y}, std::vector{x}}); 27 | mesh(i, j) = network.predict(dot)(0, 0); 28 | std::cout << "\r" << std::fixed << std::setprecision(0) << std::right 29 | << std::setw(3) << ceil(100.0 * (i * x_size + j) / (y_size * x_size)) << "% | " 30 | << "mesh[" << std::setw(3) << i + 1 << "][" << std::setw(3) << j + 1 << "] = " 31 | << std::fixed << std::setprecision(9) << std::setw(12) << mesh(i, j) << std::flush; 32 | } 33 | } 34 | std::ofstream file(filename); 35 | for (int i = 0; i < x_size; i++) { 36 | file << std::fixed << std::setprecision(9) << std::left << std::setw(20) << x_coords[i] << " "; 37 | } 38 | file << std::endl; 39 | for (int i = 0; i < y_size; i++) { 40 | file << std::fixed << std::setprecision(9) << std::left << std::setw(20) << y_coords[i] << " "; 41 | } 42 | file << std::endl; 43 | for (int i = 0; i < mesh.row(); i++) { 44 | for (int j = 0; j < mesh.col(); j++) { 45 | file << std::fixed << std::setprecision(9) << std::left << std::setw(20) << mesh(i, j) << " "; 46 | } 47 | file << std::endl; 48 | } 49 | file.close(); 50 | } 51 | 52 | 53 | void save_plane_mesh_with_data(const std::string &data_file, const std::string &mesh_file, 54 | std::vector &x, std::vector &y, Network &network) { 55 | save_testcase(data_file, x, y); 56 | double x_min = 1e18, x_max = -1e18, y_min = 1e18, y_max = -1e18; 57 | for (auto &input: x) { 58 | y_min = std::min(y_min, input(0, 0)); 59 | y_max = std::max(y_max, input(0, 0)); 60 | x_min = std::min(x_min, input(1, 0)); 61 | x_max = std::max(x_max, input(1, 0)); 62 | } 63 | double x_padding = (x_max - x_min) * 0.05; 64 | double y_padding = (y_max - y_min) * 0.05; 65 | save_plane_mesh(x_min - x_padding, x_max + x_padding, (x_max - x_min + 2 * x_padding) / 100, 66 | y_min - y_padding, y_max + y_padding, (y_max - y_min + 2 * y_padding) / 100, 67 | network, mesh_file); 68 | } -------------------------------------------------------------------------------- /network/Network.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by ChrisKim on 24-4-28. 3 | // 4 | 5 | #include 6 | #include 7 | #include "Network.h" 8 | 9 | void Network::add_layer(Layer *layer) { 10 | layers.push_back(layer); 11 | } 12 | 13 | double Network::train(const std::vector &input, const std::vector &target, 14 | LossFunction *loss_function, int epochs, double learning_rate) { 15 | int data_size = int(input.size()); 16 | std::cout << "[Train] " << data_size << " train data\n"; 17 | double loss = 0; 18 | for (int e = 0; e < epochs; e++) { 19 | loss = 0; 20 | auto start = std::chrono::system_clock::now(); 21 | for (int i = 0; i < input.size(); i++) { 22 | Matrix output = predict(input[i], false); 23 | loss += loss_function->loss(output, target[i]); 24 | Matrix d_output = loss_function->derivative(output, target[i]); 25 | for (auto it = layers.rbegin(); it != layers.rend(); ++it) { 26 | d_output = (*it)->backward(d_output, learning_rate); 27 | } 28 | std::cout << "\r[Epoch " << e + 1 << "/" << epochs << "] " 29 | << std::right << std::setw(8) << i + 1 << "/" 30 | << std::left << std::setw(8) << data_size 31 | << "loss: " << std::left << std::setw(12) << std::setprecision(9) << loss / (i + 1) << std::flush; 32 | } 33 | auto end = std::chrono::system_clock::now(); 34 | std::cout << " (" << std::chrono::duration_cast(end - start).count() << "ms)" 35 | << std::endl; 36 | } 37 | return loss / data_size; 38 | } 39 | 40 | Matrix Network::predict(const Matrix &input, bool is_eval) { 41 | Matrix output = input; 42 | for (auto &layer: layers) { 43 | output = layer->forward(output, is_eval); 44 | } 45 | return output; 46 | } 47 | 48 | double Network::evaluate(const std::vector &input, const std::vector &target, 49 | LossFunction *loss_function, bool one_hot_encoding) { 50 | int data_size = int(input.size()); 51 | std::cout << "[Evaluate] " << data_size << " evaluate data\n"; 52 | if (!one_hot_encoding) { 53 | double loss = 0; 54 | for (int i = 0; i < input.size(); i++) { 55 | Matrix output = predict(input[i], true); 56 | loss += loss_function->loss(output, target[i]); 57 | std::cout << "\r" 58 | << std::right << std::setw(8) << i + 1 << "/" 59 | << std::left << std::setw(8) << data_size 60 | << "loss: " << std::left << std::setw(12) << loss / (i + 1) << std::flush; 61 | } 62 | std::cout << std::endl; 63 | return loss / data_size; 64 | } else { 65 | int correct = 0; 66 | for (int i = 0; i < input.size(); i++) { 67 | Matrix output = predict(input[i], true); 68 | if (output.argmax().first == target[i].argmax().first) { 69 | correct++; 70 | } 71 | std::cout << "\r" 72 | << std::right << std::setw(8) << i + 1 << "/" 73 | << std::left << std::setw(8) << data_size 74 | << "accuracy: " << std::left << std::setw(4) << double(correct) / (i + 1) 75 | << std::flush; 76 | } 77 | std::cout << std::endl; 78 | return double(correct) / data_size; 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /test/LoadPlane.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by ChrisKim on 2023/6/8. 3 | // 4 | 5 | #include "LoadPlane.h" 6 | 7 | std::pair, std::vector> circle_type(int size) { 8 | // random generator 9 | auto now = std::chrono::system_clock::now(); 10 | auto now_sec = std::chrono::time_point_cast(now); 11 | auto timestamp = now_sec.time_since_epoch().count(); 12 | std::default_random_engine generator(timestamp); 13 | std::normal_distribution distribution(0, 1); 14 | 15 | std::vector input, output; 16 | 17 | // inner circle 18 | for (int i = 0; i < size / 2; i++) { 19 | double degree = 2.0 * i / size * 2 * M_PI; 20 | double x = cos(degree) + 0.5 * distribution(generator); 21 | double y = sin(degree) + 0.5 * distribution(generator); 22 | input.push_back(make_matrix({{x}, {y}})); 23 | output.push_back(make_matrix({{1}})); 24 | } 25 | 26 | // outer ring 27 | for (int i = 0; i < size / 2; i++) { 28 | double degree = 2.0 * i / size * 2 * M_PI; 29 | double x = 4 * cos(degree) + 0.5 * distribution(generator); 30 | double y = 4 * sin(degree) + 0.5 * distribution(generator); 31 | input.push_back(make_matrix({{x}, {y}})); 32 | output.push_back(make_matrix({{-1}})); 33 | } 34 | return {input, output}; 35 | } 36 | 37 | std::pair, std::vector> cluster_type(int size) { 38 | // random generator 39 | auto now = std::chrono::system_clock::now(); 40 | auto now_sec = std::chrono::time_point_cast(now); 41 | auto timestamp = now_sec.time_since_epoch().count(); 42 | std::default_random_engine generator(timestamp); 43 | std::normal_distribution distribution(0, 1); 44 | 45 | std::vector input, output; 46 | 47 | // cluster 1 48 | for (int i = 0; i < size / 2; i++) { 49 | double x = distribution(generator); 50 | double y = distribution(generator); 51 | input.push_back(make_matrix({{x}, {y}})); 52 | output.push_back(make_matrix({{1}})); 53 | } 54 | 55 | // cluster 2 56 | for (int i = 0; i < size / 2; i++) { 57 | double x = 4 + distribution(generator); 58 | double y = 4 + distribution(generator); 59 | input.push_back(make_matrix({{x}, {y}})); 60 | output.push_back(make_matrix({{-1}})); 61 | } 62 | 63 | return {input, output}; 64 | } 65 | 66 | std::pair, std::vector> window_type(int size) { 67 | // random generator 68 | auto now = std::chrono::system_clock::now(); 69 | auto now_sec = std::chrono::time_point_cast(now); 70 | auto timestamp = now_sec.time_since_epoch().count(); 71 | std::default_random_engine generator(timestamp); 72 | std::uniform_real_distribution distribution(-1, 1); 73 | 74 | std::vector input, output; 75 | 76 | for (int i = 0; i < size; i++) { 77 | double x = distribution(generator); 78 | double y = distribution(generator); 79 | input.push_back(make_matrix({{x}, {y}})); 80 | output.push_back(make_matrix({{x * y >= 0 ? 1.0 : -1.0}})); 81 | } 82 | 83 | return {input, output}; 84 | } 85 | 86 | std::pair, std::vector> whirlpool_type(int size) { 87 | // random generator 88 | auto now = std::chrono::system_clock::now(); 89 | auto now_sec = std::chrono::time_point_cast(now); 90 | auto timestamp = now_sec.time_since_epoch().count(); 91 | std::default_random_engine generator(timestamp); 92 | std::normal_distribution distribution(0, 0.4); 93 | 94 | std::vector input, output; 95 | 96 | // type 1 97 | for (int i = 0; i < size / 2; i++) { 98 | double degree = 4.0 * i / size * 2 * M_PI; 99 | double x = degree * cos(degree) + distribution(generator); 100 | double y = degree * sin(degree) + distribution(generator); 101 | input.push_back(make_matrix({{x}, {y}})); 102 | output.push_back(make_matrix({{1}})); 103 | } 104 | 105 | // type -1 106 | for (int i = 0; i < size / 2; i++) { 107 | double degree = 4.0 * i / size * 2 * M_PI; 108 | double x = degree * cos(degree + 2 * M_PI) + distribution(generator); 109 | double y = degree * sin(degree + 2 * M_PI) + distribution(generator); 110 | input.push_back(make_matrix({{-x}, {-y}})); 111 | output.push_back(make_matrix({{-1}})); 112 | } 113 | 114 | return {input, output}; 115 | } 116 | 117 | std::pair, std::vector> load_plane(int size, const std::string &type) { 118 | if (type == "circle") { 119 | return circle_type(size); 120 | } else if (type == "cluster") { 121 | return cluster_type(size); 122 | } else if (type == "window") { 123 | return window_type(size); 124 | } else if (type == "whirlpool") { 125 | return whirlpool_type(size); 126 | } else { 127 | std::cerr << "Error: unknown type" << std::endl; 128 | exit(1); 129 | } 130 | } 131 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Neural-Net-cpp 2 | 3 | C++ 实现的人工神经网络,仅用于个人练习。 4 | 5 | 现有功能: 6 | 7 | - 网络层 Layer 8 | - 全连接层 FullyConnectedLayer 9 | - 卷积层 ConvolutionalLayer (暂时只支持单通道输入输出) 10 | - 展平层 FlattenLayer 11 | - 激活层 ActivationLayer 12 | - 随机失活层 DropoutLayer 13 | 14 | - 激活函数 15 | - Sigmoid 16 | - Tanh 17 | - ReLU 18 | 19 | - 损失函数 20 | - 均方误差 MeanSquaredError 21 | 22 | - 数据集 23 | - MNIST (手写体数据集) 24 | - 二维平面二分类 25 | - 多项式拟合 26 | 27 | 28 | 现有问题: 29 | 30 | - 没有保存/读取模型功能,模型均是一次性的。 31 | - 使用朴素梯度下降进行优化,非常不稳定,运行多次训练得到的结果可能天差地别。 32 | - 没有 Softmax 功能,没有其他损失函数。 33 | - batch_size 功能还没有实现,现在 batch_size 相当于为 1. 34 | 35 | ## 示例 36 | 37 | ### MNIST 38 | 39 | **代码** 40 | 41 | ```cpp 42 | Network network({ 43 | new ConvolutionalLayer({28, 28}, {3, 3}), 44 | new ActivationLayer(new Sigmoid), 45 | new ConvolutionalLayer({26, 26}, {3, 3}), 46 | new ActivationLayer(new Sigmoid), 47 | new FlattenLayer, 48 | new FullyConnectedLayer(24 * 24, 100), 49 | new ActivationLayer(new Sigmoid), 50 | new FullyConnectedLayer(100, 10), 51 | new ActivationLayer(new Sigmoid) 52 | }); 53 | 54 | auto [train_input, train_output] = load_testcase("../mnist_train.txt"); 55 | auto [test_input, test_output] = load_testcase("../mnist_test.txt"); 56 | 57 | network.train(train_input, train_output, new MeanSquaredError, 80, 0.1); 58 | network.evaluate(test_input, test_output, new MeanSquaredError, true); 59 | ``` 60 | 61 | **训练输出** 62 | 63 | ``` 64 | [Load Dataset] 100.00% 6000/6000 65 | [Load Dataset] 100.00% 1000/1000 66 | [Train] 6000 train data 67 | [Epoch 1/80] 6000/6000 loss: 0.092344669 (2994ms) 68 | [Epoch 11/80] 6000/6000 loss: 0.090298650 (2969ms) 69 | [Epoch 21/80] 6000/6000 loss: 0.090335344 (2876ms) 70 | [Epoch 31/80] 6000/6000 loss: 0.031215471 (2525ms) 71 | [Epoch 41/80] 6000/6000 loss: 0.017014869 (2448ms) 72 | [Epoch 51/80] 6000/6000 loss: 0.013342757 (2454ms) 73 | [Epoch 61/80] 6000/6000 loss: 0.011136877 (2538ms) 74 | [Epoch 71/80] 6000/6000 loss: 0.009667794 (2508ms) 75 | [Epoch 80/80] 6000/6000 loss: 0.008628765 (2468ms) 76 | [Evaluate] 1000 evaluate data 77 | 1000/1000 accuracy: 0.910000000 78 | ``` 79 | 80 | **重复 10 次训练** 81 | 82 | ``` 83 | # loss accuracy 84 | ----------------------------------- 85 | 1 0.012405698 0.870000000 86 | 2 0.012413764 0.902000000 87 | 3 0.005940926 0.926000000 88 | 4 0.008770140 0.912000000 89 | 5 0.010302665 0.902000000 90 | 6 0.022851723 0.838000000 91 | 7 0.047779869 0.623000000 92 | 8 0.011795792 0.874000000 93 | 9 0.013515847 0.887000000 94 | 10 0.010445787 0.907000000 95 | ----------------------------------- 96 | avg 0.015622221 0.864100000 97 | ``` 98 | 99 | ### 二维二分类问题 100 | 101 | **代码** 102 | 103 | ```cpp 104 | Network network({ 105 | new FullyConnectedLayer(2, 100), 106 | new ActivationLayer(new Tanh), 107 | new FullyConnectedLayer(100, 100), 108 | new ActivationLayer(new Tanh), 109 | new FullyConnectedLayer(100, 100), 110 | new ActivationLayer(new Tanh), 111 | new FullyConnectedLayer(100, 1), 112 | new ActivationLayer(new Tanh) 113 | }); 114 | 115 | auto [train_input, train_output] = load_plane(2000, "whirlpool"); 116 | auto [test_input, test_output] = load_plane(500, "whirlpool"); 117 | 118 | network.train(train_input, train_output, new MeanSquaredError, 1000, 0.001); 119 | 120 | save_plane_mesh_with_data("../test.txt", "../mesh.txt", test_input, test_output, network); 121 | 122 | // 接下来再用 Python 运行 test/PlotPlaneMesh.py 来可视化分类边界 123 | ``` 124 | 125 | **分类边界** 126 | 127 | | Cluster | Window | Circle | Whirlpool | 128 | | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | 129 | | ![](./images/Neural-Net-cpp-cluster_data.png) | ![](./images/Neural-Net-cpp-window_data.png) | ![](./images/Neural-Net-cpp-circle_data.png) | ![](./images/Neural-Net-cpp-whirlpool_data.png) | 130 | | ![](./images/Neural-Net-cpp-cluster_mesh.png) | ![](./images/Neural-Net-cpp-window_mesh.png) | ![](./images/Neural-Net-cpp-circle_mesh.png) | ![](./images/Neural-Net-cpp-whirlpool_mesh.png) | 131 | 132 | ### 多项式拟合 133 | 134 | **代码** 135 | 136 | ```cpp 137 | Network network({ 138 | new FullyConnectedLayer(1, 100), 139 | new ActivationLayer(new Sigmoid), 140 | new FullyConnectedLayer(100, 100), 141 | new ActivationLayer(new Sigmoid), 142 | new FullyConnectedLayer(100, 1), 143 | }); 144 | 145 | auto poly = [](double x) { return 5 * pow(x, 3) + 2 * pow(x, 2) - 7 * x + 1; }; 146 | auto [train_input, train_output] = load_polynomial(1000, poly, -2, 2); 147 | network.train(train_input, train_output, new MeanSquaredError, 100, 0.01); 148 | 149 | for (int x = -2; x <= 2; x += 1) { 150 | Matrix input = make_matrix({{1.0 * x}}); 151 | Matrix output = network.predict(input); 152 | double predict = output(0, 0); 153 | double actual = poly(x); 154 | std::cout << "x: " 155 | << std::fixed << std::setprecision(2) << std::setw(2) << std::right << x << " Predict: " 156 | << std::fixed << std::setprecision(2) << std::setw(7) << predict << " Actual: " 157 | << std::fixed << std::setprecision(2) << std::setw(7) << actual << " Error: " 158 | << std::fixed << std::setprecision(2) << std::setw(7) << predict - actual << std::endl; 159 | } 160 | ``` 161 | 162 | **结果** 163 | 164 | ``` 165 | x: -2 Predict: -16.85 Actual: -17.00 Error: 0.15 166 | x: -1 Predict: 5.01 Actual: 5.00 Error: 0.01 167 | x: 0 Predict: 1.06 Actual: 1.00 Error: 0.06 168 | x: 1 Predict: 1.01 Actual: 1.00 Error: 0.01 169 | x: 2 Predict: 35.04 Actual: 35.00 Error: 0.04 170 | ``` 171 | 172 | -------------------------------------------------------------------------------- /test/TestCaseFile.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by ChrisKim on 2023/6/8. 3 | // 4 | 5 | #include "TestCaseFile.h" 6 | 7 | std::pair, std::vector> load_testcase(const std::string &filename) { 8 | std::ifstream file(filename); 9 | std::vector x, y; 10 | int data_size; 11 | std::pair input_shape; 12 | std::pair label_shape; 13 | file >> data_size 14 | >> input_shape.first >> input_shape.second 15 | >> label_shape.first >> label_shape.second; 16 | for (int i = 0; i < data_size; i++) { 17 | std::cout << "\r[Load Dataset] " 18 | << std::right << std::fixed << std::setprecision(2) 19 | << std::setw(6) << 100.0 * (i + 1) / data_size << "% " 20 | << std::right << std::setw(8) << i + 1 << "/" 21 | << std::left << std::setw(8) << data_size << std::flush; 22 | Matrix input(input_shape.first, input_shape.second); 23 | Matrix label(label_shape.first, label_shape.second); 24 | for (int j = 0; j < input_shape.first; j++) { 25 | for (int k = 0; k < input_shape.second; k++) { 26 | file >> input(j, k); 27 | } 28 | } 29 | for (int j = 0; j < label_shape.first; j++) { 30 | for (int k = 0; k < label_shape.second; k++) { 31 | file >> label(j, k); 32 | } 33 | } 34 | x.push_back(input); 35 | y.push_back(label); 36 | } 37 | std::cout << std::endl; 38 | return {x, y}; 39 | } 40 | 41 | void save_testcase(const std::string &filename, std::vector &x, std::vector &y) { 42 | if (x.size() != y.size()) { 43 | std::cerr << "Input and label size mismatch" << std::endl; 44 | exit(1); 45 | } 46 | std::ofstream file(filename); 47 | int data_size = int(x.size()); 48 | std::pair input_shape = x.front().shape(); 49 | std::pair label_shape = y.front().shape(); 50 | file << data_size << '\n' 51 | << input_shape.first << ' ' << input_shape.second << '\n' 52 | << label_shape.first << ' ' << label_shape.second << '\n'; 53 | for (int k = 0; k < data_size; k++) { 54 | std::cout << "\r[Save Dataset] " 55 | << std::right << std::fixed << std::setprecision(2) 56 | << std::setw(6) << 100.0 * (k + 1) / data_size << "% " 57 | << std::right << std::setw(8) << k + 1 << "/" 58 | << std::left << std::setw(8) << data_size << std::flush; 59 | auto &input = x[k]; 60 | auto &label = y[k]; 61 | for (int i = 0; i < input_shape.first; i++) { 62 | for (int j = 0; j < input_shape.second; j++) { 63 | file << std::fixed << std::setprecision(9) << std::left << std::setw(20) << input(i, j); 64 | } 65 | } 66 | for (int i = 0; i < label_shape.first; i++) { 67 | for (int j = 0; j < label_shape.second; j++) { 68 | file << std::fixed << std::setprecision(9) << std::left << std::setw(20) << label(i, j); 69 | } 70 | } 71 | file << '\n'; 72 | } 73 | std::cout << std::endl; 74 | file.close(); 75 | } 76 | 77 | std::pair, std::vector> load_binary_testcase(const std::string &filename) { 78 | std::ifstream file(filename, std::ios::binary); 79 | std::vector x, y; 80 | int data_size; 81 | std::pair input_shape; 82 | std::pair label_shape; 83 | file.read(reinterpret_cast(&data_size), sizeof(int)); 84 | file.read(reinterpret_cast(&input_shape.first), sizeof(int)); 85 | file.read(reinterpret_cast(&input_shape.second), sizeof(int)); 86 | file.read(reinterpret_cast(&label_shape.first), sizeof(int)); 87 | file.read(reinterpret_cast(&label_shape.second), sizeof(int)); 88 | for (int i = 0; i < data_size; i++) { 89 | std::cout << "\r[Load Dataset] " 90 | << std::right << std::fixed << std::setprecision(2) 91 | << std::setw(6) << 100.0 * (i + 1) / data_size << "% " 92 | << std::right << std::setw(8) << i + 1 << "/" 93 | << std::left << std::setw(8) << data_size << std::flush; 94 | Matrix input(input_shape.first, input_shape.second); 95 | Matrix label(label_shape.first, label_shape.second); 96 | for (int j = 0; j < input_shape.first; j++) { 97 | for (int k = 0; k < input_shape.second; k++) { 98 | file.read(reinterpret_cast(&input(j, k)), sizeof(double)); 99 | } 100 | } 101 | for (int j = 0; j < label_shape.first; j++) { 102 | for (int k = 0; k < label_shape.second; k++) { 103 | file.read(reinterpret_cast(&label(j, k)), sizeof(double)); 104 | } 105 | } 106 | x.push_back(input); 107 | y.push_back(label); 108 | } 109 | std::cout << std::endl; 110 | return {x, y}; 111 | } 112 | 113 | void save_binary_testcase(const std::string &filename, std::vector &x, std::vector &y) { 114 | if (x.size() != y.size()) { 115 | std::cerr << "Input and label size mismatch" << std::endl; 116 | exit(1); 117 | } 118 | std::ofstream file(filename, std::ios::binary); 119 | int data_size = int(x.size()); 120 | std::pair input_shape = x.front().shape(); 121 | std::pair label_shape = y.front().shape(); 122 | file.write(reinterpret_cast(&data_size), sizeof(int)); 123 | file.write(reinterpret_cast(&input_shape.first), sizeof(int)); 124 | file.write(reinterpret_cast(&input_shape.second), sizeof(int)); 125 | file.write(reinterpret_cast(&label_shape.first), sizeof(int)); 126 | file.write(reinterpret_cast(&label_shape.second), sizeof(int)); 127 | for (int k = 0; k < data_size; k++) { 128 | std::cout << "\r[Save Dataset] " 129 | << std::right << std::fixed << std::setprecision(2) 130 | << std::setw(6) << 100.0 * (k + 1) / data_size << "% " 131 | << std::right << std::setw(8) << k + 1 << "/" 132 | << std::left << std::setw(8) << data_size << std::flush; 133 | auto &input = x[k]; 134 | auto &label = y[k]; 135 | for (int i = 0; i < input_shape.first; i++) { 136 | for (int j = 0; j < input_shape.second; j++) { 137 | file.write(reinterpret_cast(&input(i, j)), sizeof(double)); 138 | } 139 | } 140 | for (int i = 0; i < label_shape.first; i++) { 141 | for (int j = 0; j < label_shape.second; j++) { 142 | file.write(reinterpret_cast(&label(i, j)), sizeof(double)); 143 | } 144 | } 145 | } 146 | std::cout << std::endl; 147 | file.close(); 148 | } -------------------------------------------------------------------------------- /matrix/Matrix.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by ChrisKim on 2023/6/5. 3 | // 4 | 5 | #include 6 | #include 7 | #include "Matrix.h" 8 | 9 | Matrix::Matrix() { 10 | _row = 1; 11 | _col = 1; 12 | _data = {0}; 13 | } 14 | 15 | Matrix::Matrix(int row, int col, double val) 16 | : _row(row), _col(col) { 17 | if (row <= 0 || col <= 0) { 18 | std::cerr << "Matrix size must be positive." << std::endl; 19 | exit(1); 20 | } 21 | _data.resize(row * col, val); 22 | } 23 | 24 | Matrix::Matrix(const Matrix &mat) 25 | : _row(mat._row), _col(mat._col) { 26 | _data.assign(mat._data.begin(), mat._data.end()); 27 | } 28 | 29 | Matrix::Matrix(const std::vector> &data) { 30 | this->from_vector(data); 31 | } 32 | 33 | void Matrix::from_vector(const std::vector> &data) { 34 | _row = int(data.size()); 35 | _col = int(data[0].size()); 36 | _data.resize(_row * _col); 37 | for (int i = 0; i < _row; i++) { 38 | if (int(data[i].size()) != _col) { 39 | std::cerr << "Invalid matrix data: " << i << "th row has different size." << std::endl; 40 | exit(1); 41 | } 42 | } 43 | for (int i = 0; i < _row; i++) { 44 | for (int j = 0; j < _col; j++) { 45 | this->operator()(i, j) = data[i][j]; 46 | } 47 | } 48 | } 49 | 50 | int Matrix::row() const { 51 | return _row; 52 | } 53 | 54 | int Matrix::col() const { 55 | return _col; 56 | } 57 | 58 | std::pair Matrix::shape() const { 59 | return {_row, _col}; 60 | } 61 | 62 | Matrix Matrix::transpose() const { 63 | Matrix res; 64 | res._row = _col; 65 | res._col = _row; 66 | res._data.assign(_data.begin(), _data.end()); 67 | return res; 68 | } 69 | 70 | void Matrix::set(double val) { 71 | _data.assign(_data.size(), val); 72 | } 73 | 74 | void Matrix::print(int precision) const { 75 | std::cout << "[\n"; 76 | for (int i = 0; i < _row; i++) { 77 | std::cout << " ["; 78 | for (int j = 0; j < _col; j++) { 79 | if (j) 80 | std::cout << ", "; 81 | std::cout << std::fixed << std::setprecision(precision) << this->operator()(i, j); 82 | } 83 | std::cout << "]\n"; 84 | } 85 | std::cout << "]" << std::endl; 86 | } 87 | 88 | Matrix &Matrix::operator=(const Matrix &mat) { 89 | if (this == &mat) 90 | return *this; 91 | if (_row != mat._row || _col != mat._col) { 92 | _row = mat._row; 93 | _col = mat._col; 94 | _data.resize(_row * _col); 95 | } 96 | _data.assign(mat._data.begin(), mat._data.end()); 97 | return *this; 98 | } 99 | 100 | Matrix Matrix::operator+(const Matrix &mat) const { 101 | Matrix res(*this); 102 | res += mat; 103 | return res; 104 | } 105 | 106 | Matrix &Matrix::operator+=(const Matrix &mat) { 107 | if (_row != mat._row || _col != mat._col) { 108 | std::cerr << "Matrix size not match: can't perform addition between " 109 | << "(" << _row << ", " << _col << ") and " 110 | << "(" << mat._row << ", " << mat._col << ")" << std::endl; 111 | exit(1); 112 | } 113 | for (int i = 0; i < _data.size(); i++) { 114 | _data[i] += mat._data[i]; 115 | } 116 | return *this; 117 | } 118 | 119 | Matrix Matrix::operator+(double x) const { 120 | Matrix res(*this); 121 | res += x; 122 | return res; 123 | } 124 | 125 | Matrix &Matrix::operator+=(double x) { 126 | for (double &i: _data) { 127 | i += x; 128 | } 129 | return *this; 130 | } 131 | 132 | Matrix Matrix::operator-(const Matrix &mat) const { 133 | Matrix res(*this); 134 | res -= mat; 135 | return res; 136 | } 137 | 138 | Matrix &Matrix::operator-=(const Matrix &mat) { 139 | if (_row != mat._row || _col != mat._col) { 140 | std::cerr << "Matrix size not match: can't perform subtraction between " 141 | << "(" << _row << ", " << _col << ") and " 142 | << "(" << mat._row << ", " << mat._col << ")" << std::endl; 143 | exit(1); 144 | } 145 | for (int i = 0; i < _data.size(); i++) { 146 | _data[i] -= mat._data[i]; 147 | } 148 | return *this; 149 | } 150 | 151 | Matrix Matrix::operator-(double x) const { 152 | return *this + (-x); 153 | } 154 | 155 | Matrix &Matrix::operator-=(double x) { 156 | return *this += -x; 157 | } 158 | 159 | Matrix Matrix::operator*(const Matrix &mat) const { 160 | if (_col != mat._row) { 161 | std::cerr << "Matrix size not match: can't perform matrix multiplication between " 162 | << "(" << _row << ", " << _col << ") and " 163 | << "(" << mat._row << ", " << mat._col << ")" << std::endl; 164 | exit(1); 165 | } 166 | Matrix res(_row, mat._col); 167 | for (int i = 0; i < _row; i++) { 168 | for (int j = 0; j < mat._col; j++) { 169 | for (int k = 0; k < _col; k++) { 170 | res(i, j) += this->operator()(i, k) * mat(k, j); 171 | } 172 | } 173 | } 174 | return res; 175 | } 176 | 177 | Matrix Matrix::operator*(const double &val) const { 178 | Matrix res(*this); 179 | res *= val; 180 | return res; 181 | } 182 | 183 | Matrix &Matrix::operator*=(const double &val) { 184 | for (double &i: _data) { 185 | i *= val; 186 | } 187 | return *this; 188 | } 189 | 190 | Matrix Matrix::operator/(const double &val) const { 191 | Matrix res(*this); 192 | res /= val; 193 | return res; 194 | } 195 | 196 | Matrix &Matrix::operator/=(const double &val) { 197 | if (val == 0) { 198 | std::cerr << "Division by zero error" << std::endl; 199 | exit(1); 200 | } 201 | for (double &i: _data) { 202 | i /= val; 203 | } 204 | return *this; 205 | } 206 | 207 | bool Matrix::operator==(const Matrix &mat) const { 208 | if (_row != mat._row || _col != mat._col) { 209 | return false; 210 | } 211 | for (int i = 0; i < _data.size(); i++) { 212 | if (_data[i] != mat._data[i]) { 213 | return false; 214 | } 215 | } 216 | return true; 217 | } 218 | 219 | void Matrix::randomize(double min, double max) { 220 | std::random_device rd; 221 | std::mt19937 gen(rd()); 222 | std::uniform_real_distribution dis(min, max); 223 | for (double &i: _data) { 224 | i = dis(gen); 225 | } 226 | } 227 | 228 | void Matrix::perform(double (*f)(double)) { 229 | for (double &i: _data) { 230 | i = f(i); 231 | } 232 | } 233 | 234 | Matrix Matrix::hadamard(const Matrix &mat) const { 235 | if (_row != mat._row || _col != mat._col) { 236 | std::cerr << "Matrix size not match: can't perform hadamard product between " 237 | << "(" << _row << ", " << _col << ") and " 238 | << "(" << mat._row << ", " << mat._col << ")" << std::endl; 239 | exit(1); 240 | } 241 | Matrix res(*this); 242 | for (int i = 0; i < _data.size(); i++) { 243 | res._data[i] *= mat._data[i]; 244 | } 245 | return res; 246 | } 247 | 248 | Matrix Matrix::convolution(const Matrix &mat, bool valid) const { 249 | // correlation between this matrix and mat matrix 250 | // valid: if true, return valid convolution, else return full convolution 251 | int row = valid ? _row - mat._row + 1 : _row + mat._row - 1; 252 | int col = valid ? _col - mat._col + 1 : _col + mat._col - 1; 253 | Matrix res(row, col); 254 | for (int i = 0; i < row; i++) { 255 | for (int j = 0; j < col; j++) { 256 | // calculate convolution at (i, j) 257 | double sum = 0; 258 | for (int k = 0; k < mat._row; k++) { 259 | for (int l = 0; l < mat._col; l++) { 260 | if (i + k >= 0 && i + k < 0 + _row && j + l >= 0 && j + l < 0 + _col) { 261 | sum += this->operator()(i + k, j + l) * mat(k, l); 262 | } 263 | } 264 | } 265 | res(i, j) = sum; 266 | } 267 | } 268 | return res; 269 | } 270 | 271 | double Matrix::max() const { 272 | return *std::max_element(_data.begin(), _data.end()); 273 | } 274 | 275 | double Matrix::min() const { 276 | return *std::min_element(_data.begin(), _data.end()); 277 | } 278 | 279 | double Matrix::sum() const { 280 | return std::accumulate(_data.begin(), _data.end(), 0.0); 281 | } 282 | 283 | std::pair Matrix::argmax() const { 284 | int max_idx = int(std::max_element(_data.begin(), _data.end()) - _data.begin()); 285 | return {max_idx / _col, max_idx % _col}; 286 | } 287 | 288 | std::pair Matrix::argmin() const { 289 | int min_idx = int(std::min_element(_data.begin(), _data.end()) - _data.begin()); 290 | return {min_idx / _col, min_idx % _col}; 291 | } 292 | 293 | Matrix Matrix::reshape(int row, int col) const { 294 | if (_row * _col != row * col) { 295 | std::cerr << "Matrix size not match: can't reshape " 296 | << "(" << _row << ", " << _col << ") to " 297 | << "(" << row << ", " << col << ")" << std::endl; 298 | exit(1); 299 | } 300 | Matrix res(*this); 301 | res._row = row; 302 | res._col = col; 303 | return res; 304 | } 305 | 306 | double Matrix::operator()(int r, int c) const { 307 | return _data[r * _col + c]; 308 | } 309 | 310 | double &Matrix::operator()(int r, int c) { 311 | return _data[r * _col + c]; 312 | } 313 | 314 | Matrix operator*(const double &val, const Matrix &mat) { 315 | return mat * val; 316 | } 317 | 318 | Matrix make_matrix(const std::vector> &data) { 319 | return Matrix(data); 320 | } --------------------------------------------------------------------------------