├── images ├── build.png ├── cmake_config.png └── cmake_config2.png ├── .gitmodules ├── src ├── geometry │ ├── TArc.c │ ├── TRay.c │ ├── TRay.h │ ├── TArc.h │ ├── TVec4.h │ ├── TVec2.h │ ├── TVec3.h │ ├── TQuaternion.h │ ├── TVec4.c │ ├── TVec2.c │ ├── TVec3.c │ └── TQuaternion.c ├── Utils.cu ├── TWeightsInit.h ├── Softmax.h ├── Regression.h ├── fList.h ├── tList.h ├── Conc.h ├── Input.h ├── fList.c ├── dList.c ├── Relu.h ├── TanhA.h ├── RL │ ├── SimpleDeque.h │ ├── RLAgent.h │ ├── RLAgent.c │ ├── ACBrain.h │ ├── RLBrain.h │ ├── ReplayBuffer.h │ ├── SimpleDeque.c │ ├── DDPG.h │ ├── TD3.h │ ├── ReplayBuffer.c │ ├── RLBrain.c │ └── ACBrain.c ├── MSE.h ├── Utils.h ├── tList.c ├── dList.h ├── Input.cu ├── Losses.h ├── Tensor4.cu ├── message.h ├── Interfaces.h ├── TWeightsInit.c ├── TCommon.h ├── MaxPool2d.h ├── Input.c ├── Relu.c ├── Tensor4.h ├── cmd │ ├── qmaze │ │ ├── cell.h │ │ ├── quad.h │ │ ├── grid.h │ │ └── qmaze.cpp │ ├── rand_test.cpp │ ├── cartpole │ │ ├── shapes.h │ │ ├── cart.h │ │ ├── cartpole.cpp │ │ └── agent.h │ ├── cartpole_cont │ │ ├── shapes.h │ │ ├── cart.h │ │ ├── cartpole.cpp │ │ └── agent.h │ ├── model_test.cpp │ ├── data_test.cpp │ ├── opt_test.cpp │ ├── cuda_test.cu │ ├── mult_opt_test.cpp │ ├── cartpole_td3 │ │ ├── cartpole.cpp │ │ └── agent.h │ └── particles │ │ └── particles.cpp ├── TanhA.c ├── Regression.c ├── Dense.h ├── MSE.c ├── Losses.c ├── Conv2d.h ├── Model.h ├── TanhA.cu ├── Softmax.c ├── MSE.cu ├── Model.cu ├── Optimizer.h ├── Relu.cu ├── Tensor4.c ├── message.c ├── Losses.cu ├── Tensor.h ├── Utils.c ├── Conc.c ├── MaxPool2d.c ├── TCommon.c ├── Dense.cu ├── cJSON_Utils.h ├── Tensor.cu ├── Dense.c ├── MaxPool2d.cu ├── Optimizer.cu ├── Conv2d.c ├── Conv2d.cu └── Model.c ├── ext ├── glad │ └── CMakeLists.txt └── CMakeLists.txt └── LICENSE /images/build.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cognitive-systems-and-technologies/RoboAICore/HEAD/images/build.png -------------------------------------------------------------------------------- /images/cmake_config.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cognitive-systems-and-technologies/RoboAICore/HEAD/images/cmake_config.png -------------------------------------------------------------------------------- /images/cmake_config2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cognitive-systems-and-technologies/RoboAICore/HEAD/images/cmake_config2.png -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "ext/glfw"] 2 | path = ext/glfw 3 | url = https://github.com/glfw/glfw.git 4 | [submodule "ext/box2d"] 5 | path = ext/box2d 6 | url = https://github.com/erincatto/box2d.git 7 | -------------------------------------------------------------------------------- /src/geometry/TArc.c: -------------------------------------------------------------------------------- 1 | #include "TArc.h" 2 | 3 | int TArc_IsClockwise(TArc a) 4 | { 5 | return a.sweepAngle > 0 ? 1 : 0; 6 | } 7 | 8 | float TArc_Length(TArc a) 9 | { 10 | return a.r * fabs(a.sweepAngle); 11 | } -------------------------------------------------------------------------------- /src/geometry/TRay.c: -------------------------------------------------------------------------------- 1 | #include "TRay.h" 2 | #include 3 | #include 4 | 5 | TVec3 TRay_OnRay(TRay r, float dist) 6 | { 7 | TVec3 n = TVec3_Norm(r.dir); 8 | return TVec3_Add(r.org, TVec3_Mul(n, dist)); 9 | } -------------------------------------------------------------------------------- /ext/glad/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | add_library (glad STATIC ${CMAKE_CURRENT_SOURCE_DIR}/src/gl.c) 3 | 4 | target_include_directories(glad PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include) 5 | 6 | set_property (TARGET glad PROPERTY FOLDER "ext") -------------------------------------------------------------------------------- /src/geometry/TRay.h: -------------------------------------------------------------------------------- 1 | #ifndef TRAY_H 2 | #define TRAY_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include "TVec3.h" 9 | 10 | typedef struct TRay 11 | { 12 | TVec3 org; 13 | TVec3 dir; 14 | }TRay; 15 | 16 | TVec3 TRay_OnRay(TRay r, float dist); 17 | 18 | #ifdef __cplusplus 19 | } 20 | #endif 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /src/Utils.cu: -------------------------------------------------------------------------------- 1 | #include "Utils.h" 2 | 3 | #ifdef __NVCC__ 4 | float* createFloatArrayGPU(int n) 5 | { 6 | float* a = NULL; 7 | if (cudaMalloc((void**)&a, n * sizeof(float)) != cudaSuccess) { 8 | printf("Array GPU allocation error\n"); 9 | return NULL; 10 | } 11 | else { 12 | cudaMemset(a, 0, sizeof(float) * n); 13 | return a; 14 | } 15 | } 16 | #endif // __NVCC__ -------------------------------------------------------------------------------- /src/geometry/TArc.h: -------------------------------------------------------------------------------- 1 | #ifndef TARC_H 2 | #define TARC_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include 9 | #include 10 | 11 | typedef struct TArc 12 | { 13 | float r; 14 | float startAngle; 15 | float sweepAngle; 16 | }TArc; 17 | 18 | int TArc_IsClockwise(TArc a); 19 | float TArc_Length(TArc a); 20 | 21 | #ifdef __cplusplus 22 | } 23 | #endif 24 | 25 | #endif 26 | -------------------------------------------------------------------------------- /src/TWeightsInit.h: -------------------------------------------------------------------------------- 1 | #ifndef TWEIGHTSI_H 2 | #define TWEIGHTSI_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | #include "TCommon.h" 8 | #include 9 | 10 | typedef enum RandType { 11 | R_XAVIER, 12 | R_XAVIER_NORM, 13 | R_HE 14 | } RandType; 15 | 16 | float xavier_rand(int n); 17 | float xavier_norm_rand(int n, int m); 18 | float he_rand(int n); 19 | 20 | #ifdef __cplusplus 21 | } 22 | #endif 23 | 24 | #endif -------------------------------------------------------------------------------- /src/Softmax.h: -------------------------------------------------------------------------------- 1 | #ifndef SOFTMAX_H 2 | #define SOFTMAX_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include "Tensor.h" 9 | #include "Interfaces.h" 10 | 11 | typedef struct Softmax 12 | { 13 | float* sums; 14 | }Softmax; 15 | 16 | Layer *Softmax_Create(Layer *in); 17 | Tensor *Softmax_Forward(Layer* l); 18 | void Softmax_Backward(Layer* l, Tensor* y); 19 | 20 | #ifdef __cplusplus 21 | } 22 | #endif 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /src/Regression.h: -------------------------------------------------------------------------------- 1 | #ifndef REGRESSION_H 2 | #define REGRESSION_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include "Interfaces.h" 9 | #include "Tensor.h" 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | Layer *Regression_Create(Layer *in); 16 | Tensor *Regression_Forward(Layer* l); 17 | void Regression_Backward(Layer* l, Tensor* y); 18 | 19 | #ifdef __cplusplus 20 | } 21 | #endif 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /src/fList.h: -------------------------------------------------------------------------------- 1 | #ifndef FLOATLIST_H//dynamic list 2 | #define FLOATLIST_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include "Tensor.h" 9 | #include 10 | 11 | typedef struct fList 12 | { 13 | int length; 14 | float* data; 15 | }fList; 16 | 17 | fList fList_create(); 18 | void fList_realloc(fList* d);//add new elem 19 | float fList_push(fList* d, float t);//add and assign 20 | void fList_free(fList* d);//clear list 21 | 22 | #ifdef __cplusplus 23 | } 24 | #endif 25 | 26 | #endif //!FLOATLIST_H -------------------------------------------------------------------------------- /src/tList.h: -------------------------------------------------------------------------------- 1 | #ifndef TENSORLIST_H//dynamic list 2 | #define TENSORLIST_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include "Tensor.h" 9 | #include 10 | 11 | typedef struct tList 12 | { 13 | int length; 14 | Tensor* data; 15 | }tList; 16 | 17 | tList tList_create(); 18 | void tList_realloc(tList* d);//add new elem 19 | Tensor tList_push(tList* d, Tensor* t);//add and assign 20 | void tList_free(tList* d);//clear list 21 | 22 | #ifdef __cplusplus 23 | } 24 | #endif 25 | 26 | #endif //!TENSORLIST_H -------------------------------------------------------------------------------- /src/Conc.h: -------------------------------------------------------------------------------- 1 | #ifndef CONCATE_H 2 | #define CONCATE_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include "Tensor.h" 9 | #include "Interfaces.h" 10 | #include "dList.h" 11 | 12 | //Layer *Dense_Create(int num_neurons, RandType weightInit, LayerActivation act, Layer *in); 13 | Layer* Conc_Create(Layer* in1, Layer* in2); 14 | Tensor* Conc_Forward(Layer* l); 15 | void Conc_Backward(Layer* l); 16 | void Conc_BackpropGrads(Layer* l, Tensor* t1, Tensor* t2); 17 | #ifdef __cplusplus 18 | } 19 | #endif 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /src/Input.h: -------------------------------------------------------------------------------- 1 | #ifndef INPUT_H 2 | #define INPUT_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include "Tensor.h" 9 | #include "Interfaces.h" 10 | 11 | #include 12 | #include 13 | 14 | Layer *Input_Create(shape out_shape); 15 | Tensor *Input_Forward(Layer* l); 16 | void Input_Backward(Layer* l); 17 | 18 | void Input_Free(Layer *l); 19 | #ifdef __NVCC__ 20 | Layer* Input_CreateGPU(shape out_shape); 21 | Tensor* Input_ForwardGPU(Layer* l); 22 | #endif // __NVCC__ 23 | 24 | #ifdef __cplusplus 25 | } 26 | #endif 27 | 28 | #endif 29 | -------------------------------------------------------------------------------- /src/fList.c: -------------------------------------------------------------------------------- 1 | #include "fList.h" 2 | 3 | fList fList_create() 4 | { 5 | fList l; 6 | l.data = NULL; 7 | l.length = 0; 8 | return l; 9 | } 10 | void fList_realloc(fList* d) 11 | { 12 | d->length += 1; 13 | float* tmp = (float*)realloc(d->data, sizeof(float) * d->length); 14 | if (!tmp) { 15 | free(d->data); 16 | d->data = NULL; 17 | return NULL; 18 | } 19 | d->data = tmp; 20 | } 21 | float fList_push(fList* d, float t) 22 | { 23 | fList_realloc(d); 24 | d->data[d->length - 1] = t; 25 | return d->data[d->length - 1]; 26 | } 27 | void fList_free(fList* d) 28 | { 29 | free(d->data); 30 | d->data = NULL; 31 | d->length = 0; 32 | } -------------------------------------------------------------------------------- /src/geometry/TVec4.h: -------------------------------------------------------------------------------- 1 | #ifndef TVEC4_H 2 | #define TVEC4_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | typedef struct TVec4 9 | { 10 | float x; 11 | float y; 12 | float z; 13 | float w; 14 | }TVec4; 15 | 16 | TVec4 TVec4_Create(float x, float y, float z, float w); 17 | TVec4 TVec4_Create3(float x, float y, float z); 18 | TVec4 TVec4_Create1(float all); 19 | 20 | TVec4 TVec4_Mul(TVec4 v, float d); 21 | TVec4 TVec4_Div(TVec4 v, float d); 22 | TVec4 TVec4_Sub(TVec4 v1, TVec4 v2); 23 | TVec4 TVec4_Norm(TVec4 v); 24 | float TVec4_Dot(TVec4 v1, TVec4 v2); 25 | 26 | #ifdef __cplusplus 27 | } 28 | #endif 29 | 30 | #endif 31 | -------------------------------------------------------------------------------- /src/dList.c: -------------------------------------------------------------------------------- 1 | #include "dList.h" 2 | 3 | dList dList_create() 4 | { 5 | dList l; 6 | l.data = NULL; 7 | l.length = 0; 8 | return l; 9 | } 10 | void dList_realloc(dList* d) 11 | { 12 | d->length += 1; 13 | dlElem* tmp = (dlElem*)realloc(d->data, sizeof(dlElem) * d->length); 14 | if (!tmp) { 15 | free(d->data); 16 | d->data = NULL; 17 | return NULL; 18 | } 19 | d->data = tmp; 20 | } 21 | void* dList_push(dList* d, void* t) 22 | { 23 | dList_realloc(d); 24 | d->data[d->length - 1].e = t; 25 | return d->data[d->length - 1].e; 26 | } 27 | void dList_free(dList* d) 28 | { 29 | free(d->data); 30 | d->data = NULL; 31 | } -------------------------------------------------------------------------------- /src/Relu.h: -------------------------------------------------------------------------------- 1 | #ifndef RELU_H 2 | #define RELU_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include "Tensor.h" 9 | #include "Interfaces.h" 10 | 11 | Layer *Relu_Create(Layer *in); 12 | Tensor *Relu_Forward(Layer* l); 13 | void Relu_Backward(Layer* l); 14 | 15 | #ifdef __NVCC__ 16 | Layer* Relu_CreateGPU(Layer* in); 17 | __global__ void Relu_ForwardKernels(int limit, float* xw, float* outw); 18 | Tensor* Relu_ForwardGPU(Layer* l); 19 | __global__ void Relu_BackwardKernels(int limit, float* xdw, float* outw, float* outdw); 20 | void Relu_BackwardGPU(Layer* l); 21 | #endif // __NVCC__ 22 | 23 | #ifdef __cplusplus 24 | } 25 | #endif 26 | 27 | #endif 28 | -------------------------------------------------------------------------------- /src/TanhA.h: -------------------------------------------------------------------------------- 1 | #ifndef TANHA_H 2 | #define TANHA_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include "Interfaces.h" 9 | #include 10 | #include 11 | 12 | Layer *TanhA_Create(Layer *in); 13 | Tensor* TanhA_Forward(Layer* l); 14 | void TanhA_Backward(Layer* l); 15 | 16 | #ifdef __NVCC__ 17 | Layer* TanhA_CreateGPU(Layer* in); 18 | __global__ void TanhA_ForwardKernels(float* xw, float* outw); 19 | Tensor* TanhA_ForwardGPU(Layer* l); 20 | __global__ void TanhA_BackwardKernels(float* xdw, float* outw, float* outdw); 21 | void TanhA_BackwardGPU(Layer* l); 22 | #endif // __NVCC__ 23 | 24 | #ifdef __cplusplus 25 | } 26 | #endif 27 | 28 | #endif 29 | -------------------------------------------------------------------------------- /src/RL/SimpleDeque.h: -------------------------------------------------------------------------------- 1 | #ifndef SIMPLEDEQUE_H 2 | #define SIMPLEDEQUE_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | typedef struct DequeElem 13 | { 14 | void* elem; 15 | }DequeElem; 16 | 17 | typedef struct SimpleDeque 18 | { 19 | int capacity; 20 | int length; 21 | DequeElem* data; 22 | }SimpleDeque; 23 | 24 | SimpleDeque* createDeque(int capacity); 25 | 26 | void dequeAppend(SimpleDeque* d, void* t, void (*elementFree) (void* e)); 27 | 28 | void freeDeque(SimpleDeque* d, void (*elementFree) (void* e)); 29 | 30 | #ifdef __cplusplus 31 | } 32 | #endif 33 | 34 | #endif // !SIMPLEDEQUE_H -------------------------------------------------------------------------------- /src/MSE.h: -------------------------------------------------------------------------------- 1 | #ifndef MSE_H 2 | #define MSE_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include "Interfaces.h" 9 | #include "Tensor.h" 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | Layer *MSE_Create(Layer* in); 16 | Tensor *MSE_Forward(Layer* l); 17 | void MSE_Backward(Layer* l, Tensor* y_true); 18 | 19 | #ifdef __NVCC__ 20 | Layer* MSE_CreateGPU(Layer* in); 21 | Tensor* MSE_ForwardGPU(Layer* l); 22 | __global__ void MSE_BackwardKernels(int limit, float* xw, float* xdw, float* yw, float n, float* sum); 23 | void MSE_BackwardGPU(Layer* l, Tensor* y_true); 24 | #endif // __NVCC__ 25 | 26 | #ifdef __cplusplus 27 | } 28 | #endif 29 | 30 | #endif 31 | -------------------------------------------------------------------------------- /src/RL/RLAgent.h: -------------------------------------------------------------------------------- 1 | #ifndef RLAGENT_H 2 | #define RLAGENT_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include 9 | 10 | #include "TCommon.h" 11 | #include "Tensor.h" 12 | #include "RLBrain.h" 13 | 14 | typedef enum AgentPhase 15 | { 16 | A_IDLE, 17 | A_TRAIN, 18 | A_TEST 19 | }AgentPhase; 20 | 21 | typedef struct RLAgent 22 | { 23 | RLBrain *brain; 24 | Tensor state; 25 | float epsilon; 26 | float decay; 27 | AgentPhase phase; 28 | }RLAgent; 29 | 30 | RLAgent *RLAgent_Create(shape state_shape, int n_outputs); 31 | int RLAgent_Policy(RLAgent *agent, Tensor* s); 32 | int RLAgent_Act(RLAgent *agent, Tensor* s); 33 | 34 | #ifdef __cplusplus 35 | } 36 | #endif 37 | 38 | #endif 39 | -------------------------------------------------------------------------------- /src/Utils.h: -------------------------------------------------------------------------------- 1 | #ifndef UTILS_H 2 | #define UTILS_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | #include 8 | #include 9 | #include 10 | 11 | float* createFloatArray(int n); 12 | int* createIntArray(int n); 13 | float StandardDeviation(float* data, int n); 14 | void FlipArray(float* w, int n); 15 | void NormalizeArray(float* w, float n); 16 | //char* LoadFile(const char* filename); 17 | //void WriteToFile(const char* txt, const char* file); 18 | void PrintArray(float* w, int n); 19 | void FillArray(float* w, int n, float v); 20 | 21 | #ifdef __NVCC__ 22 | float* createFloatArrayGPU(int n); 23 | #endif // __NVCC__ 24 | 25 | #ifdef __cplusplus 26 | } 27 | #endif 28 | 29 | #endif 30 | -------------------------------------------------------------------------------- /src/tList.c: -------------------------------------------------------------------------------- 1 | #include "tList.h" 2 | 3 | tList tList_create() 4 | { 5 | tList l; 6 | l.data = NULL; 7 | l.length = 0; 8 | return l; 9 | } 10 | void tList_realloc(tList* d) 11 | { 12 | d->length += 1; 13 | Tensor* tmp = (Tensor*)realloc(d->data, sizeof(Tensor) * d->length); 14 | if (!tmp) { 15 | free(d->data); 16 | d->data = NULL; 17 | return NULL; 18 | } 19 | d->data = tmp; 20 | } 21 | Tensor tList_push(tList* d, Tensor* t) 22 | { 23 | tList_realloc(d); 24 | d->data[d->length - 1] = Tensor_CreateCopy(t); 25 | return d->data[d->length - 1]; 26 | } 27 | void tList_free(tList* d) 28 | { 29 | for (size_t i = 0; i < d->length; i++) 30 | { 31 | Tensor_Free(&d->data[i]); 32 | } 33 | free(d->data); 34 | d->data = NULL; 35 | d->length = 0; 36 | } -------------------------------------------------------------------------------- /src/geometry/TVec2.h: -------------------------------------------------------------------------------- 1 | #ifndef TVEC2_H 2 | #define TVEC2_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include "TCommon.h" 9 | 10 | typedef struct TVec2 11 | { 12 | float x; 13 | float y; 14 | }TVec2; 15 | 16 | TVec2 TVec2_Create(float x, float y); 17 | TVec2 TVec2_Create2(float all); 18 | 19 | TVec2 TVec2_Mul(TVec2 v, float d); 20 | TVec2 TVec2_Div(TVec2 v, float d); 21 | TVec2 TVec2_Sub(TVec2 v1, TVec2 v2); 22 | TVec2 TVec2_Add(TVec2 v1, TVec2 v2); 23 | TVec2 TVec2_Norm(TVec2 v); 24 | 25 | TVec2 TVec2_Dir(TVec2 org, TVec2 dest); //direction vector 26 | 27 | float TVec2_Length(TVec2 v); 28 | 29 | float TVec2_Dot(TVec2 v1, TVec2 v2); 30 | float TVec2_AngleDeg(TVec2 v1, TVec2 v2); 31 | 32 | #ifdef __cplusplus 33 | } 34 | #endif 35 | 36 | #endif 37 | -------------------------------------------------------------------------------- /src/dList.h: -------------------------------------------------------------------------------- 1 | #ifndef DLIST_H//dynamic list 2 | #define DLIST_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include 9 | #define getLElem(E, L, I) ((E*)L.data[I].e) 10 | #define getLEInfo(E, L, I) ((E*)L.data[I].i) 11 | #define LElem(T, E) ((T*)E->e) 12 | 13 | typedef struct dlElem 14 | { 15 | void* e;//element host 16 | void* i;//element info 17 | }dlElem; 18 | 19 | typedef struct dList 20 | { 21 | int length; 22 | dlElem* data; 23 | }dList; 24 | 25 | dList dList_create(); 26 | void dList_realloc(dList* d);//add new elem 27 | void* dList_push(dList* d, void* t);//add and assign 28 | void dList_free(dList* d);//clear list 29 | 30 | #ifdef __cplusplus 31 | } 32 | #endif 33 | 34 | #endif //!DLIST_H -------------------------------------------------------------------------------- /src/Input.cu: -------------------------------------------------------------------------------- 1 | #include "Input.h" 2 | 3 | #ifdef __NVCC__ 4 | Layer* Input_CreateGPU(shape out_shape) 5 | { 6 | Layer* dl = (Layer*)malloc(sizeof(Layer)); 7 | if (!dl) 8 | { 9 | printf("Input allocation error!"); 10 | return NULL; 11 | } 12 | dl->input = NULL; 13 | dl->type = LT_INPUT; 14 | dl->aData = NULL; 15 | dl->n_inputs = out_shape.w * out_shape.h * out_shape.d; 16 | dl->out_shape = { out_shape.w, out_shape.h, out_shape.d }; 17 | dl->output = Tensor_CreateGPU(dl->out_shape, 0); 18 | printf("Input GPU, output shape: [%d, %d, %d]\n", dl->out_shape.w, dl->out_shape.h, dl->out_shape.d); 19 | return dl; 20 | } 21 | 22 | Tensor* Input_ForwardGPU(Layer* l) 23 | { 24 | Tensor_CopyDataGPU(&l->output, l->input); 25 | return &l->output; 26 | } 27 | #endif // __NVCC__ 28 | -------------------------------------------------------------------------------- /src/geometry/TVec3.h: -------------------------------------------------------------------------------- 1 | #ifndef TVEC3_H 2 | #define TVEC3_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | typedef struct TVec3 9 | { 10 | float x; 11 | float y; 12 | float z; 13 | }TVec3; 14 | 15 | TVec3 TVec3_Create(float x, float y, float z); 16 | TVec3 TVec3_Create2(float all); 17 | 18 | TVec3 TVec3_Mul(TVec3 v, float d); 19 | TVec3 TVec3_Div(TVec3 v, float d); 20 | TVec3 TVec3_Sub(TVec3 v1, TVec3 v2); 21 | TVec3 TVec3_Add(TVec3 v1, TVec3 v2); 22 | TVec3 TVec3_Norm(TVec3 v); 23 | TVec3 TVec3_Cross(TVec3 v1, TVec3 v2); 24 | TVec3 TVec3_Dir(TVec3 org, TVec3 dest); //direction vector 25 | TVec3 TVec3_Middle(TVec3 org, TVec3 dest); 26 | float TVec3_Length(TVec3 v); 27 | float TVec3_Dot(TVec3 v1, TVec3 v2); 28 | float TVec3_AngleRad(TVec3 v1, TVec3 v2); 29 | 30 | #ifdef __cplusplus 31 | } 32 | #endif 33 | 34 | #endif 35 | -------------------------------------------------------------------------------- /src/geometry/TQuaternion.h: -------------------------------------------------------------------------------- 1 | #ifndef TQUATERNION_H 2 | #define TQUATERNION_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include "TVec4.h" 9 | #include "TVec3.h" 10 | 11 | typedef struct TQuaternion 12 | { 13 | float x; 14 | float y; 15 | float z; 16 | float w; 17 | }TQuaternion; 18 | 19 | TQuaternion TQuaternion_Create(float x, float y, float z, float w); 20 | TQuaternion TQuaternion_CreateV(TVec3 v, float w); 21 | TQuaternion TQuaternion_FromVec3(TVec3 axis, float angleRadian); 22 | TQuaternion TQuaternion_Norm(TQuaternion v); 23 | TQuaternion TQuaternion_Conjugate(TQuaternion v); 24 | TQuaternion TQuaternion_Mul(TQuaternion q1, TQuaternion q2); 25 | TQuaternion TQuaternion_Euler(float x, float y, float z); 26 | 27 | TVec3 TQuaternion_Rotate(TQuaternion q, TVec3 pt); 28 | #ifdef __cplusplus 29 | } 30 | #endif 31 | 32 | #endif 33 | -------------------------------------------------------------------------------- /src/Losses.h: -------------------------------------------------------------------------------- 1 | #ifndef LOSSES_H 2 | #define LOSSES_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include "Tensor.h" 9 | 10 | #include 11 | #include 12 | 13 | float MSE_Loss(Tensor *y, Tensor *y_true); 14 | Tensor SoftmaxProb(Tensor* t); 15 | float Cross_entropy_Loss(Tensor* y, int idx); 16 | float Regression_Loss(Tensor* y, int idx, float val); 17 | 18 | #ifdef __NVCC__ 19 | float Cross_entropy_LossGPU(Tensor* y, int idx); 20 | __global__ void Cross_entropy_LossKernels(int n, float* xw, float* ydw, int idx); 21 | Tensor SoftmaxProbGPU(Tensor* t); 22 | __global__ void SoftmaxProbKernels(int n, float* iw, float* ow); 23 | float MSE_LossGPU(Tensor* y, Tensor* y_true); 24 | __global__ void MSE_LossKernels(int n, float* yw, float* ytw, float* ydw, float* sum); 25 | #endif // __NVCC__ 26 | 27 | #ifdef __cplusplus 28 | } 29 | #endif 30 | 31 | #endif 32 | -------------------------------------------------------------------------------- /src/Tensor4.cu: -------------------------------------------------------------------------------- 1 | #include "Tensor4.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #ifdef __NVCC__ 8 | Tensor4 Tensor4_CreateGPU(shape4 s, float c) 9 | { 10 | Tensor4 v; 11 | v.s.w = s.w; 12 | v.s.h = s.h; 13 | v.s.d = s.d; 14 | v.s.b = s.b; 15 | 16 | v.n = s.w * s.h * s.d * s.b; 17 | v.w = NULL; v.dw = NULL; v.vt = NULL; 18 | 19 | if (cudaMalloc((void**)&v.w, v.n * sizeof(float)) != cudaSuccess) printf("Tensor4 weights allocation error\n"); 20 | else Tensor_FillArrayGPU(v.w, v.n, c); 21 | if (cudaMalloc((void**)&v.dw, v.n * sizeof(float)) != cudaSuccess) printf("Tensor4 grads allocation error\n"); 22 | else cudaMemset(v.dw, 0, sizeof(float) * v.n); 23 | if (cudaMalloc((void**)&v.vt, v.n * sizeof(float)) != cudaSuccess) printf("Tensor4 additions allocation error\n"); 24 | else cudaMemset(v.vt, 0, sizeof(float) * v.n); 25 | 26 | v.sumdw = 0; 27 | return v; 28 | } 29 | #endif 30 | -------------------------------------------------------------------------------- /src/RL/RLAgent.c: -------------------------------------------------------------------------------- 1 | #include "RLAgent.h" 2 | 3 | RLAgent *RLAgent_Create(shape state_shape, int n_outputs) 4 | { 5 | RLAgent *agent = malloc(sizeof(RLAgent)); 6 | if(!agent) 7 | { 8 | return NULL; 9 | } 10 | agent->state = Tensor_Create(state_shape, 0); 11 | agent->brain = RLBrain_Create(state_shape, n_outputs); 12 | agent->epsilon = 0.9f; 13 | agent->phase = A_TRAIN; 14 | agent->decay = 0.9999f; 15 | return agent; 16 | } 17 | 18 | int RLAgent_Policy(RLAgent *agent, Tensor* s) 19 | { 20 | Tensor y = RLBrain_Forward(agent->brain, s); 21 | shape max = T_Argmax(&y); 22 | int act = max.d; 23 | return act; 24 | } 25 | 26 | int RLAgent_Act(RLAgent *agent, Tensor* s) 27 | { 28 | agent->epsilon *= agent->decay; 29 | if (rngFloat() <= agent->epsilon) { 30 | int ra = rngInt(0, agent->brain->num_outputs-1); 31 | return ra; 32 | } 33 | else { 34 | int act = RLAgent_Policy(agent, s); 35 | return act; 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/RL/ACBrain.h: -------------------------------------------------------------------------------- 1 | #ifndef ACBRAIN_H 2 | #define ACBRAIN_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include 9 | 10 | #include "TCommon.h" 11 | #include "Interfaces.h" 12 | #include "Optimizer.h" 13 | #include "Tensor.h" 14 | #include "ReplayBuffer.h" 15 | 16 | #include "Utils.h" 17 | #include "Losses.h" 18 | 19 | typedef struct ACBrain 20 | { 21 | Layer *inpA, *inpC, *actor, *critic; 22 | Model ActorNet; 23 | Model CriticNet; 24 | float gamma; 25 | float I; 26 | float discount; 27 | shape input_shape; 28 | int num_outputs; 29 | OptParams par; 30 | }ACBrain; 31 | 32 | ACBrain*ACBrain_Create(shape state_shape, int n_outputs); 33 | //Model ACBrain_CreateNet(shape input_sh, int n_outputs); 34 | Tensor ACBrain_Forward(ACBrain *brain, Tensor *state); 35 | float ACBrain_TrainTrace(ACBrain* brain, Tensor* states, float* rewards, float* actions, int n); 36 | 37 | #ifdef __cplusplus 38 | } 39 | #endif 40 | 41 | #endif 42 | -------------------------------------------------------------------------------- /src/message.h: -------------------------------------------------------------------------------- 1 | #ifndef MESSAGE_H 2 | #define MESSAGE_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include 9 | #include 10 | #include 11 | #include "cJSON.h" 12 | 13 | typedef struct Message 14 | { 15 | const char *role; 16 | const char *type; 17 | const char *message; 18 | const char *body; 19 | }Message; 20 | 21 | typedef struct AgentInfo { 22 | const char *name; 23 | const char* mac; 24 | const char* ip; 25 | int port; 26 | }AgentInfo; 27 | 28 | Message ParseMessage(const char* str); 29 | cJSON* cJsonFromMessage(Message *m, int parse_body); 30 | void FreeMessage(Message *m); 31 | char* strCopy(const char* str); 32 | cJSON* cJsonMessage(const char* role, const char* type, const char* message); 33 | cJSON* cJsonAInfo(const char* name, const char* mac, const char* ip, int port); 34 | AgentInfo ParseAInfo(cJSON* elem); 35 | void FreeAInfo(AgentInfo* ai); 36 | #ifdef __cplusplus 37 | } 38 | #endif 39 | 40 | #endif -------------------------------------------------------------------------------- /src/Interfaces.h: -------------------------------------------------------------------------------- 1 | #ifndef INTERFACES_H 2 | #define INTERFACES_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" 6 | { 7 | #endif 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include "Tensor.h" 13 | 14 | typedef enum LayerType { 15 | LT_INPUT, 16 | LT_DENSE, 17 | LT_CONC, 18 | LT_RELU, 19 | LT_SOFTMAX, 20 | LT_REGRESSION, 21 | LT_CONV, 22 | LT_MAXPOOL, 23 | LT_MSE, 24 | LT_TANHA 25 | } LayerType; 26 | 27 | typedef enum LayerActivation { 28 | A_NONE, 29 | A_RELU, 30 | A_LRELU,//leaky relu 31 | A_TANH 32 | } LayerActivation; 33 | 34 | typedef struct Layer 35 | { 36 | shape out_shape; 37 | shape in_shape; 38 | int n_inputs; 39 | LayerType type; 40 | 41 | Tensor* input; 42 | Tensor* input2; 43 | Tensor output; 44 | 45 | void* aData;//additional layer data 46 | }Layer; 47 | 48 | typedef struct LData 49 | { 50 | float loss; 51 | }LData; 52 | 53 | #ifdef __cplusplus 54 | } 55 | #endif 56 | 57 | #endif 58 | -------------------------------------------------------------------------------- /src/TWeightsInit.c: -------------------------------------------------------------------------------- 1 | #include "TWeightsInit.h" 2 | 3 | float xavier_rand(int n) { 4 | //calculate the range for the weights 5 | float lower = -(1.0f / sqrtf((float)n)); 6 | float upper = (1.0f / sqrtf((float)n)); 7 | float num = rngFloat(); 8 | //scale to the desired range 9 | float scaled = lower + num * (upper - lower); 10 | return scaled; 11 | } 12 | float xavier_norm_rand(int n, int m) 13 | { 14 | //calculate the range for the weights 15 | float lower = -(sqrtf(6.0f) / sqrtf((float)n + (float)m)); 16 | float upper = (sqrtf(6.0f) / sqrtf((float)n + (float)m)); 17 | //get random number 18 | float num = rngFloat(); 19 | //scale to the desired range 20 | float scaled = lower + num * (upper - lower); 21 | return scaled; 22 | } 23 | float he_rand(int n) 24 | { 25 | //calculate the range for the weights 26 | float std = sqrtf(2.0f / (float)n); 27 | //generate random number from a standard normal distribution 28 | float num = rngNormal(); 29 | //scale to the desired range 30 | float scaled = num * std; 31 | return scaled; 32 | } -------------------------------------------------------------------------------- /src/RL/RLBrain.h: -------------------------------------------------------------------------------- 1 | #ifndef RLBRAIN_H 2 | #define RLBRAIN_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include 9 | 10 | #include "TCommon.h" 11 | #include "Interfaces.h" 12 | #include "Optimizer.h" 13 | #include "Tensor.h" 14 | #include "ReplayBuffer.h" 15 | #include "Losses.h" 16 | 17 | typedef struct RLBrain 18 | { 19 | Layer* inp, *out; 20 | ReplayBuffer *buffer; 21 | Model net; 22 | float discount; 23 | shape input_shape; 24 | int num_outputs; 25 | OptParams par; 26 | }RLBrain; 27 | 28 | RLBrain *RLBrain_Create(shape state_shape, int n_outputs); 29 | //Model RLBrain_CreateNet(shape input_sh, int n_outputs); 30 | void RLBrain_Record(RLBrain *brain, Tensor* state, Tensor* next_state, int action, float reward, int done); 31 | Tensor RLBrain_Forward(RLBrain *brain, Tensor *state); 32 | float RLBrain_Train(RLBrain *brain); 33 | float RLBrain_TrainTrace(RLBrain* brain, Tensor* states, float* rewards, float* actions, int n); 34 | 35 | #ifdef __cplusplus 36 | } 37 | #endif 38 | 39 | #endif 40 | -------------------------------------------------------------------------------- /src/TCommon.h: -------------------------------------------------------------------------------- 1 | #ifndef TCOMMON_H 2 | #define TCOMMON_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include 9 | #include "Utils.h" 10 | # define M_PI 3.14159265358979323846f //pi 11 | 12 | float DegToRad(float deg); 13 | float RadToDeg(float rad); 14 | float Lerp(float a, float b, float t); 15 | float InvLerp(float a, float b, float t); 16 | 17 | float Clamp(float d, float min, float max); 18 | 19 | float rngFloat(); 20 | int rngInt(int min, int max); 21 | float rngNormal(); 22 | int find_ceil(int* arr, int r, int l, int h); 23 | int rng_by_prob(float* prob, int n); 24 | 25 | typedef struct OrnsteinUhlenbeckNoise { 26 | float theta, mu, sigma, dt, x0; 27 | float x_prev; 28 | }OrnsteinUhlenbeckNoise; 29 | OrnsteinUhlenbeckNoise initNoise(float mu, float sigma, float x0); 30 | float getNoiseVal(OrnsteinUhlenbeckNoise* n); 31 | 32 | void InsertionSort(float* values, int n); 33 | float Mean(float* items, int n); 34 | float Derivative(float (*f)(float), float x0); 35 | #ifdef __cplusplus 36 | } 37 | #endif 38 | 39 | #endif 40 | -------------------------------------------------------------------------------- /src/MaxPool2d.h: -------------------------------------------------------------------------------- 1 | #ifndef MAXPOOL2D_H 2 | #define MAXPOOL2D_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include "Tensor.h" 9 | #include "Interfaces.h" 10 | 11 | typedef struct MaxPool2d 12 | { 13 | shape2 k_size; 14 | shape2 stride; 15 | int pad;//0,1 or int > 0 16 | }MaxPool2d; 17 | 18 | Layer* MaxPool2d_Create(shape2 k_size, shape2 stride, int pad, Layer* in); 19 | Tensor* MaxPool2d_Forward(Layer* l); 20 | void MaxPool2d_Backward(Layer* l); 21 | void MaxPool2d_Free(Layer* l); 22 | 23 | #ifdef __NVCC__ 24 | Layer* MaxPool2d_CreateGPU(shape2 k_size, shape2 stride, int pad, Layer* in); 25 | __global__ void MaxPool2d_ForwardKernels(shape limit, float* xw, float* outw, shape ishape, shape oshape, shape2 k_size, shape2 stride, int pad); 26 | Tensor* MaxPool2d_ForwardGPU(Layer* l); 27 | __global__ void MaxPool2d_BackwardKernels(shape limit, float* xw, float* xdw, float* outdw, shape ishape, shape oshape, shape2 k_size, shape2 stride, int pad); 28 | void MaxPool2d_BackwardGPU(Layer* l); 29 | #endif // __NVCC__ 30 | 31 | 32 | #ifdef __cplusplus 33 | } 34 | #endif 35 | 36 | #endif 37 | -------------------------------------------------------------------------------- /src/Input.c: -------------------------------------------------------------------------------- 1 | #include "Input.h" 2 | 3 | Layer* Input_Create(shape out_shape) 4 | { 5 | Layer* dl = (Layer*)malloc(sizeof(Layer)); 6 | if (!dl) 7 | { 8 | printf("Input allocation error!"); 9 | return NULL; 10 | } 11 | dl->input = NULL; 12 | dl->type = LT_INPUT; 13 | dl->aData = NULL; 14 | dl->n_inputs = out_shape.w * out_shape.h * out_shape.d; 15 | dl->out_shape = (shape){ out_shape.w, out_shape.h, out_shape.d }; 16 | dl->output = Tensor_Create(dl->out_shape, 0); 17 | printf("Input, output shape: [%d, %d, %d]\n", dl->out_shape.w, dl->out_shape.h, dl->out_shape.d); 18 | 19 | return dl; 20 | } 21 | 22 | Tensor *Input_Forward(Layer* l) 23 | { 24 | Tensor_CopyData(&l->output, l->input); 25 | return &l->output; 26 | } 27 | 28 | void Input_Backward(Layer* l) 29 | { 30 | Tensor* x = l->input; 31 | //for (int i = 0; i < l->output.n; i++) 32 | //{ 33 | // x->dw[i] += l->output.dw[i]; 34 | //} 35 | memcpy(x->dw, l->output.dw, sizeof(float) * l->output.n); 36 | } 37 | 38 | void Input_Free(Layer* l) 39 | { 40 | Tensor_Free(&l->output); 41 | free(l); 42 | } -------------------------------------------------------------------------------- /ext/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required (VERSION 3.17) 2 | 3 | ########################################################################### 4 | # box2d 5 | 6 | set (BOX2D_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/box2d/include PARENT_SCOPE) 7 | set (BOX2D_BUILD_TESTBED OFF CACHE BOOL "" FORCE) 8 | 9 | add_subdirectory (box2d) 10 | 11 | set_property (TARGET box2d PROPERTY FOLDER "ext") 12 | 13 | ########################################################################### 14 | # glfw / glad 15 | 16 | set (GLFW_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/glfw/include PARENT_SCOPE) 17 | set (GLFW_DEPS ${CMAKE_CURRENT_SOURCE_DIR}/glfw/deps PARENT_SCOPE) 18 | set (GLAD_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/glad/include PARENT_SCOPE) 19 | 20 | set (GLFW_LIBRARY_TYPE STATIC CACHE STRING "" FORCE) 21 | set (GLFW_BUILD_DOCS OFF CACHE BOOL "" FORCE) 22 | set (GLFW_BUILD_TESTS OFF CACHE BOOL "" FORCE) 23 | set (GLFW_BUILD_EXAMPLES OFF CACHE BOOL "" FORCE) 24 | 25 | add_subdirectory (glfw) 26 | add_subdirectory (glad) 27 | 28 | set_property (TARGET glfw PROPERTY FOLDER "ext") 29 | set_property (TARGET glad PROPERTY FOLDER "ext") 30 | 31 | -------------------------------------------------------------------------------- /src/RL/ReplayBuffer.h: -------------------------------------------------------------------------------- 1 | #ifndef RBUFFER_H 2 | #define RBUFFER_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include 9 | #include 10 | 11 | #include "Tensor.h" 12 | #include "Interfaces.h" 13 | #include "SimpleDeque.h" 14 | #include "dList.h" 15 | 16 | typedef struct 17 | { 18 | Tensor* state; 19 | Tensor* next_state; 20 | int action; 21 | float reward; 22 | int done;//bool 23 | }Sample; 24 | 25 | typedef struct 26 | { 27 | int capacity; 28 | int batch_size; 29 | SimpleDeque* buffer; 30 | }ReplayBuffer; 31 | 32 | ReplayBuffer *ReplayBuffer_Create(int capacity, int batch_size); 33 | void ReplayBuffer_Record(ReplayBuffer* rBuffer, Tensor* state, 34 | Tensor* next_state, 35 | int action, 36 | float reward, int done); 37 | dList ReplayBuffer_Sample(ReplayBuffer* rb); 38 | void ReplayBuffer_Free(ReplayBuffer *rBuffer); 39 | 40 | Sample* createSample(Tensor* state, 41 | Tensor* next_state, 42 | int action, 43 | float reward, int done); 44 | void freeSample(void* sample); 45 | 46 | #ifdef __cplusplus 47 | } 48 | #endif 49 | 50 | #endif 51 | -------------------------------------------------------------------------------- /src/Relu.c: -------------------------------------------------------------------------------- 1 | #include "Relu.h" 2 | #include 3 | 4 | Layer* Relu_Create(Layer* in) 5 | { 6 | Layer* dl = (Layer*)malloc(sizeof(Layer)); 7 | if (!dl) 8 | { 9 | printf("Relu allocation error!"); 10 | return NULL; 11 | } 12 | dl->type = LT_RELU; 13 | dl->aData = NULL; 14 | dl->n_inputs = in->out_shape.w * in->out_shape.h * in->out_shape.d; 15 | dl->out_shape = (shape){ in->out_shape.w, in->out_shape.h, in->out_shape.d }; 16 | dl->output = Tensor_Create(dl->out_shape, 0); 17 | dl->input = &in->output; 18 | printf("Relu, output shape: [%d, %d, %d]\n", dl->out_shape.w, dl->out_shape.h, dl->out_shape.d); 19 | return dl; 20 | } 21 | 22 | Tensor* Relu_Forward(Layer* l) 23 | { 24 | Tensor* x = l->input; 25 | for (int i = 0; i < x->n; i++) 26 | { 27 | if (x->w[i] < 0) l->output.w[i] = 0.0; 28 | else 29 | l->output.w[i] = x->w[i]; 30 | } 31 | return &l->output; 32 | } 33 | 34 | void Relu_Backward(Layer* l) 35 | { 36 | Tensor* x = l->input; 37 | for (int i = 0; i < x->n; i++) 38 | { 39 | if (x->w[i] < 0) x->dw[i] += 0.0; // threshold 40 | else x->dw[i] += l->output.dw[i]; 41 | } 42 | } -------------------------------------------------------------------------------- /src/Tensor4.h: -------------------------------------------------------------------------------- 1 | #ifndef TENSOR4_H 2 | #define TENSOR4_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" 6 | { 7 | #endif 8 | 9 | 10 | #include "TWeightsInit.h" 11 | #include "Tensor.h" 12 | typedef struct shape4 13 | { 14 | int w;//width 15 | int h;//heigth 16 | int d;//depth 17 | int b; 18 | }shape4; 19 | 20 | typedef struct Tensor4 21 | { 22 | shape4 s; 23 | int n; 24 | 25 | float *w; 26 | float *dw; 27 | //additions for optimizer 28 | float *vt; 29 | float sumdw; 30 | }Tensor4; 31 | 32 | Tensor4 Tensor4_Create(shape4 s, float c); 33 | void Tensor4_CopyData(Tensor4* dst, Tensor4* src); 34 | int tIdx4(shape4 s, int w, int h, int d, int b); 35 | 36 | void Tensor4_Set(Tensor4* t, int w, int h, int d, int b, float v); 37 | float Tensor4_Get(Tensor4* t, int w, int h, int d, int b); 38 | //======================================================================= 39 | void Tensor4_Free(Tensor4 *v); 40 | void Tensor4_Copy(Tensor4* dst, Tensor4 *src); 41 | void T4Print(Tensor4* t); 42 | 43 | #ifdef __NVCC__ 44 | Tensor4 Tensor4_CreateGPU(shape4 s, float c); 45 | #endif // __NVCC__ 46 | 47 | 48 | #ifdef __cplusplus 49 | } 50 | #endif 51 | 52 | #endif 53 | -------------------------------------------------------------------------------- /src/cmd/qmaze/cell.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef CELL_H 3 | #define CELL_H 4 | 5 | #include "geometry/TVec3.h" 6 | #include "TCommon.h" 7 | #include "quad.h" 8 | 9 | class Cell { 10 | public: 11 | int i = 0; 12 | int j = 0; 13 | 14 | bool walkable = true; 15 | bool visited = false; 16 | glQuad quad; 17 | 18 | Cell(TVec3 pos, float cellSize, int _i, int _j, bool _walkable) 19 | { 20 | quad.Pos = pos; 21 | quad.width = cellSize; 22 | quad.height = cellSize; 23 | i = _i; 24 | j = _j; 25 | walkable = _walkable; 26 | if (walkable) quad.Color = {0.9f, 0.9f, 0.9f}; else quad.Color = { 0.3f, 0.3f, 0.3f}; 27 | } 28 | 29 | void SetWalkable(bool w) 30 | { 31 | walkable = w; 32 | if (walkable) quad.Color = { 0.9f, 0.9f, 0.9f }; else quad.Color = { 0.1f, 0.1f, 0.1f }; 33 | } 34 | 35 | void SetVisited(bool w) 36 | { 37 | if (walkable) { 38 | visited = w; 39 | if (visited) quad.Color = { 0.7f, 0.7f, 0.7f }; else quad.Color = { 0.9f, 0.9f, 0.9f }; 40 | } 41 | } 42 | 43 | TVec3 center() 44 | { 45 | return quad.center(); 46 | } 47 | 48 | void Draw() 49 | { 50 | quad.Draw(); 51 | } 52 | 53 | //private: 54 | }; 55 | #endif -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Cognitive-systems-and-technologies 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/TanhA.c: -------------------------------------------------------------------------------- 1 | #include "TanhA.h" 2 | #include 3 | #include 4 | 5 | Layer* TanhA_Create(Layer* in) 6 | { 7 | Layer* dl = (Layer*)malloc(sizeof(Layer)); 8 | if (!dl) 9 | { 10 | printf("Tanh allocation error!"); 11 | return NULL; 12 | } 13 | dl->type = LT_TANHA; 14 | dl->n_inputs = in->out_shape.w * in->out_shape.h * in->out_shape.d; 15 | dl->out_shape = (shape){ in->out_shape.w, in->out_shape.h, in->out_shape.d }; 16 | dl->output = Tensor_Create(dl->out_shape, 0); 17 | dl->input = &in->output; 18 | dl->aData = NULL; 19 | printf("Tanh activation, output shape: [%d, %d, %d]\n", dl->out_shape.w, dl->out_shape.h, dl->out_shape.d); 20 | 21 | return dl; 22 | } 23 | 24 | Tensor* TanhA_Forward(Layer* l) 25 | { 26 | Tensor* y = &l->output; 27 | for (int i = 0; i < l->input->n; i++) 28 | { 29 | y->w[i] = tanhf(l->input->w[i]); 30 | } 31 | return y; 32 | } 33 | 34 | void TanhA_Backward(Layer* l) 35 | { 36 | Tensor* x = l->input; 37 | Tensor* out = &l->output; 38 | 39 | for (size_t i = 0; i < x->n; i++) 40 | { 41 | float xwi = out->w[i]; 42 | x->dw[i] += (1.f - xwi * xwi) * out->dw[i];//mult by chain gradient 43 | } 44 | } -------------------------------------------------------------------------------- /src/Regression.c: -------------------------------------------------------------------------------- 1 | #include "Regression.h" 2 | 3 | Layer* Regression_Create(Layer* in) 4 | { 5 | Layer* dl = malloc(sizeof(Layer)); 6 | if (!dl) 7 | { 8 | printf("Regression allocation error!"); 9 | return NULL; 10 | } 11 | dl->type = LT_REGRESSION; 12 | dl->n_inputs = in->out_shape.w * in->out_shape.h * in->out_shape.d; 13 | dl->out_shape = (shape){ 1, 1, dl->n_inputs }; 14 | dl->output = Tensor_Create(dl->out_shape, 0); 15 | dl->input = &in->output; 16 | LData* ld = (LData*)malloc(sizeof(LData)); 17 | if (ld) { 18 | ld->loss = 0; 19 | } 20 | else printf("Regression data allocation error\n"); 21 | dl->aData = ld; 22 | printf("Regression, output shape: [%d, %d, %d]\n", dl->out_shape.w, dl->out_shape.h, dl->out_shape.d); 23 | return dl; 24 | } 25 | 26 | Tensor *Regression_Forward(Layer* l) 27 | { 28 | Tensor_CopyData(&l->output, l->input); 29 | return &l->output; 30 | } 31 | 32 | void Regression_Backward(Layer* l, Tensor* y) 33 | { 34 | Tensor* x = l->input; 35 | int i = (int)y->w[0]; 36 | float val = y->w[1]; 37 | float dy = x->w[i] - val; 38 | x->dw[i] += dy; 39 | 40 | float dy2 = dy * dy; 41 | float loss = 0.5f * dy2; 42 | LData* ld = (LData*)l->aData; 43 | ld->loss = loss; 44 | } -------------------------------------------------------------------------------- /src/Dense.h: -------------------------------------------------------------------------------- 1 | #ifndef DENSE_H 2 | #define DENSE_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include "Tensor.h" 9 | #include "Interfaces.h" 10 | #include "dList.h" 11 | 12 | typedef struct Dense 13 | { 14 | //LayerActivation activation; 15 | Tensor *kernels; 16 | Tensor biases; 17 | int n; 18 | }Dense; 19 | 20 | //Layer *Dense_Create(int num_neurons, RandType weightInit, LayerActivation act, Layer *in); 21 | Layer *Dense_Create(int num_neurons, RandType weightInit, Layer *in); 22 | Tensor* Dense_Forward(Layer* l); 23 | void Dense_Backward(Layer* l); 24 | 25 | void Dense_Free(Layer* l); 26 | 27 | cJSON* Dense_To_JSON(Dense* d); 28 | void Dense_Load_JSON(Dense* d, cJSON* node); 29 | void Dense_GetGrads(Dense* l, dList* grads); 30 | #ifdef __NVCC__ 31 | Layer* Dense_CreateGPU(int num_neurons, Layer* in); 32 | __global__ void Dense_ForwardKernels(shape limit, float* x, float* k, float* out, shape s); 33 | Tensor* Dense_ForwardGPU(Layer* l); 34 | __global__ void Dense_BackwardKernels(shape limit, float* xw, float* xdw, float* kw, float* kdw, float* bdw, float* outdw, shape s); 35 | void Dense_BackwardGPU(Layer* l); 36 | #endif // __NVCC__ 37 | 38 | #ifdef __cplusplus 39 | } 40 | #endif 41 | 42 | #endif 43 | -------------------------------------------------------------------------------- /src/MSE.c: -------------------------------------------------------------------------------- 1 | #include "MSE.h" 2 | 3 | Layer* MSE_Create(Layer *in) 4 | { 5 | Layer* l = (Layer*)malloc(sizeof(Layer)); 6 | if (!l) 7 | { 8 | printf("MSE allocation error!"); 9 | return NULL; 10 | } 11 | l->type = LT_MSE; 12 | l->n_inputs = in->out_shape.w * in->out_shape.h * in->out_shape.d; 13 | l->out_shape = (shape){ 1, 1, l->n_inputs }; 14 | l->output = Tensor_Create(l->out_shape, 0); 15 | l->input = &in->output; 16 | 17 | LData* ld = (LData*)malloc(sizeof(LData)); 18 | if (ld) { 19 | ld->loss = 0; 20 | } 21 | else printf("MSE data allocation error\n"); 22 | l->aData = ld; 23 | printf("Mse, output shape: [%d, %d, %d]\n", l->out_shape.w, l->out_shape.h, l->out_shape.d); 24 | return l; 25 | } 26 | 27 | Tensor * MSE_Forward(Layer* l) 28 | { 29 | Tensor_CopyData(&l->output, l->input); 30 | return &l->output; 31 | } 32 | 33 | void MSE_Backward(Layer* l, Tensor* y_true) 34 | { 35 | Tensor* x = l->input; 36 | float sum = 0; 37 | for (int i = 0; i < x->n; i++) 38 | { 39 | float dy = (2.f/(float)x->n) * (x->w[i] - y_true->w[i]); 40 | x->dw[i] += dy; 41 | 42 | float t = y_true->w[i] - x->w[i]; 43 | sum += t*t; 44 | } 45 | float loss = sum / (float)x->n; 46 | LData* ld = (LData*)l->aData; 47 | ld->loss = loss; 48 | } -------------------------------------------------------------------------------- /src/geometry/TVec4.c: -------------------------------------------------------------------------------- 1 | #include "TVec4.h" 2 | #include 3 | #include 4 | 5 | TVec4 TVec4_Create(float x, float y, float z, float w) 6 | { 7 | TVec4 vec = (TVec4){x, y, z, w}; 8 | return vec; 9 | } 10 | 11 | TVec4 TVec4_Create3(float x, float y, float z) 12 | { 13 | TVec4 vec = (TVec4){ x, y, z, 1 }; 14 | return vec; 15 | } 16 | 17 | TVec4 TVec4_Create1(float v) 18 | { 19 | TVec4 vec = (TVec4){ v, v, v, v }; 20 | return vec; 21 | } 22 | 23 | TVec4 TVec4_Mul(TVec4 v, float d) 24 | { 25 | return (TVec4) { v.x * d, v.y * d, v.z * d, v.w * d }; 26 | } 27 | 28 | TVec4 TVec4_Div(TVec4 v, float d) 29 | { 30 | if (d != 0) 31 | return (TVec4) { v.x / d, v.y / d, v.z / d, v.w / d }; 32 | else 33 | return TVec4_Create1(0); 34 | } 35 | 36 | TVec4 TVec4_Sub(TVec4 v1, TVec4 v2) 37 | { 38 | return (TVec4) { v1.x - v2.x, v1.y - v2.y, v1.z - v2.z, v1.w - v2.w }; 39 | } 40 | 41 | TVec4 TVec4_Norm(TVec4 v) 42 | { 43 | TVec4 r = v; 44 | float norm = sqrtf(v.x * v.x + v.y * v.y + v.z * v.z + v.w * v.w); 45 | float invNorm = 1.0f / norm; 46 | 47 | r.x *= invNorm; 48 | r.y *= invNorm; 49 | r.z *= invNorm; 50 | r.w *= invNorm; 51 | return r; 52 | } 53 | 54 | float TVec4_Dot(TVec4 v1, TVec4 v2) 55 | { 56 | return v1.x * v2.x + v1.y * v2.y + v1.z * v2.z + v1.w * v2.w; 57 | } -------------------------------------------------------------------------------- /src/RL/SimpleDeque.c: -------------------------------------------------------------------------------- 1 | #include "SimpleDeque.h" 2 | 3 | SimpleDeque* createDeque(int capacity) 4 | { 5 | SimpleDeque* d = (SimpleDeque*)malloc(sizeof(SimpleDeque)); 6 | if (!d) 7 | { 8 | printf("Deque allocation error!"); 9 | return NULL; 10 | } 11 | d->capacity = capacity; 12 | d->length = 0; 13 | d->data = (DequeElem*)malloc(sizeof(DequeElem) * capacity); 14 | if (!d->data) 15 | { 16 | printf("Deque data allocation error!"); 17 | free(d); 18 | return NULL; 19 | } 20 | return d; 21 | } 22 | 23 | void dequeAppend(SimpleDeque* d, void *t, void (*elementFree) (void* e)) 24 | { 25 | int id = d->length + 1; 26 | if (id > d->capacity) 27 | { 28 | //delete first 29 | elementFree(d->data[0].elem); 30 | free(d->data[0].elem); 31 | d->data[0].elem = NULL; 32 | //move array 33 | memmove(&d->data[0], &d->data[1], sizeof(DequeElem) * d->capacity - 1); 34 | //set last 35 | d->data[d->length - 1].elem = t; 36 | } 37 | else 38 | { 39 | d->data[id - 1].elem = t; 40 | d->length = id; 41 | } 42 | } 43 | 44 | void freeDeque(SimpleDeque* d, void (*elementFree) (void* e)) 45 | { 46 | for (int i = 0; i < d->length; i++) 47 | { 48 | elementFree(d->data[i].elem); 49 | free(d->data[i].elem); 50 | } 51 | free(d->data); 52 | free(d); 53 | } -------------------------------------------------------------------------------- /src/cmd/rand_test.cpp: -------------------------------------------------------------------------------- 1 | #include "Model.h" 2 | //пример работы алгоритмов для инициализации весовых коэффициентов 3 | void WeightsInitTest() 4 | { 5 | int count = 10000; 6 | float min = FLT_MAX, max = FLT_MIN, sum = 0; 7 | printf("xavier\n"); 8 | for (size_t i = 0; i < count; i++) 9 | { 10 | float f = xavier_rand(10); 11 | //printf("%f, ", f); 12 | min = f < min ? f : min; 13 | max = f > max ? f : max; 14 | sum += f; 15 | } 16 | printf("\nmin val:%f max val:%f, mean:%f\n", min, max, sum / (float)count); 17 | min = FLT_MAX, max = FLT_MIN, sum = 0; 18 | printf("\nxavier normalized\n"); 19 | for (size_t i = 0; i < count; i++) 20 | { 21 | float f = xavier_norm_rand(10, 20); 22 | //printf("%f, ", f); 23 | min = f < min ? f : min; 24 | max = f > max ? f : max; 25 | sum += f; 26 | } 27 | printf("\nmin val:%f max val:%f, mean:%f\n", min, max, sum / (float)count); 28 | min = FLT_MAX, max = FLT_MIN, sum = 0; 29 | printf("\nhe distribution\n"); 30 | for (size_t i = 0; i < count; i++) 31 | { 32 | float f = he_rand(10); 33 | //printf("%f, ", f); 34 | min = f < min ? f : min; 35 | max = f > max ? f : max; 36 | sum += f; 37 | } 38 | printf("\nmin val:%f max val:%f, mean:%f\n", min, max, sum / (float)count); 39 | } 40 | 41 | int main() 42 | { 43 | WeightsInitTest(); 44 | 45 | printf("\nPress enter to close..."); 46 | getchar(); 47 | return 0; 48 | } -------------------------------------------------------------------------------- /src/RL/DDPG.h: -------------------------------------------------------------------------------- 1 | #ifndef DDPG_H 2 | #define DDPG_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include 9 | 10 | #include "TCommon.h" 11 | #include "Interfaces.h" 12 | #include "Optimizer.h" 13 | #include "Tensor.h" 14 | //#include "ReplayBuffer.h" 15 | 16 | #include "Utils.h" 17 | #include "Losses.h" 18 | //#include "geometry/TVec2.h" 19 | 20 | typedef struct DDPG 21 | { 22 | Layer *inpA, *inpC, *inpCA, *actor, *critic; 23 | Layer *inpAT, *inpCT, *inpCTA, *actor_target, *critic_target; 24 | Model ActorNet; 25 | Model CriticNet; 26 | 27 | Model ActorTargetNet; 28 | Model CriticTargetNet; 29 | float gamma; 30 | float tau; 31 | shape input_shape; 32 | int num_outputs; 33 | int update_frq; 34 | OrnsteinUhlenbeckNoise noise; 35 | OptParams par; 36 | }DDPG; 37 | 38 | void DDPGsoft_update(dList* tp, dList*sp, float tau); 39 | DDPG* DDPG_Create(shape state_shape, int n_acts); 40 | Tensor DDPG_Forward(DDPG *brain, Tensor *state); 41 | Tensor DDPG_SelectAction(DDPG* brain, Tensor* state, float eps); 42 | 43 | float DDPG_TrainTrace(DDPG* brain, Tensor* states, Tensor* last_state, Tensor* probs, float* rewards, int n, int iter); 44 | 45 | void DDPGcopy_params(Model* dest_m, Model* src_m); 46 | void DDPGSoftUpdate_Targets(DDPG* brain); 47 | #ifdef __cplusplus 48 | } 49 | #endif 50 | 51 | #endif 52 | -------------------------------------------------------------------------------- /src/RL/TD3.h: -------------------------------------------------------------------------------- 1 | #ifndef TD3_H 2 | #define TD3_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include 9 | 10 | #include "TCommon.h" 11 | #include "Interfaces.h" 12 | #include "Optimizer.h" 13 | #include "Tensor.h" 14 | //#include "ReplayBuffer.h" 15 | 16 | #include "Utils.h" 17 | #include "Losses.h" 18 | //#include "geometry/TVec2.h" 19 | 20 | typedef struct TD3 21 | { 22 | Layer *inpA, *inpC, *inpCA, *actor, *critic, *Q1, *Q2; 23 | Layer *inpAT, *inpCT, *inpCTA, *actor_target, *critic_target, *QT1, *QT2; 24 | Model ActorNet; 25 | Model CriticNet; 26 | 27 | Model ActorTargetNet; 28 | Model CriticTargetNet; 29 | float gamma; 30 | float tau; 31 | shape input_shape; 32 | int num_outputs; 33 | float noise_clip; 34 | int update_frq; 35 | OptParams par; 36 | OrnsteinUhlenbeckNoise noise; 37 | }TD3; 38 | 39 | void soft_update(dList* tp, dList*sp, float tau); 40 | TD3* TD3_Create(shape state_shape, int n_acts); 41 | Tensor TD3_Forward(TD3 *brain, Tensor *state); 42 | Tensor TD3_SelectAction(TD3* brain, Tensor* state, float eps); 43 | 44 | float TD3_TrainTrace(TD3* brain, Tensor* states, Tensor* last_state, Tensor* probs, float* rewards, int n, int iter); 45 | 46 | void copy_params(Model* dest_m, Model* src_m); 47 | void SoftUpdate_Targets(TD3* brain); 48 | #ifdef __cplusplus 49 | } 50 | #endif 51 | 52 | #endif 53 | -------------------------------------------------------------------------------- /src/geometry/TVec2.c: -------------------------------------------------------------------------------- 1 | #include "TVec2.h" 2 | #include 3 | #include 4 | 5 | TVec2 TVec2_Create(float x, float y) 6 | { 7 | TVec2 vec = (TVec2){x, y}; 8 | return vec; 9 | } 10 | 11 | TVec2 TVec2_Create2(float v) 12 | { 13 | TVec2 vec = (TVec2){ v, v }; 14 | return vec; 15 | } 16 | 17 | TVec2 TVec2_Mul(TVec2 v, float d) 18 | { 19 | return (TVec2) { v.x * d, v.y * d }; 20 | } 21 | 22 | TVec2 TVec2_Div(TVec2 v, float d) 23 | { 24 | if (d != 0) 25 | return (TVec2) { v.x / d, v.y / d}; 26 | else 27 | return TVec2_Create2(0); 28 | } 29 | 30 | TVec2 TVec2_Sub(TVec2 v1, TVec2 v2) 31 | { 32 | return (TVec2) { v1.x - v2.x, v1.y - v2.y }; 33 | } 34 | 35 | TVec2 TVec2_Add(TVec2 v1, TVec2 v2) 36 | { 37 | return (TVec2) { v1.x + v2.x, v1.y + v2.y }; 38 | } 39 | 40 | TVec2 TVec2_Norm(TVec2 v) 41 | { 42 | float le = TVec2_Length(v); 43 | return TVec2_Div(v, le); 44 | } 45 | 46 | float TVec2_Length(TVec2 v) 47 | { 48 | return sqrtf(v.x * v.x + v.y * v.y); 49 | } 50 | 51 | TVec2 TVec2_Dir(TVec2 org, TVec2 dest) 52 | { 53 | return TVec2_Norm(TVec2_Sub(dest, org)); 54 | } 55 | 56 | float TVec2_Dot(TVec2 v1, TVec2 v2) 57 | { 58 | return v1.x * v2.x + v1.y * v2.y; 59 | } 60 | 61 | float TVec2_AngleDeg(TVec2 v1, TVec2 v2) 62 | { 63 | float sin = v1.x * v2.y - v2.x * v1.y; 64 | float cos = v1.x * v2.x + v1.y * v2.y; 65 | 66 | return (float)atan2(sin, cos) * (180.f / M_PI); 67 | } -------------------------------------------------------------------------------- /src/Losses.c: -------------------------------------------------------------------------------- 1 | #include "Losses.h" 2 | 3 | float MSE_Loss(Tensor* y, Tensor* y_true) 4 | { 5 | float sum = 0; 6 | float scale = 1.f / (float)y->n; 7 | for (int i = 0; i < y->n; i++) 8 | { 9 | float t = y_true->w[i] - y->w[i]; 10 | float dy = -2.f * scale * t; 11 | y->dw[i] += dy; 12 | sum += t * t; 13 | } 14 | float loss = sum * scale; 15 | return loss; 16 | } 17 | 18 | Tensor SoftmaxProb(Tensor *t) 19 | { 20 | Tensor out = Tensor_Create(t->s, 0); 21 | //get max 22 | //float maxv = T_MaxValue(t); 23 | float sum = 0; 24 | for (size_t i = 0; i < t->n; i++) 25 | { 26 | float e = expf(t->w[i]); 27 | sum += e; 28 | out.w[i] = e; 29 | } 30 | //normalize 31 | for (size_t i = 0; i < t->n; i++) 32 | { 33 | float x = out.w[i] / sum; 34 | out.w[i] = x; 35 | } 36 | return out; 37 | } 38 | 39 | float Cross_entropy_Loss(Tensor* y, int idx) 40 | { 41 | Tensor x = SoftmaxProb(y); 42 | for (size_t i = 0; i < y->n; i++) 43 | { 44 | float y_true = (i == idx) ? 1.f : 0.f; 45 | float der = -(y_true - x.w[i]); 46 | y->dw[i] += der; 47 | } 48 | float loss = -logf(x.w[idx]); 49 | Tensor_Free(&x); 50 | return loss; 51 | } 52 | 53 | float Regression_Loss(Tensor* y, int idx, float val) 54 | { 55 | float dy = y->w[idx] - val; 56 | y->dw[idx] += dy; 57 | 58 | float dy2 = dy * dy; 59 | float loss = 0.5f * dy2; 60 | return loss; 61 | } -------------------------------------------------------------------------------- /src/Conv2d.h: -------------------------------------------------------------------------------- 1 | #ifndef CONV2D_H 2 | #define CONV2D_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include "Tensor.h" 9 | #include "Tensor4.h" 10 | #include "Interfaces.h" 11 | 12 | typedef struct Conv2d 13 | { 14 | Tensor *kernels; 15 | Tensor biases; 16 | int n; 17 | shape2 k_size; 18 | shape2 stride; 19 | int pad; 20 | }Conv2d; 21 | 22 | Layer* Conv2d_Create(int num_kernels, shape2 k_size, shape2 stride, int pad, RandType weightInit, Layer* in); 23 | Tensor* Conv2d_Forward(Layer* l); 24 | void Conv2d_Backward(Layer* l); 25 | void Conv2d_Free(Layer* l); 26 | 27 | cJSON* Conv2d_To_JSON(Conv2d* d); 28 | void Conv2d_Load_JSON(Conv2d* d, cJSON* node); 29 | #ifdef __NVCC__ 30 | typedef struct Conv2dGPU 31 | { 32 | Tensor4 kernels; 33 | Tensor biases; 34 | 35 | shape2 k_size; 36 | shape2 stride; 37 | int pad; 38 | }Conv2dGPU; 39 | 40 | Layer* Conv2d_CreateGPU(int num_kernels, shape2 k_size, shape2 stride, int pad, Layer* in); 41 | __global__ void Conv2d_ForwardKernels(shape limit, float* xw, float* kerw, float* bw, float* outw, shape ishape, shape4 kshape, shape oshape, shape2 k_size, shape2 stride, int pad); 42 | Tensor* Conv2d_ForwardGPU(Layer* l); 43 | __global__ void Conv2d_BackwardKernels(shape limit, float* xw, float* xdw, float* kerw, float* kerdw, float* outdw, float* bdw, shape ishape, shape4 kshape, shape oshape, shape2 k_size, shape2 stride, int pad); 44 | void Conv2d_BackwardGPU(Layer* l); 45 | #endif // __NVCC__ 46 | 47 | #ifdef __cplusplus 48 | } 49 | #endif 50 | 51 | #endif 52 | -------------------------------------------------------------------------------- /src/Model.h: -------------------------------------------------------------------------------- 1 | #ifndef MODEL_H 2 | #define MODEL_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include "Interfaces.h" 9 | #include "Dense.h" 10 | #include "Input.h" 11 | #include "MSE.h" 12 | #include "TanhA.h" 13 | #include "Conv2d.h" 14 | #include "MaxPool2d.h" 15 | #include "Relu.h" 16 | #include "Regression.h" 17 | #include "Conc.h" 18 | #include "cJSON.h" 19 | 20 | typedef struct _Model 21 | { 22 | Layer** Layers; 23 | int n_layers; 24 | 25 | Tensor* (*NetForward) (struct _Model* n, Tensor* x); 26 | void (*NetBackward) (struct _Model* n, Tensor *y); 27 | }Model; 28 | 29 | Model Model_Create(); 30 | Layer* Model_AddLayer(Model *n, Layer* l); 31 | 32 | void Backward_Layer (Layer* l); 33 | Tensor *Forward_Layer(Layer* l); 34 | 35 | void Model_Forward(Model* n); 36 | void Model_Backward(Model* n); 37 | 38 | cJSON* Layer_To_JSON(Layer* l); 39 | void Layer_Load_JSON(Layer* t, cJSON* node); 40 | cJSON* Model_To_JSON(Model *n); 41 | void Model_Load_JSON(Model *t, cJSON* node); 42 | void Model_CLearGrads(Model* m); 43 | dList Model_getGradients(Model* n); 44 | #ifdef __NVCC__ 45 | Model Model_CreateGPU(); 46 | Tensor* Forward_LayerGPU(Layer* l); 47 | void Backward_LayerGPU(Layer* l); 48 | void Model_ForwardGPU(Model* n); 49 | void Model_BackwardGPU(Model* n); 50 | Tensor* Seq_ForwardGPU(Model* n, Tensor* x); 51 | void Seq_BackwardGPU(Model* n, Tensor* y); 52 | #endif // __NVCC__ 53 | 54 | 55 | #ifdef __cplusplus 56 | } 57 | #endif 58 | 59 | #endif 60 | -------------------------------------------------------------------------------- /src/RL/ReplayBuffer.c: -------------------------------------------------------------------------------- 1 | #include "ReplayBuffer.h" 2 | 3 | ReplayBuffer *ReplayBuffer_Create(int capacity, int batch_size) 4 | { 5 | ReplayBuffer *rb = malloc(sizeof(ReplayBuffer)); 6 | if (!rb) 7 | { 8 | printf("Replay buffer allocation error"); 9 | return NULL; 10 | } 11 | rb->capacity = capacity; 12 | rb->batch_size = batch_size; 13 | rb->buffer = createDeque(capacity); 14 | return rb; 15 | } 16 | 17 | void ReplayBuffer_Record(ReplayBuffer *rBuffer, Tensor* state, 18 | Tensor* next_state, 19 | int action, 20 | float reward, int done) 21 | { 22 | Sample* s = createSample(state, next_state, action, reward, done); 23 | dequeAppend(rBuffer->buffer, s, freeSample); 24 | } 25 | 26 | dList ReplayBuffer_Sample(ReplayBuffer* rb) 27 | { 28 | dList lst = dList_create(); 29 | //sample buffer 30 | return lst; 31 | } 32 | 33 | Sample* createSample(Tensor* state, 34 | Tensor* next_state, 35 | int action, 36 | float reward, int done) 37 | { 38 | Sample* s = malloc(sizeof(Sample)); 39 | if (!s) 40 | { 41 | printf("Sample allocation error!"); 42 | return NULL; 43 | } 44 | s->action = action; 45 | s->reward = reward; 46 | //s->state = Tensor_CreateCopy(state); 47 | //s->next_state = Tensor_CreateCopy(next_state); 48 | s->done = done; 49 | return s; 50 | } 51 | 52 | void freeSample(void* sample) 53 | { 54 | Sample* s = (Sample*)sample; 55 | Tensor_Free(s->state); 56 | Tensor_Free(s->next_state); 57 | free(s); 58 | } 59 | 60 | void ReplayBuffer_Free(ReplayBuffer* rBuffer) 61 | { 62 | freeDeque(rBuffer->buffer, freeSample); 63 | free(rBuffer); 64 | } -------------------------------------------------------------------------------- /src/geometry/TVec3.c: -------------------------------------------------------------------------------- 1 | #include "TVec3.h" 2 | #include 3 | #include 4 | 5 | TVec3 TVec3_Create(float x, float y, float z) 6 | { 7 | TVec3 vec = (TVec3){x, y, z}; 8 | return vec; 9 | } 10 | 11 | TVec3 TVec3_Create2(float v) 12 | { 13 | TVec3 vec = (TVec3){ v, v, v }; 14 | return vec; 15 | } 16 | 17 | TVec3 TVec3_Mul(TVec3 v, float d) 18 | { 19 | return (TVec3) { v.x * d, v.y * d, v.z * d }; 20 | } 21 | 22 | TVec3 TVec3_Div(TVec3 v, float d) 23 | { 24 | if (d != 0) 25 | return (TVec3) { v.x / d, v.y / d, v.z / d }; 26 | else 27 | return TVec3_Create2(0); 28 | } 29 | 30 | TVec3 TVec3_Sub(TVec3 v1, TVec3 v2) 31 | { 32 | return (TVec3) { v1.x - v2.x, v1.y - v2.y, v1.z - v2.z }; 33 | } 34 | 35 | TVec3 TVec3_Add(TVec3 v1, TVec3 v2) 36 | { 37 | return (TVec3) { v1.x + v2.x, v1.y + v2.y, v1.z + v2.z }; 38 | } 39 | 40 | TVec3 TVec3_Norm(TVec3 v) 41 | { 42 | return TVec3_Div(v, TVec3_Length(v)); 43 | } 44 | 45 | float TVec3_Length(TVec3 v) 46 | { 47 | return sqrtf(v.x * v.x + v.y * v.y + v.z * v.z); 48 | } 49 | 50 | TVec3 TVec3_Cross(TVec3 v1, TVec3 v2) 51 | { 52 | return (TVec3) { 53 | v1.y* v2.z - v1.z * v2.y, 54 | v1.z* v2.x - v1.x * v2.z, 55 | v1.x* v2.y - v1.y * v2.x 56 | }; 57 | } 58 | 59 | float TVec3_Dot(TVec3 v1, TVec3 v2) 60 | { 61 | return v1.x * v2.x + v1.y * v2.y + v1.z * v2.z; 62 | } 63 | 64 | TVec3 TVec3_Dir(TVec3 org, TVec3 dest) 65 | { 66 | return TVec3_Norm(TVec3_Sub(dest, org)); 67 | } 68 | 69 | float TVec3_AngleRad(TVec3 v1, TVec3 v2) 70 | { 71 | float l1 = TVec3_Length(v1); 72 | float l2 = TVec3_Length(v2); 73 | float dot = TVec3_Dot(TVec3_Div(v1, l1), TVec3_Div(v2, l2)); 74 | return acosf(dot); 75 | } 76 | 77 | TVec3 TVec3_Middle(TVec3 org, TVec3 dest) 78 | { 79 | TVec3 v = TVec3_Sub(dest, org); 80 | float l = TVec3_Length(v) * 0.5f; 81 | TVec3 n = TVec3_Norm(v); 82 | return TVec3_Add(org, TVec3_Mul(n, l)); 83 | 84 | } -------------------------------------------------------------------------------- /src/TanhA.cu: -------------------------------------------------------------------------------- 1 | #include "TanhA.h" 2 | #include 3 | #include 4 | 5 | #ifdef __NVCC__ 6 | Layer* TanhA_CreateGPU(Layer* in) 7 | { 8 | Layer* dl = (Layer *)malloc(sizeof(Layer)); 9 | if (!dl) 10 | { 11 | printf("Tanh allocation error!"); 12 | return NULL; 13 | } 14 | dl->type = LT_TANHA; 15 | dl->n_inputs = in->out_shape.w * in->out_shape.h * in->out_shape.d; 16 | dl->out_shape = { in->out_shape.w, in->out_shape.h, in->out_shape.d }; 17 | dl->output = Tensor_CreateGPU(dl->out_shape, 0); 18 | dl->input = &in->output; 19 | dl->aData = NULL; 20 | printf("Tanh GPU, output shape: [%d, %d, %d]\n", dl->out_shape.w, dl->out_shape.h, dl->out_shape.d); 21 | return dl; 22 | } 23 | 24 | __global__ void TanhA_ForwardKernels(float* xw, float* outw) 25 | { 26 | int i = (blockIdx.x * blockDim.x) + threadIdx.x; 27 | outw[i] = tanhf(xw[i]); 28 | } 29 | 30 | Tensor* TanhA_ForwardGPU(Layer* l) 31 | { 32 | int n = l->n_inputs; 33 | 34 | int threadsPerBlockX = 128; 35 | if (n < threadsPerBlockX) threadsPerBlockX = 1; 36 | dim3 gridDim(ceil(n / (float)threadsPerBlockX), 1, 1); 37 | dim3 blockDim(threadsPerBlockX, 1, 1); 38 | 39 | TanhA_ForwardKernels KERNEL_CALL(gridDim, blockDim) (l->input->w, l->output.w); 40 | cudaDeviceSynchronize(); 41 | return &l->output; 42 | } 43 | 44 | __global__ void TanhA_BackwardKernels(float* xdw, float* outw, float* outdw) 45 | { 46 | int i = (blockIdx.x * blockDim.x) + threadIdx.x; 47 | 48 | float xwi = outw[i]; 49 | float dw = (1.f - xwi * xwi) * outdw[i]; 50 | //atomicAdd(&xdw[i], dw); 51 | xdw[i] += dw; 52 | } 53 | 54 | void TanhA_BackwardGPU(Layer* l) 55 | { 56 | int n = l->n_inputs; 57 | 58 | int threadsPerBlockX = 128; 59 | if (n < threadsPerBlockX) threadsPerBlockX = 1; 60 | dim3 gridDim(ceil(n / (float)threadsPerBlockX), 1, 1); 61 | dim3 blockDim(threadsPerBlockX, 1, 1); 62 | 63 | TanhA_BackwardKernels KERNEL_CALL(gridDim, blockDim) (l->input->dw, l->output.w, l->output.dw); 64 | cudaDeviceSynchronize(); 65 | } 66 | #endif // __NVCC__ -------------------------------------------------------------------------------- /src/Softmax.c: -------------------------------------------------------------------------------- 1 | #include "Softmax.h" 2 | #include 3 | #include 4 | 5 | Layer* Softmax_Create(Layer *in) 6 | { 7 | Layer* dl = malloc(sizeof(Layer)); 8 | Softmax* l = malloc(sizeof(Softmax)); 9 | if (!dl) 10 | { 11 | printf("Softmax allocation error!"); 12 | return NULL; 13 | } 14 | if (!l) 15 | { 16 | printf("Softmax data allocation error!"); 17 | free(dl); 18 | return NULL; 19 | } 20 | dl->input = in; 21 | dl->type = LT_SOFTMAX; 22 | dl->n_inputs = in->out_shape.w * in->out_shape.h * in->out_shape.d; 23 | dl->out_shape = (shape){ 1, 1, dl->n_inputs }; 24 | dl->output = Tensor_Create(dl->out_shape, 0); 25 | 26 | l->sums = malloc(dl->out_shape.d*sizeof(float)); 27 | if (!l->sums) 28 | { 29 | printf("Softmax es allocation error!"); 30 | free(l); 31 | free(dl); 32 | return NULL; 33 | } 34 | for (int i = 0; i < dl->out_shape.d; i++) 35 | { 36 | l->sums[i] = 0.f; 37 | } 38 | dl->aData = l; 39 | return dl; 40 | } 41 | 42 | Tensor *Softmax_Forward(Layer* l) 43 | { 44 | Softmax* data = (Softmax*)l->aData; 45 | Tensor* x = l->input; 46 | //get max 47 | float amax = x->w[0]; 48 | for (int i = 1; i < l->out_shape.d; i++) 49 | { 50 | if (x->w[i] > amax) 51 | amax = x->w[i]; 52 | } 53 | // compute exponentials (carefully to not blow up) 54 | float esum = 0.0f; 55 | for (int i = 0; i < l->out_shape.d; i++) 56 | { 57 | float e = (float)exp(x->w[i] - amax); 58 | esum += e; 59 | data->sums[i] = e; 60 | } 61 | // normalize output sum to one 62 | for (int i = 0; i < l->out_shape.d; i++) 63 | { 64 | data->sums[i] /= esum; 65 | l->output.w[i] = data->sums[i]; 66 | } 67 | return &l->output; 68 | } 69 | 70 | void Softmax_Backward(Layer* l, Tensor* y) 71 | { 72 | Softmax* data = l->aData; 73 | float loss = 0.f; 74 | Tensor* x = l->input; 75 | for (int i = 0; i < l->out_shape.d; i++) 76 | { 77 | float mul = -(y->w[i] - data->sums[i]); 78 | x->dw[i] += mul; 79 | if (y->w[i] > 0) 80 | loss += -(float)log(data->sums[i]); 81 | } 82 | } -------------------------------------------------------------------------------- /src/MSE.cu: -------------------------------------------------------------------------------- 1 | #include "MSE.h" 2 | 3 | #ifdef __NVCC__ 4 | Layer* MSE_CreateGPU(Layer* in) 5 | { 6 | Layer* l = (Layer*)malloc(sizeof(Layer)); 7 | if (!l) 8 | { 9 | printf("MSE allocation error!"); 10 | return NULL; 11 | } 12 | l->type = LT_MSE; 13 | l->n_inputs = in->out_shape.w * in->out_shape.h * in->out_shape.d; 14 | l->out_shape = { 1, 1, l->n_inputs }; 15 | l->output = Tensor_CreateGPU(l->out_shape, 0); 16 | l->input = &in->output; 17 | 18 | LData* ld = (LData*)malloc(sizeof(LData)); 19 | if (ld) { 20 | ld->loss = 0; 21 | } 22 | else printf("MSE data allocation error\n"); 23 | l->aData = ld; 24 | printf("Mse, output shape: [%d, %d, %d]\n", l->out_shape.w, l->out_shape.h, l->out_shape.d); 25 | return l; 26 | } 27 | 28 | Tensor* MSE_ForwardGPU(Layer* l) 29 | { 30 | Tensor_CopyDataGPU(&l->output, l->input); 31 | return &l->output; 32 | } 33 | 34 | __global__ void MSE_BackwardKernels(int limit, float* xw, float* xdw, float* yw, float n, float* sum) 35 | { 36 | int i = (blockIdx.x * blockDim.x) + threadIdx.x; 37 | if (i < limit) { 38 | float dy = (2.f / n) * (xw[i] - yw[i]); 39 | atomicAdd(&xdw[i], dy); 40 | //xdw[i] += dy; 41 | 42 | float t = yw[i] - xw[i]; 43 | float t2 = t * t; 44 | 45 | atomicAdd(sum, t2); 46 | } 47 | } 48 | 49 | void MSE_BackwardGPU(Layer* l, Tensor* y_true) 50 | { 51 | int n = l->n_inputs; 52 | 53 | int threadsPerBlockX = 128; 54 | dim3 gridDim((int)ceil(n / (float)threadsPerBlockX), 1, 1); 55 | dim3 blockDim(threadsPerBlockX, 1, 1); 56 | 57 | float *sumd, *sumh; 58 | sumh = (float*)malloc(sizeof(float)); 59 | if (cudaMalloc((void**)&sumd, sizeof(float)) != cudaSuccess) printf("in loss allocation\n"); 60 | cudaMemset(sumd, 0, sizeof(float)); 61 | MSE_BackwardKernels KERNEL_CALL(gridDim, blockDim) (n, 62 | l->input->w, l->input->dw, y_true->w, (float)n, sumd); 63 | cudaDeviceSynchronize(); 64 | 65 | cudaMemcpy(sumh, sumd, sizeof(float), cudaMemcpyDeviceToHost); 66 | cudaFree(sumd); 67 | LData* ld = (LData*)l->aData; 68 | ld->loss = sumh[0]/(float)n; 69 | free(sumh); 70 | cudaDeviceSynchronize(); 71 | } 72 | #endif // __NVCC__ -------------------------------------------------------------------------------- /src/Model.cu: -------------------------------------------------------------------------------- 1 | #include "Model.h" 2 | #include 3 | 4 | #ifdef __NVCC__ 5 | Model Model_CreateGPU() 6 | { 7 | Model n; 8 | n.Layers = NULL; 9 | n.n_layers = 0; 10 | n.NetForward = NULL;//Seq_ForwardGPU; 11 | n.NetBackward = NULL;// Seq_BackwardGPU; 12 | return n; 13 | } 14 | Tensor* Forward_LayerGPU(Layer* l) 15 | { 16 | Tensor* y = NULL; 17 | switch (l->type) 18 | { 19 | case LT_INPUT: y = Input_ForwardGPU(l); break; 20 | case LT_DENSE: y = Dense_ForwardGPU(l); break; 21 | case LT_SOFTMAX: break; 22 | case LT_RELU: y = Relu_ForwardGPU(l); break; 23 | case LT_REGRESSION: break; 24 | case LT_MSE: y = MSE_ForwardGPU(l); break; 25 | case LT_TANHA: y = TanhA_ForwardGPU(l); break; 26 | case LT_CONV: y = Conv2d_ForwardGPU(l); break; 27 | case LT_MAXPOOL: y = MaxPool2d_ForwardGPU(l); break; 28 | default: break; 29 | } 30 | return y; 31 | } 32 | void Backward_LayerGPU(Layer* l) 33 | { 34 | switch (l->type) 35 | { 36 | case LT_DENSE: Dense_BackwardGPU(l); break; 37 | case LT_SOFTMAX: break; 38 | case LT_RELU: Relu_BackwardGPU(l); break; 39 | case LT_REGRESSION: break; 40 | case LT_MSE: break;//MSE_BackwardGPU(l, y); break; 41 | case LT_TANHA: TanhA_BackwardGPU(l); break; 42 | case LT_CONV: Conv2d_BackwardGPU(l); break; 43 | case LT_MAXPOOL: MaxPool2d_BackwardGPU(l); break; 44 | default: break; 45 | } 46 | } 47 | 48 | void Model_ForwardGPU(Model* n) 49 | { 50 | for (int i = 0; i < n->n_layers; i++) 51 | { 52 | Forward_LayerGPU(n->Layers[i]); 53 | } 54 | } 55 | 56 | void Model_BackwardGPU(Model* n) 57 | { 58 | int N = n->n_layers; 59 | for (int i = N - 1; i >= 0; i--) 60 | { 61 | Layer* l = n->Layers[i]; 62 | Backward_LayerGPU(l); 63 | } 64 | } 65 | 66 | Tensor* Seq_ForwardGPU(Model* n, Tensor* x) 67 | { 68 | Tensor* y = Forward_LayerGPU(n->Layers[0]); 69 | for (int i = 1; i < n->n_layers; i++) 70 | { 71 | y = Forward_LayerGPU(n->Layers[i]); 72 | } 73 | return y; 74 | } 75 | void Seq_BackwardGPU(Model* n, Tensor* y) 76 | { 77 | int N = n->n_layers; 78 | for (int i = N - 1; i >= 0; i--) 79 | { 80 | Backward_LayerGPU(n->Layers[i]); 81 | } 82 | } 83 | #endif // __NVCC__ -------------------------------------------------------------------------------- /src/Optimizer.h: -------------------------------------------------------------------------------- 1 | #ifndef OPTIMIZER_H 2 | #define OPTIMIZER_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include "Model.h" 9 | #include 10 | #include "Utils.h" 11 | 12 | typedef enum OptMethod { 13 | ADAGRAD, 14 | RMSPROP, 15 | ADAM, 16 | ADAN, 17 | NRMSPROP, 18 | SGD 19 | } OptMethod; 20 | 21 | typedef struct OptParams 22 | { 23 | float learning_rate; 24 | OptMethod method; 25 | float eps; 26 | int counter; 27 | float b1, b2, b3; 28 | float decay; 29 | float b; 30 | float clip; 31 | }OptParams; 32 | 33 | typedef struct adanTData 34 | { 35 | float* mk; 36 | float* vk; 37 | float* nk; 38 | float* gprev; 39 | }adanTData; 40 | 41 | typedef struct adamTData 42 | { 43 | float* mt; 44 | float* vt; 45 | }adamTData; 46 | 47 | typedef struct momentumTData 48 | { 49 | float* vk; 50 | }momentumTData; 51 | 52 | void CreateAdanData(Tensor* t); 53 | void CreateAdamData(Tensor* t); 54 | void CreateMomentumData(Tensor* t); 55 | 56 | void AdanOpt(Tensor* v, OptParams* par); 57 | void AdamOpt(Tensor* v, OptParams* par); 58 | void AdagradOpt(Tensor* v, OptParams* par); 59 | void RMSPropOpt(Tensor* v, OptParams* par); 60 | void NRMSPropOpt(Tensor* v, OptParams* par); 61 | void SGDOpt(Tensor* v, OptParams* par); 62 | 63 | OptParams OptParams_Create(); 64 | void Optimize(Model*n, OptParams *par, Tensor *x, Tensor *y); 65 | void OptimizeModel(Model* n, OptParams* par); 66 | void Change_Grad(OptParams* par, Tensor* v, bool norm); 67 | 68 | #ifdef __NVCC__ 69 | void CreateAdanDataGPU(Tensor* t); 70 | void CreateAdamDataGPU(Tensor* t); 71 | void CreateMomentumDataGPU(Tensor* t); 72 | void PrepareTDataGPU(Model* n, OptParams* par); 73 | void PrepareTensorGPU(Tensor* v, OptParams* par); 74 | void Change_GradGPU(OptParams* par, Tensor* k, Tensor* b, bool norm); 75 | void OptimizeModelGPU(Model* n, OptParams* par); 76 | 77 | __global__ void NRMSProp_GradKernel(float* w, float* dw, float* vt, float* bw, float* bdw, float* bvt, float lr, shape s); 78 | __global__ void Change_GradKernel(float* w, float* dw, float* vt, float lr, shape s); 79 | #endif // __NVCC__ 80 | 81 | #ifdef __cplusplus 82 | } 83 | #endif 84 | 85 | #endif 86 | -------------------------------------------------------------------------------- /src/Relu.cu: -------------------------------------------------------------------------------- 1 | #include "Relu.h" 2 | #include 3 | 4 | #ifdef __NVCC__ 5 | Layer* Relu_CreateGPU(Layer* in) 6 | { 7 | Layer* dl = (Layer*)malloc(sizeof(Layer)); 8 | if (!dl) 9 | { 10 | printf("Relu allocation error!"); 11 | return NULL; 12 | } 13 | dl->type = LT_RELU; 14 | dl->aData = NULL; 15 | dl->n_inputs = in->out_shape.w * in->out_shape.h * in->out_shape.d; 16 | dl->out_shape = { in->out_shape.w, in->out_shape.h, in->out_shape.d }; 17 | dl->output = Tensor_CreateGPU(dl->out_shape, 0); 18 | dl->input = &in->output; 19 | printf("Relu_GPU, output shape: [%d, %d, %d]\n", dl->out_shape.w, dl->out_shape.h, dl->out_shape.d); 20 | return dl; 21 | } 22 | 23 | __global__ void Relu_ForwardKernels(int limit, float* xw, float* outw) 24 | { 25 | int i = (blockIdx.x * blockDim.x) + threadIdx.x; 26 | if (i < limit) { 27 | //outw[i] = (xw[i] < 0) ? 0 : xw[i]; 28 | if (xw[i] < 0) outw[i] = 0.0001f* xw[i]; 29 | else 30 | outw[i] = xw[i]; 31 | } 32 | } 33 | 34 | Tensor* Relu_ForwardGPU(Layer* l) 35 | { 36 | int n = l->n_inputs; 37 | 38 | int threadsPerBlockX = 256; 39 | 40 | dim3 gridDim((int)ceil(n / (float)threadsPerBlockX), 1, 1); 41 | dim3 blockDim(threadsPerBlockX, 1, 1); 42 | 43 | Relu_ForwardKernels KERNEL_CALL(gridDim, blockDim) (n, 44 | l->input->w, l->output.w); 45 | cudaDeviceSynchronize(); 46 | return &l->output; 47 | } 48 | 49 | __global__ void Relu_BackwardKernels(int limit, float* xdw, float* outw, float* outdw) 50 | { 51 | int i = (blockIdx.x * blockDim.x) + threadIdx.x; 52 | if (i < limit) { 53 | //xdw[i] += (outw[i] <= 0) ? 0 : outdw[i]; 54 | //if (outw[i] <= 0) xdw[i] += 0; 55 | //else 56 | //atomicAdd(&xdw[i], outdw[i]); 57 | //xdw[i] += outdw[i]; 58 | if (outw[i] < 0) atomicAdd(&xdw[i], 0.0001f* outdw[i]); 59 | else 60 | atomicAdd(&xdw[i], outdw[i]); 61 | } 62 | } 63 | 64 | void Relu_BackwardGPU(Layer* l) 65 | { 66 | int n = l->n_inputs; 67 | 68 | int threadsPerBlockX = 256; 69 | 70 | dim3 gridDim((int)ceil(n / (float)threadsPerBlockX), 1, 1); 71 | dim3 blockDim(threadsPerBlockX, 1, 1); 72 | 73 | Relu_BackwardKernels KERNEL_CALL(gridDim, blockDim) (n, 74 | l->input->dw, l->output.w, l->output.dw); 75 | cudaDeviceSynchronize(); 76 | } 77 | #endif // __NVCC__ 78 | -------------------------------------------------------------------------------- /src/cmd/cartpole/shapes.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "box2d/box2d.h" 3 | #include "GLFW/glfw3.h" 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #define PI 3.141592653589793f 10 | #define DEGTORAD 0.0174532925199432957f 11 | 12 | class TDBox { 13 | public: 14 | b2Body* m_body; 15 | b2PolygonShape polygonShape; 16 | 17 | TDBox(b2World* world, float w, float h) { 18 | b2BodyDef bodyDef; 19 | bodyDef.type = b2_dynamicBody; 20 | bodyDef.linearDamping = 3; 21 | m_body = world->CreateBody(&bodyDef); 22 | 23 | polygonShape.SetAsBox(w, h); 24 | 25 | b2FixtureDef fixtureDef; 26 | fixtureDef.shape = &polygonShape; 27 | fixtureDef.density = 1.0f; 28 | fixtureDef.friction = 0.3f; 29 | 30 | b2Fixture* fixture = m_body->CreateFixture(&fixtureDef); 31 | } 32 | 33 | TDBox(b2World* world, float w, float h, b2Vec2 pos, b2BodyType btype, bool isSensor, float den) 34 | { 35 | b2BodyDef bodyDef; 36 | bodyDef.type = btype; 37 | bodyDef.linearDamping = 3; 38 | bodyDef.angularDamping = 3; 39 | 40 | m_body = world->CreateBody(&bodyDef); 41 | 42 | polygonShape.SetAsBox(w, h); 43 | 44 | b2FixtureDef fixtureDef; 45 | fixtureDef.shape = &polygonShape; 46 | fixtureDef.density = den; 47 | fixtureDef.friction = 0.3f; 48 | fixtureDef.isSensor = isSensor; 49 | 50 | b2Fixture* fixture = m_body->CreateFixture(&fixtureDef); 51 | SetPos(pos); 52 | } 53 | 54 | void SetPos(b2Vec2 pos) 55 | { 56 | m_body->SetTransform(pos, 0); 57 | } 58 | 59 | ~TDBox() { 60 | m_body->GetWorld()->DestroyBody(m_body); 61 | } 62 | 63 | void Draw() 64 | { 65 | glPushMatrix(); 66 | glTranslatef(m_body->GetPosition().x, m_body->GetPosition().y, 0); 67 | glRotatef(m_body->GetAngle() * (180.f / PI), 0, 0, 1); 68 | glColor3f(0, 0, 0); 69 | glBegin(GL_LINE_LOOP); 70 | for (size_t i = 0; i < polygonShape.m_count; i++) 71 | { 72 | glVertex2f(polygonShape.m_vertices[i].x, polygonShape.m_vertices[i].y); 73 | } 74 | glEnd(); 75 | glPopMatrix(); 76 | } 77 | }; 78 | -------------------------------------------------------------------------------- /src/Tensor4.c: -------------------------------------------------------------------------------- 1 | #include "Tensor4.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | Tensor4 Tensor4_Create(shape4 s, float c) 8 | { 9 | Tensor4 v; 10 | v.s.w = s.w; 11 | v.s.h = s.h; 12 | v.s.d = s.d; 13 | v.s.b = s.b; 14 | 15 | v.n = s.w * s.h * s.d * s.b; 16 | v.vt = NULL; 17 | 18 | v.w = (float *)malloc(sizeof(float) * v.n); 19 | v.dw = (float*)malloc(sizeof(float) * v.n); 20 | v.vt = (float*)malloc(sizeof(float) * v.n); 21 | 22 | v.sumdw = 0; 23 | if (!v.w || !v.dw) printf("Tensor data allocation error"); 24 | else 25 | for (int i = 0; i < v.n; i++) { 26 | v.w[i] = c; 27 | v.dw[i] = 0; 28 | v.vt[i] = 0; 29 | } 30 | return v; 31 | } 32 | 33 | void Tensor4_CopyData(Tensor4* dst, Tensor4* src) 34 | { 35 | memcpy(dst->w, src->w, sizeof(float) * src->n); 36 | } 37 | 38 | void Tensor4_Free(Tensor4* v) 39 | { 40 | free(v->dw); 41 | v->dw = NULL; 42 | free(v->w); 43 | v->w = NULL; 44 | free(v->vt); 45 | v->vt = NULL; 46 | } 47 | 48 | //============================================================================================ 49 | int tIdx4(shape4 s, int w, int h, int d, int b) 50 | { 51 | return (((s.w * h) + w) * s.d + d)*s.b+b; 52 | } 53 | 54 | void Tensor4_Set(Tensor4* t, int w, int h, int d, int b, float v) 55 | { 56 | int id = (((t->s.w * h) + w) * t->s.d + d) * t->s.b + b; 57 | t->w[id] = v; 58 | } 59 | float Tensor4_Get(Tensor4* t, int w, int h, int d, int b) 60 | { 61 | int id = (((t->s.w * h) + w) * t->s.d + d) * t->s.b + b; 62 | return t->w[id]; 63 | } 64 | 65 | void Tensor4_Copy(Tensor4* dst, Tensor4* src) 66 | { 67 | memcpy(dst->w, src->w, sizeof(float) * src->n); 68 | memcpy(dst->dw, src->dw, sizeof(float) * src->n); 69 | if (src->vt!=NULL&&dst->vt!=NULL) 70 | memcpy(dst->w, src->w, sizeof(float) * src->n); 71 | } 72 | 73 | void T4Print(Tensor4 *t) 74 | { 75 | for (size_t b = 0; b < t->s.b; b++) 76 | { 77 | printf("\n\n"); 78 | for (size_t d = 0; d < t->s.d; d++) 79 | { 80 | printf("[\n"); 81 | for (size_t h = 0; h < t->s.h; h++) 82 | { 83 | printf("["); 84 | for (size_t w = 0; w < t->s.w; w++) 85 | { 86 | int id = tIdx4(t->s, w, h, d, b); 87 | float x = t->w[id]; 88 | printf("%f, ", x); 89 | } 90 | printf("]\n"); 91 | } 92 | printf("]\n"); 93 | } 94 | //printf("]"); 95 | } 96 | printf("\n"); 97 | } -------------------------------------------------------------------------------- /src/cmd/cartpole_cont/shapes.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "box2d/box2d.h" 3 | #include "GLFW/glfw3.h" 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #define PI 3.141592653589793f 10 | #define DEGTORAD 0.0174532925199432957f 11 | 12 | class TDBox { 13 | public: 14 | b2Body* m_body; 15 | b2PolygonShape polygonShape; 16 | 17 | TDBox(b2World* world, float w, float h) { 18 | b2BodyDef bodyDef; 19 | bodyDef.type = b2_dynamicBody; 20 | bodyDef.linearDamping = 3; 21 | m_body = world->CreateBody(&bodyDef); 22 | 23 | polygonShape.SetAsBox(w, h); 24 | 25 | b2FixtureDef fixtureDef; 26 | fixtureDef.shape = &polygonShape; 27 | fixtureDef.density = 1.0f; 28 | fixtureDef.friction = 0.3f; 29 | 30 | b2Fixture* fixture = m_body->CreateFixture(&fixtureDef); 31 | } 32 | 33 | TDBox(b2World* world, float w, float h, b2Vec2 pos, b2BodyType btype, bool isSensor, float den) 34 | { 35 | b2BodyDef bodyDef; 36 | bodyDef.type = btype; 37 | bodyDef.linearDamping = 3; 38 | bodyDef.angularDamping = 3; 39 | 40 | m_body = world->CreateBody(&bodyDef); 41 | 42 | polygonShape.SetAsBox(w, h); 43 | 44 | b2FixtureDef fixtureDef; 45 | fixtureDef.shape = &polygonShape; 46 | fixtureDef.density = den; 47 | fixtureDef.friction = 0.3f; 48 | fixtureDef.isSensor = isSensor; 49 | 50 | b2Fixture* fixture = m_body->CreateFixture(&fixtureDef); 51 | SetPos(pos); 52 | } 53 | 54 | void SetPos(b2Vec2 pos) 55 | { 56 | m_body->SetTransform(pos, 0); 57 | } 58 | 59 | ~TDBox() { 60 | m_body->GetWorld()->DestroyBody(m_body); 61 | } 62 | 63 | void Draw() 64 | { 65 | glPushMatrix(); 66 | glTranslatef(m_body->GetPosition().x, m_body->GetPosition().y, 0); 67 | glRotatef(m_body->GetAngle() * (180.f / PI), 0, 0, 1); 68 | glColor3f(0, 0, 0); 69 | glBegin(GL_LINE_LOOP); 70 | for (size_t i = 0; i < polygonShape.m_count; i++) 71 | { 72 | glVertex2f(polygonShape.m_vertices[i].x, polygonShape.m_vertices[i].y); 73 | } 74 | glEnd(); 75 | glPopMatrix(); 76 | } 77 | }; 78 | -------------------------------------------------------------------------------- /src/cmd/model_test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "Model.h" 4 | #include "Utils.h" 5 | 6 | //Пример создания глубокой модели нейросети (по типу AlexNet) на CPU 7 | int main() 8 | { 9 | //Определяем размерность входных данных 10 | shape input = { 128,128,1 }; 11 | //Создаем тензор для теста прямого прохода модели 12 | Tensor x = Tensor_Create(input, 1.f); 13 | 14 | printf("Create model structure:\n"); 15 | //Инициализация модели на CPU 16 | Model n = Model_Create(); 17 | //Добавляем входной слой CPU к модели n, сохраняем ссылку на входной слой в inp 18 | Layer* inp = Model_AddLayer(&n, Input_Create(input)); 19 | //Добавляем сверточный слой CPU к модели n 20 | Layer* l = Model_AddLayer(&n, Conv2d_Create(96, { 11,11 }, { 2,2 }, 0, R_HE, inp)); 21 | //Добавляем слой активации Relu CPU к модели n 22 | l = Model_AddLayer(&n, Relu_Create(l)); 23 | //Добавляем MaxPool слой CPU к модели n 24 | l = Model_AddLayer(&n, MaxPool2d_Create({ 5,5 }, { 2,2 }, 0, l)); 25 | l = Model_AddLayer(&n, Conv2d_Create(64, { 3,3 }, { 1,1 }, 0, R_HE, l)); 26 | l = Model_AddLayer(&n, Relu_Create(l)); 27 | l = Model_AddLayer(&n, MaxPool2d_Create({ 3,3 }, { 1,1 }, 0, l)); 28 | l = Model_AddLayer(&n, Conv2d_Create(32, { 3,3 }, { 1,1 }, 0, R_HE, l)); 29 | l = Model_AddLayer(&n, Relu_Create(l)); 30 | l = Model_AddLayer(&n, Conv2d_Create(32, { 3,3 }, { 1,1 }, 0, R_HE, l)); 31 | l = Model_AddLayer(&n, Relu_Create(l)); 32 | l = Model_AddLayer(&n, Conv2d_Create(32, { 3,3 }, { 1,1 }, 0, R_HE, l)); 33 | l = Model_AddLayer(&n, Relu_Create(l)); 34 | l = Model_AddLayer(&n, Conv2d_Create(32, { 3,3 }, { 1,1 }, 0, R_HE, l)); 35 | l = Model_AddLayer(&n, Relu_Create(l)); 36 | l = Model_AddLayer(&n, MaxPool2d_Create({ 3,3 }, { 1,1 }, 0, l)); 37 | //Добавляем полносвязный слой CPU к модели n 38 | l = Model_AddLayer(&n, Dense_Create(4096, R_HE, l)); 39 | l = Model_AddLayer(&n, Relu_Create(l)); 40 | l = Model_AddLayer(&n, Dense_Create(4096, R_HE, l)); 41 | l = Model_AddLayer(&n, Relu_Create(l)); 42 | Layer* out = Model_AddLayer(&n, Dense_Create(3, R_XAVIER, l)); 43 | 44 | printf("\nTest model forward pass:"); 45 | //Тестирование прямого прохода модели 46 | //Задаем вход модели: 47 | inp->input = &x; 48 | //Выполняем прямой проход модели: 49 | Model_Forward(&n); 50 | //Вывод массива выходного тензора на консоль 51 | PrintArray(out->output.w, out->output.n); 52 | printf("\nPress enter to close..."); 53 | getchar(); 54 | return 0; 55 | } -------------------------------------------------------------------------------- /src/cmd/qmaze/quad.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef GLQUAD_H 3 | #define GLQUAD_H 4 | 5 | #include "geometry/TVec3.h" 6 | #include "TCommon.h" 7 | 8 | #include 9 | 10 | class glQuad { 11 | public: 12 | float width, height; 13 | 14 | TVec3 Color = {1.f, 0.f, 0.f}; 15 | TVec3 Pos = {0, 0, 0}; 16 | 17 | glQuad(float w, float h) { 18 | width = w; 19 | height = h; 20 | CalcList(); 21 | } 22 | 23 | glQuad(TVec3 p) { 24 | Pos = p; 25 | width = 1.f; 26 | height = 1.f; 27 | CalcList(); 28 | } 29 | 30 | glQuad() { 31 | width = 1.f; 32 | height = 1.f; 33 | CalcList(); 34 | } 35 | 36 | TVec3 center() 37 | { 38 | return { Pos.x + width * 0.5f, Pos.y + height * 0.5f, Pos.z }; 39 | } 40 | 41 | void Draw() 42 | { 43 | glColor3f(Color.x, Color.y, Color.z); 44 | glPushMatrix(); 45 | glTranslated(Pos.x, Pos.y, Pos.z); 46 | glCallList(MAP_LIST); 47 | //DrawQuad(); 48 | glPopMatrix(); 49 | } 50 | 51 | void Rescale(float w, float h) 52 | { 53 | width = w; 54 | height = h; 55 | CalcList(); 56 | } 57 | 58 | void Rescale(float size) 59 | { 60 | Rescale(size, size); 61 | } 62 | 63 | private: 64 | int MAP_LIST; 65 | void CalcList() 66 | { 67 | MAP_LIST = glGenLists(1); 68 | glNewList(MAP_LIST, GL_COMPILE); 69 | DrawQuad(); 70 | glEndList(); 71 | } 72 | 73 | void DrawQuad() 74 | { 75 | /* 76 | TVec3 v0 = TVec3_Create(0, 0, -height); 77 | TVec3 v1 = TVec3_Create(width, 0, -height); 78 | TVec3 v2 = TVec3_Create(width, 0, 0); 79 | TVec3 v3 = TVec3_Create(0, 0, 0); 80 | */ 81 | TVec3 v0 = TVec3_Create(width, 0, 0); 82 | TVec3 v1 = TVec3_Create(width, height, 0); 83 | TVec3 v2 = TVec3_Create(0, height, 0); 84 | TVec3 v3 = TVec3_Create(0, 0, 0); 85 | 86 | //glBindTexture(GL_TEXTURE_2D, imageTexture); 87 | 88 | glBegin(GL_QUADS); 89 | glTexCoord2f(0, 0); 90 | glVertex3f(v0.x, v0.y, v0.z); 91 | glTexCoord2f(1, 0); 92 | glVertex3f(v1.x, v1.y, v1.z); 93 | glTexCoord2f(1, 1); 94 | glVertex3f(v2.x, v2.y, v2.z); 95 | glTexCoord2f(0, 1); 96 | glVertex3f(v3.x, v3.y, v3.z); 97 | glEnd(); 98 | 99 | glColor3f(0.8f, 0.8f, 0.8f); 100 | glBegin(GL_LINE_LOOP); 101 | glVertex3f(v0.x, v0.y, v0.z); 102 | glVertex3f(v1.x, v1.y, v1.z); 103 | glVertex3f(v2.x, v2.y, v2.z); 104 | glVertex3f(v3.x, v3.y, v3.z); 105 | glEnd(); 106 | 107 | //glBindTexture(GL_TEXTURE_2D, 0); 108 | } 109 | }; 110 | #endif -------------------------------------------------------------------------------- /src/message.c: -------------------------------------------------------------------------------- 1 | #include "message.h" 2 | 3 | char* strCopy(const char* str) 4 | { 5 | char *newStr = (char*)malloc(strlen(str) + 1); 6 | if (newStr) { 7 | strncpy(newStr, str, strlen(str) + 1); 8 | } 9 | else newStr = NULL; 10 | return newStr; 11 | } 12 | 13 | Message ParseMessage(const char* str) 14 | { 15 | Message m; 16 | cJSON* node = cJSON_Parse(str); 17 | cJSON *role = cJSON_GetObjectItem(node, "r"); 18 | cJSON* type = cJSON_GetObjectItem(node, "t"); 19 | cJSON* message = cJSON_GetObjectItem(node, "m"); 20 | cJSON* body = cJSON_GetObjectItem(node, "b"); 21 | 22 | m.role = strCopy(role->valuestring); 23 | m.type = strCopy(type->valuestring); 24 | m.message = strCopy(message->valuestring); 25 | m.body = strCopy(body->valuestring); 26 | 27 | cJSON_Delete(node); 28 | return m; 29 | } 30 | 31 | AgentInfo ParseAInfo(cJSON *elem) 32 | { 33 | AgentInfo ai; 34 | cJSON* name = cJSON_GetObjectItem(elem, "name"); 35 | cJSON* mac = cJSON_GetObjectItem(elem, "mac"); 36 | cJSON* ip = cJSON_GetObjectItem(elem, "ip"); 37 | cJSON* port = cJSON_GetObjectItem(elem, "port"); 38 | 39 | ai.name = strCopy(name->valuestring); 40 | ai.mac = strCopy(mac->valuestring); 41 | ai.ip = strCopy(ip->valuestring); 42 | ai.port = port->valueint; 43 | 44 | return ai; 45 | } 46 | 47 | cJSON* cJsonFromMessage(Message* m, int parse_body) 48 | { 49 | cJSON* node = cJSON_CreateObject(); 50 | cJSON_AddStringToObject(node, "r", strCopy(m->role)); 51 | cJSON_AddStringToObject(node, "t", strCopy(m->type)); 52 | cJSON_AddStringToObject(node, "m", strCopy(m->message)); 53 | cJSON_AddStringToObject(node, "b", strCopy(m->body)); 54 | return node; 55 | } 56 | 57 | cJSON* cJsonMessage(const char *role,const char *type,const char *message) 58 | { 59 | cJSON* node = cJSON_CreateObject(); 60 | cJSON_AddStringToObject(node, "r", role); 61 | cJSON_AddStringToObject(node, "t", type); 62 | cJSON_AddStringToObject(node, "m", message); 63 | cJSON_AddObjectToObject(node, "b"); 64 | return node; 65 | } 66 | 67 | cJSON* cJsonAInfo(const char* name, const char* mac, const char* ip, int port) 68 | { 69 | cJSON* node = cJSON_CreateObject(); 70 | cJSON_AddStringToObject(node, "name", name); 71 | cJSON_AddStringToObject(node, "mac", mac); 72 | cJSON_AddStringToObject(node, "ip", ip); 73 | cJSON_AddNumberToObject(node, "port", port); 74 | return node; 75 | } 76 | 77 | void FreeMessage(Message* m) 78 | { 79 | free(m->role); 80 | free(m->type); 81 | free(m->message); 82 | free(m->body); 83 | } 84 | 85 | void FreeAInfo(AgentInfo* ai) 86 | { 87 | free(ai->name); 88 | free(ai->mac); 89 | free(ai->ip); 90 | } -------------------------------------------------------------------------------- /src/Losses.cu: -------------------------------------------------------------------------------- 1 | #include "Losses.h" 2 | 3 | #ifdef __NVCC__ 4 | __global__ void SoftmaxProbKernels(int n, float* iw, float* ow) 5 | { 6 | __shared__ float sum[1]; 7 | int i = (blockIdx.x * blockDim.x) + threadIdx.x; 8 | if (i < n) { 9 | float e = expf(iw[i]); 10 | ow[i] = e; 11 | atomicAdd(&sum[0], e); 12 | __syncthreads(); 13 | float x = ow[i] / sum[0]; 14 | ow[i] = x; 15 | } 16 | } 17 | Tensor SoftmaxProbGPU(Tensor* t) 18 | { 19 | Tensor out = Tensor_CreateGPU(t->s, 0); 20 | int n = t->n; 21 | int threadsPerBlockX = 128; 22 | dim3 gridDim((int)ceil(n / (float)threadsPerBlockX), 1, 1); 23 | dim3 blockDim(threadsPerBlockX, 1, 1); 24 | SoftmaxProbKernels KERNEL_CALL(gridDim, blockDim) (n, 25 | t->w, out.w); 26 | cudaDeviceSynchronize(); 27 | return out; 28 | } 29 | 30 | __global__ void Cross_entropy_LossKernels(int n, float* xw, float* ydw, int idx) 31 | { 32 | int i = (blockIdx.x * blockDim.x) + threadIdx.x; 33 | if (i < n) { 34 | float y_true = (i == idx) ? 1.f : 0.f; 35 | float der = -(y_true - xw[i]); 36 | atomicAdd(&ydw[i], der); 37 | } 38 | } 39 | 40 | float Cross_entropy_LossGPU(Tensor* y, int idx) 41 | { 42 | Tensor x = SoftmaxProbGPU(y); 43 | 44 | int n = y->n; 45 | int threadsPerBlockX = 128; 46 | dim3 gridDim((int)ceil(n / (float)threadsPerBlockX), 1, 1); 47 | dim3 blockDim(threadsPerBlockX, 1, 1); 48 | Cross_entropy_LossKernels KERNEL_CALL(gridDim, blockDim) (n, x.w, y->dw, idx); 49 | cudaDeviceSynchronize(); 50 | float true_val = 0.f; 51 | cudaMemcpy(&true_val, &x.w[idx], sizeof(float), cudaMemcpyDeviceToHost); 52 | float loss = -logf(true_val); 53 | Tensor_FreeGPU(&x); 54 | return loss; 55 | } 56 | 57 | __global__ void MSE_LossKernels(int n, float* yw, float* ytw, float* ydw, float* sum) 58 | { 59 | int i = (blockIdx.x * blockDim.x) + threadIdx.x; 60 | if (i < n) { 61 | float dy = (2.f / (float)n) * (yw[i] - ytw[i]); 62 | atomicAdd(&ydw[i], dy); 63 | float t = ytw[i] - yw[i]; 64 | float t2 = t * t; 65 | atomicAdd(sum, t2); 66 | } 67 | } 68 | 69 | float MSE_LossGPU(Tensor* y, Tensor* y_true) 70 | { 71 | int n = y->n; 72 | int threadsPerBlockX = 128; 73 | dim3 gridDim((int)ceil(n / (float)threadsPerBlockX), 1, 1); 74 | dim3 blockDim(threadsPerBlockX, 1, 1); 75 | float sum = 0; 76 | float* sumd; 77 | if (cudaMalloc((void**)&sumd, sizeof(float)) != cudaSuccess) printf("in loss allocation\n"); 78 | cudaMemset(sumd, 0, sizeof(float)); 79 | MSE_LossKernels KERNEL_CALL(gridDim, blockDim) (n, 80 | y->w, y_true->w, y->dw, sumd); 81 | cudaDeviceSynchronize(); 82 | cudaMemcpy(&sum, sumd, sizeof(float), cudaMemcpyDeviceToHost); 83 | cudaFree(sumd); 84 | float loss = sum / (float)n; 85 | return loss; 86 | } 87 | #endif // __NVCC__ -------------------------------------------------------------------------------- /src/cmd/data_test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "Model.h" 4 | #include "Optimizer.h" 5 | #include "Utils.h" 6 | #include "Losses.h" 7 | 8 | //Пример работы с данными и функциями слоев 9 | void TensorTest() 10 | { 11 | //Создание тензора размерностью 5x5x1 и значениями = 1.f 12 | Tensor t = Tensor_Create({ 5, 5, 1 }, 1.f); 13 | //Заполнение элементов массива w с количеством элементов n случайными значениями по непрерывному равномерному распределению. 14 | Tensor_Xavier_Rand(t.w, t.n); 15 | //Применение операции softmax к тензору t 16 | Tensor sm = SoftmaxProb(&t); 17 | //Вывод тензора на консоль 18 | Tensor_Print(&sm); 19 | //Очищение памяти для тензоров, т.к. они больше не нужны 20 | Tensor_Free(&sm); 21 | Tensor_Free(&t); 22 | //Создание тензора из массива data и размерностью 1x1x5 23 | float data[5] = { 1,2,3,4,5 }; 24 | Tensor fromData = Tensor_FromData({ 1,1,5 }, data); 25 | Tensor_Print(&fromData); 26 | Tensor_Free(&fromData); 27 | } 28 | 29 | //Тест прямого и обратного проходов полносвязного слоя 30 | void DenseTest() 31 | { 32 | Tensor x = Tensor_Create({ 5, 5, 3 }, 2.f); 33 | Layer* inp = Input_Create(x.s); 34 | Layer* de = Dense_Create(10, R_XAVIER, inp); 35 | 36 | inp->input = &x; 37 | //Прямой проход 38 | Input_Forward(inp); 39 | Dense_Forward(de); 40 | PrintArray(de->output.w, de->output.n); 41 | //Обратный проход 42 | FillArray(de->output.dw, de->output.n, 2.f); 43 | Dense_Backward(de); 44 | Dense* data = (Dense*)de->aData; 45 | PrintArray(data->kernels[0].dw, data->kernels[0].n); 46 | PrintArray(data->biases.dw, data->biases.n); 47 | 48 | Input_Free(inp); 49 | Dense_Free(de); 50 | Tensor_Free(&x); 51 | } 52 | 53 | //Тест прямого и обратного проходов сверточного слоя 54 | void ConvTest() 55 | { 56 | Tensor x = Tensor_Create({ 10, 10, 3 }, 2.f); 57 | Layer* inp = Input_Create(x.s); 58 | Layer* conv = Conv2d_Create(10, { 3,3 }, {2,2}, 0, R_XAVIER, inp); 59 | 60 | inp->input = &x; 61 | //Прямой проход 62 | Input_Forward(inp); 63 | Conv2d_Forward(conv); 64 | PrintArray(conv->output.w, conv->output.n); 65 | //Обратный проход 66 | FillArray(conv->output.dw, conv->output.n, 2.f); 67 | Conv2d_Backward(conv); 68 | Conv2d* data = (Conv2d*)conv->aData; 69 | PrintArray(data->kernels[0].dw, data->kernels[0].n); 70 | PrintArray(data->biases.dw, data->biases.n); 71 | 72 | Input_Free(inp); 73 | Conv2d_Free(conv); 74 | Tensor_Free(&x); 75 | } 76 | 77 | int main() 78 | { 79 | printf("Tensor creation test:\n"); 80 | TensorTest(); 81 | printf("\nDense layer test:\n"); 82 | DenseTest(); 83 | printf("\nConv2d layer test:\n"); 84 | ConvTest(); 85 | 86 | printf("\nPress enter to close..."); 87 | getchar(); 88 | return 0; 89 | } -------------------------------------------------------------------------------- /src/Tensor.h: -------------------------------------------------------------------------------- 1 | #ifndef TENSOR_H 2 | #define TENSOR_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" 6 | { 7 | #endif 8 | #define DBL_MAX 1.7976931348623158e+308 /* max value */ 9 | #define DBL_MIN 2.2250738585072014e-308 /* min positive value */ 10 | 11 | #define FLT_MAX 3.402823466e+38F /* max value */ 12 | #define FLT_MIN 1.175494351e-38F /* min positive value */ 13 | 14 | #ifdef __NVCC__ 15 | #define KERNEL_CALL(x, y) <<>> 16 | #define KERNEL_CALL_ONCE <<<1,1>>> 17 | #endif 18 | 19 | #include "TWeightsInit.h" 20 | #include "cJSON.h" 21 | 22 | typedef struct shape 23 | { 24 | int w;//width 25 | int h;//heigth 26 | int d;//depth 27 | }shape; 28 | 29 | typedef struct shape2 30 | { 31 | int w;//width 32 | int h;//heigth 33 | }shape2; 34 | 35 | typedef struct Tensor 36 | { 37 | shape s; 38 | int n; 39 | 40 | float *w; 41 | float *dw; 42 | //additions for optimizer 43 | //float *vt; 44 | float sumdw; 45 | 46 | void* tData;//training data 47 | }Tensor; 48 | 49 | Tensor Tensor_Create(shape s, float c); 50 | Tensor Tensor_FromData(shape s, const float *data); 51 | Tensor* Tensor_CreateDyn(shape s, float c); 52 | Tensor Tensor_CreateCopy(Tensor* t); 53 | void Tensor_CopyData(Tensor* dst, Tensor* src); 54 | int tIdx(shape s, int w, int h, int d); 55 | void Tensor_Xavier_Rand(float* w, int n); 56 | void Tensor_He_Rand(float* w, int n); 57 | #ifdef __NVCC__ 58 | __global__ void Tensor_FillKernel(int limit, float* w, float v); 59 | void Tensor_FillGPU(Tensor* v, float c); 60 | void Tensor_FillArrayGPU(float* v, int n, float c); 61 | Tensor Tensor_CreateGPU(shape s, float c); 62 | Tensor Tensor_FromDataGPU(shape s, const float* data); 63 | void Tensor_FreeGPU(Tensor* v); 64 | void Tensor_CopyDataGPU(Tensor* dst, Tensor* src); 65 | __global__ void xavier_rand_kernel(void* globalState, float* w, int n); 66 | __global__ void setup_rng_kernel(int limit, void* state); 67 | void Tensor_Xavier_RandGPU(float* w, int n); 68 | __global__ void TPrintKernel(float* w, int n); 69 | void Tensor_PrintGPU(Tensor* v); 70 | void Tensor_PrintArrayGPU(float* v, int n); 71 | #endif 72 | //======================================================================= 73 | void Tensor_Free(Tensor *v); 74 | float Tensor_Get(Tensor *vol, int x, int y, int d); 75 | void Tensor_Set(Tensor *vol, int w, int h, int d, float v); 76 | void Tensor_Copy(Tensor* dst, Tensor *src); 77 | shape T_Argmax(Tensor *t); 78 | 79 | float T_MinValue(Tensor* t); 80 | float T_MaxValue(Tensor* t); 81 | float T_Mean(Tensor* t); 82 | 83 | cJSON* Shape_To_JSON(shape s); 84 | cJSON* Tensor_To_JSON(Tensor* v); 85 | void Tensor_Load_JSON(Tensor* t, cJSON* node); 86 | void Tensor_Print(Tensor* x); 87 | #ifdef __cplusplus 88 | } 89 | #endif 90 | 91 | #endif 92 | -------------------------------------------------------------------------------- /src/Utils.c: -------------------------------------------------------------------------------- 1 | #include "Utils.h" 2 | 3 | float* createFloatArray(int n) 4 | { 5 | float* a = (float*)malloc(sizeof(float)*n); 6 | if (!a) 7 | { 8 | printf("Array allocation error\n"); 9 | return NULL; 10 | } 11 | else { 12 | memset(a, 0, sizeof(float) * n); 13 | return a; 14 | } 15 | } 16 | 17 | int* createIntArray(int n) 18 | { 19 | int* a = (int*)malloc(sizeof(int) * n); 20 | if (!a) 21 | { 22 | printf("Array allocation error\n"); 23 | return NULL; 24 | } 25 | else { 26 | memset(a, 0, sizeof(int) * n); 27 | return a; 28 | } 29 | } 30 | 31 | void NormalizeArray(float *w, float n) 32 | { 33 | float stdev = StandardDeviation(w, n); 34 | float mean = 0, sum = 0, eps = 1e-10f; 35 | for (int i = 0; i < n; ++i) { 36 | sum += w[i]; 37 | } 38 | mean = sum / (float)n; 39 | 40 | for (size_t i = 0; i < n; i++) 41 | { 42 | float norm = (w[i] - mean) / (stdev + eps); 43 | w[i] = norm; 44 | } 45 | } 46 | 47 | float StandardDeviation(float *data, int n) 48 | { 49 | float sum = 0.0f, mean, SD = 0.0f; 50 | int i; 51 | for (i = 0; i < n; ++i) { 52 | sum += data[i]; 53 | } 54 | mean = sum / (float)n; 55 | for (i = 0; i < n; ++i) { 56 | float x = data[i] - mean; 57 | SD += x * x; 58 | } 59 | return sqrtf(SD / (float)n); 60 | } 61 | 62 | void FlipArray(float* w, int n) 63 | { 64 | for (size_t i = 0; i < n/2; i++) 65 | { 66 | float temp = w[i]; 67 | w[i] = w[n - i - 1]; 68 | w[n - i - 1] = temp; 69 | } 70 | } 71 | 72 | //Doesnt support by microcontroller 73 | /* 74 | void WriteToFile(const char* txt, const char* file) 75 | { 76 | FILE* fptr; 77 | fptr = fopen(file, "w"); 78 | 79 | if (fptr == NULL) 80 | { 81 | printf("Error!"); 82 | exit(1); 83 | } 84 | fprintf(fptr, txt); 85 | fclose(fptr); 86 | } 87 | 88 | char* LoadFile(const char* filename) 89 | { 90 | FILE* textfile = fopen(filename, "r"); 91 | if (textfile == NULL) 92 | return NULL; 93 | 94 | fseek(textfile, 0L, SEEK_END); 95 | long numbytes = ftell(textfile); 96 | fseek(textfile, 0L, SEEK_SET); 97 | 98 | char* text = (char*)calloc(numbytes, sizeof(char)); 99 | if (text == NULL) 100 | return NULL; 101 | 102 | fread(text, sizeof(char), numbytes, textfile); 103 | fclose(textfile); 104 | 105 | return text; 106 | } 107 | */ 108 | 109 | void PrintArray(float* w, int n) 110 | { 111 | printf("\n["); 112 | for (size_t i = 0; i < n; i++) 113 | { 114 | printf("%f, ", w[i]); 115 | } 116 | printf("]\n"); 117 | } 118 | 119 | void FillArray(float* w, int n, float v) 120 | { 121 | for (int i = 0; i < n; i++) 122 | { 123 | w[i] = v; 124 | } 125 | } 126 | -------------------------------------------------------------------------------- /src/Conc.c: -------------------------------------------------------------------------------- 1 | #include "Conc.h" 2 | #include 3 | 4 | Layer* Conc_Create(Layer* in1, Layer *in2) 5 | { 6 | Layer* l = (Layer*)malloc(sizeof(Layer)); 7 | if (!l) 8 | { 9 | printf("Concatenate allocation error!"); 10 | return NULL; 11 | } 12 | l->type = LT_CONC; 13 | //common layer def 14 | l->out_shape = (shape){ in1->out_shape.w, in1->out_shape.h, in1->out_shape.d+in2->out_shape.d }; 15 | l->n_inputs = l->out_shape.w* l->out_shape.h* l->out_shape.d; 16 | l->output = Tensor_Create(l->out_shape, 0); 17 | l->input = &in1->output; 18 | l->input2 = &in2->output; 19 | 20 | l->aData = NULL; 21 | printf("Conc, output shape: [%d, %d, %d]\n", l->out_shape.w, l->out_shape.h, l->out_shape.d); 22 | return l; 23 | } 24 | 25 | Tensor* Conc_Forward(Layer* l) 26 | { 27 | Tensor* t1 = l->input; 28 | Tensor* t2 = l->input2; 29 | 30 | shape s = l->out_shape; 31 | for (size_t d = 0; d < s.d; d++) 32 | { 33 | for (size_t h = 0; h < s.h; h++) 34 | { 35 | for (size_t w = 0; w < s.w; w++) 36 | { 37 | float val = (d < t1->s.d) ? Tensor_Get(t1, w, h, d) : Tensor_Get(t2, w, h, d - t1->s.d); 38 | Tensor_Set(&l->output, w, h, d, val); 39 | } 40 | } 41 | } 42 | return &l->output; 43 | } 44 | 45 | void Conc_Backward(Layer* l) 46 | { 47 | Tensor* t1 = l->input; 48 | Tensor* t2 = l->input2; 49 | 50 | shape s = l->out_shape; 51 | for (size_t d = 0; d < s.d; d++) 52 | { 53 | for (size_t h = 0; h < s.h; h++) 54 | { 55 | for (size_t w = 0; w < s.w; w++) 56 | { 57 | if(ds.d) 58 | { 59 | int idx = tIdx(s, w, h, d); 60 | int ti = tIdx(t1->s, w, h, d); 61 | t1->dw[ti] = l->output.dw[idx]; 62 | } 63 | else 64 | { 65 | int idx = tIdx(s, w, h, d); 66 | int ti = tIdx(t2->s, w, h, d-t1->s.d); 67 | t2->dw[ti] = l->output.dw[idx]; 68 | } 69 | //float grad = (d < t1->s.d) ? Tensor_Get(t1, w, h, d) : Tensor_Get(t2, w, h, d - t1->s.d); 70 | } 71 | } 72 | } 73 | } 74 | 75 | void Conc_BackpropGrads(Layer* l, Tensor *t1, Tensor *t2) 76 | { 77 | shape s = l->out_shape; 78 | for (size_t d = 0; d < s.d; d++) 79 | { 80 | for (size_t h = 0; h < s.h; h++) 81 | { 82 | for (size_t w = 0; w < s.w; w++) 83 | { 84 | if (d < t1->s.d) 85 | { 86 | int idx = tIdx(s, w, h, d); 87 | int ti = tIdx(t1->s, w, h, d); 88 | t1->dw[ti] = l->output.dw[idx]; 89 | } 90 | else 91 | { 92 | int idx = tIdx(s, w, h, d); 93 | int ti = tIdx(t2->s, w, h, d - t1->s.d); 94 | t2->dw[ti] = l->output.dw[idx]; 95 | } 96 | //float grad = (d < t1->s.d) ? Tensor_Get(t1, w, h, d) : Tensor_Get(t2, w, h, d - t1->s.d); 97 | } 98 | } 99 | } 100 | } -------------------------------------------------------------------------------- /src/geometry/TQuaternion.c: -------------------------------------------------------------------------------- 1 | #include "TQuaternion.h" 2 | #include 3 | #include 4 | 5 | TQuaternion TQuaternion_Create(float x, float y, float z, float w) 6 | { 7 | TQuaternion vec = (TQuaternion){x, y, z, w}; 8 | return vec; 9 | } 10 | 11 | TQuaternion TQuaternion_CreateV(TVec3 v, float w) 12 | { 13 | return (TQuaternion){v.x, v.y, v.z, w}; 14 | } 15 | 16 | TQuaternion TQuaternion_FromVec3(TVec3 axis, float angleRadian) 17 | { 18 | TQuaternion q; 19 | float m = TVec3_Length(axis); 20 | if (m > 0.0001f) 21 | { 22 | float ca = cosf(angleRadian * 0.5f); 23 | float sa = sinf(angleRadian * 0.5f); 24 | q.x = axis.x / m * sa; 25 | q.y = axis.y / m * sa; 26 | q.z = axis.z / m * sa; 27 | q.w = ca; 28 | } 29 | else 30 | { 31 | q.w = 1; q.x = 0; q.y = 0; q.z = 0; 32 | } 33 | return q; 34 | } 35 | 36 | TQuaternion TQuaternion_Norm(TQuaternion v) 37 | { 38 | TQuaternion r = v; 39 | float m = v.w * v.w + v.x * v.x + v.y * v.y + v.z * v.z; 40 | if (m > 0.0001f) 41 | { 42 | m = sqrtf(m); 43 | r.w /= m; 44 | r.x /= m; 45 | r.y /= m; 46 | r.z /= m; 47 | } 48 | else 49 | { 50 | r.w = 1.f; r.x = 0; r.y = 0; r.z = 0; 51 | } 52 | return r; 53 | } 54 | 55 | TQuaternion TQuaternion_Conjugate(TQuaternion v) 56 | { 57 | return (TQuaternion){-v.x, -v.y, -v.z, v.w}; 58 | } 59 | 60 | TQuaternion TQuaternion_Mul(TQuaternion q1, TQuaternion q2) 61 | { 62 | float nw = q1.w * q2.w - q1.x * q2.x - q1.y * q2.y - q1.z * q2.z; 63 | float nx = q1.w * q2.x + q1.x * q2.w + q1.y * q2.z - q1.z * q2.y; 64 | float ny = q1.w * q2.y + q1.y * q2.w + q1.z * q2.x - q1.x * q2.z; 65 | float nz = q1.w * q2.z + q1.z * q2.w + q1.x * q2.y - q1.y * q2.x; 66 | return (TQuaternion) { nx, ny, nz, nw }; 67 | } 68 | 69 | TQuaternion TQuaternion_Euler(float x, float y, float z) 70 | { 71 | float c1 = cosf(x); 72 | float s1 = sinf(x); 73 | float c2 = cosf(y); 74 | float s2 = sinf(y); 75 | float c3 = cosf(z); 76 | float s3 = sinf(z); 77 | float wn = sqrtf(1.0f + c1 * c2 + c1 * c3 - s1 * s2 * s3 + c2 * c3) / 2.0f; 78 | float w4 = (4.0f * wn); 79 | float xn = (c2 * s3 + c1 * s3 + s1 * s2 * c3) / w4; 80 | float yn = (s1 * c2 + s1 * c3 + c1 * s2 * s3) / w4; 81 | float zn = (-s1 * s3 + c1 * s2 * c3 + s2) / w4; 82 | 83 | return (TQuaternion) { xn, yn, zn, wn }; 84 | } 85 | 86 | TVec3 TQuaternion_Rotate(TQuaternion q, TVec3 pt) 87 | { 88 | //q = TQuaternion_Norm(q); 89 | TQuaternion q1 = TQuaternion_Norm(q); 90 | q1 = TQuaternion_Conjugate(q1); 91 | 92 | TQuaternion qNode = (TQuaternion){ pt.x, pt.y, pt.z, 0 }; 93 | qNode = TQuaternion_Mul(TQuaternion_Mul(q, qNode), q1); 94 | 95 | pt.x = qNode.x; 96 | pt.y = qNode.y; 97 | pt.z = qNode.z; 98 | 99 | return pt; 100 | } -------------------------------------------------------------------------------- /src/cmd/cartpole/cart.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "box2d/box2d.h" 3 | #include "GLFW/glfw3.h" 4 | #include 5 | #include 6 | #include 7 | #include "shapes.h" 8 | 9 | class Cart { 10 | public: 11 | TDBox *b1; 12 | TDBox *b2; 13 | TDBox *b3; 14 | 15 | b2World* world; 16 | 17 | Cart(b2World* _world) { 18 | world = _world; 19 | b1 =new TDBox(_world, 0.04f, 0.32f, { 0.0f, 0.32f }, b2_dynamicBody, false, 0.01f); 20 | b2 =new TDBox(_world, 0.16f, 0.08f, { 0.0f, 0.0f }, b2_dynamicBody, false, 1.f); 21 | b3 =new TDBox(_world, 2.0f, 0.01f, { 0.0f, 0.f }, b2_kinematicBody, true, 1.f); 22 | 23 | b2RevoluteJointDef jointDef; 24 | jointDef.bodyA = b2->m_body; 25 | jointDef.bodyB = b1->m_body; 26 | //jointDef.collideConnected = false; 27 | jointDef.localAnchorB = b2Vec2(0, -0.32f); 28 | _world->CreateJoint(&jointDef); 29 | 30 | b2PrismaticJointDef prjointDef; 31 | prjointDef.bodyA = b3->m_body; 32 | prjointDef.bodyB = b2->m_body; 33 | prjointDef.collideConnected = false; 34 | _world->CreateJoint(&prjointDef); 35 | } 36 | 37 | float poleAngle() 38 | { 39 | return b1->m_body->GetAngle(); 40 | } 41 | 42 | float cartPos() 43 | { 44 | return b2->m_body->GetPosition().x; 45 | } 46 | 47 | float cartVel() 48 | { 49 | return b2->m_body->GetLinearVelocity().x; 50 | } 51 | 52 | float poleVel() 53 | { 54 | return b1->m_body->GetAngularVelocity(); 55 | } 56 | 57 | bool needToReset() 58 | { 59 | float pos = cartPos(); 60 | float angle = poleAngle(); 61 | if (angle < -1.f || angle>1.f || pos < -1.5f || pos>1.5f) 62 | return true; 63 | return false; 64 | } 65 | 66 | void pushLeft() 67 | { 68 | b2->m_body->ApplyLinearImpulseToCenter(b2Vec2(-0.02f, 0), true); 69 | } 70 | void pushRight() 71 | { 72 | b2->m_body->ApplyLinearImpulseToCenter(b2Vec2(0.02f, 0), true); 73 | } 74 | 75 | void Reset() 76 | { 77 | b1->m_body->SetTransform(b2Vec2(0.0f, 0.32f), 0); 78 | b2->m_body->SetTransform(b2Vec2(0.0f, 0.0f), 0); 79 | 80 | b1->m_body->SetAngularVelocity(0); 81 | b1->m_body->SetLinearVelocity(b2Vec2(0.0f, 0.0f)); 82 | 83 | b2->m_body->SetAngularVelocity(0); 84 | b2->m_body->SetLinearVelocity(b2Vec2(0.0f, 0.0f)); 85 | } 86 | 87 | void Step(int action) 88 | { 89 | switch (action) 90 | { 91 | case 0: pushLeft(); break; 92 | case 1: pushRight(); break; 93 | default: 94 | break; 95 | } 96 | } 97 | 98 | ~Cart() { 99 | delete(b1); 100 | delete(b2); 101 | delete(b3); 102 | } 103 | 104 | void Draw() 105 | { 106 | b1->Draw(); 107 | b2->Draw(); 108 | b3->Draw(); 109 | } 110 | }; 111 | -------------------------------------------------------------------------------- /src/RL/RLBrain.c: -------------------------------------------------------------------------------- 1 | #include "RLBrain.h" 2 | 3 | RLBrain *RLBrain_Create(shape state_shape, int n_outputs) 4 | { 5 | RLBrain *brain = malloc(sizeof(RLBrain)); 6 | if(!brain) 7 | { 8 | return NULL; 9 | } 10 | brain->input_shape = (shape){state_shape.w, state_shape.h, state_shape.d}; 11 | brain->num_outputs = n_outputs; 12 | brain->net = Model_Create(); 13 | brain->inp = Model_AddLayer(&brain->net, Input_Create(brain->input_shape)); 14 | Layer* l = Model_AddLayer(&brain->net, Dense_Create(16, R_XAVIER, brain->inp)); 15 | l = Model_AddLayer(&brain->net, Dense_Create(16, R_XAVIER, l)); 16 | brain->out = Model_AddLayer(&brain->net, Dense_Create(n_outputs, R_XAVIER, l)); 17 | //l = Model_AddLayer(&brain->net, Regression_Create(l)); 18 | 19 | 20 | //printf("Brain created"); 21 | brain->buffer = ReplayBuffer_Create(64, 64); 22 | brain ->par = OptParams_Create(); 23 | brain->par.method = ADAN; 24 | brain->par.learning_rate = 0.0001f; 25 | brain->discount = 0.95f; 26 | return brain; 27 | } 28 | 29 | void RLBrain_Record(RLBrain *brain, Tensor* state, Tensor* next_state, int action, float reward, int done) 30 | { 31 | ReplayBuffer_Record(brain->buffer, state, next_state, action, reward, done); 32 | } 33 | 34 | Tensor RLBrain_Forward(RLBrain *brain, Tensor *state) 35 | { 36 | //Tensor *y = Seq_Forward(&brain->net, state, 0); 37 | brain->inp->input = state; 38 | Model_Forward(&brain->net); 39 | return brain->out->output; 40 | } 41 | 42 | float RLBrain_Train(RLBrain *brain) 43 | { 44 | /* 45 | if (brain->buffer->buffer->length >= brain->buffer->batch_size) { 46 | float cur_loss = 0.0f; 47 | for (int i = brain->buffer->buffer->length-(int)1; i > 0; i--) 48 | { 49 | Sample *s = (Sample*)brain->buffer->buffer->data[i].elem; 50 | Tensor y = Tensor_Create((shape){1,1,2}, 0); 51 | y.w[0] = (float)s->action; 52 | if(s->done) 53 | y.w[1] = s->reward; 54 | else 55 | { 56 | Tensor* next = brain->net.NetForward(&brain->net, s->next_state); 57 | float Q_sa = T_MaxValue(next); 58 | y.w[1] = s->reward + brain->discount * Q_sa; 59 | } 60 | Optimize(&brain->net, &brain->par, s->state, &y); 61 | LData* d = (LData*)brain->net.Layers[brain->net.n_layers - 1]->aData; 62 | cur_loss += d->loss; 63 | Tensor_Free(&y); 64 | } 65 | float loss = cur_loss / brain->buffer->batch_size; 66 | return loss; 67 | } 68 | */ 69 | return -1.f; 70 | } 71 | 72 | float RLBrain_TrainTrace(RLBrain* brain, Tensor* states, float* rewards, float* actions, int n) 73 | { 74 | float total_loss = 0; 75 | for (int i = 0; i < n-1; i++) 76 | { 77 | brain->inp->input = &states[i+1]; 78 | Model_Forward(&brain->net); 79 | float Q_sa = T_MaxValue(&brain->out->output); 80 | float target = rewards[i] + brain->discount * Q_sa; 81 | target = (i == n - 2) ? rewards[i] : target; 82 | 83 | brain->inp->input = &states[i]; 84 | Model_Forward(&brain->net); 85 | float loss = Regression_Loss(&brain->out->output, (int)actions[i], target); 86 | total_loss += loss; 87 | Model_Backward(&brain->net); 88 | OptimizeModel(&brain->net, &brain->par); 89 | } 90 | total_loss /= (float)n; 91 | printf("trace_loss: %f\n", total_loss); 92 | return total_loss; 93 | } 94 | -------------------------------------------------------------------------------- /src/cmd/opt_test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "Model.h" 4 | #include "Optimizer.h" 5 | #include "Losses.h" 6 | #include "Utils.h" 7 | 8 | //Пример создания и обучения модели из трех полносвязных слоев и функциями активации гиперболического тангенса на CPU 9 | int main() 10 | { 11 | //Определяем размерность входных данных 12 | shape input = { 128,128,1 }; 13 | //Создаем два образа на которые будем обучать модель 14 | Tensor x1 = Tensor_Create(input, 1.f); 15 | Tensor x2 = Tensor_Create(input, -1.f); 16 | 17 | printf("Create model structure:\n"); 18 | //Инициализация модели на CPU 19 | Model n = Model_Create(); 20 | //Добавляем входной слой CPU к модели n, сохраняем ссылку на входной слой в inp 21 | Layer* inp = Model_AddLayer(&n, Input_Create(input)); 22 | //Добавляем полносвязный слой CPU к модели n 23 | Layer *l = Model_AddLayer(&n, Dense_Create(128, R_XAVIER, inp)); 24 | //Добавляем слой активации tanh CPU к модели n 25 | l = Model_AddLayer(&n, TanhA_Create(l)); 26 | l = Model_AddLayer(&n, Dense_Create(64, R_XAVIER, l)); 27 | l = Model_AddLayer(&n, TanhA_Create(l)); 28 | Layer *out = Model_AddLayer(&n, Dense_Create(2, R_XAVIER, l)); 29 | 30 | //====optimization===== 31 | //Инициализируем параметры оптимизации: 32 | OptParams p = OptParams_Create(); 33 | //Задаем скорость обучения 34 | p.learning_rate = 0.01f; 35 | //Задаем метод оптимизации 36 | p.method = ADAN; 37 | 38 | printf("Optimizer: ADAN, lr = 0.01, loss = cross_entropy\n"); 39 | //Цикл оптимизации из 50 шагов 40 | for (size_t i = 0; i < 50; i++) 41 | { 42 | //Шаг обучения для первого образа 43 | //Задаем вход модели: 44 | inp->input = &x1; 45 | //Выполняем прямой проход модели: 46 | Model_Forward(&n); 47 | //Вычисляем ошибку для текущего выхода модели: 48 | float loss1 = Cross_entropy_Loss(&out->output, 0); 49 | //Выполняем обратный проход модели и вычисляем градиенты: 50 | Model_Backward(&n); 51 | //Выполняем шаг оптимизации и применяем градиенты к весовым коэффициентам модели: 52 | OptimizeModel(&n, &p); 53 | 54 | //Шаг обучения для второго образа 55 | inp->input = &x2; 56 | Model_Forward(&n); 57 | float loss2 = Cross_entropy_Loss(&out->output, 1); 58 | Model_Backward(&n); 59 | OptimizeModel(&n, &p); 60 | 61 | //Средняя ошибка для двух образов на текущем шаге оптимизации 62 | float total_loss = (loss1 + loss2) * 0.5f; 63 | printf("loss: %f\n", total_loss); 64 | } 65 | printf("\nTest model forward pass:"); 66 | //Тестирование модели после оптимизации 67 | //Тестирование выхода модели для первого образа 68 | inp->input = &x1; 69 | Model_Forward(&n); 70 | //Так как использовалась Cross_entropy_Loss применяем операцию softmax к выходу сети, для значений результата в диапазоне [0,1] 71 | Tensor o1 = SoftmaxProb(&out->output); 72 | PrintArray(o1.w, o1.n); 73 | //Очищаем память для тензора с результатами операции softmax, т.к. он больше не нужен 74 | Tensor_Free(&o1); 75 | 76 | //Тестирование выхода модели для второго образа 77 | inp->input = &x2; 78 | Model_Forward(&n); 79 | Tensor o2 = SoftmaxProb(&out->output); 80 | PrintArray(o2.w, o2.n); 81 | Tensor_Free(&o2); 82 | 83 | printf("\nPress enter to close..."); 84 | getchar(); 85 | return 0; 86 | } -------------------------------------------------------------------------------- /src/MaxPool2d.c: -------------------------------------------------------------------------------- 1 | #include "MaxPool2d.h" 2 | #include 3 | 4 | Layer* MaxPool2d_Create(shape2 k_size, shape2 stride, int pad, Layer* in) 5 | { 6 | //input shape depth must be == 1 7 | Layer* l = (Layer*)malloc(sizeof(Layer)); 8 | if (!l) 9 | { 10 | printf("MaxPool2d allocation error!"); 11 | return NULL; 12 | } 13 | l->type = LT_MAXPOOL; 14 | int inn = in->out_shape.w * in->out_shape.h * in->out_shape.d; 15 | //calculate output shape 16 | l->out_shape.d = in->out_shape.d; 17 | l->out_shape.w = (int)((in->out_shape.w - k_size.w + pad * 2) / stride.w + 1); 18 | l->out_shape.h = (int)((in->out_shape.h - k_size.h + pad * 2) / stride.h + 1); 19 | printf("MaxPool2d output shape: [%d, %d, %d]\n", l->out_shape.w, l->out_shape.h, l->out_shape.d); 20 | 21 | l->n_inputs = inn; 22 | l->output = Tensor_Create(l->out_shape, 0); 23 | l->input = &in->output; 24 | 25 | float bias = 0.0f; 26 | MaxPool2d*ld = (MaxPool2d*)malloc(sizeof(MaxPool2d)); 27 | if (ld) { 28 | ld->pad = pad; 29 | ld->stride.w = stride.w; ld->stride.h = stride.h; 30 | ld->k_size.w = k_size.w; ld->k_size.h = k_size.h; 31 | } 32 | else printf("MaxPool2d data allocation error\n"); 33 | l->aData = ld; 34 | return l; 35 | } 36 | 37 | Tensor* MaxPool2d_Forward(Layer* l) 38 | { 39 | Tensor* inp = l->input; 40 | MaxPool2d* data = (MaxPool2d*)l->aData; 41 | 42 | int pad = data->pad; 43 | for (size_t d = 0; d < l->out_shape.d; d++) 44 | { 45 | for (size_t h = 0; h < l->out_shape.h; h++) 46 | { 47 | for (size_t w = 0; w < l->out_shape.w; w++) 48 | { 49 | float maxk = -FLT_MAX; 50 | //iterate kernels by size 51 | for (size_t kh = 0; kh < data->k_size.h; kh++) 52 | { 53 | int cury = (h * data->stride.h - pad) + kh; 54 | for (size_t kw = 0; kw < data->k_size.w; kw++) 55 | { 56 | int curx = (w * data->stride.w - pad) + kw; 57 | if (curx >= 0&& cury >=0&& curx < inp->s.w && cury < inp->s.h) 58 | { 59 | float xwi = Tensor_Get(inp, curx, cury, d); 60 | if (xwi > maxk) { maxk = xwi;} 61 | } 62 | } 63 | } 64 | Tensor_Set(&l->output, w, h, d, maxk); 65 | } 66 | } 67 | } 68 | return &l->output; 69 | } 70 | 71 | void MaxPool2d_Backward(Layer* l) 72 | { 73 | Tensor* inp = l->input; 74 | MaxPool2d* data = (MaxPool2d*)l->aData; 75 | Tensor* out = &l->output; 76 | 77 | int pad = data->pad; 78 | for (size_t d = 0; d < l->out_shape.d; d++) 79 | { 80 | for (size_t h = 0; h < l->out_shape.h; h++) 81 | { 82 | for (size_t w = 0; w < l->out_shape.w; w++) 83 | { 84 | float maxk = -FLT_MAX; 85 | int khm=0, kwm=0; 86 | //iterate kernels by size 87 | for (size_t kh = 0; kh < data->k_size.h; kh++) 88 | { 89 | int cury = (h * data->stride.h - pad) + kh; 90 | for (size_t kw = 0; kw < data->k_size.w; kw++) 91 | { 92 | int curx = (w * data->stride.w - pad) + kw; 93 | if (curx >= 0 && cury >= 0 && curx < inp->s.w && cury < inp->s.h) 94 | { 95 | float xwi = Tensor_Get(inp, curx, cury, d); 96 | if (xwi > maxk) { maxk = xwi; kwm = curx; khm = cury; } 97 | } 98 | } 99 | } 100 | int ido = tIdx(out->s, w, h, d); 101 | float next_grad = out->dw[ido]; 102 | int id = tIdx(inp->s, kwm, khm, d); 103 | inp->dw[id] += next_grad; 104 | } 105 | } 106 | } 107 | } -------------------------------------------------------------------------------- /src/cmd/cartpole_cont/cart.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "box2d/box2d.h" 3 | #include "GLFW/glfw3.h" 4 | #include 5 | #include 6 | #include 7 | #include "shapes.h" 8 | #include "TCommon.h" 9 | 10 | class Cart { 11 | public: 12 | TDBox *b1; 13 | TDBox *b2; 14 | TDBox *b3; 15 | 16 | b2World* world; 17 | 18 | Cart(b2World* _world) { 19 | world = _world; 20 | b1 =new TDBox(_world, 0.04f, 0.32f, { 0.0f, 0.32f }, b2_dynamicBody, false, 0.01f); 21 | b2 =new TDBox(_world, 0.16f, 0.08f, { 0.0f, 0.0f }, b2_dynamicBody, false, 1.f); 22 | b3 =new TDBox(_world, 2.0f, 0.01f, { 0.0f, 0.f }, b2_kinematicBody, true, 1.f); 23 | 24 | b2RevoluteJointDef jointDef; 25 | jointDef.bodyA = b2->m_body; 26 | jointDef.bodyB = b1->m_body; 27 | //jointDef.collideConnected = false; 28 | jointDef.localAnchorB = b2Vec2(0, -0.32f); 29 | _world->CreateJoint(&jointDef); 30 | 31 | b2PrismaticJointDef prjointDef; 32 | prjointDef.bodyA = b3->m_body; 33 | prjointDef.bodyB = b2->m_body; 34 | prjointDef.collideConnected = false; 35 | _world->CreateJoint(&prjointDef); 36 | } 37 | 38 | float poleAngle() 39 | { 40 | return b1->m_body->GetAngle(); 41 | } 42 | 43 | float cartPos() 44 | { 45 | return b2->m_body->GetPosition().x; 46 | } 47 | 48 | float cartVel() 49 | { 50 | return b2->m_body->GetLinearVelocity().x; 51 | } 52 | 53 | float poleVel() 54 | { 55 | return b1->m_body->GetAngularVelocity(); 56 | } 57 | 58 | bool needToReset() 59 | { 60 | float pos = cartPos(); 61 | float angle = poleAngle(); 62 | if (angle < -1.f || angle>1.f || pos < -1.5f || pos>1.5f) 63 | return true; 64 | return false; 65 | } 66 | 67 | void pushLeft() 68 | { 69 | b2->m_body->ApplyLinearImpulseToCenter(b2Vec2(-0.02f, 0), true); 70 | } 71 | void pushRight() 72 | { 73 | b2->m_body->ApplyLinearImpulseToCenter(b2Vec2(0.02f, 0), true); 74 | } 75 | 76 | void Reset() 77 | { 78 | int r = rngInt(0, 1); 79 | float angle = r > 0 ? 0.1f : -0.1f; 80 | b1->m_body->SetTransform(b2Vec2(0.0f, 0.32f), angle); 81 | b2->m_body->SetTransform(b2Vec2(0.0f, 0.0f), 0); 82 | 83 | b1->m_body->SetAngularVelocity(0); 84 | b1->m_body->SetLinearVelocity(b2Vec2(0.0f, 0.0f)); 85 | 86 | b2->m_body->SetAngularVelocity(0); 87 | b2->m_body->SetLinearVelocity(b2Vec2(0.0f, 0.0f)); 88 | } 89 | 90 | void Step(int action) 91 | { 92 | switch (action) 93 | { 94 | case 0: pushLeft(); break; 95 | case 1: pushRight(); break; 96 | default: 97 | break; 98 | } 99 | } 100 | 101 | void ApplyForceValue(float force) 102 | { 103 | b2->m_body->ApplyLinearImpulseToCenter(b2Vec2(force, 0), true); 104 | } 105 | 106 | ~Cart() { 107 | delete(b1); 108 | delete(b2); 109 | delete(b3); 110 | } 111 | 112 | void Draw() 113 | { 114 | b1->Draw(); 115 | b2->Draw(); 116 | b3->Draw(); 117 | } 118 | }; 119 | -------------------------------------------------------------------------------- /src/TCommon.c: -------------------------------------------------------------------------------- 1 | #include "TCommon.h" 2 | 3 | float DegToRad(float deg) { return M_PI * deg / 180.0f; } 4 | float RadToDeg(float rad) { return rad * (180.0f / M_PI); } 5 | float Lerp(float a, float b, float t) { return a - (a * t) + (b * t); } 6 | float InvLerp(float a, float b, float t) { return (t - a) / (b - a); } 7 | 8 | float Clamp(float d, float min, float max) 9 | { 10 | const float t = d < min ? min : d; 11 | return t > max ? max : t; 12 | } 13 | 14 | void InsertionSort(float* values, int n) { 15 | for (size_t i = 1; i < n; ++i) { 16 | float x = values[i]; 17 | size_t j = i; 18 | while (j > 0 && values[j - 1] > x) { 19 | values[j] = values[j - 1]; 20 | --j; 21 | } 22 | values[j] = x; 23 | } 24 | } 25 | 26 | float Mean(float* items, int n) 27 | { 28 | float sum = 0; 29 | for (int i = 0; i < n; i++) 30 | { 31 | sum += items[i]; 32 | } 33 | return sum / n; 34 | } 35 | 36 | float rngFloat() { return (float)(rand()) / (float)(RAND_MAX); } 37 | int rngInt(int min, int max) { int randNum = rand() % (max - min + 1) + min; return randNum; } 38 | float rngNormal() { 39 | float u = ((float)rand() / (RAND_MAX)) * 2.f - 1.f; 40 | float v = ((float)rand() / (RAND_MAX)) * 2.f - 1.f; 41 | float r = u * u + v * v; 42 | 43 | while (r == 0 || r > 1) 44 | { 45 | u = ((float)rand() / (RAND_MAX)) * 2.f - 1.f; 46 | v = ((float)rand() / (RAND_MAX)) * 2.f - 1.f; 47 | r = u * u + v * v; 48 | } 49 | 50 | float c = sqrtf(-2.f * logf(r) / r); 51 | return u * c; 52 | } 53 | 54 | OrnsteinUhlenbeckNoise initNoise(float mu, float sigma, float x0) 55 | { 56 | OrnsteinUhlenbeckNoise noise; 57 | noise.mu = mu; 58 | noise.sigma = sigma; 59 | noise.theta = 0.15f; 60 | noise.dt = 1e-2f; 61 | noise.x0 = x0; 62 | noise.x_prev = noise.x0; 63 | return noise; 64 | } 65 | 66 | //Ornstein-Uhlenbeck noise implemented by OpenAI 67 | //from https://github.com/openai/baselines/blob/master/baselines/ddpg/noise.py 68 | float getNoiseVal(OrnsteinUhlenbeckNoise* n) 69 | { 70 | float x = n->x_prev + n->theta * (n->mu - n->x_prev) * n->dt + n->sigma * sqrtf(n->dt) * rngNormal(); 71 | n->x_prev = x; 72 | return x; 73 | } 74 | 75 | //from: 76 | //https://www.geeksforgeeks.org/random-number-generator-in-arbitrary-probability-distribution-fashion/ 77 | int find_ceil(int* arr, int r, int l, int h) 78 | { 79 | int mid = 0; 80 | while (l < h) 81 | { 82 | mid = l + ((h - l) >> 1); 83 | (r > arr[mid]) ? (l = mid + 1) : (h = mid); 84 | } 85 | return (arr[l] >= r) ? l : -1; 86 | } 87 | 88 | int rng_by_prob(float* prob, int n) 89 | { 90 | int* prefix = createIntArray(n); 91 | if (prefix != NULL) 92 | { 93 | prefix[0] = (int)(prob[0] * 100.f); 94 | for (int i = 1; i < n; ++i) 95 | prefix[i] = prefix[i - 1] + (int)(prob[i] * 100.f); 96 | int r = (rand() % prefix[n - 1]) + 1; 97 | int indexc = find_ceil(prefix, r, 0, n - 1); 98 | free(prefix); 99 | return indexc; 100 | } 101 | return 0; 102 | } 103 | 104 | float Derivative(float (*f)(float), float x0) 105 | { 106 | const float delta = 1.0e-6; //small offset 107 | float x1 = x0 - delta; 108 | float x2 = x0 + delta; 109 | float y1 = f(x1); 110 | float y2 = f(x2); 111 | return (y2 - y1) / (x2 - x1); 112 | } 113 | -------------------------------------------------------------------------------- /src/cmd/cuda_test.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "Model.h" 4 | #include "Optimizer.h" 5 | #include "Losses.h" 6 | 7 | #include "cuda_runtime.h" 8 | #include "device_launch_parameters.h" 9 | 10 | //Пример создания и обучения модели из трех полносвязных слоев и функциями активации гиперболического тангенса на GPU 11 | int main() 12 | { 13 | //Определяем размерность входных данных 14 | shape input = { 128,128,1 }; 15 | //Инициализация модели на GPU 16 | Model m = Model_CreateGPU(); 17 | //Добавляем входной слой GPU к модели m, сохраняем ссылку на входной слой в inp 18 | Layer *inp = Model_AddLayer(&m, Input_CreateGPU(input)); 19 | //Добавляем полносвязный слой GPU к модели m 20 | Layer* l = Model_AddLayer(&m, Dense_CreateGPU(128, inp)); 21 | //Добавляем слой активации tanh GPU к модели m 22 | l = Model_AddLayer(&m, TanhA_CreateGPU(l)); 23 | l = Model_AddLayer(&m, Dense_CreateGPU(128, l)); 24 | l = Model_AddLayer(&m, TanhA_CreateGPU(l)); 25 | l = Model_AddLayer(&m, Dense_CreateGPU(2, l)); 26 | 27 | //Тест прямого прохода модели до оптимизации: 28 | printf("\nTest model forward pass:\n"); 29 | //Задаем вход модели: 30 | Tensor test = Tensor_CreateGPU(input, 1.f); 31 | inp->input = &test; 32 | //Выполняем прямой проход модели: 33 | Model_ForwardGPU(&m); 34 | //Результат выполнения будет записан в выходном слое модели 35 | //Вывод тензора на консоль 36 | Tensor_PrintGPU(&l->output); 37 | 38 | //Инициализируем параметры оптимизации: 39 | OptParams p = OptParams_Create(); 40 | //Задаем скорость обучения 41 | p.learning_rate = 0.001f; 42 | //Задаем метод оптимизации 43 | p.method = NRMSPROP; 44 | //Подготовка модели m для обучения на GPU с параметрами p 45 | PrepareTDataGPU(&m, &p); 46 | 47 | //Создаем два образа на которые будем обучать модель 48 | Tensor x1 = Tensor_CreateGPU(input, 1.f); 49 | Tensor x2 = Tensor_CreateGPU(input, -1.f); 50 | 51 | //Создаем два тензора y1 и y2 с ожидаемыми выходами модели для x1 и x2 52 | float data1[2] = {1.f, 0.f}; 53 | float data2[2] = { 0.f, 1.f }; 54 | Tensor y1 = Tensor_FromDataGPU({ 1,1,2 }, data1); 55 | Tensor y2 = Tensor_FromDataGPU({ 1,1,2 }, data2); 56 | 57 | //Цикл оптимизации из 300 шагов 58 | for (size_t i = 0; i < 300; i++) 59 | { 60 | //Шаг обучения для первого образа 61 | //Задаем вход модели: 62 | inp->input = &x1; 63 | //Выполняем прямой проход модели: 64 | Model_ForwardGPU(&m); 65 | //Вычисляем ошибку для текущего выхода модели: 66 | float loss1 = MSE_LossGPU(&l->output, &y1); 67 | //Выполняем обратный проход модели и вычисляем градиенты: 68 | Model_BackwardGPU(&m); 69 | //Выполняем шаг оптимизации и применяем градиенты к весовым коэффициентам модели: 70 | OptimizeModelGPU(&m, &p); 71 | 72 | //Шаг обучения для второго образа 73 | inp->input = &x2; 74 | Model_ForwardGPU(&m); 75 | float loss2 = MSE_LossGPU(&l->output, &y2); 76 | Model_BackwardGPU(&m); 77 | OptimizeModelGPU(&m, &p); 78 | 79 | //Средняя ошибка для двух образов на текущем шаге оптимизации 80 | float total_loss = (loss1 + loss2) * 0.5f; 81 | printf("loss: %f\n", total_loss); 82 | } 83 | 84 | printf("\nTest model forward pass:\n"); 85 | //Тестирование модели после оптимизации 86 | //Тестирование выхода модели для первого образа 87 | inp->input = &x1; 88 | Model_ForwardGPU(&m); 89 | Tensor_PrintGPU(&l->output); 90 | 91 | //Тестирование выхода модели для второго образа 92 | inp->input = &x2; 93 | Model_ForwardGPU(&m); 94 | Tensor_PrintGPU(&l->output); 95 | 96 | printf("\nPress enter to close..."); 97 | getchar(); 98 | return 0; 99 | } -------------------------------------------------------------------------------- /src/RL/ACBrain.c: -------------------------------------------------------------------------------- 1 | #include "ACBrain.h" 2 | 3 | ACBrain* ACBrain_Create(shape state_shape, int n_outputs) 4 | { 5 | ACBrain* brain = malloc(sizeof(ACBrain)); 6 | if (!brain) 7 | { 8 | return NULL; 9 | } 10 | brain->input_shape = (shape){ state_shape.w, state_shape.h, state_shape.d }; 11 | brain->num_outputs = n_outputs; 12 | //brain->net = ACBrain_CreateNet(brain->input_shape, brain->num_outputs); 13 | brain->gamma = 0.99f; 14 | brain->discount = 0.01f; 15 | //create net 16 | brain->ActorNet = Model_Create(); 17 | brain->inpA = Model_AddLayer(&brain->ActorNet, Input_Create(brain->input_shape)); 18 | Layer* l = Model_AddLayer(&brain->ActorNet, Dense_Create(64, R_XAVIER, brain->inpA)); 19 | l = Model_AddLayer(&brain->ActorNet, Dense_Create(64, R_XAVIER, l)); 20 | brain->actor = Model_AddLayer(&brain->ActorNet, Dense_Create(n_outputs, R_XAVIER, l)); 21 | 22 | brain->CriticNet = Model_Create(); 23 | brain->inpC = Model_AddLayer(&brain->CriticNet, Input_Create(brain->input_shape)); 24 | Layer* l2 = Model_AddLayer(&brain->CriticNet, Dense_Create(64, R_XAVIER, brain->inpC)); 25 | l2 = Model_AddLayer(&brain->CriticNet, Dense_Create(64, R_XAVIER, l2)); 26 | brain->critic = Model_AddLayer(&brain->CriticNet, Dense_Create(1, R_XAVIER, l2)); 27 | 28 | brain->par = OptParams_Create(); 29 | brain->par.method = ADAN; 30 | brain->par.learning_rate = 0.00001f; 31 | brain->I = 1.f; 32 | return brain; 33 | } 34 | 35 | Tensor ACBrain_Forward(ACBrain* brain, Tensor* state) 36 | { 37 | brain->inpA->input = state; 38 | Model_Forward(&brain->ActorNet); 39 | Tensor prop = SoftmaxProb(&brain->actor->output);//need to be free() after use 40 | return prop; 41 | } 42 | 43 | float ACBrain_TrainTrace(ACBrain* brain, Tensor* states, float* rewards, float* actions, int n) 44 | { 45 | float* adv_rewards = createFloatArray(n); 46 | float discounted_sum = 0.f; 47 | for (int i = n - 1; i >= 0; i--) 48 | { 49 | discounted_sum = rewards[i] + brain->gamma * discounted_sum; 50 | adv_rewards[i] = discounted_sum; 51 | } 52 | //NormalizeArray(adv_rewards, n); 53 | 54 | float total_actor_loss = 0; 55 | float total_critic_loss = 0; 56 | for (int i = 0; i < n; i++) 57 | { 58 | //setup critic 59 | brain->inpC->input = &states[i]; 60 | Model_Forward(&brain->CriticNet); 61 | float critic_value = brain->critic->output.w[0]; 62 | float advantage = adv_rewards[i] - critic_value; 63 | Tensor critic_true = Tensor_Create((shape) { 1, 1, 1 }, adv_rewards[i]); 64 | float critic_loss = MSE_Loss(&brain->critic->output, &critic_true); 65 | total_critic_loss += critic_loss; 66 | Tensor_Free(&critic_true); 67 | Model_Backward(&brain->CriticNet); 68 | OptimizeModel(&brain->CriticNet, &brain->par); 69 | 70 | //setup actor 71 | brain->inpA->input = &states[i]; 72 | Model_Forward(&brain->ActorNet); 73 | float actor_loss = Cross_entropy_Loss(&brain->actor->output, actions[i]); 74 | total_actor_loss += actor_loss; 75 | 76 | Tensor prop = SoftmaxProb(&brain->actor->output); 77 | for (size_t j = 0; j < brain->actor->output.n; j++) 78 | { 79 | //float y_true = (j == (int)actions[i]) ? 1.f : 0.f; 80 | //float der = -(y_true - prop.w[j]); 81 | //brain->actor->output.dw[j] = brain->discount * advantage * der; 82 | brain->actor->output.dw[j] *= advantage*brain->I; 83 | } 84 | Tensor_Free(&prop); 85 | Model_Backward(&brain->ActorNet); 86 | //================== 87 | OptimizeModel(&brain->ActorNet, &brain->par); 88 | //brain->I *= 0.9999f; 89 | } 90 | total_actor_loss /= n; 91 | total_critic_loss /= n; 92 | 93 | printf("actor_loss: %f, critic_loss: %f\n", total_actor_loss, total_critic_loss); 94 | free(adv_rewards); 95 | return -1.f; 96 | } 97 | -------------------------------------------------------------------------------- /src/Dense.cu: -------------------------------------------------------------------------------- 1 | #include "Dense.h" 2 | #include 3 | 4 | #ifdef __NVCC__ 5 | Layer* Dense_CreateGPU(int num_neurons, Layer* in) 6 | { 7 | Layer* l = (Layer*)malloc(sizeof(Layer)); 8 | if (!l) 9 | { 10 | printf("Dense allocation error!"); 11 | return NULL; 12 | } 13 | l->type = LT_DENSE; 14 | int inn = in->out_shape.w * in->out_shape.h * in->out_shape.d; 15 | //common layer def 16 | l->out_shape = { 1, 1, num_neurons }; 17 | l->n_inputs = inn; 18 | l->output = Tensor_CreateGPU(l->out_shape, 0); 19 | l->input = &in->output; 20 | 21 | float bias = 0.0f; 22 | 23 | Dense* ld = (Dense*)malloc(sizeof(Dense)); 24 | if (ld) { 25 | ld->kernels = (Tensor*)malloc(sizeof(Tensor)); 26 | if (ld->kernels) { 27 | shape kernels_shape = { 1, inn, num_neurons };//each row is weight 28 | *ld->kernels = Tensor_CreateGPU(kernels_shape, 1.f); 29 | Tensor_Xavier_RandGPU(ld->kernels->w, ld->kernels->n); 30 | ld->biases = Tensor_CreateGPU({ 1, 1, num_neurons }, bias); 31 | } 32 | } 33 | else printf("Dense data allocation error\n"); 34 | l->aData = ld; 35 | printf("Dense GPU, output shape: [%d, %d, %d]\n", l->out_shape.w, l->out_shape.h, l->out_shape.d); 36 | return l; 37 | } 38 | 39 | __global__ void Dense_ForwardKernels(shape limit, float* x, float* k, float* out, shape s) 40 | { 41 | int h = (blockIdx.x * blockDim.x) + threadIdx.x; 42 | int d = (blockIdx.y * blockDim.y) + threadIdx.y; 43 | int w = 0; 44 | if (h < limit.h && d < limit.d && w < limit.w) { 45 | 46 | int id = ((s.w * h) + w) * s.d + d; 47 | 48 | float xi = x[h]; 49 | float wi = k[id]; 50 | float mul = xi * wi; 51 | 52 | atomicAdd(&out[d], mul); 53 | } 54 | } 55 | 56 | Tensor* Dense_ForwardGPU(Layer* l) 57 | { 58 | Dense* data = (Dense*)l->aData; 59 | //cudaMemset(l->output.w, 0, sizeof(float) * l->output.n); 60 | Tensor_CopyDataGPU(&l->output, &data->biases); 61 | //===================== 62 | int n = l->input->n; 63 | int nk = l->out_shape.d; 64 | 65 | int threadsPerBlockX = 16; 66 | int threadsPerBlockY = 64; 67 | 68 | dim3 gridDim((int)ceil(n / (float)threadsPerBlockX), (int)ceil(nk / (float)threadsPerBlockY), 1); 69 | dim3 blockDim(threadsPerBlockX, threadsPerBlockY, 1); 70 | 71 | Dense_ForwardKernels KERNEL_CALL(gridDim, blockDim) ({1,n,nk}, 72 | l->input->w, data->kernels->w, l->output.w, data->kernels->s); 73 | cudaDeviceSynchronize(); 74 | return &l->output; 75 | } 76 | 77 | __global__ void Dense_BackwardKernels(shape limit, float* xw, float* xdw, float* kw, float* kdw, float* bdw, float* outdw, shape s) 78 | { 79 | int h = (blockIdx.x * blockDim.x) + threadIdx.x; 80 | int d = (blockIdx.y * blockDim.y) + threadIdx.y; 81 | int w = 0; 82 | if (h < limit.h && d < limit.d&&waData; 105 | //===================== 106 | int n = l->input->n; 107 | int nk = l->out_shape.d; 108 | 109 | int threadsPerBlockX = 16; 110 | int threadsPerBlockY = 64; 111 | 112 | dim3 gridDim((int)ceil(n / (float)threadsPerBlockX), (int)ceil(nk / (float)threadsPerBlockY), 1); 113 | dim3 blockDim(threadsPerBlockX, threadsPerBlockY, 1); 114 | 115 | Dense_BackwardKernels KERNEL_CALL(gridDim, blockDim) ({1, n, nk}, 116 | l->input->w, l->input->dw, data->kernels->w, data->kernels->dw, data->biases.dw, l->output.dw, data->kernels->s); 117 | cudaDeviceSynchronize(); 118 | } 119 | #endif // __NVCC__ -------------------------------------------------------------------------------- /src/cJSON_Utils.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2009-2017 Dave Gamble and cJSON contributors 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | 23 | #ifndef cJSON_Utils__h 24 | #define cJSON_Utils__h 25 | 26 | #ifdef __cplusplus 27 | extern "C" 28 | { 29 | #endif 30 | 31 | #include "cJSON.h" 32 | 33 | /* Implement RFC6901 (https://tools.ietf.org/html/rfc6901) JSON Pointer spec. */ 34 | CJSON_PUBLIC(cJSON *) cJSONUtils_GetPointer(cJSON * const object, const char *pointer); 35 | CJSON_PUBLIC(cJSON *) cJSONUtils_GetPointerCaseSensitive(cJSON * const object, const char *pointer); 36 | 37 | /* Implement RFC6902 (https://tools.ietf.org/html/rfc6902) JSON Patch spec. */ 38 | /* NOTE: This modifies objects in 'from' and 'to' by sorting the elements by their key */ 39 | CJSON_PUBLIC(cJSON *) cJSONUtils_GeneratePatches(cJSON * const from, cJSON * const to); 40 | CJSON_PUBLIC(cJSON *) cJSONUtils_GeneratePatchesCaseSensitive(cJSON * const from, cJSON * const to); 41 | /* Utility for generating patch array entries. */ 42 | CJSON_PUBLIC(void) cJSONUtils_AddPatchToArray(cJSON * const array, const char * const operation, const char * const path, const cJSON * const value); 43 | /* Returns 0 for success. */ 44 | CJSON_PUBLIC(int) cJSONUtils_ApplyPatches(cJSON * const object, const cJSON * const patches); 45 | CJSON_PUBLIC(int) cJSONUtils_ApplyPatchesCaseSensitive(cJSON * const object, const cJSON * const patches); 46 | 47 | /* 48 | // Note that ApplyPatches is NOT atomic on failure. To implement an atomic ApplyPatches, use: 49 | //int cJSONUtils_AtomicApplyPatches(cJSON **object, cJSON *patches) 50 | //{ 51 | // cJSON *modme = cJSON_Duplicate(*object, 1); 52 | // int error = cJSONUtils_ApplyPatches(modme, patches); 53 | // if (!error) 54 | // { 55 | // cJSON_Delete(*object); 56 | // *object = modme; 57 | // } 58 | // else 59 | // { 60 | // cJSON_Delete(modme); 61 | // } 62 | // 63 | // return error; 64 | //} 65 | // Code not added to library since this strategy is a LOT slower. 66 | */ 67 | 68 | /* Implement RFC7386 (https://tools.ietf.org/html/rfc7396) JSON Merge Patch spec. */ 69 | /* target will be modified by patch. return value is new ptr for target. */ 70 | CJSON_PUBLIC(cJSON *) cJSONUtils_MergePatch(cJSON *target, const cJSON * const patch); 71 | CJSON_PUBLIC(cJSON *) cJSONUtils_MergePatchCaseSensitive(cJSON *target, const cJSON * const patch); 72 | /* generates a patch to move from -> to */ 73 | /* NOTE: This modifies objects in 'from' and 'to' by sorting the elements by their key */ 74 | CJSON_PUBLIC(cJSON *) cJSONUtils_GenerateMergePatch(cJSON * const from, cJSON * const to); 75 | CJSON_PUBLIC(cJSON *) cJSONUtils_GenerateMergePatchCaseSensitive(cJSON * const from, cJSON * const to); 76 | 77 | /* Given a root object and a target object, construct a pointer from one to the other. */ 78 | CJSON_PUBLIC(char *) cJSONUtils_FindPointerFromObjectTo(const cJSON * const object, const cJSON * const target); 79 | 80 | /* Sorts the members of the object into alphabetical order. */ 81 | CJSON_PUBLIC(void) cJSONUtils_SortObject(cJSON * const object); 82 | CJSON_PUBLIC(void) cJSONUtils_SortObjectCaseSensitive(cJSON * const object); 83 | 84 | #ifdef __cplusplus 85 | } 86 | #endif 87 | 88 | #endif 89 | -------------------------------------------------------------------------------- /src/cmd/cartpole/cartpole.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | #include "linmath.h" 7 | #include 8 | 9 | #include "cart.h" 10 | #include "box2d/box2d.h" 11 | 12 | #include "TCommon.h" 13 | #include "agent.h" 14 | 15 | GLFWwindow* CreateGLFWindow(int w, int h) 16 | { 17 | GLFWwindow* window = nullptr; 18 | 19 | if (!glfwInit()) 20 | printf("Unable to initialize GLFW"); 21 | 22 | GLFWmonitor* monitor = glfwGetPrimaryMonitor(); 23 | const GLFWvidmode* mode = glfwGetVideoMode(monitor); 24 | int sWidth = mode->width; 25 | int sHeight = mode->height; 26 | 27 | window = glfwCreateWindow(w, h, "Env", NULL, NULL); 28 | if (!window) { 29 | glfwTerminate(); 30 | printf("Unable to create GLFW window"); 31 | } 32 | 33 | glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3); 34 | glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 2); 35 | glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE); 36 | glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE); 37 | 38 | glfwMakeContextCurrent(window); 39 | gladLoadGL(glfwGetProcAddress); 40 | glfwSwapInterval(1.0); 41 | 42 | //glEnable(GL_BLEND); 43 | //glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); 44 | glEnable(GL_MULTISAMPLE); // Enabled Multisample 45 | //glEnable(GL_LINE_WIDTH); 46 | //glEnable(GL_DEPTH_TEST); 47 | glEnable(GL_BLEND); 48 | glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); 49 | glBlendEquation(GL_FUNC_ADD); 50 | return window; 51 | } 52 | 53 | Cart* cartLink; 54 | Agent* agentLink; 55 | 56 | void key_callback(GLFWwindow* window, int key, int scancode, int action, int mods) 57 | { 58 | if (key == GLFW_KEY_R && action == GLFW_PRESS) { 59 | cartLink->Reset(); 60 | } 61 | if (key == GLFW_KEY_W && action == GLFW_PRESS) { 62 | 63 | } 64 | if (key == GLFW_KEY_S && action == GLFW_PRESS) { 65 | 66 | } 67 | if (key == GLFW_KEY_A && action == GLFW_PRESS) { 68 | //cartLink->pushLeft(); 69 | } 70 | if (key == GLFW_KEY_D && action == GLFW_PRESS) { 71 | //cartLink->pushRight(); 72 | } 73 | if (key == GLFW_KEY_F && action == GLFW_PRESS) { 74 | 75 | } 76 | if (key == GLFW_KEY_E && action == GLFW_PRESS) { 77 | printf("E pressed\n"); 78 | agentLink->phase = agentLink->phase == Agent::Phase::TRAIN ? Agent::Phase::TEST : Agent::Phase::TRAIN; 79 | if (agentLink->phase == Agent::Phase::TEST) { 80 | glfwSwapInterval(1); 81 | printf("TEST PHASE\n"); 82 | } 83 | else { 84 | glfwSwapInterval(0.5); 85 | printf("TRAIN PHASE\n"); 86 | } 87 | } 88 | } 89 | 90 | int main() 91 | { 92 | b2Vec2 gravity(0.0f, -9.8f); 93 | b2World world(gravity); 94 | 95 | int width = 640, height = 480; 96 | GLFWwindow* window = CreateGLFWindow(width, height); 97 | //int width, height; 98 | float aspect = (float)width / (float)height; 99 | float viewRectSize = 1.f; 100 | float right = viewRectSize * aspect, top = viewRectSize; 101 | float bottom = -viewRectSize, left = -viewRectSize * aspect; 102 | glClearColor(1.f, 1.f, 1.f, 0.0f); 103 | 104 | mat4x4 m, p, mvp; 105 | mat4x4_ortho(p, left, right, bottom, top, 1.f, -1.f); 106 | 107 | Cart cart(&world); 108 | cartLink = &cart; 109 | 110 | Agent agent(&cart); 111 | agentLink = &agent; 112 | 113 | glfwSetKeyCallback(window, key_callback); 114 | 115 | float lastFrame = 0; 116 | float deltaTime = 0; 117 | 118 | float time = 0; 119 | printf("=== CARTPOLE ENVIRONMENT ===\n"); 120 | printf("=== Controls: R-reset, E-toogle test/train mode ===\n"); 121 | while (!glfwWindowShouldClose(window)) 122 | { 123 | float currentFrame = (float)glfwGetTime(); 124 | deltaTime = currentFrame - lastFrame; 125 | lastFrame = currentFrame; 126 | time += deltaTime; 127 | 128 | glViewport(0, 0, width, height); 129 | glClear(GL_COLOR_BUFFER_BIT); 130 | glLoadMatrixf((const GLfloat*)p); 131 | 132 | agent.Discover(); 133 | cart.Draw(); 134 | 135 | //printf("pole angle: %f\n", cart.poleAngle()); 136 | //printf("cart pos x:%f\n", cart.cartPos()); 137 | //printf("cart velocity: %f\n", cart.cartVel()); 138 | //printf("pole velocity: %f\n", cart.poleVel()); 139 | 140 | glfwSwapBuffers(window); 141 | glfwPollEvents(); 142 | } 143 | 144 | glfwTerminate(); 145 | return 0; 146 | } -------------------------------------------------------------------------------- /src/cmd/qmaze/grid.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef GRID_H 3 | #define GRID_H 4 | 5 | #include "geometry/TVec3.h" 6 | #include "TCommon.h" 7 | #include "cell.h" 8 | #include 9 | #include "Tensor.h" 10 | 11 | class Grid{ 12 | public: 13 | float cellSize = 0.5f; 14 | std::vector> gridOfCells; 15 | int Rows = 1; 16 | int Cols = 1; 17 | float epsilon = 0.8f;//threshold for random numbers generator 18 | 19 | Grid(int _Rows, int _Cols) 20 | { 21 | Rows = _Rows; 22 | Cols = _Cols; 23 | generateGrid(Rows, Cols); 24 | gridOfCells[9][9].quad.Color = { 1.f,0,0 }; 25 | //SimpleMaze(); 26 | } 27 | 28 | Tensor ToState(int curI, int curJ) 29 | { 30 | Tensor state = Tensor_Create({Rows, Cols, 1},0); 31 | for (size_t i = 0; i < Rows; i++) 32 | { 33 | for (size_t j = 0; j < Cols; j++) 34 | { 35 | Cell* c = &gridOfCells[i][j]; 36 | if (c->walkable == false) 37 | Tensor_Set(&state, i,j,0, -1.f); 38 | } 39 | } 40 | Tensor_Set(&state, curI, curJ, 0, 10.f); 41 | return state; 42 | } 43 | 44 | void generateGrid(int ic, int jc) 45 | { 46 | for (int i = 0; i < ic; i++) { 47 | std::vector row; 48 | for (int j = 0; j < jc; j++) 49 | { 50 | TVec3 pos = {i * cellSize, j * cellSize, 0,}; 51 | Cell c(pos, cellSize, i, j, SimpleWall()); 52 | row.push_back(c); 53 | } 54 | gridOfCells.push_back(row); 55 | } 56 | } 57 | 58 | bool SimpleWall() 59 | { 60 | bool walkable = true; 61 | float f = rngFloat(); 62 | if (f > epsilon) 63 | walkable = false; 64 | return walkable; 65 | } 66 | 67 | void SimpleMaze() 68 | { 69 | float placementThreshold = .6f; 70 | int rMax = Rows; 71 | int cMax = Cols; 72 | for (int i = 0; i < rMax; i++) 73 | { 74 | for (int j = 0; j < cMax; j++) 75 | { 76 | 77 | // outside wall 78 | /* 79 | if (i == 0 || j == 0 || i == rMax || j == cMax) 80 | { 81 | gridOfCells[i][j].SetWalkable(false); 82 | } 83 | else if (i == 1 || j == 1 || i == rMax-1 || j == cMax-1) 84 | { 85 | gridOfCells[i][j].SetWalkable(true); 86 | } 87 | // every other inside space 88 | else 89 | */ 90 | if (i % 2 == 0 && j % 2 == 0) 91 | { 92 | float f = rngFloat(); 93 | //float f = static_cast (rand()) / static_cast (RAND_MAX); 94 | if (f > placementThreshold) 95 | { 96 | (&gridOfCells[i][j])->SetWalkable(false); 97 | 98 | int a = f < .5f ? 0 : (f < .5f ? -1 : 1); 99 | int b = a != 0 ? 0 : (f < .5f ? -1 : 1); 100 | (&gridOfCells[i+a][j+b])->SetWalkable(false); 101 | } 102 | } 103 | } 104 | } 105 | } 106 | 107 | struct Vec2 108 | { 109 | int x, y; 110 | }; 111 | 112 | Vec2 FindFreeRandomPosition() 113 | { 114 | int rMax = Rows - 1; 115 | int cMax = Cols - 1; 116 | Vec2 pos = {-1, -1}; 117 | //==0 free 118 | int isWall = 1; 119 | while (isWall > 0) 120 | { 121 | pos.x = rand() % (rMax - 0 + 1) + 0; 122 | pos.y = rand() % (cMax - 0 + 1) + 0; 123 | Cell c = gridOfCells[pos.x][pos.y]; 124 | isWall = c.walkable ? 0 : 1; 125 | } 126 | return pos; 127 | } 128 | 129 | void Draw() 130 | { 131 | for (size_t i = 0; i < Rows; i++) 132 | { 133 | for (size_t j = 0; j < Cols; j++) 134 | { 135 | gridOfCells[i][j].Draw(); 136 | } 137 | } 138 | } 139 | 140 | private: 141 | }; 142 | #endif -------------------------------------------------------------------------------- /src/cmd/cartpole/agent.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "geometry/TVec3.h" 4 | #include "TCommon.h" 5 | 6 | #include "Model.h" 7 | #include "tList.h" 8 | #include "fList.h" 9 | #include "Utils.h" 10 | 11 | #include "RL/ACBrain.h" 12 | #include "RL/RLBrain.h" 13 | 14 | #include "cart.h" 15 | 16 | struct Samples 17 | { 18 | tList states; 19 | fList rewards; 20 | fList actions; 21 | }; 22 | 23 | class Agent { 24 | public: 25 | enum Phase { TRAIN, TEST }; 26 | enum MoveDirection 27 | { 28 | Left = 0, Right = 1, Idle = 2 29 | }; 30 | 31 | Cart* cart; 32 | float epsilon = 0.8f; 33 | int n_actions = 3; 34 | Tensor state; 35 | Phase phase = Phase::TEST; 36 | ACBrain *brain; 37 | Samples sa; 38 | shape input_shape = { 1, 1, 4 }; 39 | 40 | float timeStep = 1.0f / 60.0f; 41 | int32 velocityIterations = 6; 42 | int32 positionIterations = 2; 43 | Agent(Cart* c) 44 | { 45 | cart = c; 46 | state = Tensor_Create(input_shape, 0); 47 | 48 | sa.states = tList_create(); 49 | sa.actions = fList_create(); 50 | sa.rewards = fList_create(); 51 | 52 | brain = ACBrain_Create(input_shape, n_actions); 53 | } 54 | 55 | ~Agent() 56 | { 57 | } 58 | 59 | int Policy(Tensor* s) 60 | { 61 | Tensor t = ACBrain_Forward(brain, s); 62 | shape max = T_Argmax(&t); 63 | int act = max.d; 64 | Tensor_Free(&t); 65 | return act; 66 | } 67 | 68 | int Act(Tensor* s) 69 | { 70 | Tensor prob = ACBrain_Forward(brain, s); 71 | int act = rng_by_prob(prob.w, prob.n); 72 | Tensor_Free(&prob); 73 | return act; 74 | } 75 | 76 | float GetReward() 77 | { 78 | float reward = 1.0f; 79 | return reward; 80 | } 81 | float trace_reward = 0; 82 | void Discover() 83 | { 84 | if (phase == TRAIN) { 85 | int a = Act(&state); 86 | 87 | Tensor next_state = Move((MoveDirection)a); 88 | float reward = GetReward(); 89 | trace_reward += reward; 90 | 91 | tList_push(&sa.states, &state); 92 | fList_push(&sa.actions, (float)a); 93 | fList_push(&sa.rewards, (float)reward); 94 | 95 | Tensor_Copy(&state, &next_state); 96 | Tensor_Free(&next_state); 97 | if (cart->needToReset()) 98 | { 99 | printf("\nepsilon: %f trace reward: %f\n", epsilon, trace_reward); 100 | if (trace_reward > 500) 101 | sa.rewards.data[sa.rewards.length - 1] = 10.f; 102 | else 103 | sa.rewards.data[sa.rewards.length - 1] = -10.f; 104 | ACBrain_TrainTrace(brain, sa.states.data, sa.rewards.data, sa.actions.data, sa.states.length); 105 | 106 | tList_free(&sa.states); 107 | fList_free(&sa.actions); 108 | fList_free(&sa.rewards); 109 | 110 | sa.states = tList_create(); 111 | sa.actions = fList_create(); 112 | sa.rewards = fList_create(); 113 | 114 | if (trace_reward > 4000) 115 | { 116 | printf("Maximum score reached, set TEST phase\n"); 117 | glfwSwapInterval(1.0); 118 | phase = TEST; 119 | } 120 | trace_reward = 0; 121 | cart->Reset(); 122 | } 123 | } 124 | else 125 | { 126 | int action = Policy(&state); 127 | Tensor next_state = Move(MoveDirection(action)); 128 | Tensor_Free(&state); 129 | state = next_state; 130 | } 131 | } 132 | 133 | Tensor Move(MoveDirection d) 134 | { 135 | cart->Step((int)d); 136 | cart->world->Step(timeStep, velocityIterations, positionIterations); 137 | Tensor s = Tensor_Create(input_shape, 0); 138 | s.w[0] = cart->cartPos(); 139 | s.w[1] = cart->cartVel(); 140 | s.w[2] = cart->poleAngle(); 141 | s.w[3] = cart->poleVel(); 142 | return s; 143 | } 144 | 145 | private: 146 | }; 147 | -------------------------------------------------------------------------------- /src/cmd/mult_opt_test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "Model.h" 4 | #include "Optimizer.h" 5 | #include "Losses.h" 6 | #include "Utils.h" 7 | 8 | //Пример создания и обучения модели с несколькими выходами на CPU 9 | int main() 10 | { 11 | //Определяем размерность входных данных 12 | shape input = { 128,128,1 }; 13 | //Создаем два образа на которые будем обучать модель 14 | Tensor x1 = Tensor_Create(input, 1.f); 15 | Tensor x2 = Tensor_Create(input, -1.f); 16 | 17 | printf("Create model structure:\n"); 18 | //Инициализация модели на CPU 19 | Model n = Model_Create(); 20 | //Добавляем входной слой CPU к модели n, сохраняем ссылку на входной слой в inp 21 | Layer* inp = Model_AddLayer(&n, Input_Create(input)); 22 | //Добавляем полносвязный слой CPU к модели n 23 | Layer *l = Model_AddLayer(&n, Dense_Create(128, R_XAVIER, inp)); 24 | //Добавляем слой активации tanh CPU к модели n 25 | l = Model_AddLayer(&n, TanhA_Create(l)); 26 | l = Model_AddLayer(&n, Dense_Create(64, R_XAVIER, l)); 27 | l = Model_AddLayer(&n, TanhA_Create(l)); 28 | 29 | //Создаем первую дополнительную ветку от слоя tanh модели с выходом out1 30 | Layer *out1 = Model_AddLayer(&n, Dense_Create(32, R_XAVIER, l)); 31 | out1 = Model_AddLayer(&n, TanhA_Create(out1)); 32 | out1 = Model_AddLayer(&n, Dense_Create(2, R_XAVIER, out1)); 33 | 34 | //Создаем вторую дополнительную ветку от слоя tanh модели с выходом out2 35 | Layer* out2 = Model_AddLayer(&n, Dense_Create(32, R_XAVIER, l)); 36 | out2 = Model_AddLayer(&n, TanhA_Create(out2)); 37 | out2 = Model_AddLayer(&n, Dense_Create(2, R_XAVIER, out2)); 38 | 39 | //Создаем два тензора t1 и t2 с ожидаемыми выходами второй ветки(out2) модели для x1 и x2 40 | Tensor t1 = Tensor_Create(out2->out_shape, 0); t1.w[0] = 10.f; 41 | Tensor t2 = Tensor_Create(out2->out_shape, 0); t2.w[1] = 10.f; 42 | 43 | //====optimization===== 44 | //Инициализируем параметры оптимизации: 45 | OptParams p = OptParams_Create(); 46 | //Задаем скорость обучения 47 | p.learning_rate = 0.01f; 48 | //Задаем метод оптимизации 49 | p.method = ADAN; 50 | 51 | printf("Optimizer: ADAN, lr = 0.01, loss1 = cross_entropy, loss2 = mse\n"); 52 | //Цикл оптимизации из 500 шагов 53 | for (size_t i = 0; i < 500; i++) 54 | { 55 | //Шаг обучения для первого образа 56 | //Задаем вход модели: 57 | inp->input = &x1; 58 | //Выполняем прямой проход модели: 59 | Model_Forward(&n); 60 | //Вычисляем ошибки для выходов модели out1 и out2: 61 | float loss1_1 = Cross_entropy_Loss(&out1->output, 0); 62 | float loss1_2 = MSE_Loss(&out2->output, &t1); 63 | //Выполняем обратный проход модели и вычисляем градиенты: 64 | Model_Backward(&n); 65 | //Выполняем шаг оптимизации и применяем градиенты к весовым коэффициентам модели: 66 | OptimizeModel(&n, &p); 67 | 68 | //Шаг обучения для второго образа 69 | inp->input = &x2; 70 | Model_Forward(&n); 71 | float loss2_1 = Cross_entropy_Loss(&out1->output, 1); 72 | float loss2_2 = MSE_Loss(&out2->output, &t2); 73 | Model_Backward(&n); 74 | OptimizeModel(&n, &p); 75 | 76 | //Средняя ошибка для двух образов на текущем шаге оптимизации отдельно для выходов out1 и out2 77 | float total_loss1 = (loss1_1 + loss2_1) * 0.5f; 78 | float total_loss2 = (loss1_2 + loss2_2) * 0.5f; 79 | printf("loss_1: %f, loss_2: %f\n", total_loss1, total_loss2); 80 | } 81 | 82 | printf("\nTest model forward pass:\n"); 83 | printf("\nSample 1:"); 84 | //Тестирование модели после оптимизации 85 | //Тестирование выхода модели для первого образа 86 | inp->input = &x1; 87 | Model_Forward(&n); 88 | //Так как использовалась Cross_entropy_Loss для первого выхода сети, применяем операцию softmax к выходу сети out1, для значений результата в диапазоне [0,1] 89 | Tensor o1 = SoftmaxProb(&out1->output); 90 | PrintArray(o1.w, o1.n); 91 | PrintArray(out2->output.w, out2->output.n); 92 | //Очищаем память для тензора с результатами операции softmax, т.к. он больше не нужен 93 | Tensor_Free(&o1); 94 | 95 | printf("\nSample 2:"); 96 | //Тестирование выхода модели для второго образа 97 | inp->input = &x2; 98 | Model_Forward(&n); 99 | Tensor o2 = SoftmaxProb(&out1->output); 100 | PrintArray(o2.w, o2.n); 101 | PrintArray(out2->output.w, out2->output.n); 102 | Tensor_Free(&o2); 103 | 104 | printf("\nPress enter to close..."); 105 | getchar(); 106 | return 0; 107 | } -------------------------------------------------------------------------------- /src/Tensor.cu: -------------------------------------------------------------------------------- 1 | #include "Tensor.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #ifdef __NVCC__ 9 | Tensor Tensor_FromDataGPU(shape s, const float* data) 10 | { 11 | Tensor t = Tensor_CreateGPU(s, 0.f); 12 | cudaMemcpy(t.w, data, sizeof(float)*t.n, cudaMemcpyHostToDevice); 13 | return t; 14 | } 15 | 16 | __global__ void Tensor_FillKernel(int limit, float *w, float v) 17 | { 18 | int i = (blockIdx.x * blockDim.x) + threadIdx.x; 19 | if(in / (float)threadsPerBlockX), 1, 1); 28 | dim3 blockDim(threadsPerBlockX, 1, 1); 29 | Tensor_FillKernel KERNEL_CALL(gridDim, blockDim) (v->n, v->w, c); 30 | cudaDeviceSynchronize(); 31 | } 32 | 33 | void Tensor_FillArrayGPU(float* v, int n, float c) 34 | { 35 | int threadsPerBlockX = 32; 36 | if (n < threadsPerBlockX) threadsPerBlockX = 1; 37 | dim3 gridDim(ceil(n / (float)threadsPerBlockX), 1, 1); 38 | dim3 blockDim(threadsPerBlockX, 1, 1); 39 | Tensor_FillKernel KERNEL_CALL(gridDim, blockDim) (n, v, c); 40 | cudaDeviceSynchronize(); 41 | } 42 | 43 | Tensor Tensor_CreateGPU(shape s, float c) 44 | { 45 | Tensor v; 46 | v.s.w = s.w; 47 | v.s.h = s.h; 48 | v.s.d = s.d; 49 | v.n = s.w * s.h * s.d; 50 | v.sumdw = 0; 51 | 52 | v.w = NULL; 53 | v.dw = NULL; 54 | //v.vt = NULL; 55 | v.tData = NULL; 56 | 57 | if (cudaMalloc((void**)&v.w, v.n * sizeof(float)) != cudaSuccess) printf("Tensor weights allocation error\n"); 58 | else Tensor_FillGPU(&v, c); 59 | if (cudaMalloc((void**)&v.dw, v.n * sizeof(float)) != cudaSuccess) printf("Tensor grads allocation error\n"); 60 | else cudaMemset(v.dw, 0, sizeof(float) * v.n); 61 | //if (cudaMalloc((void**)&v.vt, v.n * sizeof(float)) != cudaSuccess) printf("Tensor additions allocation error\n"); 62 | //else cudaMemset(v.vt, 0, sizeof(float) * v.n); 63 | 64 | return v; 65 | } 66 | void Tensor_FreeGPU(Tensor* v) 67 | { 68 | if (cudaFree(v->w) != cudaSuccess) printf("Tensor weights free error\n"); 69 | else v->w = NULL; 70 | if (cudaFree(v->dw) != cudaSuccess) printf("Tensor grads free error\n"); 71 | else v->dw = NULL; 72 | //if (cudaFree(v->vt) != cudaSuccess) printf("Tensor additions free error\n"); 73 | //else v->vt = NULL; 74 | } 75 | 76 | void Tensor_CopyDataGPU(Tensor* dst, Tensor* src) 77 | { 78 | cudaMemcpy(dst->w, src->w, sizeof(float) * src->n, cudaMemcpyDeviceToDevice); 79 | } 80 | #endif 81 | 82 | //print weights 83 | #ifdef __NVCC__ 84 | __global__ void TPrintKernel(float* w, int n) 85 | { 86 | printf("["); 87 | for (int i = 0; i < n; i++) 88 | printf("%f, ", w[i]); 89 | printf("]\n"); 90 | } 91 | void Tensor_PrintGPU(Tensor* v) 92 | { 93 | TPrintKernel KERNEL_CALL_ONCE(v->w, v->n); 94 | cudaDeviceSynchronize(); 95 | } 96 | void Tensor_PrintArrayGPU(float* v, int n) 97 | { 98 | TPrintKernel KERNEL_CALL_ONCE(v, n); 99 | cudaDeviceSynchronize(); 100 | } 101 | #endif 102 | 103 | //random weights 104 | #ifdef __NVCC__ 105 | __global__ void xavier_rand_kernel(void* globalState, float* w, int n) 106 | { 107 | int i = (blockIdx.x * blockDim.x) + threadIdx.x; 108 | if (i < n) { 109 | //calculate the range for the weights 110 | float lower = -(1.0f / sqrtf(n)); 111 | float upper = (1.0f / sqrtf(n)); 112 | curandState localState = ((curandState*)globalState)[i]; 113 | float num = curand_uniform(&localState); 114 | //scale to the desired range 115 | float scaled = lower + num * (upper - lower); 116 | w[i] = scaled; 117 | ((curandState*)globalState)[i] = localState; 118 | } 119 | } 120 | 121 | __global__ void setup_rng_kernel(int limit, void* state) 122 | { 123 | int id = (blockIdx.x * blockDim.x) + threadIdx.x; 124 | if (id < limit) { 125 | curandState* ls = (curandState*)state; 126 | curand_init(clock(), id, 0, &ls[id]); 127 | } 128 | } 129 | void Tensor_Xavier_RandGPU(float *w, int n) 130 | { 131 | curandState* devStates; 132 | cudaMalloc(&devStates, n * sizeof(curandState)); 133 | setup_rng_kernel KERNEL_CALL(n, 1) (n, devStates); 134 | cudaDeviceSynchronize(); 135 | 136 | int threadsPerBlockX = 32; 137 | 138 | dim3 gridDim(ceil(n / (float)threadsPerBlockX), 1, 1); 139 | dim3 blockDim(threadsPerBlockX, 1, 1); 140 | xavier_rand_kernel KERNEL_CALL(gridDim, blockDim) (devStates, w, n); 141 | cudaDeviceSynchronize(); 142 | 143 | cudaFree(devStates); 144 | } 145 | #endif 146 | //============================================================================================ 147 | 148 | #ifdef __NVCC__ 149 | #endif 150 | -------------------------------------------------------------------------------- /src/Dense.c: -------------------------------------------------------------------------------- 1 | #include "Dense.h" 2 | #include 3 | 4 | Layer* Dense_Create(int num_neurons, RandType weightInit, Layer* in) 5 | { 6 | Layer* l = (Layer*)malloc(sizeof(Layer)); 7 | if (!l) 8 | { 9 | printf("Dense allocation error!"); 10 | return NULL; 11 | } 12 | l->type = LT_DENSE; 13 | int inn = in->out_shape.w * in->out_shape.h * in->out_shape.d; 14 | //common layer def 15 | l->out_shape = (shape){ 1, 1, num_neurons }; 16 | l->n_inputs = inn; 17 | l->output = Tensor_Create(l->out_shape, 0); 18 | l->input = &in->output; 19 | 20 | float bias = 0.0f; 21 | 22 | Dense *ld = (Dense*)malloc(sizeof(Dense)); 23 | if (ld) { 24 | //ld->activation = act; 25 | ld->n = num_neurons; 26 | ld->kernels = (Tensor*)malloc(sizeof(Tensor) * num_neurons); 27 | if (ld->kernels) { 28 | //shape kernels_shape = { 1, 1, inn }; 29 | for (size_t i = 0; i < num_neurons; i++) 30 | { 31 | ld->kernels[i] = Tensor_Create(in->out_shape, 0.f); 32 | 33 | switch (weightInit) 34 | { 35 | case R_XAVIER: 36 | Tensor_Xavier_Rand(ld->kernels[i].w, ld->kernels[i].n); 37 | break; 38 | case R_HE: 39 | Tensor_He_Rand(ld->kernels[i].w, ld->kernels[i].n); 40 | break; 41 | default: 42 | break; 43 | } 44 | } 45 | ld->biases = Tensor_Create((shape){ 1, 1, num_neurons }, bias); 46 | } 47 | else printf("Kernels allocation error\n"); 48 | } 49 | else printf("Dense data allocation error\n"); 50 | l->aData = ld; 51 | printf("Dense, output shape: [%d, %d, %d]\n", l->out_shape.w, l->out_shape.h, l->out_shape.d); 52 | return l; 53 | } 54 | 55 | Tensor* Dense_Forward(Layer* l) 56 | { 57 | Tensor* x = l->input; 58 | Dense* data = (Dense*)l->aData; 59 | for (int d = 0; d < l->out_shape.d; d++) //foreach kernel 60 | { 61 | float wsum = 0; 62 | for (int i = 0; i < x->n; i++) 63 | { 64 | //int id = tIdx(data->kernels.s, 0, i, d); 65 | //float wi = Tensor_Get(&data->kernels, 0, i, d); 66 | wsum += x->w[i] * data->kernels[d].w[i]; 67 | } 68 | wsum += data->biases.w[d]; 69 | 70 | //if (data->activation == A_RELU) 71 | // if (wsum < 0) wsum = 0.0001f * wsum; 72 | 73 | l->output.w[d] = wsum; 74 | } 75 | return &l->output; 76 | } 77 | 78 | void Dense_Backward(Layer* l) 79 | { 80 | Dense* data = (Dense*)l->aData; 81 | Tensor* x = l->input; 82 | for (int d = 0; d < l->out_shape.d; d++) 83 | { 84 | float next_grad = l->output.dw[d]; 85 | //if(data->activation == A_RELU) 86 | // if (l->output.w[d] < 0) chain_grad = 0.0001f * chain_grad; // threshold 87 | for (int h = 0; h < l->n_inputs; h++) 88 | { 89 | //int idx = tIdx(ke->s, 0, h, d); 90 | x->dw[h] += data->kernels[d].w[h] * next_grad; // grad wrt input data 91 | data->kernels[d].dw[h] += x->w[h] * next_grad; // grad wrt params 92 | } 93 | data->biases.dw[d] += next_grad; 94 | } 95 | } 96 | 97 | cJSON* Dense_To_JSON(Dense* d) 98 | { 99 | cJSON* Data = cJSON_CreateObject(); 100 | cJSON* fi = cJSON_CreateArray(); 101 | 102 | cJSON_AddNumberToObject(Data, "nk", d->biases.n); 103 | 104 | for (int i = 0; i < d->biases.n; i++) 105 | { 106 | cJSON_AddItemToArray(fi, Tensor_To_JSON(&d->kernels[i])); 107 | } 108 | cJSON_AddItemToObject(Data, "kernels", fi); 109 | cJSON_AddItemToObject(Data, "biases", Tensor_To_JSON(&d->biases)); 110 | return Data; 111 | } 112 | 113 | void Dense_Load_JSON(Dense* d, cJSON* node) 114 | { 115 | cJSON* nk = cJSON_GetObjectItem(node, "nk"); 116 | cJSON* kernels = cJSON_GetObjectItem(node, "kernels");//array 117 | cJSON* biases = cJSON_GetObjectItem(node, "biases"); 118 | //load biases 119 | Tensor_Load_JSON(&d->biases, biases); 120 | //load kernels 121 | int n = nk->valueint; 122 | 123 | int i = 0; 124 | cJSON* kernel = NULL; 125 | cJSON_ArrayForEach(kernel, kernels) 126 | { 127 | Tensor_Load_JSON(&d->kernels[i], kernel); 128 | i++; 129 | } 130 | /* 131 | for (int i = 0; i < n; i++) 132 | { 133 | cJSON* f = cJSON_GetArrayItem(kernels, i); 134 | Tensor_Load_JSON(&d->kernels[i], f); 135 | }*/ 136 | } 137 | 138 | void Dense_Free(Layer* l) 139 | { 140 | Dense* data = (Dense*)l->aData; 141 | for (int i = 0; i < data->n; i++) 142 | { 143 | Tensor_Free(&data->kernels[i]); 144 | } 145 | Tensor_Free(&data->biases); 146 | free(data); 147 | Tensor_Free(&l->output); 148 | free(l); 149 | } 150 | 151 | void Dense_GetGrads(Dense* l, dList* grads) 152 | { 153 | for (size_t i = 0; i < l->n; i++) 154 | { 155 | //add kernel 156 | dList_push(grads, &l->kernels[i]); 157 | } 158 | //add bias 159 | dList_push(grads, &l->biases); 160 | } -------------------------------------------------------------------------------- /src/cmd/cartpole_cont/cartpole.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | #include "linmath.h" 7 | #include 8 | 9 | #include "cart.h" 10 | #include "box2d/box2d.h" 11 | 12 | #include "TCommon.h" 13 | #include "agent.h" 14 | 15 | GLFWwindow* CreateGLFWindow(int w, int h) 16 | { 17 | GLFWwindow* window = nullptr; 18 | 19 | if (!glfwInit()) 20 | printf("Unable to initialize GLFW"); 21 | 22 | GLFWmonitor* monitor = glfwGetPrimaryMonitor(); 23 | const GLFWvidmode* mode = glfwGetVideoMode(monitor); 24 | int sWidth = mode->width; 25 | int sHeight = mode->height; 26 | 27 | window = glfwCreateWindow(w, h, "Env", NULL, NULL); 28 | if (!window) { 29 | glfwTerminate(); 30 | printf("Unable to create GLFW window"); 31 | } 32 | 33 | glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3); 34 | glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 2); 35 | glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE); 36 | glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE); 37 | 38 | glfwMakeContextCurrent(window); 39 | gladLoadGL(glfwGetProcAddress); 40 | glfwSwapInterval(1.0); 41 | 42 | //glEnable(GL_BLEND); 43 | //glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); 44 | glEnable(GL_MULTISAMPLE); // Enabled Multisample 45 | //glEnable(GL_LINE_WIDTH); 46 | //glEnable(GL_DEPTH_TEST); 47 | glEnable(GL_BLEND); 48 | glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); 49 | glBlendEquation(GL_FUNC_ADD); 50 | return window; 51 | } 52 | 53 | Cart* cartLink; 54 | Agent* agentLink; 55 | 56 | void key_callback(GLFWwindow* window, int key, int scancode, int action, int mods) 57 | { 58 | if (key == GLFW_KEY_R && action == GLFW_PRESS) { 59 | cartLink->Reset(); 60 | } 61 | if (key == GLFW_KEY_W && action == GLFW_PRESS) { 62 | 63 | } 64 | if (key == GLFW_KEY_S && action == GLFW_PRESS) { 65 | 66 | } 67 | if (key == GLFW_KEY_A && action == GLFW_PRESS) { 68 | //cartLink->pushLeft(); 69 | printf("Push the cart left\n"); 70 | cartLink->b2->m_body->ApplyLinearImpulseToCenter(b2Vec2(-0.2f, 0), true); 71 | } 72 | if (key == GLFW_KEY_D && action == GLFW_PRESS) { 73 | //cartLink->pushRight(); 74 | printf("Push the cart right\n"); 75 | cartLink->b2->m_body->ApplyLinearImpulseToCenter(b2Vec2(0.2f, 0), true); 76 | } 77 | if (key == GLFW_KEY_F && action == GLFW_PRESS) { 78 | 79 | } 80 | if (key == GLFW_KEY_E && action == GLFW_PRESS) { 81 | printf("E pressed\n"); 82 | agentLink->phase = agentLink->phase == Agent::Phase::TRAIN ? Agent::Phase::TEST : Agent::Phase::TRAIN; 83 | if (agentLink->phase == Agent::Phase::TEST) { 84 | glfwSwapInterval(1); 85 | printf("TEST PHASE\n"); 86 | } 87 | else { 88 | glfwSwapInterval(0.5); 89 | printf("TRAIN PHASE\n"); 90 | } 91 | } 92 | } 93 | 94 | int main() 95 | { 96 | b2Vec2 gravity(0.0f, -9.8f); 97 | b2World world(gravity); 98 | 99 | int width = 640, height = 480; 100 | GLFWwindow* window = CreateGLFWindow(width, height); 101 | //int width, height; 102 | float aspect = (float)width / (float)height; 103 | float viewRectSize = 1.f; 104 | float right = viewRectSize * aspect, top = viewRectSize; 105 | float bottom = -viewRectSize, left = -viewRectSize * aspect; 106 | glClearColor(1.f, 1.f, 1.f, 0.0f); 107 | 108 | mat4x4 m, p, mvp; 109 | mat4x4_ortho(p, left, right, bottom, top, 1.f, -1.f); 110 | 111 | Cart cart(&world); 112 | cartLink = &cart; 113 | 114 | Agent agent(&cart); 115 | agentLink = &agent; 116 | 117 | glfwSetKeyCallback(window, key_callback); 118 | 119 | float lastFrame = 0; 120 | float deltaTime = 0; 121 | 122 | float time = 0; 123 | //sudo apt-get install xorg-dev 124 | printf("=== CONTINUOUS CARTPOLE ENVIRONMENT ===\n"); 125 | printf("=== Controls: R-reset, E-toogle test/train mode ===\n"); 126 | printf("=== Controls: A-push the cart left, D-push the cart right ===\n"); 127 | while (!glfwWindowShouldClose(window)) 128 | { 129 | float currentFrame = (float)glfwGetTime(); 130 | deltaTime = currentFrame - lastFrame; 131 | lastFrame = currentFrame; 132 | time += deltaTime; 133 | 134 | glViewport(0, 0, width, height); 135 | glClear(GL_COLOR_BUFFER_BIT); 136 | glLoadMatrixf((const GLfloat*)p); 137 | 138 | agent.Discover(); 139 | cart.Draw(); 140 | 141 | //printf("pole angle: %f\n", cart.poleAngle()); 142 | //printf("cart pos x:%f\n", cart.cartPos()); 143 | //printf("cart velocity: %f\n", cart.cartVel()); 144 | //printf("pole velocity: %f\n", cart.poleVel()); 145 | 146 | glfwSwapBuffers(window); 147 | glfwPollEvents(); 148 | } 149 | 150 | glfwTerminate(); 151 | return 0; 152 | } -------------------------------------------------------------------------------- /src/cmd/cartpole_td3/cartpole.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | #include "linmath.h" 7 | #include 8 | 9 | #include "cmd/cartpole_cont/cart.h" 10 | #include "box2d/box2d.h" 11 | 12 | #include "TCommon.h" 13 | #include "agent.h" 14 | 15 | GLFWwindow* CreateGLFWindow(int w, int h) 16 | { 17 | GLFWwindow* window = nullptr; 18 | 19 | if (!glfwInit()) 20 | printf("Unable to initialize GLFW"); 21 | 22 | GLFWmonitor* monitor = glfwGetPrimaryMonitor(); 23 | const GLFWvidmode* mode = glfwGetVideoMode(monitor); 24 | int sWidth = mode->width; 25 | int sHeight = mode->height; 26 | 27 | window = glfwCreateWindow(w, h, "Env", NULL, NULL); 28 | if (!window) { 29 | glfwTerminate(); 30 | printf("Unable to create GLFW window"); 31 | } 32 | 33 | glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3); 34 | glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 2); 35 | glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE); 36 | glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE); 37 | 38 | glfwMakeContextCurrent(window); 39 | gladLoadGL(glfwGetProcAddress); 40 | glfwSwapInterval(1.0); 41 | 42 | //glEnable(GL_BLEND); 43 | //glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); 44 | glEnable(GL_MULTISAMPLE); // Enabled Multisample 45 | //glEnable(GL_LINE_WIDTH); 46 | //glEnable(GL_DEPTH_TEST); 47 | glEnable(GL_BLEND); 48 | glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); 49 | glBlendEquation(GL_FUNC_ADD); 50 | return window; 51 | } 52 | 53 | Cart* cartLink; 54 | Agent* agentLink; 55 | 56 | void key_callback(GLFWwindow* window, int key, int scancode, int action, int mods) 57 | { 58 | if (key == GLFW_KEY_R && action == GLFW_PRESS) { 59 | cartLink->Reset(); 60 | } 61 | if (key == GLFW_KEY_W && action == GLFW_PRESS) { 62 | 63 | } 64 | if (key == GLFW_KEY_S && action == GLFW_PRESS) { 65 | 66 | } 67 | if (key == GLFW_KEY_A && action == GLFW_PRESS) { 68 | //cartLink->pushLeft(); 69 | printf("Push the cart left\n"); 70 | cartLink->b2->m_body->ApplyLinearImpulseToCenter(b2Vec2(-0.2f, 0), true); 71 | } 72 | if (key == GLFW_KEY_D && action == GLFW_PRESS) { 73 | //cartLink->pushRight(); 74 | printf("Push the cart right\n"); 75 | cartLink->b2->m_body->ApplyLinearImpulseToCenter(b2Vec2(0.2f, 0), true); 76 | } 77 | if (key == GLFW_KEY_F && action == GLFW_PRESS) { 78 | 79 | } 80 | if (key == GLFW_KEY_E && action == GLFW_PRESS) { 81 | printf("E pressed\n"); 82 | agentLink->phase = agentLink->phase == Agent::Phase::TRAIN ? Agent::Phase::TEST : Agent::Phase::TRAIN; 83 | if (agentLink->phase == Agent::Phase::TEST) { 84 | glfwSwapInterval(1); 85 | printf("TEST PHASE\n"); 86 | } 87 | else { 88 | glfwSwapInterval(0.5); 89 | printf("TRAIN PHASE\n"); 90 | } 91 | } 92 | } 93 | 94 | int main() 95 | { 96 | b2Vec2 gravity(0.0f, -9.8f); 97 | b2World world(gravity); 98 | 99 | int width = 640, height = 480; 100 | GLFWwindow* window = CreateGLFWindow(width, height); 101 | //int width, height; 102 | float aspect = (float)width / (float)height; 103 | float viewRectSize = 1.f; 104 | float right = viewRectSize * aspect, top = viewRectSize; 105 | float bottom = -viewRectSize, left = -viewRectSize * aspect; 106 | glClearColor(1.f, 1.f, 1.f, 0.0f); 107 | 108 | mat4x4 m, p, mvp; 109 | mat4x4_ortho(p, left, right, bottom, top, 1.f, -1.f); 110 | 111 | Cart cart(&world); 112 | cartLink = &cart; 113 | 114 | Agent agent(&cart); 115 | agentLink = &agent; 116 | 117 | glfwSetKeyCallback(window, key_callback); 118 | 119 | float lastFrame = 0; 120 | float deltaTime = 0; 121 | 122 | float time = 0; 123 | //sudo apt-get install xorg-dev 124 | printf("=== CARTPOLE ENVIRONMENT ===\n"); 125 | printf("=== Controls: R-reset, E-toogle test/train mode ===\n"); 126 | printf("=== Controls: A-push the cart left, D-push the cart right ===\n"); 127 | while (!glfwWindowShouldClose(window)) 128 | { 129 | float currentFrame = (float)glfwGetTime(); 130 | deltaTime = currentFrame - lastFrame; 131 | lastFrame = currentFrame; 132 | time += deltaTime; 133 | 134 | glViewport(0, 0, width, height); 135 | glClear(GL_COLOR_BUFFER_BIT); 136 | glLoadMatrixf((const GLfloat*)p); 137 | 138 | agent.Discover(); 139 | cart.Draw(); 140 | 141 | //printf("pole angle: %f\n", cart.poleAngle()); 142 | //printf("cart pos x:%f\n", cart.cartPos()); 143 | //printf("cart velocity: %f\n", cart.cartVel()); 144 | //printf("pole velocity: %f\n", cart.poleVel()); 145 | 146 | glfwSwapBuffers(window); 147 | glfwPollEvents(); 148 | } 149 | 150 | glfwTerminate(); 151 | return 0; 152 | } -------------------------------------------------------------------------------- /src/cmd/qmaze/qmaze.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "linmath.h" 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | #include "grid.h" 10 | #include "agent.h" 11 | 12 | Agent* link; 13 | 14 | void key_callback(GLFWwindow* window, int key, int scancode, int action, int mods) 15 | { 16 | if (key == GLFW_KEY_R && action == GLFW_PRESS) { 17 | printf("RESET pressed\n"); 18 | link->epsilon = 0.5f; 19 | link->Reset(); 20 | link->SetRandomPos(); 21 | } 22 | if (key == GLFW_KEY_E && action == GLFW_PRESS) { 23 | printf("E pressed\n"); 24 | link->phase = link->phase == Agent::Phase::TRAIN ? Agent::Phase::TEST : Agent::Phase::TRAIN; 25 | if (link->phase == Agent::Phase::TEST) { 26 | glfwSwapInterval(1); 27 | printf("TEST PHASE\n"); 28 | } 29 | else { 30 | glfwSwapInterval(0.1); 31 | printf("TRAIN PHASE\n"); 32 | } 33 | } 34 | } 35 | 36 | void mouse_callback(GLFWwindow* window, int button, int action, int mods) 37 | { 38 | if (button == GLFW_MOUSE_BUTTON_LEFT && action == GLFW_PRESS) { 39 | double x; 40 | double y; 41 | int stepX = 64; 42 | glfwGetCursorPos(window, &x, &y); 43 | //printf("MX: %f, MY: %f\n", x, y); 44 | int wX = (int)(x / stepX); 45 | int wY = (int)(y / stepX); 46 | link->Reset(); 47 | link->SetPos(wX, wY); 48 | 49 | printf("MX: %d, MY: %d\n", wX, wY); 50 | } 51 | } 52 | 53 | GLFWwindow* CreateGLFWindow(int w, int h) 54 | { 55 | GLFWwindow* window = nullptr; 56 | 57 | if (!glfwInit()) 58 | printf("Unable to initialize GLFW"); 59 | 60 | GLFWmonitor* monitor = glfwGetPrimaryMonitor(); 61 | const GLFWvidmode* mode = glfwGetVideoMode(monitor); 62 | int sWidth = mode->width; 63 | int sHeight = mode->height; 64 | 65 | window = glfwCreateWindow(w, h, "Env", NULL, NULL); 66 | if (!window) { 67 | glfwTerminate(); 68 | printf("Unable to create GLFW window"); 69 | } 70 | 71 | glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3); 72 | glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 2); 73 | glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE); 74 | glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE); 75 | 76 | glfwMakeContextCurrent(window); 77 | gladLoadGL(glfwGetProcAddress); 78 | glfwSwapInterval(0.1); 79 | 80 | //glEnable(GL_BLEND); 81 | //glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); 82 | glEnable(GL_MULTISAMPLE); // Enabled Multisample 83 | //glEnable(GL_DEPTH_TEST); 84 | glEnable(GL_BLEND); 85 | glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); 86 | glBlendEquation(GL_FUNC_ADD); 87 | return window; 88 | } 89 | 90 | int main() 91 | { 92 | GLFWwindow* window = CreateGLFWindow(640, 640); 93 | //int width, height; 94 | float loss = 0; 95 | float right = 5.f, top = 5.f; 96 | float bottom = 0.f, left = 0.f; 97 | glClearColor(1.f, 1.f, 1.f, 0.0f); 98 | 99 | int numAgents = 1; 100 | int numItems = 0; 101 | int gridSize = 10; 102 | 103 | Grid grid(gridSize, gridSize); 104 | 105 | Agent agent(&grid); 106 | link = &agent; 107 | 108 | printf("Agents count: %d\n", numAgents); 109 | printf("Grid size: [%d; %d]\n", gridSize, gridSize); 110 | printf("== Start training ==\n"); 111 | 112 | mat4x4 m, p, mvp; 113 | float ratio; 114 | int width, height; 115 | 116 | glfwGetFramebufferSize(window, &width, &height); 117 | ratio = (float)width / (float)height; 118 | mat4x4_ortho(p, left, right, top, bottom, 1.f, -1.f); 119 | 120 | glfwSetKeyCallback(window, key_callback); 121 | glfwSetMouseButtonCallback(window, mouse_callback); 122 | printf("=== GRID ENVIRONMENT ===\n"); 123 | printf("=== Controls: R-reset, E-toogle test/train mode LMB - set agent position ===\n"); 124 | while (!glfwWindowShouldClose(window)) 125 | { 126 | glViewport(0, 0, width, height); 127 | glClear(GL_COLOR_BUFFER_BIT); 128 | glLoadMatrixf((const GLfloat*)p); 129 | //draw axis line 130 | glColor3f(0.7, 0.7, 0.7); 131 | glBegin(GL_LINES); 132 | glVertex3f(0, 0, 0.0); 133 | glVertex3f(right, 0, 0.0); 134 | glVertex3f(0, 0, 0.0); 135 | glVertex3f(left, 0, 0.0); 136 | glVertex3f(0, 0, 0.0); 137 | glVertex3f(0, top, 0.0); 138 | glVertex3f(0, 0, 0.0); 139 | glVertex3f(0, bottom, 0.0); 140 | glEnd(); 141 | glColor3f(1.0, 0.0, 0.0); 142 | glLineWidth(1.2f); 143 | 144 | grid.Draw(); 145 | 146 | agent.Discover(); 147 | agent.Draw(); 148 | 149 | glfwSwapBuffers(window); 150 | glfwPollEvents(); 151 | } 152 | return 0; 153 | } -------------------------------------------------------------------------------- /src/MaxPool2d.cu: -------------------------------------------------------------------------------- 1 | #include "MaxPool2d.h" 2 | #include 3 | 4 | #ifdef __NVCC__ 5 | Layer* MaxPool2d_CreateGPU(shape2 k_size, shape2 stride, int pad, Layer* in) 6 | { 7 | //input shape depth must be == 1 8 | Layer* l = (Layer*)malloc(sizeof(Layer)); 9 | if (!l) 10 | { 11 | printf("MaxPool2d allocation error!"); 12 | return NULL; 13 | } 14 | l->type = LT_MAXPOOL; 15 | int inn = in->out_shape.w * in->out_shape.h * in->out_shape.d; 16 | //calculate output shape 17 | l->out_shape.d = in->out_shape.d; 18 | l->out_shape.w = (int)((in->out_shape.w - k_size.w + pad * 2) / stride.w + 1); 19 | l->out_shape.h = (int)((in->out_shape.h - k_size.h + pad * 2) / stride.h + 1); 20 | printf("MaxPool2d output shape: [%d, %d, %d]\n", l->out_shape.w, l->out_shape.h, l->out_shape.d); 21 | 22 | l->n_inputs = inn; 23 | l->output = Tensor_CreateGPU(l->out_shape, 0); 24 | l->input = &in->output; 25 | 26 | float bias = 0.0f; 27 | MaxPool2d* ld = (MaxPool2d*)malloc(sizeof(MaxPool2d)); 28 | if (ld) { 29 | ld->pad = pad; 30 | ld->stride.w = stride.w; ld->stride.h = stride.h; 31 | ld->k_size.w = k_size.w; ld->k_size.h = k_size.h; 32 | } 33 | else printf("MaxPool2d data allocation error\n"); 34 | l->aData = ld; 35 | return l; 36 | } 37 | 38 | __global__ void MaxPool2d_ForwardKernels(shape limit, float* xw, float* outw, shape ishape, shape oshape, shape2 k_size, shape2 stride, int pad) 39 | { 40 | int w = (blockIdx.x * blockDim.x) + threadIdx.x; 41 | int h = (blockIdx.y * blockDim.y) + threadIdx.y; 42 | int d = (blockIdx.z * blockDim.z) + threadIdx.z; 43 | if (w < limit.w && h < limit.h && d < limit.d) { 44 | 45 | float maxk = -FLT_MAX; 46 | for (size_t kh = 0; kh < k_size.h; kh++) 47 | { 48 | int cury = (h * stride.h - pad) + kh; 49 | for (size_t kw = 0; kw < k_size.w; kw++) 50 | { 51 | int curx = (w * stride.w - pad) + kw; 52 | if (curx >= 0 && cury >= 0 && curx < ishape.w && cury < ishape.h) 53 | { 54 | int xwi = ((ishape.w * cury) + curx) * ishape.d + d; 55 | float val = xw[xwi]; 56 | if (val > maxk) maxk = val; 57 | } 58 | } 59 | } 60 | int owi = ((oshape.w * h) + w) * oshape.d + d; 61 | outw[owi] = maxk; 62 | } 63 | } 64 | 65 | Tensor* MaxPool2d_ForwardGPU(Layer* l) 66 | { 67 | MaxPool2d* data = (MaxPool2d*)l->aData; 68 | 69 | int w = l->out_shape.w; 70 | int h = l->out_shape.h; 71 | int d = l->out_shape.d; 72 | 73 | int threadsPerBlockX = 4; 74 | int threadsPerBlockY = 4; 75 | int threadsPerBlockZ = 64; 76 | 77 | dim3 gridDim((int)ceil(w / (float)threadsPerBlockX), (int)ceil(h / (float)threadsPerBlockY), (int)ceil(d / (float)threadsPerBlockZ)); 78 | dim3 blockDim(threadsPerBlockX, threadsPerBlockY, threadsPerBlockZ); 79 | 80 | MaxPool2d_ForwardKernels KERNEL_CALL(gridDim, blockDim) ({w,h,d}, 81 | l->input->w, 82 | l->output.w, 83 | l->input->s, 84 | l->output.s, 85 | data->k_size, 86 | data->stride, 87 | data->pad); 88 | cudaDeviceSynchronize(); 89 | return &l->output; 90 | } 91 | 92 | __global__ void MaxPool2d_BackwardKernels(shape limit, float* xw, float* xdw, float* outdw, shape ishape, shape oshape, shape2 k_size, shape2 stride, int pad) 93 | { 94 | int w = (blockIdx.x * blockDim.x) + threadIdx.x; 95 | int h = (blockIdx.y * blockDim.y) + threadIdx.y; 96 | int d = (blockIdx.z * blockDim.z) + threadIdx.z; 97 | 98 | if (w < limit.w && h < limit.h && d < limit.d) { 99 | 100 | float maxk = -FLT_MAX; 101 | int khm = 0, kwm = 0; 102 | for (size_t kh = 0; kh < k_size.h; kh++) 103 | { 104 | int cury = (h * stride.h - pad) + kh; 105 | for (size_t kw = 0; kw < k_size.w; kw++) 106 | { 107 | int curx = (w * stride.w - pad) + kw; 108 | if (curx >= 0 && cury >= 0 && curx < ishape.w && cury < ishape.h) 109 | { 110 | int xwi = ((ishape.w * cury) + curx) * ishape.d + d; 111 | float val = xw[xwi]; 112 | if (val > maxk) { maxk = val; kwm = curx; khm = cury; } 113 | } 114 | } 115 | } 116 | int odwi = ((oshape.w * h) + w) * oshape.d + d; 117 | float chain_grad = outdw[odwi]; 118 | ///----------------------- 119 | int id = ((ishape.w * khm) + kwm) * ishape.d + d; 120 | //xdw[id] += chain_grad; 121 | atomicAdd(&xdw[id], chain_grad); 122 | } 123 | } 124 | 125 | void MaxPool2d_BackwardGPU(Layer* l) 126 | { 127 | MaxPool2d* data = (MaxPool2d*)l->aData; 128 | 129 | int w = l->out_shape.w; 130 | int h = l->out_shape.h; 131 | int d = l->out_shape.d; 132 | 133 | int threadsPerBlockX = 4; 134 | int threadsPerBlockY = 4; 135 | int threadsPerBlockZ = 64; 136 | 137 | dim3 gridDim((int)ceil(w / (float)threadsPerBlockX), (int)ceil(h / (float)threadsPerBlockY), (int)ceil(d / (float)threadsPerBlockZ)); 138 | dim3 blockDim(threadsPerBlockX, threadsPerBlockY, threadsPerBlockZ); 139 | 140 | MaxPool2d_BackwardKernels KERNEL_CALL(gridDim, blockDim) ({w,h,d}, 141 | l->input->w, 142 | l->input->dw, 143 | l->output.dw, 144 | l->input->s, 145 | l->output.s, 146 | data->k_size, 147 | data->stride, 148 | data->pad); 149 | cudaDeviceSynchronize(); 150 | } 151 | #endif // __NVCC__ 152 | -------------------------------------------------------------------------------- /src/Optimizer.cu: -------------------------------------------------------------------------------- 1 | #include "Optimizer.h" 2 | #include 3 | #include 4 | 5 | #ifdef __NVCC__ 6 | void CreateAdanDataGPU(Tensor* t) 7 | { 8 | adanTData* data = (adanTData*)malloc(sizeof(adanTData));; 9 | data->gprev = createFloatArrayGPU(t->n); 10 | data->mk = createFloatArrayGPU(t->n); 11 | data->nk = createFloatArrayGPU(t->n); 12 | data->vk = createFloatArrayGPU(t->n); 13 | 14 | if (data->mk!=NULL && data->vk != NULL && data->nk != NULL && data->gprev != NULL) { 15 | t->tData = data; 16 | } 17 | else printf("Adan GPU data allocation error!\n"); 18 | } 19 | 20 | void CreateAdamDataGPU(Tensor* t) 21 | { 22 | adamTData* data = (adamTData*)malloc(sizeof(adamTData));; 23 | data->vt = createFloatArrayGPU(t->n); 24 | data->mt = createFloatArrayGPU(t->n); 25 | 26 | if (data->vt != NULL && data->mt != NULL) { 27 | t->tData = data; 28 | } 29 | else printf("Adam GPU data allocation error!\n"); 30 | } 31 | 32 | void CreateMomentumDataGPU(Tensor* t) 33 | { 34 | momentumTData* data = (momentumTData*)malloc(sizeof(momentumTData));; 35 | data->vk = createFloatArrayGPU(t->n); 36 | if (data->vk!=NULL) { 37 | t->tData = data; 38 | } 39 | else printf("Momentum GPU data allocation error!\n"); 40 | } 41 | 42 | __global__ void NRMSProp_GradKernel(float* w, float* dw, float* vt, float* bw, float* bdw, float* bvt, float lr, shape s) 43 | { 44 | int y = (blockIdx.x * blockDim.x) + threadIdx.x; 45 | int z = (blockIdx.y * blockDim.y) + threadIdx.y; 46 | int x = (blockIdx.z * blockDim.z) + threadIdx.z;; 47 | 48 | if (y < s.h && z < s.d && x < s.w) { 49 | int i = ((s.w * y) + x) * s.d + z; 50 | 51 | float b = 0.9f; 52 | float clip = 1e10f; 53 | 54 | float dwij = dw[i]; 55 | //NRMSProp 56 | if (dwij > clip) 57 | dwij = clip; 58 | if (dwij < -clip) 59 | dwij = -clip; 60 | 61 | float dx = vt[i]; 62 | vt[i] = vt[i] * b + lr * dwij; 63 | dx = b * dx + (1.f - b) * vt[i]; 64 | w[i] += -dx; 65 | 66 | dw[i] = 0; 67 | 68 | if (y == 0) //biases 69 | { 70 | float dwijb = bdw[i]; 71 | //NRMSProp 72 | if (dwijb > clip) 73 | dwijb = clip; 74 | if (dwijb < -clip) 75 | dwijb = -clip; 76 | 77 | float dxb = bvt[i]; 78 | bvt[i] = bvt[i] * b + lr * dwijb; 79 | dxb = b * dxb + (1.f - b) * bvt[i]; 80 | bw[i] += -dxb; 81 | 82 | bdw[i] = 0; 83 | } 84 | } 85 | } 86 | 87 | void Change_GradGPU(OptParams* par, Tensor* k, Tensor* b, bool norm) 88 | { 89 | int w = k->s.w; 90 | int h = k->s.h; 91 | int d = k->s.d; 92 | 93 | int threadsPerBlockX = 4; 94 | int threadsPerBlockY = 64; 95 | int threadsPerBlockZ = 4; 96 | dim3 gridDim(ceil(h / (float)threadsPerBlockX), ceil(d / (float)threadsPerBlockY), ceil(w / (float)threadsPerBlockZ)); 97 | dim3 blockDim(threadsPerBlockX, threadsPerBlockY, threadsPerBlockZ); 98 | 99 | //======================================== 100 | switch (par->method) 101 | { 102 | //case ADAGRAD: AdagradOpt(v, par); break; 103 | //case RMSPROP: NRMSPropOpt(v, par); break; 104 | case NRMSPROP: 105 | { 106 | momentumTData* kdata = (momentumTData*)k->tData; 107 | momentumTData* bdata = (momentumTData*)b->tData; 108 | 109 | NRMSProp_GradKernel KERNEL_CALL(gridDim, blockDim) ( 110 | k->w, k->dw, kdata->vk, 111 | b->w, b->dw, bdata->vk, 112 | par->learning_rate, k->s); 113 | }break; 114 | //case SGD: SGDOpt(v, par); break; 115 | //case ADAN: AdanOpt(v, par); break; 116 | //case ADAM: AdamOpt(v, par); break; 117 | default: printf("Currently only NRMSPROP available on GPU\n"); break; 118 | } 119 | cudaDeviceSynchronize(); 120 | } 121 | 122 | void PrepareTensorGPU(Tensor *v, OptParams* par) 123 | { 124 | if (v->tData == NULL) 125 | switch (par->method) 126 | { 127 | case ADAN: CreateAdanDataGPU(v); break; 128 | case ADAM: CreateAdamDataGPU(v); break; 129 | case SGD:break;//no data for simple sgd 130 | default: CreateMomentumDataGPU(v); break; 131 | } 132 | } 133 | 134 | void PrepareTDataGPU(Model* n, OptParams* par) 135 | { 136 | for (int i = 0; i < n->n_layers; i++) 137 | { 138 | switch (n->Layers[i]->type) 139 | { 140 | case LT_DENSE: { 141 | Dense* data = (Dense*)n->Layers[i]->aData; 142 | PrepareTensorGPU(data->kernels, par); 143 | PrepareTensorGPU(&data->biases, par); 144 | }break; 145 | case LT_CONV: { 146 | printf("Currently Conv2d layer unavailable for optimization on GPU\n"); 147 | }break; 148 | default: break; 149 | } 150 | } 151 | } 152 | 153 | void OptimizeModelGPU(Model* n, OptParams* par) 154 | { 155 | for (int i = 0; i < n->n_layers; i++) 156 | { 157 | switch (n->Layers[i]->type) 158 | { 159 | case LT_DENSE: { 160 | Dense* data = (Dense*)n->Layers[i]->aData; 161 | Layer* l = n->Layers[i]; 162 | Change_GradGPU(par, data->kernels, &data->biases, false); 163 | // 164 | Tensor* out = &l->output; 165 | cudaMemset(out->dw, 0, sizeof(float) * out->n); 166 | }break; 167 | case LT_CONV: { 168 | Tensor* out = &n->Layers[i]->output; 169 | cudaMemset(out->dw, 0, sizeof(float) * out->n); 170 | }break; 171 | default: { 172 | Tensor* out = &n->Layers[i]->output; 173 | cudaMemset(out->dw, 0, sizeof(float) * out->n); 174 | } 175 | break; 176 | } 177 | } 178 | } 179 | #endif // __NVCC__ -------------------------------------------------------------------------------- /src/cmd/particles/particles.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "linmath.h" 4 | 5 | #include 6 | #include 7 | #include 8 | #include "box2d/box2d.h" 9 | #include "shape.h" 10 | #include "agent.h" 11 | 12 | #include "TCommon.h" 13 | 14 | GLFWwindow* CreateGLFWindow(int w, int h) 15 | { 16 | GLFWwindow* window = nullptr; 17 | 18 | if (!glfwInit()) 19 | printf("Unable to initialize GLFW"); 20 | 21 | GLFWmonitor* monitor = glfwGetPrimaryMonitor(); 22 | const GLFWvidmode* mode = glfwGetVideoMode(monitor); 23 | int sWidth = mode->width; 24 | int sHeight = mode->height; 25 | 26 | window = glfwCreateWindow(w, h, "Env", NULL, NULL); 27 | if (!window) { 28 | glfwTerminate(); 29 | printf("Unable to create GLFW window"); 30 | } 31 | 32 | glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3); 33 | glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 2); 34 | glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE); 35 | glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE); 36 | 37 | glfwMakeContextCurrent(window); 38 | gladLoadGL(glfwGetProcAddress); 39 | glfwSwapInterval(0.1); 40 | 41 | glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); 42 | glBlendEquation(GL_FUNC_ADD); 43 | glEnable(GL_BLEND); 44 | glEnable(GL_MULTISAMPLE); 45 | glEnable(GL_DOUBLEBUFFER); 46 | glEnable(GL_DEPTH); 47 | return window; 48 | } 49 | 50 | int stepsCount = 0; 51 | float total_reward = 0; 52 | float alpha = 0.8f; 53 | int trained_steps = 0; 54 | 55 | Agent* a1, *a2; 56 | float viewRectSize = 10.f; 57 | 58 | void key_callback(GLFWwindow* window, int key, int scancode, int action, int mods) 59 | { 60 | if (key == GLFW_KEY_R && action == GLFW_PRESS) { 61 | a1->ResetPosition(); 62 | a1->target->SetRandomPos(viewRectSize); 63 | } 64 | if (key == GLFW_KEY_E && action == GLFW_PRESS) { 65 | printf("E pressed\n"); 66 | a1->phase = a1->phase == Agent::Phase::TRAIN ? Agent::Phase::TEST : Agent::Phase::TRAIN; 67 | if (a1->phase == Agent::Phase::TEST) { 68 | glfwSwapInterval(1); 69 | printf("TEST PHASE\n"); 70 | } 71 | else { 72 | glfwSwapInterval(0.1); 73 | printf("TRAIN PHASE\n"); 74 | } 75 | } 76 | } 77 | 78 | int main() 79 | { 80 | b2Vec2 gravity(0.0f, 0.0f); 81 | b2World world(gravity); 82 | float timeStep = 1.0f / 60.0f; 83 | int32 velocityIterations = 6; 84 | int32 positionIterations = 2; 85 | 86 | int width = 640, height = 640; 87 | GLFWwindow* window = CreateGLFWindow(width, height); 88 | //int width, height; 89 | float aspect = (float)width / (float)height; 90 | 91 | float right = viewRectSize * aspect, top = viewRectSize; 92 | float bottom = -viewRectSize, left = -viewRectSize * aspect; 93 | glClearColor(1.f, 1.f, 1.f, 0.0f); 94 | 95 | mat4x4 m, p, mvp; 96 | mat4x4_ortho(p, left, right, bottom, top, 1.f, -1.f); 97 | 98 | glfwSetKeyCallback(window, key_callback); 99 | 100 | float lastFrame = 0; 101 | float deltaTime = 0; 102 | 103 | float time = 0; 104 | glfwSetKeyCallback(window, key_callback); 105 | printf("=== MA PARTICLES ENVIRONMENT ===\n"); 106 | printf("=== Controls: R-reset, E-toogle test/train mode LMB - set agent position ===\n"); 107 | 108 | //TDCircle circle(&world, 0, 0, 0.5f, 1.f, false); 109 | //TDCircle circle2(&world, 0, 0, 0.5f, 1.f, false); 110 | 111 | TDCircle Target1(&world, 5.f, -5.f, 1.5f, 1.f, true); 112 | TDCircle Target2(&world, -5.f, 5.f, 1.0f, 1.f, true); 113 | 114 | Agent agent1(&world, viewRectSize, &Target1); 115 | Agent agent2(&world, viewRectSize, &Target2); 116 | 117 | a1 = &agent1; 118 | a2 = &agent2; 119 | 120 | while (!glfwWindowShouldClose(window)) 121 | { 122 | glViewport(0, 0, width, height); 123 | glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); 124 | glLoadMatrixf((const GLfloat*)p); 125 | //draw axis line 126 | glColor3f(0.7, 0.7, 0.7); 127 | glBegin(GL_LINES); 128 | glVertex3f(0, 0, 0.0); 129 | glVertex3f(right, 0, 0.0); 130 | glVertex3f(0, 0, 0.0); 131 | glVertex3f(left, 0, 0.0); 132 | glVertex3f(0, 0, 0.0); 133 | glVertex3f(0, top, 0.0); 134 | glVertex3f(0, 0, 0.0); 135 | glVertex3f(0, bottom, 0.0); 136 | glEnd(); 137 | 138 | agent1.Discover(); 139 | //agent2.Discover(); 140 | //circle.ApplyForce(rngNormal(), rngNormal()); 141 | //circle2.ApplyForce(rngNormal(), rngNormal()); 142 | 143 | //circle.Draw(); 144 | //circle2.Draw(); 145 | agent1.Draw(); 146 | agent2.Draw(); 147 | //if (circle.needToReset(viewRectSize) || circle2.needToReset(viewRectSize)) { circle.SetRandomPos(viewRectSize); circle2.SetRandomPos(viewRectSize); 148 | //} 149 | //if (circle2.needToReset(viewRectSize)) circle2.SetRandomPos(viewRectSize); 150 | 151 | Target1.Draw(); 152 | Target2.Draw(); 153 | glfwSwapBuffers(window); 154 | glfwPollEvents(); 155 | } 156 | glfwTerminate(); 157 | return 0; 158 | } -------------------------------------------------------------------------------- /src/cmd/cartpole_cont/agent.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "geometry/TVec3.h" 4 | #include "TCommon.h" 5 | 6 | #include "Model.h" 7 | #include "tList.h" 8 | #include "fList.h" 9 | #include "Utils.h" 10 | 11 | #include "RL/DDPG.h" 12 | 13 | #include "cart.h" 14 | #include "RL/SimpleDeque.h" 15 | 16 | struct Samples 17 | { 18 | Samples(shape s) 19 | { 20 | states = tList_create(); 21 | probs = tList_create(); 22 | actions = fList_create(); 23 | rewards = fList_create(); 24 | last_state = Tensor_Create(s, 0); 25 | } 26 | 27 | void AddSample(Tensor* state, Tensor* prob, float action, float reward) 28 | { 29 | tList_push(&states, state); 30 | tList_push(&probs, prob); 31 | fList_push(&actions, action); 32 | fList_push(&rewards, reward); 33 | } 34 | 35 | tList states; 36 | tList probs; 37 | fList rewards; 38 | fList actions; 39 | Tensor last_state; 40 | }; 41 | 42 | void freeSample(void* s) 43 | { 44 | Samples* sa = (Samples*)s; 45 | tList_free(&sa->states); 46 | tList_free(&sa->probs); 47 | fList_free(&sa->actions); 48 | fList_free(&sa->rewards); 49 | Tensor_Free(&sa->last_state); 50 | } 51 | 52 | class Agent { 53 | public: 54 | enum Phase { TRAIN, TEST }; 55 | 56 | Cart* cart; 57 | float epsilon = 0.9f; 58 | int n_actions = 1; 59 | Tensor state; 60 | Phase phase = Phase::TEST; 61 | DDPG* brain; 62 | Samples* sa; 63 | shape input_shape = { 1, 1, 4 }; 64 | 65 | float timeStep = 1.0f / 60.0f; 66 | int32 velocityIterations = 6; 67 | int32 positionIterations = 2; 68 | 69 | //replay buffer: 70 | SimpleDeque* history = createDeque(20000); 71 | int batch_size = 64; 72 | 73 | Agent(Cart* c) 74 | { 75 | cart = c; 76 | state = MoveByForce(0.f); //Tensor_Create(input_shape, 0); 77 | sa = new Samples(input_shape); 78 | brain = DDPG_Create(input_shape, n_actions); 79 | } 80 | 81 | ~Agent() 82 | { 83 | printf("Clean up actor's data...\n"); 84 | freeDeque(history, freeSample); 85 | } 86 | 87 | float Policy(Tensor* s) 88 | { 89 | Tensor t = DDPG_Forward(brain, s); 90 | float force = t.w[0]; 91 | Tensor_Free(&t); 92 | return force; 93 | } 94 | 95 | Tensor Act(Tensor* s) 96 | { 97 | Tensor prob = DDPG_SelectAction(brain, s, epsilon); 98 | if (epsilon > 0.05f) 99 | epsilon *= 0.99999f; 100 | return prob; 101 | } 102 | 103 | float GetReward() 104 | { 105 | float reward = 1.0f; 106 | return reward; 107 | } 108 | 109 | float trace_reward = 0; 110 | int counter = 0; 111 | void Discover() 112 | { 113 | if (phase == TRAIN) { 114 | Tensor a = Act(&state); 115 | 116 | float force = a.w[0]; 117 | 118 | Tensor next_state = MoveByForce(force); 119 | float reward = GetReward(); 120 | trace_reward += reward; 121 | 122 | sa->AddSample(&state, &a, force, reward); 123 | 124 | Tensor_Copy(&state, &next_state); 125 | Tensor_Free(&next_state); 126 | Tensor_Free(&a); 127 | if (cart->needToReset() || sa->states.length > 5000) 128 | { 129 | if (sa->states.length < 5000) 130 | sa->rewards.data[sa->rewards.length - 1] = -1; 131 | printf("\ntrace reward: %f eps: %f\n", trace_reward, epsilon); 132 | if (history->length > batch_size) { 133 | for (size_t i = 0; i < batch_size; i++) 134 | { 135 | int k = rngInt(0, history->length - 1); 136 | Samples* s = (Samples*)history->data[k].elem; 137 | DDPG_TrainTrace(brain, s->states.data, &s->last_state, s->probs.data, s->rewards.data, s->states.length, counter); 138 | counter++; 139 | } 140 | } 141 | Tensor_CopyData(&sa->last_state, &state); 142 | dequeAppend(history, sa, freeSample); 143 | sa = new Samples(input_shape); 144 | 145 | if (trace_reward > 5000) 146 | { 147 | printf("Maximum score reached, set TEST phase\n"); 148 | glfwSwapInterval(1.0); 149 | phase = TEST; 150 | } 151 | 152 | trace_reward = 0; 153 | cart->Reset(); 154 | } 155 | } 156 | else 157 | { 158 | float force = Policy(&state); 159 | Tensor next_state = MoveByForce(force); 160 | Tensor_Free(&state); 161 | state = next_state; 162 | } 163 | } 164 | 165 | Tensor MoveByForce(float force) 166 | { 167 | cart->ApplyForceValue(force * 0.1f); 168 | cart->world->Step(timeStep, velocityIterations, positionIterations); 169 | Tensor s = Tensor_Create(input_shape, 0); 170 | s.w[0] = cart->cartPos(); 171 | s.w[1] = cart->cartVel(); 172 | s.w[2] = cart->poleAngle(); 173 | s.w[3] = cart->poleVel(); 174 | return s; 175 | } 176 | 177 | private: 178 | }; 179 | -------------------------------------------------------------------------------- /src/cmd/cartpole_td3/agent.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "geometry/TVec3.h" 4 | #include "TCommon.h" 5 | 6 | #include "Model.h" 7 | #include "tList.h" 8 | #include "fList.h" 9 | #include "Utils.h" 10 | 11 | #include "RL/TD3.h" 12 | 13 | #include "cmd/cartpole_cont/cart.h" 14 | #include "RL/SimpleDeque.h" 15 | 16 | struct Samples 17 | { 18 | Samples(shape s) 19 | { 20 | states = tList_create(); 21 | probs = tList_create(); 22 | actions = fList_create(); 23 | rewards = fList_create(); 24 | last_state = Tensor_Create(s, 0); 25 | } 26 | 27 | void AddSample(Tensor *state, Tensor *prob, float action, float reward) 28 | { 29 | tList_push(&states, state); 30 | tList_push(&probs, prob); 31 | fList_push(&actions, action); 32 | fList_push(&rewards, reward); 33 | } 34 | 35 | tList states; 36 | tList probs; 37 | fList rewards; 38 | fList actions; 39 | Tensor last_state; 40 | }; 41 | 42 | void freeSample(void* s) 43 | { 44 | Samples* sa = (Samples*)s; 45 | tList_free(&sa->states); 46 | tList_free(&sa->probs); 47 | fList_free(&sa->actions); 48 | fList_free(&sa->rewards); 49 | Tensor_Free(&sa->last_state); 50 | } 51 | 52 | class Agent { 53 | public: 54 | enum Phase { TRAIN, TEST }; 55 | 56 | Cart* cart; 57 | float epsilon = 0.8f; 58 | int n_actions = 1; 59 | Tensor state; 60 | Phase phase = Phase::TEST; 61 | TD3 *brain; 62 | Samples *sa; 63 | shape input_shape = { 1, 1, 4 }; 64 | 65 | float timeStep = 1.0f / 60.0f; 66 | int32 velocityIterations = 6; 67 | int32 positionIterations = 2; 68 | 69 | //replay buffer: 70 | SimpleDeque* history = createDeque(20000); 71 | int batch_size = 64; 72 | 73 | Agent(Cart* c) 74 | { 75 | cart = c; 76 | state = MoveByForce(0.f); //Tensor_Create(input_shape, 0); 77 | sa = new Samples(input_shape); 78 | brain = TD3_Create(input_shape, n_actions); 79 | } 80 | 81 | ~Agent() 82 | { 83 | printf("Clean up actor's data...\n"); 84 | freeDeque(history, freeSample); 85 | } 86 | 87 | float Policy(Tensor* s) 88 | { 89 | Tensor t = TD3_Forward(brain, s); 90 | float force = t.w[0]; 91 | Tensor_Free(&t); 92 | return force; 93 | } 94 | 95 | Tensor Act(Tensor* s) 96 | { 97 | Tensor prob = TD3_SelectAction(brain, s, epsilon); 98 | if (epsilon > 0.05f) 99 | epsilon *= 0.99999f; 100 | return prob; 101 | } 102 | 103 | float GetReward() 104 | { 105 | float reward = 1.0f; 106 | return reward; 107 | } 108 | 109 | float trace_reward = 0; 110 | int counter = 0; 111 | void Discover() 112 | { 113 | if (phase == TRAIN) { 114 | Tensor a = Act(&state); 115 | 116 | float force = a.w[0]; 117 | 118 | Tensor next_state = MoveByForce(force); 119 | float reward = GetReward(); 120 | trace_reward += reward; 121 | 122 | sa->AddSample(&state, &a, force, reward); 123 | 124 | Tensor_Copy(&state, &next_state); 125 | Tensor_Free(&next_state); 126 | Tensor_Free(&a); 127 | if (cart->needToReset() || sa->states.length > 5000) 128 | { 129 | sa->rewards.data[sa->rewards.length - 1] = -1; 130 | if(sa->states.length > 5000) 131 | sa->rewards.data[sa->rewards.length - 1] = 1; 132 | printf("\ntrace reward: %f eps: %f\n", trace_reward, epsilon); 133 | if (history->length > batch_size) { 134 | for (size_t i = 0; i < batch_size; i++) 135 | { 136 | int k = rngInt(0, history->length - 1); 137 | Samples* s = (Samples*)history->data[k].elem; 138 | TD3_TrainTrace(brain, s->states.data, &s->last_state, s->probs.data, s->rewards.data, s->states.length, counter); 139 | counter++; 140 | } 141 | } 142 | Tensor_CopyData(&sa->last_state, &state); 143 | dequeAppend(history, sa, freeSample); 144 | sa = new Samples(input_shape); 145 | 146 | if (trace_reward > 5000) 147 | { 148 | printf("Maximum score reached, set TEST phase\n"); 149 | glfwSwapInterval(1.0); 150 | phase = TEST; 151 | } 152 | 153 | trace_reward = 0; 154 | cart->Reset(); 155 | } 156 | } 157 | else 158 | { 159 | float force = Policy(&state); 160 | Tensor next_state = MoveByForce(force); 161 | Tensor_Free(&state); 162 | state = next_state; 163 | } 164 | } 165 | 166 | Tensor MoveByForce(float force) 167 | { 168 | cart->ApplyForceValue(force*0.1f); 169 | cart->world->Step(timeStep, velocityIterations, positionIterations); 170 | Tensor s = Tensor_Create(input_shape, 0); 171 | s.w[0] = cart->cartPos(); 172 | s.w[1] = cart->cartVel(); 173 | s.w[2] = cart->poleAngle(); 174 | s.w[3] = cart->poleVel(); 175 | return s; 176 | } 177 | 178 | }; 179 | -------------------------------------------------------------------------------- /src/Conv2d.c: -------------------------------------------------------------------------------- 1 | #include "Conv2d.h" 2 | #include 3 | 4 | Layer* Conv2d_Create(int num_kernels, shape2 k_size, shape2 stride, int pad, RandType weightInit, Layer* in) 5 | { 6 | //input shape depth must be == 1 7 | Layer* l = (Layer*)malloc(sizeof(Layer)); 8 | if (!l) 9 | { 10 | printf("Conv2d allocation error!"); 11 | return NULL; 12 | } 13 | l->type = LT_CONV; 14 | int inn = in->out_shape.w * in->out_shape.h * in->out_shape.d; 15 | //calculate output shape 16 | l->out_shape.d = num_kernels; 17 | l->out_shape.w = (int)((in->out_shape.w - k_size.w + pad * 2) / stride.w + 1); 18 | l->out_shape.h = (int)((in->out_shape.h - k_size.h + pad * 2) / stride.h + 1); 19 | printf("Conv2d, output shape: [%d, %d, %d] pad: %d\n", l->out_shape.w, l->out_shape.h, l->out_shape.d, pad); 20 | 21 | l->n_inputs = inn; 22 | l->output = Tensor_Create(l->out_shape, 0); 23 | l->input = &in->output; 24 | 25 | float bias = 0.0f; 26 | Conv2d *ld = (Conv2d*)malloc(sizeof(Conv2d)); 27 | if (ld) { 28 | ld->n = num_kernels; 29 | ld->pad = pad; 30 | ld->stride.w = stride.w; ld->stride.h = stride.h; 31 | ld->k_size.w = k_size.w; ld->k_size.h = k_size.h; 32 | 33 | //create kernels 34 | ld->kernels = (Tensor*)malloc(sizeof(Tensor) * num_kernels); 35 | if (ld->kernels) { 36 | shape ks = { k_size.w, k_size.h, in->out_shape.d }; 37 | for (size_t i = 0; i < num_kernels; i++) 38 | { 39 | ld->kernels[i] = Tensor_Create(ks, 1.f); //assume that n input channels = 1 for now 40 | //Tensor_He_Rand(ld->kernels[i].w, ld->kernels[i].n); 41 | switch (weightInit) 42 | { 43 | case R_XAVIER: 44 | Tensor_Xavier_Rand(ld->kernels[i].w, ld->kernels[i].n); 45 | break; 46 | case R_HE: 47 | Tensor_He_Rand(ld->kernels[i].w, ld->kernels[i].n); 48 | break; 49 | default: 50 | break; 51 | } 52 | } 53 | ld->biases = Tensor_Create((shape){ 1, 1, num_kernels }, bias); 54 | } 55 | } 56 | else printf("Conv2d data allocation error\n"); 57 | l->aData = ld; 58 | return l; 59 | } 60 | 61 | Tensor* Conv2d_Forward(Layer* l) 62 | { 63 | Tensor* inp = l->input; 64 | Conv2d* data = (Conv2d*)l->aData; 65 | 66 | int pad = data->pad; 67 | for (size_t d = 0; d < l->out_shape.d; d++) 68 | { 69 | for (size_t h = 0; h < l->out_shape.h; h++) 70 | { 71 | for (size_t w = 0; w < l->out_shape.w; w++) 72 | { 73 | float ksum = 0; 74 | //iterate kernels by size 75 | for (size_t kh = 0; kh < data->k_size.h; kh++) 76 | { 77 | int cury = (h * data->stride.h - pad) + kh; 78 | for (size_t kw = 0; kw < data->k_size.w; kw++) 79 | { 80 | int curx = (w * data->stride.w - pad) + kw; 81 | //for image depth 82 | for (size_t imd = 0; imd < inp->s.d; imd++) 83 | { 84 | if (curx >= 0&& cury >=0&& curx < inp->s.w && cury < inp->s.h) 85 | { 86 | int imi = ((l->input->s.w * cury) + curx) * l->input->s.d + imd; 87 | int ki = ((data->kernels[d].s.w * kh) + kw) * data->kernels[d].s.d + imd; 88 | ksum += data->kernels[d].w[ki] * inp->w[imi]; 89 | } 90 | } 91 | } 92 | } 93 | ksum += data->biases.w[d]; 94 | Tensor_Set(&l->output, w, h, d, ksum); 95 | } 96 | } 97 | } 98 | return &l->output; 99 | } 100 | 101 | void Conv2d_Backward(Layer* l) 102 | { 103 | Tensor* inp = l->input; 104 | Conv2d* data = (Conv2d*)l->aData; 105 | 106 | int pad = data->pad; 107 | for (size_t d = 0; d < l->out_shape.d; d++) 108 | { 109 | for (size_t h = 0; h < l->out_shape.h; h++) 110 | { 111 | for (size_t w = 0; w < l->out_shape.w; w++) 112 | { 113 | int idx = tIdx(l->output.s, w, h, d); 114 | float next_grad = l->output.dw[idx]; 115 | //iterate kernels by size 116 | for (size_t kh = 0; kh < data->k_size.h; kh++) 117 | { 118 | int cury = (h * data->stride.h - pad) + kh; 119 | for (size_t kw = 0; kw < data->k_size.w; kw++) 120 | { 121 | int curx = (w * data->stride.w - pad) + kw; 122 | for (size_t imd = 0; imd < inp->s.d; imd++) 123 | { 124 | if (curx >= 0 && cury >= 0 && curx < inp->s.w && cury < inp->s.h) 125 | { 126 | int imi = ((l->input->s.w * cury) + curx) * l->input->s.d + imd; 127 | int ki = ((data->kernels[d].s.w * kh) + kw) * data->kernels[d].s.d + imd; 128 | data->kernels[d].dw[ki] += inp->w[imi] * next_grad; 129 | inp->dw[imi] += data->kernels[d].w[ki] * next_grad; 130 | } 131 | } 132 | } 133 | } 134 | data->biases.dw[d] += next_grad; 135 | } 136 | } 137 | } 138 | } 139 | 140 | cJSON* Conv2d_To_JSON(Conv2d* d) 141 | { 142 | cJSON* Data = cJSON_CreateObject(); 143 | cJSON* fi = cJSON_CreateArray(); 144 | 145 | cJSON_AddNumberToObject(Data, "nk", d->biases.n); 146 | 147 | for (int i = 0; i < d->biases.n; i++) 148 | { 149 | cJSON_AddItemToArray(fi, Tensor_To_JSON(&d->kernels[i])); 150 | } 151 | cJSON_AddItemToObject(Data, "kernels", fi); 152 | cJSON_AddItemToObject(Data, "biases", Tensor_To_JSON(&d->biases)); 153 | return Data; 154 | } 155 | 156 | void Conv2d_Load_JSON(Conv2d* d, cJSON* node) 157 | { 158 | cJSON* nk = cJSON_GetObjectItem(node, "nk"); 159 | cJSON* kernels = cJSON_GetObjectItem(node, "kernels");//array 160 | cJSON* biases = cJSON_GetObjectItem(node, "biases"); 161 | //load biases 162 | Tensor_Load_JSON(&d->biases, biases); 163 | //load kernels 164 | int n = nk->valueint; 165 | int i = 0; 166 | cJSON* kernel = NULL; 167 | cJSON_ArrayForEach(kernel, kernels) 168 | { 169 | Tensor_Load_JSON(&d->kernels[i], kernel); 170 | i++; 171 | } 172 | /* 173 | for (int i = 0; i < n; i++) 174 | { 175 | cJSON* f = cJSON_GetArrayItem(kernels, i); 176 | Tensor_Load_JSON(&d->kernels[i], f); 177 | } 178 | */ 179 | } 180 | 181 | void Conv2d_Free(Layer* l) 182 | { 183 | Conv2d* data = (Conv2d*)l->aData; 184 | for (int i = 0; i < data->n; i++) 185 | { 186 | Tensor_Free(&data->kernels[i]); 187 | } 188 | Tensor_Free(&data->biases); 189 | free(data); 190 | Tensor_Free(&l->output); 191 | free(l); 192 | } -------------------------------------------------------------------------------- /src/Conv2d.cu: -------------------------------------------------------------------------------- 1 | #include "Conv2d.h" 2 | #include 3 | 4 | #ifdef __NVCC__ 5 | Layer* Conv2d_CreateGPU(int num_kernels, shape2 k_size, shape2 stride, int pad, Layer* in) 6 | { 7 | //input shape depth must be == 1 8 | Layer* l = (Layer*)malloc(sizeof(Layer)); 9 | if (!l) 10 | { 11 | printf("Conv2d allocation error!"); 12 | return NULL; 13 | } 14 | l->type = LT_CONV; 15 | int inn = in->out_shape.w * in->out_shape.h * in->out_shape.d; 16 | //calculate output shape 17 | l->out_shape.d = num_kernels; 18 | l->out_shape.w = (int)((in->out_shape.w - k_size.w + pad * 2) / stride.w + 1); 19 | l->out_shape.h = (int)((in->out_shape.h - k_size.h + pad * 2) / stride.h + 1); 20 | printf("Conv2d_GPU, output shape: [%d, %d, %d] pad: %d\n", l->out_shape.w, l->out_shape.h, l->out_shape.d, pad); 21 | 22 | l->n_inputs = inn; 23 | l->output = Tensor_CreateGPU(l->out_shape, 0); 24 | l->input = &in->output; 25 | 26 | float bias = 0.0f; 27 | Conv2dGPU* ld = (Conv2dGPU*)malloc(sizeof(Conv2dGPU)); 28 | if (ld) { 29 | ld->pad = pad; 30 | ld->stride.w = stride.w; ld->stride.h = stride.h; 31 | ld->k_size.w = k_size.w; ld->k_size.h = k_size.h; 32 | 33 | shape4 ks = { k_size.w, k_size.h, in->out_shape.d, num_kernels }; 34 | ld->kernels = Tensor4_CreateGPU(ks, 1.f); 35 | Tensor_Xavier_RandGPU(ld->kernels.w, ld->kernels.n); 36 | 37 | ld->biases = Tensor_CreateGPU({ 1, 1, num_kernels }, bias); 38 | } 39 | else printf("Conv2d data allocation error\n"); 40 | l->aData = ld; 41 | return l; 42 | } 43 | 44 | __global__ void Conv2d_ForwardKernels(shape limit, float* xw, float* kerw, float* bw, float* outw, shape ishape, shape4 kshape, shape oshape, shape2 k_size, shape2 stride, int pad) 45 | { 46 | int w = (blockIdx.x * blockDim.x) + threadIdx.x; 47 | int h = (blockIdx.y * blockDim.y) + threadIdx.y; 48 | int d = (blockIdx.z * blockDim.z) + threadIdx.z; 49 | if (w < limit.w && h < limit.h && d < limit.d) { 50 | 51 | float ksum = 0; 52 | for (int kh = 0; kh < k_size.h; kh++) 53 | { 54 | int cury = (h * stride.h - pad) + kh; 55 | for (int kw = 0; kw < k_size.w; kw++) 56 | { 57 | int curx = (w * stride.w - pad) + kw; 58 | for (int imd = 0; imd < ishape.d; imd++) 59 | { 60 | if (curx >= 0 && cury >= 0 && curx < ishape.w && cury < ishape.h) 61 | { 62 | int xwi = ((ishape.w * cury) + curx) * ishape.d + imd; 63 | int kwi = (((kshape.w * kh) + kw) * kshape.d + imd) * kshape.b + d; 64 | 65 | ksum += xw[xwi] * kerw[kwi]; 66 | } 67 | } 68 | } 69 | } 70 | ksum += bw[d]; 71 | int owi = ((oshape.w * h) + w) * oshape.d + d; 72 | outw[owi] = ksum; 73 | //printf("KSUM: %f\n", ksum); 74 | } 75 | } 76 | 77 | Tensor* Conv2d_ForwardGPU(Layer* l) 78 | { 79 | Conv2dGPU* data = (Conv2dGPU*)l->aData; 80 | //Tensor_CopyDataGPU(&l->output, &data->biases); 81 | //===================== 82 | int w = l->out_shape.w; 83 | int h = l->out_shape.h; 84 | int d = l->out_shape.d; 85 | 86 | int threadsPerBlockX = 4; 87 | int threadsPerBlockY = 4; 88 | int threadsPerBlockZ = 64; 89 | 90 | dim3 gridDim((int)ceil(w / (float)threadsPerBlockX), (int)ceil(h / (float)threadsPerBlockY), (int)ceil(d / (float)threadsPerBlockZ)); 91 | dim3 blockDim(threadsPerBlockX, threadsPerBlockY, threadsPerBlockZ); 92 | 93 | Conv2d_ForwardKernels KERNEL_CALL(gridDim, blockDim) ({w,h,d}, 94 | l->input->w, data->kernels.w, data->biases.w, 95 | l->output.w, l->input->s, data->kernels.s, l->output.s, data->k_size, data->stride, data->pad); 96 | cudaDeviceSynchronize(); 97 | return &l->output; 98 | } 99 | 100 | __global__ void Conv2d_BackwardKernels(shape limit, float* xw, float* xdw, float* kerw, float* kerdw, float* outdw, float* bdw, shape ishape, shape4 kshape, shape oshape, shape2 k_size, shape2 stride, int pad) 101 | { 102 | int w = (blockIdx.x * blockDim.x) + threadIdx.x; 103 | int h = (blockIdx.y * blockDim.y) + threadIdx.y; 104 | int d = (blockIdx.z * blockDim.z) + threadIdx.z; 105 | if (w < limit.w && h < limit.h && d < limit.d) { 106 | 107 | int owi = ((oshape.w * h) + w) * oshape.d + d; 108 | float chain_grad = outdw[owi]; 109 | 110 | for (int kh = 0; kh < k_size.h; kh++) 111 | { 112 | int cury = (h * stride.h - pad) + kh; 113 | for (int kw = 0; kw < k_size.w; kw++) 114 | { 115 | int curx = (w * stride.w - pad) + kw; 116 | for (int imd = 0; imd < ishape.d; imd++) 117 | { 118 | if (curx >= 0 && cury >= 0 && curx < ishape.w && cury < ishape.h) 119 | { 120 | int xwi = ((ishape.w * cury) + curx) * ishape.d + imd; 121 | int kwi = (((kshape.w * kh) + kw) * kshape.d + imd) * kshape.b + d; 122 | 123 | //kerdw[kwi] += xw[xwi] * chain_grad; 124 | atomicAdd(&kerdw[kwi], xw[xwi] * chain_grad); 125 | //xdw[xwi] += kerw[kwi] * chain_grad; 126 | float xdwi = kerw[kwi] * chain_grad; 127 | atomicAdd(&xdw[xwi], xdwi); 128 | } 129 | } 130 | } 131 | } 132 | 133 | //if(w==0&&h==0) 134 | // bdw[d] += chain_grad; 135 | atomicAdd(&bdw[d], chain_grad); 136 | } 137 | } 138 | 139 | void Conv2d_BackwardGPU(Layer* l) 140 | { 141 | Conv2dGPU* data = (Conv2dGPU*)l->aData; 142 | //Tensor_CopyDataGPU(&l->output, &data->biases); 143 | //===================== 144 | int w = l->out_shape.w; 145 | int h = l->out_shape.h; 146 | int d = l->out_shape.d; 147 | 148 | int threadsPerBlockX = 4; 149 | int threadsPerBlockY = 4; 150 | int threadsPerBlockZ = 64; 151 | 152 | dim3 gridDim((int)ceil(w / (float)threadsPerBlockX), (int)ceil(h / (float)threadsPerBlockY), (int)ceil(d / (float)threadsPerBlockZ)); 153 | dim3 blockDim(threadsPerBlockX, threadsPerBlockY, threadsPerBlockZ); 154 | 155 | Conv2d_BackwardKernels KERNEL_CALL(gridDim, blockDim) ({w,h,d}, 156 | l->input->w, l->input->dw, 157 | data->kernels.w, data->kernels.dw, 158 | l->output.dw, 159 | data->biases.dw, 160 | l->input->s, 161 | data->kernels.s, 162 | l->output.s, 163 | data->k_size, 164 | data->stride, 165 | data->pad); 166 | cudaDeviceSynchronize(); 167 | } 168 | #endif // __NVCC__ 169 | -------------------------------------------------------------------------------- /src/Model.c: -------------------------------------------------------------------------------- 1 | #include "Model.h" 2 | #include 3 | //Создает объект Model с параметрами по умолчанию. Необходимо вызвать данную функцию для инициализации модели. 4 | Model Model_Create() 5 | { 6 | Model n; 7 | n.Layers = NULL; 8 | n.n_layers = 0; 9 | n.NetForward = NULL; 10 | n.NetBackward = NULL; 11 | return n; 12 | } 13 | //Добавляет новый слой l в массив Layers модели n и возвращает его адрес. 14 | Layer* Model_AddLayer(Model* n, Layer* l) 15 | { 16 | int cnt = n->n_layers + 1; 17 | Layer** tmp = (Layer**)realloc(n->Layers, sizeof(Layer*) * cnt); 18 | if (!tmp) { 19 | free(n->Layers); 20 | n->Layers = NULL; 21 | return NULL; 22 | } 23 | n->n_layers = cnt; 24 | n->Layers = tmp; 25 | n->Layers[cnt - 1] = l; 26 | return n->Layers[cnt - 1]; 27 | } 28 | //Общая функция для вызова операции обратного прохода слоя. Вызывает backward функцию соответствующего слоя в зависимости от его типа 29 | void Backward_Layer(Layer* l) 30 | { 31 | switch (l->type) 32 | { 33 | case LT_INPUT: Input_Backward(l); break; 34 | case LT_DENSE: Dense_Backward(l); break; 35 | //case LT_SOFTMAX: break; 36 | case LT_RELU: Relu_Backward(l); break; 37 | //case LT_REGRESSION: Regression_Backward(l, y); break; 38 | //case LT_MSE: MSE_Backward(l,y); break; 39 | case LT_CONV: Conv2d_Backward(l); break; 40 | case LT_MAXPOOL: MaxPool2d_Backward(l); break; 41 | case LT_TANHA: TanhA_Backward(l); break; 42 | case LT_CONC: Conc_Backward(l); break; 43 | default: 44 | break; 45 | } 46 | } 47 | //Общая функция для вызова операции прямого прохода слоя. Вызывает forward функцию соответствующего слоя в зависимости от его типа. 48 | Tensor *Forward_Layer(Layer* l) 49 | { 50 | Tensor* y = NULL; 51 | switch (l->type) 52 | { 53 | case LT_INPUT: y = Input_Forward(l); break; 54 | case LT_DENSE: y = Dense_Forward(l); break; 55 | case LT_SOFTMAX: break; 56 | case LT_RELU: y = Relu_Forward(l); break; 57 | case LT_REGRESSION: y = Regression_Forward(l); break; 58 | case LT_MSE: y = MSE_Forward(l); break; 59 | case LT_TANHA: y = TanhA_Forward(l); break; 60 | case LT_CONV: y = Conv2d_Forward(l); break; 61 | case LT_MAXPOOL: y = MaxPool2d_Forward(l); break; 62 | case LT_CONC: y = Conc_Forward(l); break; 63 | default: break; 64 | } 65 | return y; 66 | } 67 | //Загрузка данных из cJSON объекта node в слой t 68 | void Layer_Load_JSON(Layer* t, cJSON* node) 69 | { 70 | cJSON* output_shape = cJSON_GetObjectItem(node, "os"); //shape 71 | cJSON* layer_type = cJSON_GetObjectItem(node, "lt"); //type 72 | cJSON* num_inputs = cJSON_GetObjectItem(node, "ni"); //num_inputs 73 | cJSON* jData = cJSON_GetObjectItem(node, "d"); //Layer additional data 74 | 75 | if (!cJSON_IsNull(jData)) 76 | //Load layer data 77 | switch (t->type) 78 | { 79 | case LT_DENSE: { 80 | Dense* data = (Dense*)t->aData; 81 | Dense_Load_JSON(data, jData); 82 | }break; 83 | case LT_CONV: { 84 | Conv2d* data = (Conv2d*)t->aData; 85 | Conv2d_Load_JSON(data, jData); 86 | }break; 87 | default: 88 | break; 89 | } 90 | } 91 | //Конвертирование данных из слоя l в cJSON объект 92 | cJSON* Layer_To_JSON(Layer* l) 93 | { 94 | cJSON* Layer = cJSON_CreateObject(); 95 | cJSON_AddItemToObject(Layer, "os", Shape_To_JSON(l->out_shape)); 96 | cJSON_AddNumberToObject(Layer, "ni", l->n_inputs); 97 | cJSON_AddNumberToObject(Layer, "lt", l->type); 98 | 99 | switch (l->type) 100 | { 101 | case LT_DENSE: { 102 | Dense* data = (Dense*)l->aData; 103 | cJSON_AddItemToObject(Layer, "d", Dense_To_JSON(data)); 104 | }break; 105 | case LT_CONV: 106 | { 107 | Conv2d* data = (Conv2d*)l->aData; 108 | cJSON_AddItemToObject(Layer, "d", Conv2d_To_JSON(data)); 109 | } 110 | default: 111 | break; 112 | } 113 | return Layer; 114 | } 115 | //Конвертирование данных из модели n в cJSON объект 116 | cJSON* Model_To_JSON(Model* n) 117 | { 118 | cJSON* jNet = cJSON_CreateObject(); 119 | cJSON* jLayers = cJSON_CreateArray(); 120 | cJSON_AddNumberToObject(jNet, "n_layers", n->n_layers); 121 | for (int i = 0; i < n->n_layers; i++) 122 | { 123 | cJSON* jLayer = Layer_To_JSON(n->Layers[i]); 124 | cJSON_AddItemToArray(jLayers, jLayer); 125 | } 126 | cJSON_AddItemToObject(jNet, "Layers", jLayers); 127 | return jNet; 128 | } 129 | //Загрузка данных из cJSON объекта node в модель t 130 | void Model_Load_JSON(Model* t, cJSON* node) 131 | { 132 | cJSON* Layers = cJSON_GetObjectItem(node, "Layers"); //Layers 133 | cJSON* n_layers = cJSON_GetObjectItem(node, "n_layers"); //num_layers 134 | int n = n_layers->valueint; 135 | 136 | int i = 0; 137 | cJSON* layer = NULL; 138 | cJSON_ArrayForEach(layer, Layers) 139 | { 140 | Layer_Load_JSON(t->Layers[i], layer); 141 | i++; 142 | } 143 | /* 144 | for (int i = 0; i < n; i++) 145 | { 146 | cJSON* l = cJSON_GetArrayItem(Layers, i); 147 | Layer_Load_JSON(t->Layers[i], l); 148 | } 149 | */ 150 | } 151 | //Функция прямого прохода всех слоев в модели n. 152 | void Model_Forward(Model* n) 153 | { 154 | for (int i = 0; i < n->n_layers; i++) 155 | { 156 | Forward_Layer(n->Layers[i]); 157 | } 158 | } 159 | //Функция обратного прохода всех слоев в модели n. 160 | void Model_Backward(Model* n) 161 | { 162 | int N = n->n_layers; 163 | for (int i = N - 1; i >= 0; i--) 164 | { 165 | Layer* l = n->Layers[i]; 166 | Backward_Layer(l); 167 | } 168 | } 169 | 170 | void Model_CLearGrads(Model* m) 171 | { 172 | //clear parameters grads 173 | dList props = Model_getGradients(m); 174 | for (int i = 0; i < props.length; i++) 175 | { 176 | Tensor* target = (Tensor*)props.data[i].e; 177 | memset(target->dw, 0, sizeof(float) * target->n); 178 | } 179 | //clear chain grads 180 | for (int i = 0; i < m->n_layers; i++) 181 | { 182 | Tensor* out = &m->Layers[i]->output; 183 | memset(out->dw, 0, sizeof(float) * out->n); 184 | } 185 | dList_free(&props); 186 | } 187 | 188 | //Функция возвращает динамический список из тензоров для обучения 189 | dList Model_getGradients(Model* n) 190 | { 191 | dList grads = dList_create(); 192 | for (int i = 0; i < n->n_layers; i++) 193 | { 194 | Layer* l = n->Layers[i]; 195 | switch (l->type) 196 | { 197 | case LT_DENSE: 198 | Dense_GetGrads((Dense*)l->aData, &grads); 199 | break; 200 | default: break; 201 | } 202 | } 203 | return grads; 204 | } --------------------------------------------------------------------------------