├── Utils
├── ModelTest
│ ├── CMakeLists.txt
│ └── ModelTest.cpp
├── Models
│ ├── README.md
│ ├── BossWN-nano.nam
│ ├── BossLSTM-2x8.nam
│ └── BossLSTM-1x16.nam
└── CMakeLists.txt
├── CMakeLists.txt
├── .gitmodules
├── NeuralAudioCAPI
├── CMakeLists.txt
├── NeuralAudioCApi.h
└── NeuralAudioCApi.cpp
├── NeuralAudio
├── RTNeuralLoader.h
├── TemplateHelper.h
├── Activation.h
├── NAMModel.h
├── NeuralModel.h
├── RTNeuralLoader.cpp
├── CMakeLists.txt
├── LSTMDynamic.h
├── LSTM.h
├── NeuralModel.cpp
├── InternalModel.h
├── WaveNetDynamic.h
├── RTNeuralModel.h
└── WaveNet.h
├── CREDITS.md
├── NeuralAudioCSharp
├── NeuralAudioTest
│ ├── Program.cs
│ └── NeuralAudioTest.csproj
├── NeuralAudio
│ ├── NeuralAudio.csproj
│ ├── NativeApi.cs
│ └── NeuralModel.cs
└── NeuralAudioCSharp.sln
├── LICENSE
├── deps
└── RTNeural-NAM
│ ├── LICENSE
│ └── wavenet
│ ├── arena.hpp
│ ├── wavenet_model.hpp
│ ├── wavenet_layer.hpp
│ └── wavenet_layer_array.hpp
├── .github
└── workflows
│ └── build.yml
├── .gitignore
└── README.md
/Utils/ModelTest/CMakeLists.txt:
--------------------------------------------------------------------------------
1 |
2 | create_util(ModelTest)
3 |
--------------------------------------------------------------------------------
/Utils/Models/README.md:
--------------------------------------------------------------------------------
1 | Licensing note:
2 |
3 | The "tw40_blues_deluxe_deerinkstudios.json" model was created by Deer Ink Studios and is under the CC BY-NC-ND 4.0 license.
4 |
--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.5)
2 |
3 | project(NeuralAudio VERSION 0.0.1)
4 |
5 | add_subdirectory(NeuralAudio)
6 | add_subdirectory(NeuralAudioCAPI)
7 |
8 | option(BUILD_UTILS "Build NeuralAudio utils" OFF)
9 | if(BUILD_UTILS)
10 | message(STATUS "Building NeuralAudio Utils")
11 | add_subdirectory(Utils)
12 | endif()
13 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "deps/NeuralAmpModelerCore"]
2 | path = deps/NeuralAmpModelerCore
3 | url = https://github.com/mikeoliphant/NeuralAmpModelerCore
4 | [submodule "deps/RTNeural"]
5 | path = deps/RTNeural
6 | url = https://github.com/mikeoliphant/RTNeural
7 | [submodule "deps/math_approx"]
8 | path = deps/math_approx
9 | url = https://github.com/Chowdhury-DSP/math_approx
10 |
--------------------------------------------------------------------------------
/NeuralAudioCAPI/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | set(CMAKE_CXX_STANDARD 20)
2 |
3 | set(SOURCES NeuralAudioCApi.h NeuralAudioCApi.cpp)
4 |
5 | add_library(NeuralAudioCAPI SHARED ${SOURCES})
6 |
7 | include_directories(NeuralAudioCAPI ../NeuralAudio)
8 |
9 | target_link_libraries(NeuralAudioCAPI PRIVATE NeuralAudio)
10 |
11 | source_group(NeuralAudioCAPI ${CMAKE_CURRENT_SOURCE_DIR} FILES ${SOURCES})
12 |
--------------------------------------------------------------------------------
/NeuralAudio/RTNeuralLoader.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include "NeuralModel.h"
4 |
5 | namespace NeuralAudio
6 | {
7 | extern void EnsureRTNeuralModelDefsAreLoaded();
8 | extern NeuralModel* RTNeuralLoadNAMWaveNet(const nlohmann::json& modelJson);
9 | extern NeuralModel* RTNeuralLoadNAMLSTM(const nlohmann::json& modelJson);
10 | extern NeuralModel* RTNeuralLoadKeras(const nlohmann::json& modelJson);
11 | }
--------------------------------------------------------------------------------
/Utils/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | function(create_util util_name)
2 | message(STATUS "Configuring util: ${util_name}")
3 | add_executable(${util_name} ${util_name}.cpp)
4 | target_include_directories(${util_name} PUBLIC ${CMAKE_SOURCE_DIR})
5 | target_link_libraries(${util_name} PUBLIC NeuralAudio)
6 | target_compile_features(${util_name} PRIVATE cxx_std_17)
7 | endfunction()
8 |
9 | add_subdirectory(ModelTest)
10 |
11 | file(COPY Models DESTINATION ./)
12 |
--------------------------------------------------------------------------------
/CREDITS.md:
--------------------------------------------------------------------------------
1 | [GitHub contributers](https://github.com/mikeoliphant/NeuralAudio/graphs/contributors)
2 |
3 | Depending on how it is compiled, this repository uses code from a number of sources. Please see the individual repositories for license information.
4 |
5 | https://github.com/sdatkinson/NeuralAmpModelerCore
6 |
7 | https://github.com/jatinchowdhury18/RTNeural
8 |
9 | https://github.com/jatinchowdhury18/RTNeural-NAM
10 |
11 | https://gitlab.com/libeigen/eigen
12 |
13 | https://github.com/nlohmann/json
14 |
--------------------------------------------------------------------------------
/NeuralAudioCSharp/NeuralAudioTest/Program.cs:
--------------------------------------------------------------------------------
1 | namespace NeuralAudioTest
2 | {
3 | using NeuralAudio;
4 |
5 | internal class Program
6 | {
7 | static void Main(string[] args)
8 | {
9 | NeuralModel.SetWaveNetModelLoadMode(NeuralModel.EModelLoadMode.Internal);
10 |
11 | NeuralModel model = NeuralModel.FromFile("BossWN-standard.nam");
12 |
13 | var input = new float[1024];
14 | var output = new float[1024];
15 |
16 | model.Process(input, output, 1024);
17 | }
18 | }
19 | }
20 |
--------------------------------------------------------------------------------
/NeuralAudioCSharp/NeuralAudio/NeuralAudio.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | net8.0
5 | enable
6 | enable
7 | True
8 |
9 |
10 |
11 |
12 | PreserveNewest
13 |
14 |
15 |
16 |
17 |
--------------------------------------------------------------------------------
/NeuralAudio/TemplateHelper.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include
4 |
5 | namespace NeuralAudio
6 | {
7 | template
8 | void ForEachIndex(std::index_sequence, F&& f)
9 | {
10 | int dummy[] = { 0, /* Handles empty Is. following cast handle evil operator comma */
11 | (static_cast(f(std::integral_constant())), 0)... };
12 | static_cast(dummy); // avoid warning for unused variable
13 | }
14 |
15 | template
16 | void ForEachIndex(F&& f)
17 | {
18 | ForEachIndex(std::make_index_sequence(), std::forward(f));
19 | }
20 | }
--------------------------------------------------------------------------------
/NeuralAudioCSharp/NeuralAudioTest/NeuralAudioTest.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Exe
5 | net8.0
6 | enable
7 | enable
8 |
9 |
10 |
11 |
12 | PreserveNewest
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2024 Mike Oliphant
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/NeuralAudioCAPI/NeuralAudioCApi.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include
4 |
5 | #ifdef __cplusplus
6 | extern "C" {
7 | #endif
8 |
9 | #ifdef _MSC_VER
10 | #define NA_EXTERN extern __declspec(dllexport)
11 | #else
12 | #define NA_EXTERN extern
13 | #endif
14 |
15 | struct NeuralModel;
16 |
17 |
18 | NA_EXTERN NeuralModel* CreateModelFromFile(const wchar_t* modelPath);
19 |
20 | NA_EXTERN void DeleteModel(NeuralModel* model);
21 |
22 | NA_EXTERN void SetLSTMLoadMode(int loadMode);
23 |
24 | NA_EXTERN void SetWaveNetLoadMode(int loadMode);
25 |
26 | NA_EXTERN void SetAudioInputLevelDBu(float audioDBu);
27 |
28 | NA_EXTERN void SetDefaultMaxAudioBufferSize(int maxSize);
29 |
30 | NA_EXTERN int GetLoadMode(NeuralModel* model);
31 |
32 | NA_EXTERN bool IsStatic(NeuralModel* model);
33 |
34 | NA_EXTERN void SetMaxAudioBufferSize(NeuralModel* model, int maxSize);
35 |
36 | NA_EXTERN float GetRecommendedInputDBAdjustment(NeuralModel* model);
37 |
38 | NA_EXTERN float GetRecommendedOutputDBAdjustment(NeuralModel* model);
39 |
40 | NA_EXTERN float GetSampleRate(NeuralModel* model);
41 |
42 | NA_EXTERN void Process(NeuralModel* model, float* input, float* output, size_t numSamples);
43 |
44 | #ifdef __cplusplus
45 | }
46 | #endif
--------------------------------------------------------------------------------
/deps/RTNeural-NAM/LICENSE:
--------------------------------------------------------------------------------
1 | BSD 3-Clause License
2 |
3 | Copyright (c) 2024, jatinchowdhury18
4 | All rights reserved.
5 |
6 | Redistribution and use in source and binary forms, with or without
7 | modification, are permitted provided that the following conditions are met:
8 |
9 | 1. Redistributions of source code must retain the above copyright notice, this
10 | list of conditions and the following disclaimer.
11 |
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 | this list of conditions and the following disclaimer in the documentation
14 | and/or other materials provided with the distribution.
15 |
16 | 3. Neither the name of the copyright holder nor the names of its
17 | contributors may be used to endorse or promote products derived from
18 | this software without specific prior written permission.
19 |
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |
--------------------------------------------------------------------------------
/NeuralAudioCSharp/NeuralAudioCSharp.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 12.00
3 | # Visual Studio Version 17
4 | VisualStudioVersion = 17.13.35806.99
5 | MinimumVisualStudioVersion = 10.0.40219.1
6 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "NeuralAudioTest", "NeuralAudioTest\NeuralAudioTest.csproj", "{9BC1E0F6-360A-0836-DD8E-C5B24C9DFBF0}"
7 | EndProject
8 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "NeuralAudio", "NeuralAudio\NeuralAudio.csproj", "{861DAACF-7A17-46AC-9416-3FE206F1449A}"
9 | EndProject
10 | Global
11 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
12 | Debug|Any CPU = Debug|Any CPU
13 | Release|Any CPU = Release|Any CPU
14 | EndGlobalSection
15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
16 | {9BC1E0F6-360A-0836-DD8E-C5B24C9DFBF0}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
17 | {9BC1E0F6-360A-0836-DD8E-C5B24C9DFBF0}.Debug|Any CPU.Build.0 = Debug|Any CPU
18 | {9BC1E0F6-360A-0836-DD8E-C5B24C9DFBF0}.Release|Any CPU.ActiveCfg = Release|Any CPU
19 | {9BC1E0F6-360A-0836-DD8E-C5B24C9DFBF0}.Release|Any CPU.Build.0 = Release|Any CPU
20 | {861DAACF-7A17-46AC-9416-3FE206F1449A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
21 | {861DAACF-7A17-46AC-9416-3FE206F1449A}.Debug|Any CPU.Build.0 = Debug|Any CPU
22 | {861DAACF-7A17-46AC-9416-3FE206F1449A}.Release|Any CPU.ActiveCfg = Release|Any CPU
23 | {861DAACF-7A17-46AC-9416-3FE206F1449A}.Release|Any CPU.Build.0 = Release|Any CPU
24 | EndGlobalSection
25 | GlobalSection(SolutionProperties) = preSolution
26 | HideSolutionNode = FALSE
27 | EndGlobalSection
28 | GlobalSection(ExtensibilityGlobals) = postSolution
29 | SolutionGuid = {DE4D2541-2A2F-4F6E-852E-80980B5B2862}
30 | EndGlobalSection
31 | EndGlobal
32 |
--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
1 | name: Build
2 |
3 | on:
4 | [workflow_dispatch, push, pull_request]
5 |
6 | env:
7 | BUILD_TYPE: Release
8 |
9 | jobs:
10 | build:
11 | name: Build Windows
12 | runs-on: windows-latest
13 | steps:
14 | - uses: actions/checkout@v3.3.0
15 | with:
16 | submodules: recursive
17 |
18 | - name: Build
19 | working-directory: ${{github.workspace}}/build
20 | run: |
21 | cmake -G "Visual Studio 17 2022" -A x64 -DBUILD_UTILS=ON -DBUILD_NAMCORE=ON -DBUILD_STATIC_RTNEURAL=ON -T ClangCL ..
22 | cmake --build . --config=release -j4
23 |
24 | - name: Run ModelTest
25 | working-directory: ${{github.workspace}}/build/Utils/ModelTest/Release
26 | run: |
27 | ./ModelTest.exe
28 | ./ModelTest.exe ..\..\Models\tw40_blues_deluxe_deerinkstudios.json
29 |
30 | - name: Upload binary
31 | uses: actions/upload-artifact@v4
32 | with:
33 | name: ModelTest-Windows
34 | path: ${{github.workspace}}/build/Utils
35 |
36 | build-linux-x64:
37 | name: Build Linux x64
38 | runs-on: ubuntu-24.04
39 | steps:
40 | - uses: actions/checkout@v3.3.0
41 | with:
42 | submodules: recursive
43 |
44 | - name: Build
45 | working-directory: ${{github.workspace}}/build
46 | env:
47 | CXX: g++-13
48 | run: |
49 | cmake .. -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DBUILD_UTILS=ON -DBUILD_NAMCORE=ON -DBUILD_STATIC_RTNEURAL=ON
50 | cmake --build . --config $BUILD_TYPE -j4
51 |
52 | - name: Run ModelTest
53 | working-directory: ${{github.workspace}}/build/Utils/ModelTest
54 | run: |
55 | ./ModelTest
56 | ./ModelTest ../Models/tw40_blues_deluxe_deerinkstudios.json
57 |
58 | - name: Upload ModelTest
59 | uses: actions/upload-artifact@v4
60 | with:
61 | name: ModelTest-Linux-x64
62 | path: ${{github.workspace}}/build/Utils
63 |
--------------------------------------------------------------------------------
/NeuralAudioCAPI/NeuralAudioCApi.cpp:
--------------------------------------------------------------------------------
1 | #include "NeuralAudioCApi.h"
2 | #include "NeuralModel.h"
3 |
4 | struct NeuralModel
5 | {
6 | NeuralAudio::NeuralModel* model;
7 | };
8 |
9 | NeuralModel* CreateModelFromFile(const wchar_t* modelPath)
10 | {
11 | NeuralModel* model = new NeuralModel();
12 |
13 | model->model = NeuralAudio::NeuralModel::CreateFromFile(modelPath);
14 |
15 | return model;
16 | }
17 |
18 | void DeleteModel(NeuralModel* model)
19 | {
20 | delete model->model;
21 | delete model;
22 | }
23 |
24 | void SetLSTMLoadMode(int loadMode)
25 | {
26 | NeuralAudio::NeuralModel::SetLSTMLoadMode((NeuralAudio::EModelLoadMode)loadMode);
27 | }
28 |
29 | void SetWaveNetLoadMode(int loadMode)
30 | {
31 | NeuralAudio::NeuralModel::SetWaveNetLoadMode((NeuralAudio::EModelLoadMode)loadMode);
32 | }
33 |
34 | void SetAudioInputLevelDBu(float audioDBu)
35 | {
36 | NeuralAudio::NeuralModel::SetAudioInputLevelDBu(audioDBu);
37 | }
38 |
39 | void SetDefaultMaxAudioBufferSize(int maxSize)
40 | {
41 | NeuralAudio::NeuralModel::SetDefaultMaxAudioBufferSize(maxSize);
42 | }
43 |
44 | int GetLoadMode(NeuralModel* model)
45 | {
46 | return model->model->GetLoadMode();
47 | }
48 |
49 | bool IsStatic(NeuralModel* model)
50 | {
51 | return model->model->IsStatic();
52 | }
53 |
54 | void SetMaxAudioBufferSize(NeuralModel* model, int maxSize)
55 | {
56 | model->model->SetMaxAudioBufferSize(maxSize);
57 | }
58 |
59 | float GetRecommendedInputDBAdjustment(NeuralModel* model)
60 | {
61 | return model->model->GetRecommendedInputDBAdjustment();
62 | }
63 |
64 | float GetRecommendedOutputDBAdjustment(NeuralModel* model)
65 | {
66 | return model->model->GetRecommendedOutputDBAdjustment();
67 | }
68 |
69 | float GetSampleRate(NeuralModel* model)
70 | {
71 | return model->model->GetSampleRate();
72 | }
73 |
74 | void Process(NeuralModel* model, float* input, float* output, size_t numSamples)
75 | {
76 | model->model->Process(input, output, numSamples);
77 | }
78 |
79 |
80 |
--------------------------------------------------------------------------------
/NeuralAudioCSharp/NeuralAudio/NativeApi.cs:
--------------------------------------------------------------------------------
1 | using Microsoft.VisualBasic;
2 | using System;
3 | using System.Runtime.InteropServices;
4 |
5 | namespace NeuralAudio
6 | {
7 | static class NativeApi
8 | {
9 | public const string NEURAL_AUDIO_LIB_NAME = "NeuralAudioCAPI";
10 |
11 | [DllImport(NEURAL_AUDIO_LIB_NAME)]
12 | public static extern IntPtr CreateModelFromFile([MarshalAs(UnmanagedType.LPWStr)]string modelPath);
13 |
14 | [DllImport(NEURAL_AUDIO_LIB_NAME)]
15 | public static extern void DeleteModel(IntPtr model);
16 |
17 | [DllImport(NEURAL_AUDIO_LIB_NAME)]
18 | public static extern void SetLSTMLoadMode(int loadMode);
19 |
20 | [DllImport(NEURAL_AUDIO_LIB_NAME)]
21 | public static extern void SetWaveNetLoadMode(int loadMode);
22 |
23 | [DllImport(NEURAL_AUDIO_LIB_NAME)]
24 | public static extern void SetAudioInputLevelDBu(float audioDBu);
25 |
26 | [DllImport(NEURAL_AUDIO_LIB_NAME)]
27 | public static extern void SetDefaultMaxAudioBufferSize(int maxSize);
28 |
29 | [DllImport(NEURAL_AUDIO_LIB_NAME)]
30 | public static extern int GetLoadMode(IntPtr model);
31 |
32 | [DllImport(NEURAL_AUDIO_LIB_NAME)]
33 | public static extern bool IsStatic(IntPtr model);
34 |
35 | [DllImport(NEURAL_AUDIO_LIB_NAME)]
36 | public static extern void SetMaxAudioBufferSize(IntPtr model, int maxSize);
37 |
38 | [DllImport(NEURAL_AUDIO_LIB_NAME)]
39 | public static extern float GetRecommendedInputDBAdjustment(IntPtr model);
40 |
41 | [DllImport(NEURAL_AUDIO_LIB_NAME)]
42 | public static extern float GetRecommendedOutputDBAdjustment(IntPtr model);
43 |
44 | [DllImport(NEURAL_AUDIO_LIB_NAME)]
45 | public static extern float GetSampleRate(IntPtr model);
46 |
47 | [DllImport(NEURAL_AUDIO_LIB_NAME)]
48 | public static extern unsafe void Process(IntPtr model, float* input, float* output, uint numSamples);
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/NeuralAudio/Activation.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include
4 | #include
5 | #include
6 |
7 | namespace NeuralAudio
8 | {
9 | #ifndef LSTM_MATH
10 | #define LSTM_MATH FastMath
11 | #endif
12 |
13 | #ifndef WAVENET_MATH
14 | #define WAVENET_MATH FastMath
15 | #endif
16 |
17 | struct StdMath
18 | {
19 | template
20 | static auto Tanh(Matrix& x)
21 | {
22 | float* data = x.data();
23 | size_t size = x.rows() * x.cols();
24 |
25 | for (size_t pos = 0; pos < size; pos++)
26 | {
27 | data[pos] = Tanh(data[pos]);
28 | }
29 |
30 | return x;
31 | }
32 |
33 | static inline float Tanh(const float x)
34 | {
35 | return std::tanh(x);
36 | }
37 |
38 | static inline float Sigmoid(float x)
39 | {
40 | return 1.0f / (1.0f + std::exp(-x));
41 | }
42 | };
43 |
44 | struct FastMath
45 | {
46 | template
47 | static auto Tanh(Matrix& x)
48 | {
49 | float* data = x.data();
50 | size_t size = x.rows() * x.cols();
51 |
52 | for (size_t pos = 0; pos < size; pos++)
53 | {
54 | data[pos] = Tanh(data[pos]);
55 | }
56 |
57 | return x;
58 | }
59 |
60 | static inline float Tanh(const float x)
61 | {
62 | //return std::tanh(x);
63 |
64 | //return math_approx::tanh<5>(x);
65 |
66 | const float ax = fabsf(x);
67 |
68 | const float x2 = x * x;
69 |
70 | return (x * (2.45550750702956f + 2.45550750702956f * ax + (0.893229853513558f + 0.821226666969744f * ax) * x2)
71 | / (2.44506634652299f + (2.44506634652299f + x2) * fabsf(x + 0.814642734961073f * x * ax)));
72 | }
73 |
74 | static inline float Sigmoid(float x)
75 | {
76 | //return math_approx::sigmoid_exp<5>(x);
77 |
78 | //return 1.0f / (1.0f + std::exp(-x));
79 | return 0.5f * (Tanh(x * 0.5f) + 1);
80 | }
81 | };
82 |
83 | struct EigenMath
84 | {
85 | template
86 | static auto Tanh(const Matrix& x)
87 | {
88 | return x.array().tanh();
89 | }
90 |
91 | static inline float Tanh(const float x)
92 | {
93 | return Eigen::numext::tanh(x);
94 | }
95 |
96 | static inline float Sigmoid(float x)
97 | {
98 | return 0.5f * (Tanh(x * 0.5f) + 1);
99 | }
100 | };
101 | }
--------------------------------------------------------------------------------
/NeuralAudioCSharp/NeuralAudio/NeuralModel.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Runtime.CompilerServices;
3 |
4 | namespace NeuralAudio
5 | {
6 | public class NeuralModel
7 | {
8 | public enum EModelLoadMode
9 | {
10 | Internal,
11 | RTNeural,
12 | NAMCore
13 | };
14 |
15 | IntPtr nativeModel;
16 |
17 | public static void SetLSTMModelLoadMode(EModelLoadMode mode)
18 | {
19 | NativeApi.SetLSTMLoadMode((int)mode);
20 | }
21 |
22 | public static void SetWaveNetModelLoadMode(EModelLoadMode mode)
23 | {
24 | NativeApi.SetWaveNetLoadMode((int)mode);
25 | }
26 |
27 | public static void SetDefaultMaxAudioBufferSize(int bufferSize)
28 | {
29 | NativeApi.SetDefaultMaxAudioBufferSize(bufferSize);
30 | }
31 |
32 | public bool IsStatic { get { return NativeApi.IsStatic(nativeModel); } }
33 | public EModelLoadMode LoadMode { get { return (EModelLoadMode)NativeApi.GetLoadMode(nativeModel); } }
34 | public float SampleRate { get { return NativeApi.GetSampleRate(nativeModel); } }
35 | public float RecommendedInputDBAdjustment { get { return NativeApi.GetRecommendedInputDBAdjustment(nativeModel); } }
36 | public float RecommendedOutputDBAdjustment { get { return NativeApi.GetRecommendedOutputDBAdjustment(nativeModel); } }
37 |
38 | public static NeuralModel FromFile(string modelPath)
39 | {
40 | NeuralModel model = new NeuralModel();
41 |
42 | IntPtr nativeModel = NativeApi.CreateModelFromFile(modelPath);
43 |
44 | model.nativeModel = nativeModel;
45 |
46 | return model;
47 | }
48 |
49 | public void SetMaxAudioBufferSize(int bufferSize)
50 | {
51 | NativeApi.SetMaxAudioBufferSize(nativeModel, bufferSize);
52 | }
53 |
54 | public unsafe void Process(ReadOnlySpan input, Span output, uint numSamples)
55 | {
56 | fixed (float* inputPtr = input)
57 | {
58 | fixed (float* outputPtr = output)
59 | {
60 | NativeApi.Process(nativeModel, inputPtr, outputPtr, numSamples);
61 | }
62 | }
63 | }
64 | }
65 | }
66 |
--------------------------------------------------------------------------------
/NeuralAudio/NAMModel.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include "NeuralModel.h"
4 | #include
5 | #include
6 | #include
7 | #include
8 |
9 | namespace NeuralAudio
10 | {
11 | class NAMModel : public NeuralModel
12 | {
13 | public:
14 | NAMModel()
15 | {
16 | nam::activations::Activation::enable_fast_tanh();
17 | }
18 |
19 | ~NAMModel()
20 | {
21 | if (namModel)
22 | namModel.reset();
23 | }
24 |
25 | EModelLoadMode GetLoadMode()
26 | {
27 | return EModelLoadMode::NAMCore;
28 | }
29 |
30 | bool LoadFromJson(const nlohmann::json& modelJson)
31 | {
32 | if (namModel)
33 | namModel.reset();
34 |
35 | ReadNAMConfig(modelJson);
36 |
37 | std::string arch = modelJson.at("architecture");
38 |
39 | nlohmann::json config = modelJson.at("config");
40 |
41 | std::vector weights = modelJson.at("weights");
42 |
43 | if (arch == "WaveNet")
44 | {
45 | std::vector layer_array_params;
46 |
47 | for (size_t i = 0; i < config.at("layers").size(); i++)
48 | {
49 | nlohmann::json layerConfig = config.at("layers").at(i);
50 |
51 | layer_array_params.push_back(
52 | nam::wavenet::LayerArrayParams(layerConfig.at("input_size"), layerConfig.at("condition_size"), layerConfig.at("head_size"),
53 | layerConfig.at("channels"), layerConfig.at("kernel_size"), layerConfig.at("dilations"),
54 | layerConfig.at("activation"), layerConfig.at("gated"), layerConfig.at("head_bias")));
55 | }
56 |
57 | const bool with_head = !config.at("head").is_null();
58 | const float head_scale = config.at("head_scale");
59 |
60 | namModel = std::make_unique(layer_array_params, head_scale, with_head, weights, sampleRate);
61 | }
62 | else if (arch == "LSTM")
63 | {
64 | const int num_layers = config.at("num_layers");
65 | const int input_size = config.at("input_size");
66 | const int hidden_size = config.at("hidden_size");
67 |
68 | namModel = std::make_unique(num_layers, input_size, hidden_size, weights, sampleRate);
69 | }
70 |
71 | return true;
72 | }
73 |
74 | void Process(float* input, float* output, size_t numSamples)
75 | {
76 | namModel->process(input, output, (int)numSamples);
77 | }
78 |
79 | void Prewarm()
80 | {
81 | namModel->prewarm();
82 | }
83 |
84 | private:
85 | std::unique_ptr namModel = nullptr;
86 | };
87 | }
--------------------------------------------------------------------------------
/NeuralAudio/NeuralModel.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include
4 | #include
5 | #include "json.hpp"
6 |
7 | namespace NeuralAudio
8 | {
9 | enum EModelLoadMode
10 | {
11 | Internal,
12 | RTNeural,
13 | NAMCore
14 | };
15 |
16 | class NeuralModel
17 | {
18 | public:
19 | static NeuralModel* CreateFromFile(std::filesystem::path modelPath);
20 | static NeuralModel* CreateFromStream(std::basic_istream& stream, std::filesystem::path extension);
21 |
22 | virtual ~NeuralModel()
23 | {
24 | }
25 |
26 | static bool SetLSTMLoadMode(EModelLoadMode val)
27 | {
28 | if (!SupportsLSTMLoadMode(val))
29 | return false;
30 |
31 | lstmLoadMode = val;
32 |
33 | return true;
34 | }
35 |
36 | static bool SetWaveNetLoadMode(EModelLoadMode val)
37 | {
38 | if (!SupportsWaveNetLoadMode(val))
39 | return false;
40 |
41 | wavenetLoadMode = val;
42 |
43 | return true;
44 | }
45 |
46 | static bool SupportsWaveNetLoadMode(EModelLoadMode mode);
47 | static bool SupportsLSTMLoadMode(EModelLoadMode mode);
48 |
49 | static void SetAudioInputLevelDBu(float audioDBu)
50 | {
51 | audioInputLevelDBu = audioDBu;
52 | }
53 |
54 | static void SetDefaultMaxAudioBufferSize(int maxSize)
55 | {
56 | defaultMaxAudioBufferSize = maxSize;
57 | }
58 |
59 | virtual EModelLoadMode GetLoadMode()
60 | {
61 | return EModelLoadMode::Internal;
62 | }
63 |
64 | virtual bool IsStatic()
65 | {
66 | return false;
67 | }
68 |
69 | virtual void SetMaxAudioBufferSize(int maxSize)
70 | {
71 | (void)maxSize;
72 | }
73 |
74 | virtual float GetRecommendedInputDBAdjustment()
75 | {
76 | return audioInputLevelDBu - modelInputLevelDBu;
77 | }
78 |
79 | virtual float GetRecommendedOutputDBAdjustment()
80 | {
81 | return -18 - modelLoudnessDB;
82 | }
83 |
84 | virtual float GetSampleRate()
85 | {
86 | return sampleRate;
87 | }
88 |
89 | virtual int GetReceptiveFieldSize()
90 | {
91 | return -1; // No fixed receptive field size (ie: for LSTM)
92 | }
93 |
94 | virtual void Process(float* input, float* output, size_t numSamples)
95 | {
96 | (void)input;
97 | (void)output;
98 | (void)numSamples;
99 | }
100 |
101 | virtual void Prewarm()
102 | {
103 | }
104 |
105 | protected:
106 | void ReadNAMConfig(const nlohmann::json& modelJson);
107 | void ReadKerasConfig(const nlohmann::json& modelJson);
108 |
109 | float modelInputLevelDBu = 12;
110 | float modelOutputLevelDBu = 12;
111 | float modelLoudnessDB = -18;
112 | float sampleRate = 48000;
113 |
114 | inline static float audioInputLevelDBu = 12;
115 | inline static EModelLoadMode lstmLoadMode = EModelLoadMode::Internal;
116 | inline static EModelLoadMode wavenetLoadMode = EModelLoadMode::Internal;
117 | inline static int defaultMaxAudioBufferSize = 128;
118 |
119 | void Prewarm(size_t numSamples, size_t blockSize)
120 | {
121 | std::vector input;
122 | input.resize(blockSize);
123 | std::fill(input.begin(), input.end(), 0.0f);
124 |
125 | std::vector output;
126 | output.resize(blockSize);
127 |
128 | for (size_t block = 0; block < (numSamples / blockSize); block++)
129 | {
130 | Process(input.data(), output.data(), blockSize);
131 | }
132 | }
133 | };
134 | }
--------------------------------------------------------------------------------
/NeuralAudio/RTNeuralLoader.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include "RTNeuralModel.h"
3 |
4 | namespace NeuralAudio
5 | {
6 | #ifdef BUILD_STATIC_RTNEURAL
7 | std::list rtNeuralLSTMModelDefs;
8 | std::list rtNeuralWaveNetModelDefs;
9 |
10 | void EnsureRTNeuralModelDefsAreLoaded()
11 | {
12 | rtNeuralLSTMModelDefs.push_back(new RTNeuralLSTMDefinitionT<1, 8>);
13 | rtNeuralLSTMModelDefs.push_back(new RTNeuralLSTMDefinitionT<1, 12>);
14 | rtNeuralLSTMModelDefs.push_back(new RTNeuralLSTMDefinitionT<1, 16>);
15 | rtNeuralLSTMModelDefs.push_back(new RTNeuralLSTMDefinitionT<1, 24>);
16 | rtNeuralLSTMModelDefs.push_back(new RTNeuralLSTMDefinitionT<2, 8>);
17 | rtNeuralLSTMModelDefs.push_back(new RTNeuralLSTMDefinitionT<2, 12>);
18 | rtNeuralLSTMModelDefs.push_back(new RTNeuralLSTMDefinitionT<2, 16>);
19 |
20 | rtNeuralWaveNetModelDefs.push_back(new RTNeuralWaveNetDefinitionT<16, 8>); // Standard
21 | rtNeuralWaveNetModelDefs.push_back(new RTNeuralWaveNetDefinitionT<12, 6>); // Lite
22 | rtNeuralWaveNetModelDefs.push_back(new RTNeuralWaveNetDefinitionT<8, 4>); // Feather
23 | rtNeuralWaveNetModelDefs.push_back(new RTNeuralWaveNetDefinitionT<4, 2>); // Nano
24 | }
25 |
26 | RTNeuralLSTMDefinitionBase* FindRTNeuralLSTMDefinition(size_t numLayers, size_t hiddenSize)
27 | {
28 | for (auto const& model : rtNeuralLSTMModelDefs)
29 | {
30 | if ((numLayers == model->GetNumLayers()) && (hiddenSize == model->GetHiddenSize()))
31 | return model;
32 | }
33 |
34 | return nullptr;
35 | }
36 |
37 | RTNeuralWaveNetDefinitionBase* FindRTNeuralWaveNetDefinition(size_t numChannels, size_t headSize)
38 | {
39 | for (auto const& model : rtNeuralWaveNetModelDefs)
40 | {
41 | if ((numChannels == model->GetNumChannels()) && (headSize == model->GetHeadSize()))
42 | return model;
43 | }
44 |
45 | return nullptr;
46 | }
47 |
48 | NeuralModel* RTNeuralLoadNAMWaveNet(const nlohmann::json& modelJson)
49 | {
50 | nlohmann::json config = modelJson.at("config");
51 |
52 | nlohmann::json firstLayerConfig = config.at("layers").at(0);
53 | nlohmann::json secondLayerConfig = config.at("layers").at(1);
54 |
55 | auto modelDef = FindRTNeuralWaveNetDefinition(firstLayerConfig.at("channels"), firstLayerConfig.at("head_size"));
56 |
57 | if (modelDef != nullptr)
58 | {
59 | auto model = modelDef->CreateModel();
60 |
61 | model->LoadFromNAMJson(modelJson);
62 |
63 | return model;
64 | }
65 |
66 | return nullptr;
67 | }
68 |
69 | NeuralModel* RTNeuralLoadNAMLSTM(const nlohmann::json& modelJson)
70 | {
71 | nlohmann::json config = modelJson.at("config");
72 |
73 | auto modelDef = FindRTNeuralLSTMDefinition(config.at("num_layers"), config.at("hidden_size"));
74 |
75 | if (modelDef != nullptr)
76 | {
77 | RTNeuralModel* model = modelDef->CreateModel();
78 | model->LoadFromNAMJson(modelJson);
79 |
80 | if (model != nullptr)
81 | return model;
82 |
83 | // If we didn't have a static model that matched, use RTNeural's dynamic model
84 | RTNeuralModelDyn* dynModel = new RTNeuralModelDyn;
85 | dynModel->LoadFromNAMJson(modelJson);
86 |
87 | return dynModel;
88 | }
89 |
90 | return nullptr;
91 | }
92 |
93 | NeuralModel* RTNeuralLoadKeras(const nlohmann::json& modelJson)
94 | {
95 | const auto layers = modelJson.at("layers");
96 | const size_t numLayers = layers.size() - 1;
97 | const std::string modelType = layers.at(0).at("type");
98 | const int hiddenSize = layers.at(0).at("shape").back();
99 |
100 | auto modelDef = FindRTNeuralLSTMDefinition(numLayers, hiddenSize);
101 |
102 | if (modelDef != nullptr)
103 | {
104 | RTNeuralModel* model = modelDef->CreateModel();
105 |
106 | model->LoadFromKerasJson(modelJson);
107 |
108 | return model;
109 | }
110 |
111 | return nullptr;
112 | }
113 | #endif
114 | }
115 |
116 |
--------------------------------------------------------------------------------
/NeuralAudio/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | set(CMAKE_CXX_STANDARD 20)
2 |
3 | add_definitions(-DNAM_SAMPLE_FLOAT)
4 | add_definitions(-DDSP_SAMPLE_FLOAT)
5 |
6 | option(BUILD_STATIC_RTNEURAL "Build Static RTNeural" OFF)
7 | if(BUILD_STATIC_RTNEURAL)
8 | message(STATUS "Building static RTNeural models")
9 | add_definitions(-DBUILD_STATIC_RTNEURAL)
10 | else()
11 | message(STATUS "NOT Building static RTNeural models")
12 | endif()
13 |
14 | option(BUILD_NAMCORE "Build NAM Core" OFF)
15 | if(BUILD_NAMCORE)
16 | message(STATUS "Building NAM Core implementation")
17 | add_definitions(-DBUILD_NAMCORE)
18 | else()
19 | message(STATUS "NOT Building NAM Core implementation")
20 | endif()
21 |
22 | option(BUILD_INTERNAL_STATIC_WAVENET "Build Internal static WaveNet models" ON)
23 | if(BUILD_INTERNAL_STATIC_WAVENET)
24 | message(STATUS "Building Internal static WaveNet models")
25 | add_definitions(-DBUILD_INTERNAL_STATIC_WAVENET)
26 | else()
27 | message(STATUS "NOT Building Internal static WaveNet models")
28 | endif()
29 |
30 | option(BUILD_INTERNAL_STATIC_LSTM "Build Internal static LSTM models" ON)
31 | if(BUILD_INTERNAL_STATIC_LSTM)
32 | message(STATUS "Building Internal static LSTM models")
33 | add_definitions(-DBUILD_INTERNAL_STATIC_LSTM)
34 | else()
35 | message(STATUS "NOT Building Internal static LSTM models")
36 | endif()
37 |
38 | set(LSTM_MATH "FastMath" CACHE STRING "LSTM math functions")
39 | add_definitions(-DLSTM_MATH=${LSTM_MATH})
40 | message(STATUS "LSTM math is: ${LSTM_MATH}")
41 |
42 | set(WAVENET_MATH "FastMath" CACHE STRING "WaveNet math functions")
43 | add_definitions(-DWAVENET_MATH=${WAVENET_MATH})
44 | message(STATUS "WaveNet math is: ${WAVENET_MATH}")
45 |
46 | set(WAVENET_FRAMES "64" CACHE STRING "WaveNet frame size")
47 |
48 | add_definitions(-DWAVENET_MAX_NUM_FRAMES=${WAVENET_FRAMES})
49 |
50 | message(STATUS "WaveNet frame size is: ${WAVENET_FRAMES}")
51 |
52 | if (MSVC)
53 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /bigobj")
54 | endif()
55 |
56 | set(SOURCES
57 | NeuralModel.h
58 | NeuralModel.cpp
59 | NAMModel.h
60 | RTNeuralModel.h
61 | RTNeuralLoader.cpp
62 | RTNeuralLoader.h
63 | Activation.h
64 | WaveNet.h
65 | WaveNetDynamic.h
66 | LSTM.h
67 | LSTMDynamic.h
68 | InternalModel.h
69 | TemplateHelper.h)
70 |
71 | if(BUILD_NAMCORE)
72 | set(NAM_SOURCES ../deps/NeuralAmpModelerCore/NAM/activations.h
73 | ../deps/NeuralAmpModelerCore/NAM/activations.cpp
74 | ../deps/NeuralAmpModelerCore/NAM/lstm.h
75 | ../deps/NeuralAmpModelerCore/NAM/lstm.cpp
76 | ../deps/NeuralAmpModelerCore/NAM/dsp.h
77 | ../deps/NeuralAmpModelerCore/NAM/dsp.cpp
78 | ../deps/NeuralAmpModelerCore/NAM/wavenet.cpp
79 | ../deps/NeuralAmpModelerCore/NAM/wavenet.h)
80 | endif()
81 |
82 | if(BUILD_STATIC_RTNEURAL)
83 | set(RTNEURAL_WN_SOURCES ../deps/RTNeural-NAM/wavenet/wavenet_layer.hpp
84 | ../deps/RTNeural-NAM/wavenet/wavenet_layer_array.hpp
85 | ../deps/RTNeural-NAM/wavenet/wavenet_model.hpp
86 | ../deps/RTNeural-NAM/wavenet/arena.hpp)
87 | endif()
88 |
89 | add_library(NeuralAudio STATIC ${SOURCES} ${NAM_SOURCES} ${RTNEURAL_WN_SOURCES})
90 |
91 | target_include_directories(NeuralAudio PUBLIC ..)
92 | target_include_directories(NeuralAudio SYSTEM PRIVATE ../deps/NeuralAmpModelerCore)
93 | target_include_directories(NeuralAudio SYSTEM PRIVATE ../deps/RTNeural)
94 | target_include_directories(NeuralAudio SYSTEM PRIVATE ../deps/math_approx)
95 | target_include_directories(NeuralAudio SYSTEM PRIVATE ../deps/RTNeural-NAM/wavenet)
96 | target_include_directories(NeuralAudio SYSTEM PRIVATE ../deps/RTNeural/modules/Eigen)
97 | target_include_directories(NeuralAudio SYSTEM PRIVATE ../deps/RTNeural/modules/json)
98 |
99 | set_property(TARGET NeuralAudio PROPERTY POSITION_INDEPENDENT_CODE ON)
100 |
101 | add_subdirectory(../deps/RTNeural RTNeural)
102 | add_subdirectory(../deps/math_approx math_approx)
103 | target_link_libraries(NeuralAudio LINK_PUBLIC RTNeural math_approx)
104 |
105 | source_group(NeuralAudio ${CMAKE_CURRENT_SOURCE_DIR} FILES ${SOURCES})
106 | source_group(NAM ${CMAKE_CURRENT_SOURCE_DIR} FILES ${NAM_SOURCES})
107 | source_group(RTNeural-NAM ${CMAKE_CURRENT_SOURCE_DIR} FILES ${RTNEURAL_WN_SOURCES})
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | ## Ignore Visual Studio temporary files, build results, and
2 | ## files generated by popular Visual Studio add-ons.
3 |
4 | # User-specific files
5 | *.suo
6 | *.user
7 | *.userosscache
8 | *.sln.docstates
9 |
10 | # User-specific files (MonoDevelop/Xamarin Studio)
11 | *.userprefs
12 |
13 | # Build results
14 | [Dd]ebug/
15 | [Dd]ebugPublic/
16 | [Rr]elease/
17 | [Rr]eleases/
18 | x64/
19 | x86/
20 | bld/
21 | [Bb]in/
22 | [Oo]bj/
23 | [Ll]og/
24 |
25 | # Visual Studio 2015 cache/options directory
26 | .vs/
27 | # Uncomment if you have tasks that create the project's static files in wwwroot
28 | #wwwroot/
29 |
30 | # MSTest test Results
31 | [Tt]est[Rr]esult*/
32 | [Bb]uild[Ll]og.*
33 |
34 | # NUNIT
35 | *.VisualState.xml
36 | TestResult.xml
37 |
38 | # Build Results of an ATL Project
39 | [Dd]ebugPS/
40 | [Rr]eleasePS/
41 | dlldata.c
42 |
43 | # DNX
44 | project.lock.json
45 | artifacts/
46 |
47 | *_i.c
48 | *_p.c
49 | *_i.h
50 | *.ilk
51 | *.meta
52 | *.obj
53 | *.pch
54 | *.pdb
55 | *.pgc
56 | *.pgd
57 | *.rsp
58 | *.sbr
59 | *.tlb
60 | *.tli
61 | *.tlh
62 | *.tmp
63 | *.tmp_proj
64 | *.log
65 | *.vspscc
66 | *.vssscc
67 | .builds
68 | *.pidb
69 | *.svclog
70 | *.scc
71 |
72 | # Chutzpah Test files
73 | _Chutzpah*
74 |
75 | # Visual C++ cache files
76 | ipch/
77 | *.aps
78 | *.ncb
79 | *.opendb
80 | *.opensdf
81 | *.sdf
82 | *.cachefile
83 |
84 | # Visual Studio profiler
85 | *.psess
86 | *.vsp
87 | *.vspx
88 | *.sap
89 |
90 | # TFS 2012 Local Workspace
91 | $tf/
92 |
93 | # Guidance Automation Toolkit
94 | *.gpState
95 |
96 | # ReSharper is a .NET coding add-in
97 | _ReSharper*/
98 | *.[Rr]e[Ss]harper
99 | *.DotSettings.user
100 |
101 | # JustCode is a .NET coding add-in
102 | .JustCode
103 |
104 | # TeamCity is a build add-in
105 | _TeamCity*
106 |
107 | # DotCover is a Code Coverage Tool
108 | *.dotCover
109 |
110 | # NCrunch
111 | _NCrunch_*
112 | .*crunch*.local.xml
113 | nCrunchTemp_*
114 |
115 | # MightyMoose
116 | *.mm.*
117 | AutoTest.Net/
118 |
119 | # Web workbench (sass)
120 | .sass-cache/
121 |
122 | # Installshield output folder
123 | [Ee]xpress/
124 |
125 | # DocProject is a documentation generator add-in
126 | DocProject/buildhelp/
127 | DocProject/Help/*.HxT
128 | DocProject/Help/*.HxC
129 | DocProject/Help/*.hhc
130 | DocProject/Help/*.hhk
131 | DocProject/Help/*.hhp
132 | DocProject/Help/Html2
133 | DocProject/Help/html
134 |
135 | # Click-Once directory
136 | publish/
137 |
138 | # Publish Web Output
139 | *.[Pp]ublish.xml
140 | *.azurePubxml
141 | # TODO: Comment the next line if you want to checkin your web deploy settings
142 | # but database connection strings (with potential passwords) will be unencrypted
143 | *.pubxml
144 | *.publishproj
145 |
146 | # NuGet Packages
147 | *.nupkg
148 | # The packages folder can be ignored because of Package Restore
149 | **/packages/*
150 | # except build/, which is used as an MSBuild target.
151 | !**/packages/build/
152 | # Uncomment if necessary however generally it will be regenerated when needed
153 | #!**/packages/repositories.config
154 | # NuGet v3's project.json files produces more ignoreable files
155 | *.nuget.props
156 | *.nuget.targets
157 |
158 | # Microsoft Azure Build Output
159 | csx/
160 | *.build.csdef
161 |
162 | # Microsoft Azure Emulator
163 | ecf/
164 | rcf/
165 |
166 | # Microsoft Azure ApplicationInsights config file
167 | ApplicationInsights.config
168 |
169 | # Windows Store app package directory
170 | AppPackages/
171 | BundleArtifacts/
172 |
173 | # Visual Studio cache files
174 | # files ending in .cache can be ignored
175 | *.[Cc]ache
176 | # but keep track of directories ending in .cache
177 | !*.[Cc]ache/
178 |
179 | # Others
180 | ClientBin/
181 | ~$*
182 | *~
183 | *.dbmdl
184 | *.dbproj.schemaview
185 | *.pfx
186 | *.publishsettings
187 | node_modules/
188 | orleans.codegen.cs
189 |
190 | # RIA/Silverlight projects
191 | Generated_Code/
192 |
193 | # Backup & report files from converting an old project file
194 | # to a newer Visual Studio version. Backup files are not needed,
195 | # because we have git ;-)
196 | _UpgradeReport_Files/
197 | Backup*/
198 | UpgradeLog*.XML
199 | UpgradeLog*.htm
200 |
201 | # SQL Server files
202 | *.mdf
203 | *.ldf
204 |
205 | # Business Intelligence projects
206 | *.rdl.data
207 | *.bim.layout
208 | *.bim_*.settings
209 |
210 | # Microsoft Fakes
211 | FakesAssemblies/
212 |
213 | # GhostDoc plugin setting file
214 | *.GhostDoc.xml
215 |
216 | # Node.js Tools for Visual Studio
217 | .ntvs_analysis.dat
218 |
219 | # Visual Studio 6 build log
220 | *.plg
221 |
222 | # Visual Studio 6 workspace options file
223 | *.opt
224 |
225 | # Visual Studio LightSwitch build output
226 | **/*.HTMLClient/GeneratedArtifacts
227 | **/*.DesktopClient/GeneratedArtifacts
228 | **/*.DesktopClient/ModelManifest.xml
229 | **/*.Server/GeneratedArtifacts
230 | **/*.Server/ModelManifest.xml
231 | _Pvt_Extensions
232 |
233 | # Paket dependency manager
234 | .paket/paket.exe
235 |
236 | # FAKE - F# Make
237 | .fake/
238 |
--------------------------------------------------------------------------------
/NeuralAudio/LSTMDynamic.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include
4 | #include
5 | #include "Activation.h"
6 | #include "LSTM.h"
7 |
8 | namespace NeuralAudio
9 | {
10 | class LSTMLayer
11 | {
12 | private:
13 | size_t inputSize;
14 | size_t hiddenSize;
15 | size_t inputHiddenSize;
16 | size_t gateSize;
17 | Eigen::MatrixXf inputHiddenWeights;
18 | Eigen::VectorXf bias;
19 | Eigen::VectorXf state;
20 | Eigen::VectorXf gates;
21 | Eigen::VectorXf cellState;
22 |
23 | size_t iOffset;
24 | size_t fOffset;
25 | size_t gOffset;
26 | size_t oOffset;
27 | size_t hOffset;
28 |
29 | public:
30 | LSTMLayer(size_t inputSize, size_t hiddenSize) :
31 | inputSize(inputSize),
32 | hiddenSize(hiddenSize),
33 | inputHiddenSize(inputSize + hiddenSize),
34 | gateSize(4 * hiddenSize),
35 | inputHiddenWeights(gateSize, inputHiddenSize),
36 | bias(gateSize),
37 | state(inputHiddenSize),
38 | gates(gateSize),
39 | cellState(hiddenSize),
40 | iOffset(0),
41 | fOffset(hiddenSize),
42 | gOffset(2 * hiddenSize),
43 | oOffset(3 * hiddenSize),
44 | hOffset(inputSize)
45 | {
46 | }
47 |
48 | auto GetHiddenState() const { return state(Eigen::placeholders::lastN(hiddenSize)); };
49 |
50 | void SetNAMWeights(std::vector::iterator& weights)
51 | {
52 | for (size_t i = 0; i < gateSize; i++)
53 | for (size_t j = 0; j < inputHiddenSize; j++)
54 | inputHiddenWeights(i, j) = *(weights++);
55 |
56 | for (size_t i = 0; i < gateSize; i++)
57 | bias[i] = *(weights++);
58 |
59 | for (size_t i = 0; i < hiddenSize; i++)
60 | state[i + inputSize] = *(weights++);
61 |
62 | for (size_t i = 0; i < hiddenSize; i++)
63 | cellState[i] = *(weights++);
64 | }
65 |
66 | void SetWeights(LSTMLayerDef& def)
67 | {
68 | std::vector::iterator it = def.InputWeights.begin();
69 |
70 | for (size_t j = 0; j < inputSize; j++)
71 | for (size_t i = 0; i < gateSize; i++)
72 | {
73 | inputHiddenWeights(i, j) = *(it++);
74 | }
75 |
76 | assert(std::distance(def.InputWeights.begin(), it) == (long)def.InputWeights.size());
77 |
78 | it = def.HiddenWeights.begin();
79 |
80 | for (size_t j = 0; j < hiddenSize; j++)
81 | for (size_t i = 0; i < gateSize; i++)
82 | {
83 | inputHiddenWeights(i, j + inputSize) = *(it++);
84 | }
85 |
86 | assert(std::distance(def.HiddenWeights.begin(), it) == (long)def.HiddenWeights.size());
87 |
88 | for (size_t i = 0; i < gateSize; i++)
89 | bias[i] = def.BiasWeights[i];
90 |
91 | state.setZero();
92 | cellState.setZero();
93 | }
94 |
95 | inline void Process(const float* input)
96 | {
97 | for (size_t i = 0; i < inputSize; i++)
98 | state(i) = input[i];
99 |
100 | gates = (inputHiddenWeights * state) + bias;
101 |
102 | for (size_t i = 0; i < hiddenSize; i++)
103 | cellState[i] = (LSTM_MATH::Sigmoid(gates[i + fOffset]) * cellState[i]) + (LSTM_MATH::Sigmoid(gates[i + iOffset]) *
104 | LSTM_MATH::Tanh(gates[i + gOffset]));
105 |
106 | for (size_t i = 0; i < hiddenSize; i++)
107 | state[i + hOffset] = LSTM_MATH::Sigmoid(gates[i + oOffset]) * LSTM_MATH::Tanh(cellState[i]);
108 | }
109 | };
110 |
111 | class LSTMModel
112 | {
113 | private:
114 | size_t numLayers;
115 | size_t lastLayer;
116 | size_t hiddenSize;
117 | std::vector layers;
118 | Eigen::VectorXf headWeights;
119 | float headBias;
120 |
121 | public:
122 | LSTMModel(size_t numLayers, size_t hiddenSize) :
123 | numLayers(numLayers),
124 | lastLayer(numLayers - 1),
125 | hiddenSize(hiddenSize),
126 | headWeights(hiddenSize)
127 | {
128 | layers.push_back(LSTMLayer(1, hiddenSize));
129 |
130 | for (size_t i = 0; i < numLayers - 1; i++)
131 | {
132 | layers.push_back(LSTMLayer(hiddenSize, hiddenSize));
133 | }
134 | }
135 |
136 | void SetNAMWeights(std::vector weights)
137 | {
138 | std::vector::iterator it = weights.begin();
139 |
140 | for (auto& layer : layers)
141 | {
142 | layer.SetNAMWeights(it);
143 | }
144 |
145 | for (int i = 0; i < hiddenSize; i++)
146 | headWeights[i] = *(it++);
147 |
148 | headBias = *(it++);
149 |
150 | assert(std::distance(weights.begin(), it) == (long)weights.size());
151 | }
152 |
153 | void SetWeights(LSTMDef& def)
154 | {
155 | for (size_t i = 0; i < hiddenSize; i++)
156 | headWeights[i] = def.HeadWeights[i];
157 |
158 | headBias = def.HeadBias;
159 |
160 | for (size_t i = 0; i < numLayers; i++)
161 | {
162 | layers[i].SetWeights(def.Layers[i]);
163 | }
164 | }
165 |
166 | void Process(const float* input, float* output, const size_t numSamples)
167 | {
168 | for (size_t i = 0; i < numSamples; i++)
169 | {
170 | layers[0].Process(input + i);
171 |
172 | for (size_t layer = 1; layer < numLayers; layer++)
173 | {
174 | layers[layer].Process(layers[layer - 1].GetHiddenState().data());
175 | }
176 |
177 | output[i] = headWeights.dot(layers[lastLayer].GetHiddenState()) + headBias;
178 | }
179 | }
180 | };
181 | }
--------------------------------------------------------------------------------
/deps/RTNeural-NAM/wavenet/arena.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include
4 | #include
5 | #include
6 | #include
7 | #include
8 |
9 | namespace wavenet
10 | {
11 | /**
12 | * Returns the next pointer with a given byte alignment,
13 | * or the base pointer if it is already aligned.
14 | */
15 | template
16 | Type* snap_pointer_to_alignment (Type* base_pointer,
17 | Integer_Type alignment_bytes) noexcept
18 | {
19 | return (Type*) ((((size_t) base_pointer) + (alignment_bytes - 1)) & ~(alignment_bytes - 1));
20 | }
21 |
22 | static constexpr int default_byte_alignment = 16;
23 |
24 | /**
25 | * A simple memory arena. By default the arena will be
26 | * backed with a vector of bytes, but the underlying
27 | * memory resource can be changed via the template argument.
28 | */
29 | template >
30 | class Memory_Arena
31 | {
32 | public:
33 | Memory_Arena() = default;
34 |
35 | /** Constructs the arena with an initial allocated size. */
36 | explicit Memory_Arena (size_t size_in_bytes) { resize (size_in_bytes); }
37 |
38 | Memory_Arena (const Memory_Arena&) = delete;
39 | Memory_Arena& operator= (const Memory_Arena&) = delete;
40 |
41 | Memory_Arena (Memory_Arena&&) noexcept = default;
42 | Memory_Arena& operator= (Memory_Arena&&) noexcept = default;
43 |
44 | /** Re-allocates the internal buffer with a given number of bytes */
45 | void resize (size_t new_size_bytes)
46 | {
47 | clear();
48 | raw_data.resize (new_size_bytes, std::byte {});
49 | }
50 |
51 | /**
52 | * Moves the allocator "stack pointer" back to zero,
53 | * effectively "reclaiming" all allocated memory.
54 | */
55 | void clear() noexcept
56 | {
57 | #if DEBUG
58 | std::fill (raw_data.begin(), raw_data.begin() + bytes_used, std::byte { 0xDD });
59 | #endif
60 | bytes_used = 0;
61 | }
62 |
63 | /** Returns the number of bytes currently being used */
64 | [[nodiscard]] size_t get_bytes_used() const noexcept { return bytes_used; }
65 |
66 | /**
67 | * Allocates a given number of bytes.
68 | * The returned memory will be un-initialized, so be sure to clear it manually
69 | * if needed.
70 | */
71 | void* allocate_bytes (size_t num_bytes, size_t alignment = 1)
72 | {
73 | auto* pointer = snap_pointer_to_alignment (raw_data.data() + bytes_used, alignment);
74 | const auto bytes_increment = static_cast (std::distance (raw_data.data() + bytes_used, pointer + num_bytes));
75 |
76 | if (bytes_used + bytes_increment > raw_data.size())
77 | {
78 | assert (false);
79 | return nullptr;
80 | }
81 |
82 | bytes_used += bytes_increment;
83 | return pointer;
84 | }
85 |
86 | /**
87 | * Allocates space for some number of objects of type T
88 | * The returned memory will be un-initialized, so be sure to clear it manually
89 | * if needed.
90 | */
91 | template
92 | T* allocate (IntType num_Ts, size_t alignment = alignof (T))
93 | {
94 | return static_cast (allocate_bytes ((size_t) num_Ts * sizeof (T), alignment));
95 | }
96 |
97 | /**
98 | * Returns a span of type T, and size count.
99 | * The returned memory will be un-initialized, so be sure to clear it manually
100 | * if needed.
101 | */
102 | template
103 | auto make_span (IntType count, size_t alignment = default_byte_alignment)
104 | {
105 | return std::span { allocate (count, alignment),
106 | static_cast (count) };
107 | }
108 |
109 | /** Returns a pointer to the internal buffer with a given offset in bytes */
110 | template
111 | T* data (IntType offset_bytes) noexcept
112 | {
113 | return reinterpret_cast (raw_data.data() + offset_bytes);
114 | }
115 |
116 | /**
117 | * Creates a "frame" for the allocator.
118 | * Once the frame goes out of scope, the allocator will be reset
119 | * to whatever it's state was at the beginning of the frame.
120 | */
121 | struct Frame
122 | {
123 | Frame() = default;
124 | explicit Frame (Memory_Arena& allocator)
125 | : alloc (&allocator), bytes_used_at_start (alloc->bytes_used) {}
126 |
127 | ~Frame() { alloc->bytes_used = bytes_used_at_start; }
128 |
129 | Memory_Arena* alloc = nullptr;
130 | size_t bytes_used_at_start = 0;
131 | };
132 |
133 | /** Creates a frame for this allocator */
134 | auto create_frame() { return Frame { *this }; }
135 |
136 | void reset_to_frame (const Frame& frame)
137 | {
138 | assert (frame.alloc == this);
139 | bytes_used = frame.bytes_used_at_start;
140 | }
141 |
142 | private:
143 | MemoryResourceType raw_data {};
144 | size_t bytes_used = 0;
145 | };
146 | } // namespace wavenet
147 |
--------------------------------------------------------------------------------
/NeuralAudio/LSTM.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include
4 | #include "Activation.h"
5 | #include "TemplateHelper.h"
6 |
7 | namespace NeuralAudio
8 | {
9 | struct LSTMLayerDef
10 | {
11 | std::vector InputWeights;
12 | std::vector HiddenWeights;
13 | std::vector BiasWeights;
14 | };
15 |
16 | struct LSTMDef
17 | {
18 | std::vector Layers;
19 | std::vector HeadWeights;
20 | float HeadBias;
21 | };
22 |
23 | template
24 | class LSTMLayerT
25 | {
26 | private:
27 | Eigen::Matrix inputHiddenWeights;
28 | Eigen::Vector bias;
29 | Eigen::Vector state;
30 | Eigen::Vector gates;
31 | Eigen::Vector cellState;
32 |
33 | constexpr static long iOffset = 0;
34 | constexpr static long fOffset = HiddenSize;
35 | constexpr static long gOffset = 2 * HiddenSize;
36 | constexpr static long oOffset = 3 * HiddenSize;
37 | constexpr static long hOffset = InputSize;
38 |
39 | public:
40 | auto GetHiddenState() const { return state(Eigen::placeholders::lastN(HiddenSize)); };
41 |
42 | void SetNAMWeights(std::vector::iterator& weights)
43 | {
44 | for (int i = 0; i < inputHiddenWeights.rows(); i++)
45 | for (int j = 0; j < inputHiddenWeights.cols(); j++)
46 | inputHiddenWeights(i, j) = *(weights++);
47 |
48 | for (int i = 0; i < bias.size(); i++)
49 | bias[i] = *(weights++);
50 |
51 | for (int i = 0; i < HiddenSize; i++)
52 | state[i + InputSize] = *(weights++);
53 |
54 | for (int i = 0; i < HiddenSize; i++)
55 | cellState[i] = *(weights++);
56 | }
57 |
58 | void SetWeights(LSTMLayerDef& def)
59 | {
60 | std::vector::iterator it = def.InputWeights.begin();
61 |
62 | for (int j = 0; j < InputSize; j++)
63 | for (int i = 0; i < inputHiddenWeights.rows(); i++)
64 | {
65 | inputHiddenWeights(i, j) = *(it++);
66 | }
67 |
68 | assert(std::distance(def.InputWeights.begin(), it) == (long)def.InputWeights.size());
69 |
70 | it = def.HiddenWeights.begin();
71 |
72 | for (int j = 0; j < HiddenSize; j++)
73 | for (int i = 0; i < inputHiddenWeights.rows(); i++)
74 | {
75 | inputHiddenWeights(i, j + InputSize) = *(it++);
76 | }
77 |
78 | assert(std::distance(def.HiddenWeights.begin(), it) == (long)def.HiddenWeights.size());
79 |
80 | for (int i = 0; i < bias.rows(); i++)
81 | bias[i] = def.BiasWeights[i];
82 |
83 | state.setZero();
84 | cellState.setZero();
85 | }
86 |
87 | inline void Process(const float* input)
88 | {
89 | for (int i = 0; i < InputSize; i++)
90 | state(i) = input[i];
91 |
92 | gates = (inputHiddenWeights * state) + bias;
93 |
94 | for (auto i = 0; i < HiddenSize; i++)
95 | cellState[i] = (LSTM_MATH::Sigmoid(gates[i + fOffset]) * cellState[i]) +
96 | (LSTM_MATH::Sigmoid(gates[i + iOffset]) * LSTM_MATH::Tanh(gates[i + gOffset]));
97 |
98 | for (int i = 0; i < HiddenSize; i++)
99 | state[i + hOffset] = LSTM_MATH::Sigmoid(gates[i + oOffset]) * LSTM_MATH::Tanh(cellState[i]);
100 | }
101 | };
102 |
103 | template
104 | class LSTMModelT
105 | {
106 | private:
107 | LSTMLayerT<1, HiddenSize> firstLayer;
108 | std::vector> remainingLayers;
109 | Eigen::Vector headWeights;
110 | float headBias;
111 |
112 | public:
113 | LSTMModelT()
114 | {
115 | if constexpr (NumLayers > 1)
116 | {
117 | remainingLayers.resize(NumLayers - 1);
118 |
119 | ForEachIndex([&](auto layerIndex)
120 | {
121 | (void)layerIndex;
122 |
123 | LSTMLayerT layer;
124 |
125 | remainingLayers.push_back(layer);
126 | });
127 | }
128 | }
129 |
130 | void SetNAMWeights(std::vector weights)
131 | {
132 | std::vector::iterator it = weights.begin();
133 |
134 | firstLayer.SetNAMWeights(it);
135 |
136 | ForEachIndex([&](auto layerIndex)
137 | {
138 | remainingLayers[layerIndex].SetNAMWeights(it);
139 | });
140 |
141 | for (int i = 0; i < HiddenSize; i++)
142 | headWeights[i] = *(it++);
143 |
144 | headBias = *(it++);
145 |
146 | assert(std::distance(weights.begin(), it) == (long)weights.size());
147 | }
148 |
149 | void SetWeights(LSTMDef& def)
150 | {
151 | for (int i = 0; i < HiddenSize; i++)
152 | headWeights[i] = def.HeadWeights[i];
153 |
154 | headBias = def.HeadBias;
155 |
156 | firstLayer.SetWeights(def.Layers[0]);
157 |
158 | ForEachIndex([&](auto layerIndex)
159 | {
160 | remainingLayers[layerIndex].SetWeights(def.Layers[layerIndex + 1]);
161 | });
162 | }
163 |
164 | void Process(const float* input, float* output, const size_t numSamples)
165 | {
166 | for (size_t i = 0; i < numSamples; i++)
167 | {
168 | firstLayer.Process(input + i);
169 |
170 | ForEachIndex([&](auto layerIndex)
171 | {
172 | if constexpr (layerIndex == 0)
173 | {
174 | remainingLayers[layerIndex].Process(firstLayer.GetHiddenState().data());
175 | }
176 | else
177 | {
178 | remainingLayers[layerIndex].Process(remainingLayers[layerIndex - 1].GetHiddenState().data());
179 | }
180 | });
181 |
182 | if constexpr (NumLayers == 1)
183 | {
184 | output[i] = headWeights.dot(firstLayer.GetHiddenState()) + headBias;
185 | }
186 | else
187 | {
188 | output[i] = headWeights.dot(remainingLayers[NumLayers - 2].GetHiddenState()) + headBias;
189 | }
190 | }
191 | }
192 | };
193 | }
--------------------------------------------------------------------------------
/deps/RTNeural-NAM/wavenet/wavenet_model.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include "arena.hpp"
4 | #include "wavenet_layer_array.hpp"
5 |
6 | namespace wavenet
7 | {
8 | template
11 | struct Wavenet_Model
12 | {
13 | std::tuple layer_arrays;
14 |
15 | static constexpr auto head_layer_n_channels = std::tuple_element_t<0, std::tuple>::n_channels;
16 |
17 | #if RTNEURAL_USE_EIGEN
18 | Eigen::Matrix head_input {};
19 | #elif RTNEURAL_USE_XSIMD
20 | xsimd::batch head_input[RTNeural::ceil_div (head_layer_n_channels, (int) xsimd::batch::size)];
21 | #endif
22 | T head_scale = (T) 0;
23 |
24 | Memory_Arena<> arena {};
25 |
26 | Wavenet_Model() = default;
27 |
28 | void prepare (int block_size)
29 | {
30 | #if RTNEURAL_USE_EIGEN
31 | size_t arena_bytes_needed = sizeof (Eigen::Matrix) * block_size;
32 | #elif RTNEURAL_USE_XSIMD
33 | size_t arena_bytes_needed = sizeof (xsimd::batch[RTNeural::ceil_div (head_layer_n_channels, (int) xsimd::batch::size)]) * block_size
34 | + sizeof (xsimd::batch) * block_size;
35 | #endif
36 | RTNeural::modelt_detail::forEachInTuple (
37 | [&arena_bytes_needed, block_size] (auto& layer_array, auto)
38 | {
39 | arena_bytes_needed += layer_array.get_arena_bytes_needed (block_size);
40 | },
41 | layer_arrays);
42 |
43 | arena.resize (arena_bytes_needed + 256);
44 | // prewarm();
45 | }
46 |
47 | void prewarm()
48 | {
49 | RTNeural::modelt_detail::forEachInTuple (
50 | [] (auto& layer, size_t)
51 | {
52 | layer.reset();
53 | },
54 | layer_arrays);
55 | for (int i = 0; i < 1 << 14; ++i)
56 | forward (0.0f);
57 | }
58 |
59 | void load_weights (const nlohmann::json& model_json)
60 | {
61 | std::vector model_weights = model_json.at ("weights");
62 | auto weights_iterator = model_weights.begin();
63 | RTNeural::modelt_detail::forEachInTuple (
64 | [&weights_iterator] (auto& layer, size_t)
65 | {
66 | layer.load_weights (weights_iterator);
67 | },
68 | layer_arrays);
69 |
70 | head_scale = *weights_iterator++;
71 |
72 | // Make sure we use the all of the weights exactly
73 | assert (std::distance (model_weights.begin(), weights_iterator) == model_weights.size());
74 | }
75 |
76 | T forward (T input) noexcept
77 | {
78 | #if RTNEURAL_USE_EIGEN
79 | const auto v_ins = Eigen::Matrix::Constant (input);
80 | #elif RTNEURAL_USE_XSIMD
81 | xsimd::batch v_ins[1];
82 | v_ins[0] = RTNeural::set_value (v_ins[0], 0, input);
83 | #endif
84 |
85 | RTNeural::modelt_detail::forEachInTuple (
86 | [this, v_ins] (auto& layer_array, auto index_t)
87 | {
88 | static constexpr size_t index = index_t;
89 | if constexpr (index == 0)
90 | {
91 | #if RTNEURAL_USE_EIGEN
92 | head_input.setZero();
93 | std::get<0> (layer_arrays).forward (v_ins, v_ins, head_input);
94 | #elif RTNEURAL_USE_XSIMD
95 | std::fill (std::begin (head_input), std::end (head_input), xsimd::batch {});
96 | std::get<0> (layer_arrays).forward (v_ins, v_ins, head_input);
97 | #endif
98 | }
99 | else
100 | {
101 | std::get (layer_arrays).forward (std::get (layer_arrays).layer_outputs, v_ins, std::get (layer_arrays).head_outputs);
102 | }
103 | },
104 | layer_arrays);
105 |
106 | #if RTNEURAL_USE_EIGEN
107 | return std::get - 1> (layer_arrays).head_outputs[0] * head_scale;
108 | #elif RTNEURAL_USE_XSIMD
109 | return std::get - 1> (layer_arrays).head_outputs[0].get (0) * head_scale;
110 | #endif
111 | }
112 |
113 | void forward (const T* input, T* output, int N) noexcept
114 | {
115 | #if RTNEURAL_USE_EIGEN
116 | const auto* v_ins = reinterpret_cast*> (input);
117 | #elif RTNEURAL_USE_XSIMD
118 | auto* v_ins = arena.allocate[1]> (N, RTNEURAL_DEFAULT_ALIGNMENT);
119 | for (int n = 0; n < N; ++n)
120 | v_ins[n][0] = RTNeural::set_value (v_ins[n][0], 0, input[n]);
121 | #endif
122 |
123 | RTNeural::modelt_detail::forEachInTuple (
124 | [this, v_ins, N, output] (auto& layer_array, auto index_t)
125 | {
126 | static constexpr size_t index = index_t;
127 | if constexpr (index == 0)
128 | {
129 | #if RTNEURAL_USE_EIGEN
130 | auto* head_inputs = arena.allocate> (N, RTNEURAL_DEFAULT_ALIGNMENT);
131 | for (int n = 0; n < N; ++n)
132 | head_inputs[n].setZero();
133 | std::get<0> (layer_arrays).forward (v_ins, v_ins, head_inputs, N, arena);
134 | #elif RTNEURAL_USE_XSIMD
135 | auto* head_inputs = arena.allocate[RTNeural::ceil_div (head_layer_n_channels, (int) xsimd::batch::size)]> (N, RTNEURAL_DEFAULT_ALIGNMENT);
136 | for (int n = 0; n < N; ++n)
137 | std::fill (std::begin (head_inputs[n]), std::end (head_inputs[n]), xsimd::batch {});
138 | std::get<0> (layer_arrays).forward (v_ins, v_ins, head_inputs, N, arena);
139 | #endif
140 | }
141 | else
142 | {
143 | auto& prev_layer_array = std::get (layer_arrays);
144 | std::get (layer_arrays).forward (prev_layer_array.layer_outputs_arr, v_ins, prev_layer_array.head_outputs_arr, N, arena);
145 | }
146 | },
147 | layer_arrays);
148 |
149 | auto& last_layer_array = std::get - 1> (layer_arrays);
150 | for (int n = 0; n < N; ++n)
151 | {
152 | #if RTNEURAL_USE_EIGEN
153 | output[n] = last_layer_array.head_outputs_arr[n][0] * head_scale;
154 | #elif RTNEURAL_USE_XSIMD
155 | output[n] = last_layer_array.head_outputs_arr[n][0].get (0) * head_scale;
156 | #endif
157 | }
158 |
159 | arena.clear();
160 | }
161 | };
162 | } // namespace wavenet
163 |
--------------------------------------------------------------------------------
/deps/RTNeural-NAM/wavenet/wavenet_layer.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include
4 |
5 | namespace wavenet
6 | {
7 | template >
14 | // TODO: gated?
15 | struct Wavenet_Layer
16 | {
17 | RTNeural::Conv1DT conv;
18 | RTNeural::DenseT input_mixin;
19 | RTNeural::DenseT _1x1;
20 | Activation activation;
21 |
22 | #if RTNEURAL_USE_EIGEN
23 | Eigen::Matrix outs;
24 | #elif RTNEURAL_USE_XSIMD
25 | xsimd::batch outs[RTNeural::ceil_div (channels, (int) xsimd::batch::size)];
26 | #endif
27 |
28 | void reset()
29 | {
30 | conv.reset();
31 | }
32 |
33 | void load_weights (std::vector::iterator& weights)
34 | {
35 | conv.reset();
36 |
37 | std::vector>> conv_weights (channels, std::vector> (channels, std::vector (kernel_size)));
38 | for (int i = 0; i < channels; ++i)
39 | for (int j = 0; j < channels; ++j)
40 | for (int k = 0; k < kernel_size; k++)
41 | conv_weights[i][j][k] = *(weights++);
42 | RTNeural::torch_helpers::detail::reverseKernels (conv_weights);
43 | conv.setWeights (conv_weights);
44 |
45 | std::vector conv_bias (channels);
46 | for (int i = 0; i < channels; ++i)
47 | conv_bias[i] = *(weights++);
48 | conv.setBias (conv_bias);
49 |
50 | std::vector> input_mixin_weights (channels, std::vector (condition_size));
51 | for (int i = 0; i < channels; i++)
52 | for (int j = 0; j < condition_size; j++)
53 | input_mixin_weights[i][j] = *(weights++);
54 | input_mixin.setWeights (input_mixin_weights);
55 |
56 | std::vector> _1x1_weights (channels, std::vector (channels));
57 | for (int i = 0; i < channels; i++)
58 | for (int j = 0; j < channels; j++)
59 | _1x1_weights[i][j] = *(weights++);
60 | _1x1.setWeights (_1x1_weights);
61 |
62 | std::vector _1x1_bias (channels);
63 | for (int i = 0; i < channels; i++)
64 | _1x1_bias[i] = *(weights++);
65 | _1x1.setBias (_1x1_bias.data());
66 | }
67 |
68 | #if RTNEURAL_USE_EIGEN
69 | void forward (const Eigen::Matrix& ins,
70 | const Eigen::Matrix& condition,
71 | Eigen::Matrix& head_io)
72 | #elif RTNEURAL_USE_XSIMD
73 | void forward (const xsimd::batch (&ins)[RTNeural::ceil_div (channels, (int) xsimd::batch::size)],
74 | const xsimd::batch (&condition)[RTNeural::ceil_div (condition_size, (int) xsimd::batch::size)],
75 | xsimd::batch (&head_io)[RTNeural::ceil_div (channels, (int) xsimd::batch::size)])
76 | #endif
77 | {
78 | conv.forward (ins);
79 | input_mixin.forward (condition);
80 |
81 | #if RTNEURAL_USE_EIGEN
82 | outs = conv.outs + input_mixin.outs;
83 | #elif RTNEURAL_USE_XSIMD
84 | for (int i = 0; i < std::size (outs); ++i)
85 | outs[i] = conv.outs[i] + input_mixin.outs[i];
86 | #endif
87 |
88 | activation.forward (outs);
89 |
90 | #if RTNEURAL_USE_EIGEN
91 | head_io.noalias() += activation.outs;
92 | #elif RTNEURAL_USE_XSIMD
93 | for (int i = 0; i < std::size (head_io); ++i)
94 | head_io[i] += activation.outs[i];
95 | #endif
96 |
97 | _1x1.forward (activation.outs);
98 |
99 | #if RTNEURAL_USE_EIGEN
100 | outs = ins + _1x1.outs;
101 | #elif RTNEURAL_USE_XSIMD
102 | for (int i = 0; i < std::size (outs); ++i)
103 | outs[i] = ins[i] + _1x1.outs[i];
104 | #endif
105 | }
106 |
107 | #if RTNEURAL_USE_EIGEN
108 | void forward (const Eigen::Matrix* ins,
109 | const Eigen::Matrix* condition,
110 | Eigen::Matrix* head_io,
111 | Eigen::Matrix* layer_outputs,
112 | int N,
113 | Memory_Arena<>& arena)
114 | #elif RTNEURAL_USE_XSIMD
115 | void forward (const xsimd::batch (*ins)[RTNeural::ceil_div (channels, (int) xsimd::batch::size)],
116 | const xsimd::batch (*condition)[RTNeural::ceil_div (condition_size, (int) xsimd::batch::size)],
117 | xsimd::batch (*head_io)[RTNeural::ceil_div (channels, (int) xsimd::batch::size)],
118 | xsimd::batch (*layer_outputs)[RTNeural::ceil_div (channels, (int) xsimd::batch::size)],
119 | int N,
120 | Memory_Arena<>& arena)
121 | #endif
122 | {
123 | const auto _ = arena.create_frame();
124 | #if RTNEURAL_USE_EIGEN
125 | auto* temp_outs = arena.allocate> (N, RTNEURAL_DEFAULT_ALIGNMENT);
126 | #elif RTNEURAL_USE_XSIMD
127 | auto* temp_outs = arena.allocate[RTNeural::ceil_div (channels, (int) xsimd::batch::size)]> (N, RTNEURAL_DEFAULT_ALIGNMENT);
128 | #endif
129 |
130 | for (int n = 0; n < N; ++n)
131 | {
132 | conv.forward (ins[n]);
133 | #if RTNEURAL_USE_EIGEN
134 | temp_outs[n].noalias() = conv.outs;
135 | #elif RTNEURAL_USE_XSIMD
136 | for (int i = 0; i < std::size (conv.outs); ++i)
137 | temp_outs[n][i] = conv.outs[i];
138 | #endif
139 | }
140 |
141 | for (int n = 0; n < N; ++n)
142 | {
143 | input_mixin.forward (condition[n]);
144 | #if RTNEURAL_USE_EIGEN
145 | temp_outs[n].noalias() += input_mixin.outs;
146 | #elif RTNEURAL_USE_XSIMD
147 | for (int i = 0; i < std::size (input_mixin.outs); ++i)
148 | temp_outs[n][i] += input_mixin.outs[i];
149 | #endif
150 | }
151 |
152 | for (int n = 0; n < N; ++n)
153 | {
154 | activation.forward (temp_outs[n]);
155 | #if RTNEURAL_USE_EIGEN
156 | temp_outs[n].noalias() = activation.outs;
157 | head_io[n].noalias() += activation.outs;
158 | #elif RTNEURAL_USE_XSIMD
159 | for (int i = 0; i < std::size (activation.outs); ++i)
160 | temp_outs[n][i] = activation.outs[i];
161 | for (int i = 0; i < std::size (activation.outs); ++i)
162 | head_io[n][i] += activation.outs[i];
163 | #endif
164 | }
165 |
166 | for (int n = 0; n < N; ++n)
167 | {
168 | _1x1.forward (temp_outs[n]);
169 | #if RTNEURAL_USE_EIGEN
170 | layer_outputs[n].noalias() = ins[n] + _1x1.outs;
171 | #elif RTNEURAL_USE_XSIMD
172 | for (int i = 0; i < std::size (_1x1.outs); ++i)
173 | layer_outputs[n][i] = ins[n][i] + _1x1.outs[i];
174 | #endif
175 | }
176 | }
177 | };
178 | } // namespace wavenet
179 |
--------------------------------------------------------------------------------
/deps/RTNeural-NAM/wavenet/wavenet_layer_array.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include "wavenet_layer.hpp"
4 |
5 | namespace wavenet
6 | {
7 | template
8 | using Dilations = std::integer_sequence;
9 |
10 | template >
20 | struct Layer_Array
21 | {
22 | template
23 | struct Layers_Helper
24 | {
25 | };
26 |
27 | template
28 | struct Layers_Helper>
29 | {
30 | using type = std::tuple...>;
31 | };
32 |
33 | using Layers = typename Layers_Helper::type;
34 |
35 | static constexpr auto n_channels = channels;
36 |
37 | RTNeural::DenseT rechannel; // no bias!
38 | Layers layers;
39 | static constexpr auto num_layers = std::tuple_size_v;
40 | RTNeural::DenseT head_rechannel;
41 |
42 | using Last_Layer_Type = std::remove_reference_t - 1> (layers))>;
43 | decltype (Last_Layer_Type::outs)& layer_outputs { std::get - 1> (layers).outs };
44 |
45 | #if RTNEURAL_USE_EIGEN
46 | Eigen::Matrix head_outputs {};
47 | Eigen::Matrix* layer_outputs_arr;
48 | Eigen::Matrix* head_outputs_arr;
49 | #elif RTNEURAL_USE_XSIMD
50 | decltype (RTNeural::DenseT::outs)& head_outputs { head_rechannel.outs };
51 | using Layer_Out = xsimd::batch[RTNeural::ceil_div (channels, (int) xsimd::batch::size)];
52 | Layer_Out* layer_outputs_arr;
53 | using Head_Out = xsimd::batch[RTNeural::ceil_div (head_size, (int) xsimd::batch::size)];
54 | Head_Out* head_outputs_arr;
55 | #endif
56 |
57 | void reset()
58 | {
59 | RTNeural::modelt_detail::forEachInTuple ([] (auto& layer, size_t)
60 | { layer.reset(); },
61 | layers);
62 | }
63 |
64 | static size_t get_arena_bytes_needed (int N)
65 | {
66 | #if RTNEURAL_USE_EIGEN
67 | return 2 * sizeof (Eigen::Matrix) * N + sizeof (Eigen::Matrix) * N;
68 | #elif RTNEURAL_USE_XSIMD
69 | return 2 * sizeof (Layer_Out) * N + sizeof (Head_Out) * N;
70 | #endif
71 | }
72 |
73 | void load_weights (std::vector::iterator& weights)
74 | {
75 | std::vector> rechannel_weights (channels, std::vector (in_size));
76 | for (int i = 0; i < channels; i++)
77 | for (int j = 0; j < in_size; j++)
78 | rechannel_weights[i][j] = *(weights++);
79 | rechannel.setWeights (rechannel_weights);
80 |
81 | RTNeural::modelt_detail::forEachInTuple ([&weights] (auto& layer, size_t)
82 | { layer.load_weights (weights); },
83 | layers);
84 |
85 | std::vector> head_rechannel_weights (head_size, std::vector (channels));
86 | for (int i = 0; i < head_size; i++)
87 | for (int j = 0; j < channels; j++)
88 | head_rechannel_weights[i][j] = *(weights++);
89 | head_rechannel.setWeights (head_rechannel_weights);
90 |
91 | if constexpr (has_head_bias)
92 | {
93 | std::vector head_rechannel_bias (head_size);
94 | for (int i = 0; i < head_size; i++)
95 | head_rechannel_bias[i] = *(weights++);
96 | head_rechannel.setBias (head_rechannel_bias.data());
97 | }
98 | }
99 |
100 | #if RTNEURAL_USE_EIGEN
101 | void forward (const Eigen::Matrix& ins,
102 | const Eigen::Matrix& condition,
103 | Eigen::Matrix& head_io)
104 | #elif RTNEURAL_USE_XSIMD
105 | void forward (const xsimd::batch (&ins)[RTNeural::ceil_div (in_size, (int) xsimd::batch::size)],
106 | const xsimd::batch (&condition)[RTNeural::ceil_div (condition_size, (int) xsimd::batch::size)],
107 | xsimd::batch (&head_io)[RTNeural::ceil_div (channels, (int) xsimd::batch::size)])
108 | #endif
109 | {
110 | rechannel.forward (ins);
111 |
112 | RTNeural::modelt_detail::forEachInTuple (
113 | [&] (auto& layer, auto index_t)
114 | {
115 | static constexpr size_t index = index_t;
116 | if constexpr (index == 0)
117 | layer.forward (rechannel.outs, condition, head_io);
118 | else
119 | layer.forward (std::get (layers).outs, condition, head_io);
120 | },
121 | layers);
122 |
123 | head_rechannel.forward (head_io);
124 | #if RTNEURAL_USE_EIGEN
125 | head_outputs = head_rechannel.outs;
126 | #endif
127 | }
128 |
129 | #if RTNEURAL_USE_EIGEN
130 | void forward (const Eigen::Matrix* ins,
131 | const Eigen::Matrix* condition,
132 | Eigen::Matrix* head_io,
133 | int N,
134 | Memory_Arena<>& arena)
135 | #elif RTNEURAL_USE_XSIMD
136 | void forward (const xsimd::batch (*ins)[RTNeural::ceil_div (in_size, (int) xsimd::batch::size)],
137 | const xsimd::batch (*condition)[RTNeural::ceil_div (condition_size, (int) xsimd::batch::size)],
138 | xsimd::batch (*head_io)[RTNeural::ceil_div (channels, (int) xsimd::batch::size)],
139 | int N,
140 | Memory_Arena<>& arena)
141 | #endif
142 | {
143 | #if RTNEURAL_USE_EIGEN
144 | layer_outputs_arr = arena.allocate> (N, RTNEURAL_DEFAULT_ALIGNMENT);
145 | head_outputs_arr = arena.allocate> (N, RTNEURAL_DEFAULT_ALIGNMENT);
146 | #elif RTNEURAL_USE_XSIMD
147 | layer_outputs_arr = arena.allocate (N, RTNEURAL_DEFAULT_ALIGNMENT);
148 | head_outputs_arr = arena.allocate (N, RTNEURAL_DEFAULT_ALIGNMENT);
149 | #endif
150 |
151 | for (int n = 0; n < N; ++n)
152 | {
153 | rechannel.forward (ins[n]);
154 | #if RTNEURAL_USE_EIGEN
155 | layer_outputs_arr[n] = rechannel.outs;
156 | #elif RTNEURAL_USE_XSIMD
157 | std::copy (std::begin (rechannel.outs), std::end (rechannel.outs), std::begin (layer_outputs_arr[n]));
158 | #endif
159 | }
160 |
161 | RTNeural::modelt_detail::forEachInTuple (
162 | [&] (auto& layer, auto)
163 | {
164 | layer.forward (layer_outputs_arr, condition, head_io, layer_outputs_arr, N, arena);
165 | },
166 | layers);
167 |
168 | for (int n = 0; n < N; ++n)
169 | {
170 | head_rechannel.forward (head_io[n]);
171 | #if RTNEURAL_USE_EIGEN
172 | head_outputs_arr[n] = head_rechannel.outs;
173 | #elif RTNEURAL_USE_XSIMD
174 | std::copy (std::begin (head_rechannel.outs), std::end (head_rechannel.outs), std::begin (head_outputs_arr[n]));
175 | #endif
176 | }
177 | }
178 | };
179 | } // namespace wavenet
180 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # NeuralAudio
2 |
3 | NeuralAudio is a C++ library designed to make it easy to use neural network machine learning models (ie: guitar amplifier captures/profiles) in real-time audio applications.
4 |
5 | # Supported Models
6 |
7 | NeuralAudio currently supports the following model types:
8 |
9 | - [Neural Amp Modeler](https://github.com/sdatkinson/neural-amp-modeler) (NAM) WaveNet and LSTM models
10 | - [RTNeural](https://github.com/jatinchowdhury18/RTNeural) keras models (LSTM, GRU)
11 |
12 | # Underlying Libraries and Performance
13 |
14 | By default, NeuralAudio uses its own implementation of WaveNet and LSTM network models. This implementation has been designed to produce **exactly** the same output as the [NAM Core library](https://github.com/sdatkinson/NeuralAmpModelerCore), but with increased performance and reduced memory usage.
15 |
16 | For completeness, and to facilitate accuracy and performance benchmarking, it can also load models using the [NAM Core implementation](https://github.com/sdatkinson/NeuralAmpModelerCore) and [RTNeural](https://github.com/jatinchowdhury18/RTNeural).
17 |
18 | The internal NeuralAudio implmentation currently outperforms the other implementations on all tested platforms (Windows x64, Linux x64/Arm64). It also uses significantly less memory than the NAM Core WaveNet implementation (which, for example, uses about 10x as much memory for a "standard" WaveNet model).
19 |
20 | For WaveNet, the internal implmeentation supports optimized static models of the offical NAM network architectures: "Standard", "Lite", "Feather", "Nano".
21 |
22 | For LSTM, the internal implementation supports optimized static models architectures for 1x8, 1x12, 1x16, 1x24, 2x8, 2x12, and 2x16 models.
23 |
24 | All NAM files with WaveNet and LSTM architectures not supported internally will fall back on a less performant dynamic implementation (although still faster than NAM Core).
25 |
26 | All keras models not supported internally will fall back to the RTNeural implmentation.
27 |
28 | # API overview
29 |
30 | To load a model:
31 | ```
32 | NeuralModel* model = NeuralAudio::NeuralModel::CreateFromFile("");
33 | ```
34 |
35 | To process a model:
36 |
37 | ```
38 | model->Process(pointerToFloatInputData, pointerToFloatOutputData, int numSamples);
39 | ```
40 |
41 | ## Setting maximum buffer size
42 |
43 | Some models need to allocate memory based on the size of the audio buffers being used. You need to make sure that processing does not exceed the specified maximum buffer size.
44 |
45 | The default maximum size is 128 samples. To change it, do:
46 |
47 | ```
48 | NeuralAudio::NeuralModel::SetDefaultMaxAudioBufferSize(maxSize);
49 | ```
50 |
51 | if you want to change the maximum buffer size of an already created model, do:
52 |
53 | ```
54 | model->SetMaxAudioBufferSize(int maxSize);
55 | ```
56 |
57 | ***Note: this is not real-time safe, and should not be done on a real-time audio thread.***
58 |
59 | ## Input/Output calibration
60 |
61 | Use ```model->GetRecommendedInputDBAdjustment()``` and ```model->GetRecommendedOutputDBAdjustment()``` to obtain the ideal input and output volume level adjustments in dB.
62 |
63 | To set a known audio input level (ie: from an audio interface), use ```model->SetAudioInputLevelDBu(float audioDBu)```. This is set at 12DBu by default.
64 |
65 | ## Model load behavior
66 |
67 | By default, models are loaded using the internal NeuralAudio implementation. If you would like to force the use of the NAM Core or RTNeural implementations, you can use:
68 |
69 | ```
70 | NeuralAudio::NeuralModel::SetWaveNetLoadMode(loadMode)
71 | ```
72 |
73 | and
74 |
75 | ```
76 | NeuralAudio::NeuralModel::SetLSTMLoadMode(loadMode)
77 | ```
78 |
79 | where "loadMode" is one of:
80 |
81 | ```
82 | NeuralAudio::EModelLoadMode::Internal
83 | NeuralAudio::EModelLoadMode::NAMCore
84 | NeuralAudio::EModelLoadMode::RTNeural
85 | ```
86 |
87 | You can check which implementation was actually used to load the model with ```model->GetLoadMode()```.
88 |
89 | **NOTE:** Because of compile time and executable size considerations, only the internal and dynamic RTNeural implementations are built by default. If you want to use RTNeural, it is recommended that you add ```-DBUILD_STATIC_RTNEURAL=ON``` to your cmake commandline. This will create static model implmentations for the same sets of WaveNet and LSTM models as the internal implmentation, and results in increased performance. If you want to use NAM Core, add ```DBUILD_NAMCORE=ON``` to your cmake commandline.
90 |
91 | ## Getting the model receptive field size
92 |
93 | WaveNet models have a fixed receptive field size (ie: size of the input that the output depends on).
94 |
95 | To get this value, do:
96 |
97 | ```
98 | int receptiveFieldSamples = model->GetReceptiveFieldSize();
99 | ```
100 |
101 | Note that this can return -1, which means that the receptive field size is unknown, or not fixed (ie: LSTM models technically have an infinite tail because of their internal feedback loop).
102 |
103 | This method is only supported for "internal" models. If you are using NAM Core or RTNeural it will always return -1.
104 |
105 | # Building
106 |
107 | First clone the repository:
108 | ```bash
109 | git clone --recurse-submodules https://github.com/mikeoliphant/NeuralAudio
110 | cd NeuralAudio/build
111 | ```
112 |
113 | Then compile the plugin using:
114 |
115 | **Linux/MacOS**
116 | ```bash
117 | cmake .. -DCMAKE_BUILD_TYPE="Release"
118 | make -j4
119 | ```
120 |
121 | **Windows**
122 | ```bash
123 | cmake.exe -G "Visual Studio 17 2022" -A x64 ..
124 | cmake --build . --config=release -j4
125 | ```
126 |
127 | Note - you'll have to change the Visual Studio version if you are using a different one.
128 |
129 | ## CMake Options
130 |
131 | ```-DBUILD_NAMCORE=ON|OFF```: Support loading models using the NAM Core implemenations.
132 |
133 | ```-DBUILD_STATIC_RTNEURAL=ON|OFF```: Build static RTNeural model architectures (slower compile, larger size - only use if you plan on forcing RTNeural model loading).
134 |
135 | ```-DBUILD_INTERNAL_STATIC_WAVENET=ON|OFF```: Build internal static WaveNet model architectures (faster internal WaveNet, but slower compile, larger size).
136 |
137 | ```-DBUILD_INTERNAL_STATIC_LSTM=ON|OFF```: Build internal static LSTM model architectures (faster internal LSTM, but slower compile, larger size).
138 |
139 | ```-DWAVENET_FRAMES=XXX```: Sample buffer size for the internal WaveNet implementation. Defaults to **64**. If you know you are going to be using a fixed sample buffer smaller or larger than this, use that instead. Note that the model will still be able to process any buffer size - it is just optimized for this size.
140 |
141 | ```-DWAVENET_MATH=XXX```
142 | ```-DLSTM_MATH=XXX```: Which math approximations (tanh and sigmoid) to use for WaveNet and LSTM models. Options are:
143 |
144 | - ```FastMath``` (the default): Use the same approximations as NAM Core.
145 | - ```EigenMath```: Use Eigen's builtin tanh approximation. Somewhat slower, but more accurate.
146 | - ```StdMath```: Use standard math functions. No approxmation used - much slower.
147 |
148 | ```-DBUILD_UTILS=ON|OFF```: Build performance/accuracy testing tools (located in the "Utils" folder).
149 |
150 | # Software/Hardware Using NeuralAudio
151 |
152 | The following applications and devices are using the NeuralAudio library for model processing:
153 |
154 | - [neural-amp-modeler-lv2](https://github.com/mikeoliphant/neural-amp-modeler-lv2): LV2 plugin for using neural network machine learning amp models.
155 | - [stompbox](https://github.com/mikeoliphant/stompbox): Guitar amplification and effects pedalboard simulation.
156 | - [NeuralRack](https://github.com/brummer10/NeuralRack): Neural Model and Impulse Response File loader for Linux/Windows.
157 | - [Darkglass Anagram](https://www.darkglass.com/creation/anagram): Bass guitar effects unit.
158 |
--------------------------------------------------------------------------------
/Utils/ModelTest/ModelTest.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 |
5 | using namespace NeuralAudio;
6 |
7 | static std::string LoadModes[] = { "Internal", "RTNeural", "NAMCore" };
8 |
9 | NeuralModel* LoadModel(std::filesystem::path modelPath, EModelLoadMode loadMode)
10 | {
11 | NeuralModel::SetWaveNetLoadMode(loadMode);
12 | NeuralModel::SetLSTMLoadMode(loadMode);
13 |
14 | try
15 | {
16 | auto model = NeuralAudio::NeuralModel::CreateFromFile(modelPath);
17 |
18 | if (model == nullptr)
19 | {
20 | std::cout << "Unable to load model from: " << modelPath << std::endl;
21 |
22 | return nullptr;
23 | }
24 |
25 | if (model->GetLoadMode() != loadMode)
26 | {
27 | delete model;
28 |
29 | return nullptr;
30 | }
31 |
32 | if (model->GetLoadMode() != NeuralAudio::EModelLoadMode::NAMCore)
33 | {
34 | if (!model->IsStatic())
35 | {
36 | std::cout << "**Warning: " << LoadModes[model->GetLoadMode()] << " model is not using a static architecture" << std::endl;
37 | }
38 | }
39 |
40 | return model;
41 | }
42 | catch (...)
43 | {
44 | std::cout << "Error loading model" << std::endl;
45 | }
46 |
47 | return nullptr;
48 | }
49 |
50 | static std::tuple BenchModel(NeuralModel* model, int blockSize, int numBlocks)
51 | {
52 | std::vector inData;
53 | inData.resize(blockSize);
54 |
55 | std::vector outData;
56 | outData.resize(blockSize);
57 |
58 | auto start = std::chrono::high_resolution_clock::now();
59 |
60 | double maxBlock = 0;
61 |
62 | for (int block = 0; block < numBlocks; block++)
63 | {
64 | auto blockStart = std::chrono::high_resolution_clock::now();
65 |
66 | model->Process(inData.data(), outData.data(), blockSize);
67 |
68 | auto blockEnd = std::chrono::high_resolution_clock::now();
69 |
70 | maxBlock = std::max(maxBlock, std::chrono::duration_cast> (blockEnd - blockStart).count());
71 | }
72 |
73 | auto end = std::chrono::high_resolution_clock::now();
74 |
75 | double tot = std::chrono::duration_cast> (end - start).count();
76 |
77 | return std::tie(tot, maxBlock);
78 | }
79 |
80 | static double ComputeError(NeuralModel* model1, NeuralModel* model2, int blockSize, int numBlocks)
81 | {
82 | std::vector inData;
83 | inData.resize(blockSize);
84 |
85 | std::vector outData;
86 | outData.resize(blockSize);
87 |
88 | std::vector outData2;
89 | outData2.resize(blockSize);
90 |
91 | model1->Prewarm();
92 | model2->Prewarm();
93 |
94 | double totErr = 0;
95 |
96 | long pos = 0;
97 |
98 | for (int block = 0; block < numBlocks; block++)
99 | {
100 | for (int i = 0; i < blockSize; i++)
101 | {
102 | inData[i] = (float)sin(pos++ * 0.01);
103 | }
104 |
105 | model1->Process(inData.data(), outData.data(), blockSize);
106 | model2->Process(inData.data(), outData2.data(), blockSize);
107 |
108 | for (int i = 0; i < blockSize; i++)
109 | {
110 | double diff = outData[i] - outData2[i];
111 |
112 | totErr += (diff * diff);
113 | }
114 | }
115 |
116 | return sqrt(totErr / (double)(blockSize * numBlocks));
117 | }
118 |
119 | void RunNAMTests(std::filesystem::path modelPath, int blockSize)
120 | {
121 | std::cout << "Model: " << modelPath << std::endl;
122 | std::cout << std::endl;
123 |
124 | int dataSize = 4096 * 64;
125 |
126 | int numBlocks = dataSize / blockSize;
127 |
128 | NeuralModel::SetDefaultMaxAudioBufferSize(blockSize);
129 |
130 | NeuralModel* rtNeuralModel = LoadModel(modelPath, EModelLoadMode::RTNeural);
131 | NeuralModel* namCoreModel = LoadModel(modelPath, EModelLoadMode::NAMCore);
132 | NeuralModel* internalModel = LoadModel(modelPath, EModelLoadMode::Internal);
133 |
134 | double rms;
135 |
136 | if (namCoreModel != nullptr)
137 | {
138 | }
139 |
140 | std::tuple internal;
141 | std::tuple rtNeural;
142 | std::tuple namCore;
143 |
144 | internal = BenchModel(internalModel, blockSize, numBlocks);
145 |
146 | std::cout << "Internal: " << std::get<0>(internal) << " (" << std::get<1>(internal) << ")" << std::endl;
147 |
148 | if (namCoreModel != nullptr)
149 | {
150 | std::cout << std::endl;
151 |
152 | namCore = BenchModel(namCoreModel, blockSize, numBlocks);
153 |
154 | rms = ComputeError(namCoreModel, internalModel, blockSize, numBlocks);
155 |
156 | std::cout << "NAM Core: " << std::get<0>(namCore) << " (" << std::get<1>(namCore) << ")" << std::endl;
157 | std::cout << "NAM vs Internal RMS err: " << rms << std::endl;
158 | std::cout << "Internal is: " << (std::get<0>(namCore) / std::get<0>(internal)) << "x NAM (" << (std::get<1>(namCore) / std::get<1>(internal)) << "x worst case)" << std::endl;
159 | }
160 |
161 | if (rtNeuralModel != nullptr)
162 | {
163 | std::cout << std::endl;
164 |
165 | rtNeural = BenchModel(rtNeuralModel, blockSize, numBlocks);
166 |
167 | std::cout << "RTNeural: " << std::get<0>(rtNeural) << " (" << std::get<1>(rtNeural) << ")" << std::endl;
168 | rms = ComputeError(namCoreModel, rtNeuralModel, blockSize, numBlocks);
169 | std::cout << "NAM vs RTNeural RMS err: " << rms << std::endl;
170 |
171 | if (namCoreModel != nullptr)
172 | {
173 | std::cout << "RTNeural is: " << (std::get<0>(namCore) / std::get<0>(rtNeural)) << "x NAM (" << (std::get<1>(namCore) / std::get<1>(rtNeural)) << "x worst case)" << std::endl;
174 | }
175 | }
176 |
177 | std::cout << std::endl;
178 | }
179 |
180 | void RunKerasTests(std::filesystem::path modelPath, int blockSize)
181 | {
182 | std::cout << "Model: " << modelPath << std::endl;
183 |
184 | int dataSize = 4096 * 64;
185 |
186 | int numBlocks = dataSize / blockSize;
187 |
188 | NeuralAudio::NeuralModel::SetDefaultMaxAudioBufferSize(blockSize);
189 |
190 | auto internalModel = LoadModel(modelPath, EModelLoadMode::Internal);
191 | auto rtNeuralModel = LoadModel(modelPath, EModelLoadMode::RTNeural);
192 |
193 | double rms = ComputeError(rtNeuralModel, internalModel, blockSize, numBlocks);
194 | std::cout << "Internal vs RTNeural RMS err: " << rms << std::endl;
195 | std::cout << std::endl;
196 |
197 | auto internal = BenchModel(internalModel, blockSize, numBlocks);
198 | auto rt = BenchModel(rtNeuralModel, blockSize, numBlocks);
199 |
200 | std::cout << "RTNeural: " << std::get<0>(rt) << " (" << std::get<1>(rt) << ")" << std::endl;
201 | std::cout << "Internal: " << std::get<0>(internal) << " (" << std::get<1>(internal) << ")" << std::endl;
202 | std::cout << "Internal is: " << (std::get<0>(rt) / std::get<0>(internal)) << "x RTNeural" << std::endl;
203 |
204 | std::cout << std::endl;
205 | }
206 |
207 | int RunDefaultTests(int blockSize)
208 | {
209 | std::filesystem::path modelPath = std::filesystem::current_path();
210 |
211 | while (modelPath.filename() != "Utils")
212 | {
213 | modelPath = modelPath.parent_path();
214 |
215 | if (modelPath == modelPath.root_path())
216 | {
217 | std::cout << "Unable to find Models: " << std::filesystem::current_path() << std::endl;
218 | std::cout << "ModelTest must be run from within the Utils subdirectory" << std::endl;
219 |
220 | return -1;
221 | }
222 | }
223 |
224 | modelPath = modelPath / "Models";
225 |
226 | std::cout << "Loading models from: " << modelPath << std::endl << std::endl;
227 |
228 | std::cout << "WaveNet (Standard) Test" << std::endl;
229 | RunNAMTests(modelPath / "BossWN-standard.nam", blockSize);
230 |
231 | std::cout << std::endl;
232 |
233 | std::cout << "LSTM (1x16) Test" << std::endl;
234 | RunNAMTests(modelPath / "BossLSTM-1x16.nam", blockSize);
235 |
236 | return 0;
237 | }
238 |
239 | int main(int argc, char* argv[])
240 | {
241 | std::cout << std::endl;
242 |
243 | int blockSize = 64;
244 |
245 | std::filesystem::path modelPath;
246 |
247 | for (int arg = 1; arg < argc; arg++)
248 | {
249 | char* end;
250 |
251 | long val = strtol(argv[arg], &end, 10);
252 |
253 | if (val != 0)
254 | {
255 | blockSize = (int)val;
256 | }
257 | else
258 | {
259 | modelPath.assign(argv[arg]);
260 | }
261 | }
262 |
263 | std::cout << "Block size: " << blockSize << std::endl;
264 |
265 | if (!modelPath.empty())
266 | {
267 | if (modelPath.extension() == ".nam")
268 | {
269 | RunNAMTests(modelPath, blockSize);
270 | }
271 | else
272 | {
273 | RunKerasTests(modelPath, blockSize);
274 | }
275 | }
276 | else
277 | {
278 | if (RunDefaultTests(blockSize) < 0)
279 | return -1;
280 | }
281 |
282 | return 0;
283 | }
284 |
--------------------------------------------------------------------------------
/NeuralAudio/NeuralModel.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include "NeuralModel.h"
3 | #ifdef BUILD_NAMCORE
4 | #include "NAMModel.h"
5 | #endif
6 | #ifdef BUILD_STATIC_RTNEURAL
7 | #include "RTNeuralLoader.h"
8 | #endif
9 | #include "RTNeuralModel.h"
10 | #include "InternalModel.h"
11 |
12 | namespace NeuralAudio
13 | {
14 | static bool modelDefsAreLoaded;
15 |
16 | static std::list internalWavenetModelDefs;
17 | static std::list internalLSTMModelDefs;
18 |
19 | static void EnsureModelDefsAreLoaded()
20 | {
21 | if (!modelDefsAreLoaded)
22 | {
23 | #ifdef BUILD_INTERNAL_STATIC_WAVENET
24 | internalWavenetModelDefs.push_back(new InternalWaveNetDefinitionT<16, 8>); // Standard
25 | internalWavenetModelDefs.push_back(new InternalWaveNetDefinitionT<12, 6>); // Lite
26 | internalWavenetModelDefs.push_back(new InternalWaveNetDefinitionT<8, 4>); // Feather
27 | internalWavenetModelDefs.push_back(new InternalWaveNetDefinitionT<4, 2>); // Nano
28 | #endif
29 |
30 | #ifdef BUILD_INTERNAL_STATIC_LSTM
31 | internalLSTMModelDefs.push_back(new InternalLSTMDefinitionT<1, 8>);
32 | internalLSTMModelDefs.push_back(new InternalLSTMDefinitionT<1, 12>);
33 | internalLSTMModelDefs.push_back(new InternalLSTMDefinitionT<1, 16>);
34 | internalLSTMModelDefs.push_back(new InternalLSTMDefinitionT<1, 24>);
35 | internalLSTMModelDefs.push_back(new InternalLSTMDefinitionT<2, 8>);
36 | internalLSTMModelDefs.push_back(new InternalLSTMDefinitionT<2, 12>);
37 | internalLSTMModelDefs.push_back(new InternalLSTMDefinitionT<2, 16>);
38 | #endif
39 |
40 | #ifdef BUILD_STATIC_RTNEURAL
41 | EnsureRTNeuralModelDefsAreLoaded();
42 | #endif
43 |
44 | modelDefsAreLoaded = true;
45 | }
46 | }
47 |
48 | static InternalWaveNetDefinitionBase* FindInternalWaveNetDefinition(size_t numChannels, size_t headSize)
49 | {
50 | for (auto const& model : internalWavenetModelDefs)
51 | {
52 | if ((numChannels == model->GetNumChannels()) && (headSize == model->GetHeadSize()))
53 | return model;
54 | }
55 |
56 | return nullptr;
57 | }
58 |
59 | static InternalLSTMDefinitionBase* FindInternalLSTMDefinition(size_t numLayers, size_t hiddenSize)
60 | {
61 | for (auto const& model : internalLSTMModelDefs)
62 | {
63 | if ((numLayers == model->GetNumLayers()) && (hiddenSize == model->GetHiddenSize()))
64 | return model;
65 | }
66 |
67 | return nullptr;
68 | }
69 |
70 | static std::vector stdDilations = { 1, 2, 4, 8, 16, 32, 64, 128, 256, 512 };
71 | static std::vector liteDilations = { 1, 2, 4, 8, 16, 32, 64 };
72 | static std::vector liteDilations2 = { 128, 256, 512, 1, 2, 4, 8, 16, 32, 64, 128, 256, 512 };
73 |
74 | static bool CheckDilations(const nlohmann::json dilationJson, std::vector& checkDilations)
75 | {
76 | if (dilationJson.size() != checkDilations.size())
77 | return false;
78 |
79 | for (size_t i = 0; i < dilationJson.size(); i++)
80 | {
81 | if (dilationJson[i] != checkDilations[i])
82 | return false;
83 | }
84 |
85 | return true;
86 | }
87 |
88 | bool NeuralModel::SupportsWaveNetLoadMode(EModelLoadMode mode)
89 | {
90 | if (mode == EModelLoadMode::NAMCore)
91 | #ifdef BUILD_NAMCORE
92 | return true;
93 | #else
94 | return false;
95 | #endif
96 |
97 | if (mode == EModelLoadMode::RTNeural)
98 | #ifdef BUILD_STATIC_RTNEURAL
99 | return true;
100 | #else
101 | return false;
102 | #endif
103 |
104 | return true;
105 | }
106 |
107 | bool NeuralModel::SupportsLSTMLoadMode(EModelLoadMode mode)
108 | {
109 | if (mode == EModelLoadMode::NAMCore)
110 | #ifdef BUILD_NAMCORE
111 | return true;
112 | #else
113 | return false;
114 | #endif
115 |
116 | return true;
117 | }
118 |
119 | NeuralModel* NeuralModel::CreateFromFile(std::filesystem::path modelPath)
120 | {
121 | if (!std::filesystem::exists(modelPath))
122 | return nullptr;
123 |
124 | std::ifstream jsonStream(modelPath, std::ifstream::binary);
125 |
126 | return CreateFromStream(jsonStream, modelPath.extension());
127 | }
128 |
129 | NeuralModel* NeuralModel::CreateFromStream(std::basic_istream& jsonStream, std::filesystem::path extension)
130 | {
131 | EnsureModelDefsAreLoaded();
132 |
133 | nlohmann::json modelJson;
134 | jsonStream >> modelJson;
135 |
136 | NeuralModel* newModel = nullptr;
137 |
138 | if (extension == ".nam")
139 | {
140 | std::string arch = modelJson.at("architecture");
141 |
142 | #ifdef BUILD_NAMCORE
143 | if (wavenetLoadMode == EModelLoadMode::NAMCore)
144 | {
145 | NAMModel* model = new NAMModel;
146 |
147 | model->LoadFromJson(modelJson);
148 |
149 | newModel = model;
150 | }
151 | #endif
152 |
153 | if (newModel == nullptr)
154 | {
155 | if (arch == "WaveNet")
156 | {
157 | nlohmann::json config = modelJson.at("config");
158 |
159 | if (config.at("layers").size() == 2)
160 | {
161 | nlohmann::json firstLayerConfig = config.at("layers").at(0);
162 | nlohmann::json secondLayerConfig = config.at("layers").at(1);
163 |
164 | if (!firstLayerConfig.at("gated") && !secondLayerConfig.at("gated") && !firstLayerConfig.at("head_bias") && secondLayerConfig.at("head_bias"))
165 | {
166 | bool isOfficialArchitecture = false;
167 |
168 | if (firstLayerConfig.at("channels") == 16)
169 | {
170 | if (CheckDilations(firstLayerConfig.at("dilations"), stdDilations) && CheckDilations(secondLayerConfig.at("dilations"), stdDilations))
171 | {
172 | isOfficialArchitecture = true;
173 | }
174 | }
175 | else
176 | {
177 | if (CheckDilations(firstLayerConfig.at("dilations"), liteDilations) && CheckDilations(secondLayerConfig.at("dilations"), liteDilations2))
178 | {
179 | isOfficialArchitecture = true;
180 | }
181 | }
182 |
183 | if (isOfficialArchitecture)
184 | {
185 | if (wavenetLoadMode == EModelLoadMode::RTNeural)
186 | {
187 | #ifdef BUILD_STATIC_RTNEURAL
188 | newModel = RTNeuralLoadNAMWaveNet(modelJson);
189 | #endif
190 | }
191 |
192 | if (newModel == nullptr)
193 | {
194 | auto modelDef = FindInternalWaveNetDefinition(firstLayerConfig.at("channels"), firstLayerConfig.at("head_size"));
195 |
196 | if (modelDef != nullptr)
197 | {
198 | auto model = modelDef->CreateModel();
199 |
200 | model->LoadFromNAMJson(modelJson);
201 |
202 | newModel = model;
203 | }
204 | }
205 | }
206 | }
207 | }
208 |
209 | if (newModel == nullptr)
210 | {
211 | // Use a dynamic model if we had no static definition
212 | InternalWaveNetModelDyn* model = new InternalWaveNetModelDyn;
213 |
214 | if (model->LoadFromNAMJson(modelJson))
215 | {
216 | newModel = model;
217 | }
218 | }
219 | }
220 | else if (arch == "LSTM")
221 | {
222 | nlohmann::json config = modelJson.at("config");
223 |
224 | #ifdef BUILD_STATIC_RTNEURAL
225 | if (lstmLoadMode == EModelLoadMode::RTNeural)
226 | {
227 | newModel = RTNeuralLoadNAMLSTM(modelJson);
228 | }
229 | #endif
230 |
231 | if (newModel == nullptr)
232 | {
233 | auto modelDef = FindInternalLSTMDefinition(config.at("num_layers"), config.at("hidden_size"));
234 |
235 | if (modelDef != nullptr)
236 | {
237 | auto model = modelDef->CreateModel();
238 | model->LoadFromNAMJson(modelJson);
239 |
240 | newModel = model;
241 | }
242 |
243 | // Use a dynamic model if we had no static definition
244 | if (newModel == nullptr)
245 | {
246 | InternalLSTMModelDyn* model = new InternalLSTMModelDyn;
247 |
248 | if (model->LoadFromNAMJson(modelJson))
249 | {
250 | newModel = model;
251 | }
252 | }
253 | }
254 | }
255 | }
256 | }
257 | else if ((extension == ".json") || (extension == ".aidax"))
258 | {
259 | const auto layers = modelJson.at("layers");
260 | const size_t numLayers = layers.size() - 1;
261 | const std::string modelType = layers.at(0).at("type");
262 | const int hiddenSize = layers.at(0).at("shape").back();
263 |
264 | if (modelType == "lstm")
265 | {
266 | #ifdef BUILD_STATIC_RTNEURAL
267 | if (lstmLoadMode == EModelLoadMode::RTNeural)
268 | {
269 | newModel = RTNeuralLoadKeras(modelJson);
270 | }
271 | #endif
272 |
273 | if (newModel == nullptr && lstmLoadMode == EModelLoadMode::Internal)
274 | {
275 | if (numLayers == 1)
276 | {
277 | auto modelDef = FindInternalLSTMDefinition(numLayers, hiddenSize);
278 |
279 | if (modelDef != nullptr)
280 | {
281 | auto model = modelDef->CreateModel();
282 |
283 | if (model->LoadFromKerasJson(modelJson))
284 | {
285 | newModel = model;
286 | }
287 | }
288 | }
289 |
290 | if (newModel == nullptr)
291 | {
292 | // Use a dynamic model if we had no static definition
293 | InternalLSTMModelDyn* model = new InternalLSTMModelDyn;
294 |
295 | if (model->LoadFromKerasJson(modelJson))
296 | {
297 | newModel = model;
298 | }
299 | }
300 | }
301 | }
302 |
303 | if (newModel == nullptr)
304 | {
305 | // Use a dynamic model for other model types
306 | RTNeuralModelDyn* model = new RTNeuralModelDyn;
307 | model->LoadFromKerasJson(modelJson);
308 |
309 | newModel = model;
310 | }
311 | }
312 |
313 | if (newModel != nullptr)
314 | {
315 | newModel->Prewarm();
316 | }
317 |
318 | return newModel;
319 | }
320 |
321 | void NeuralModel::ReadNAMConfig(const nlohmann::json& modelJson)
322 | {
323 | if (modelJson.contains("sample_rate") && modelJson.at("sample_rate").is_number())
324 | {
325 | sampleRate = modelJson.at("sample_rate");
326 | }
327 |
328 | if (modelJson.contains("metadata"))
329 | {
330 | nlohmann::json metaData = modelJson.at("metadata");
331 |
332 | if (metaData.contains("loudness") && metaData.at("loudness").is_number_float())
333 | {
334 | modelLoudnessDB = (float)metaData.at("loudness");
335 | }
336 |
337 | if (metaData.contains("input_level_dbu") && metaData.at("input_level_dbu").is_number_float())
338 | {
339 | modelInputLevelDBu = metaData.at("input_level_dbu");
340 | }
341 |
342 | if (metaData.contains("output_level_dbu") && metaData.at("output_level_dbu").is_number_float())
343 | {
344 | modelOutputLevelDBu = metaData.at("output_level_dbu");
345 | }
346 | }
347 | }
348 |
349 | void NeuralModel::ReadKerasConfig(const nlohmann::json& modelJson)
350 | {
351 | if (modelJson.contains("samplerate") && modelJson.at("samplerate").is_number())
352 | {
353 | sampleRate = modelJson.at("samplerate");
354 | }
355 |
356 | if (modelJson.contains("in_gain") && modelJson.at("in_gain").is_number_float())
357 | {
358 | modelInputLevelDBu = modelJson.at("in_gain");
359 | }
360 |
361 | if (modelJson.contains("out_gain") && modelJson.at("out_gain").is_number_float())
362 | {
363 | modelLoudnessDB = -18 - (float)modelJson.at("out_gain");
364 | }
365 | }
366 | }
--------------------------------------------------------------------------------
/NeuralAudio/InternalModel.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include "NeuralModel.h"
4 | #include "WaveNet.h"
5 | #include "WaveNetDynamic.h"
6 | #include "LSTM.h"
7 | #include "LSTMDynamic.h"
8 |
9 | namespace NeuralAudio
10 | {
11 | using IStdDilations = NeuralAudio::Dilations<1, 2, 4, 8, 16, 32, 64, 128, 256, 512>;
12 | using ILiteDilations1 = NeuralAudio::Dilations<1, 2, 4, 8, 16, 32, 64>;
13 | using ILiteDilations2 = NeuralAudio::Dilations<128, 256, 512, 1, 2, 4, 8, 16, 32, 64, 128, 256, 512>;
14 |
15 | class InternalModel : public NeuralModel
16 | {
17 | public:
18 | bool LoadFromKerasJson(const nlohmann::json& modelJson)
19 | {
20 | ReadKerasConfig(modelJson);
21 |
22 | return CreateModelFromKerasJson(modelJson);
23 |
24 | return true;
25 | }
26 |
27 | virtual bool CreateModelFromKerasJson(const nlohmann::json& modelJson)
28 | {
29 | (void)modelJson;
30 |
31 | return false;
32 | }
33 |
34 | virtual bool LoadFromNAMJson(const nlohmann::json& modelJson)
35 | {
36 | ReadNAMConfig(modelJson);
37 |
38 | return CreateModelFromNAMJson(modelJson);
39 | }
40 |
41 | virtual bool CreateModelFromNAMJson(const nlohmann::json& modelJson)
42 | {
43 | (void)modelJson;
44 |
45 | return false;
46 | }
47 | };
48 |
49 |
50 | template
51 | class InternalWaveNetModelT : public InternalModel
52 | {
53 | using ModelType = typename std::conditional,
56 | NeuralAudio::WaveNetLayerArrayT>,
57 | NeuralAudio::WaveNetModelT<
58 | NeuralAudio::WaveNetLayerArrayT<1, 1, headSize, numChannels, 3, ILiteDilations1, false>,
59 | NeuralAudio::WaveNetLayerArrayT>
60 | >::type;
61 |
62 | public:
63 | InternalWaveNetModelT()
64 | : model(nullptr)
65 | {
66 | }
67 |
68 | ~InternalWaveNetModelT()
69 | {
70 | if (model != nullptr)
71 | {
72 | delete model;
73 | model = nullptr;
74 | }
75 | }
76 |
77 | bool IsStatic()
78 | {
79 | return true;
80 | }
81 |
82 | bool CreateModelFromNAMJson(const nlohmann::json& modelJson)
83 | {
84 | if (model != nullptr)
85 | {
86 | delete model;
87 | model = nullptr;
88 | }
89 |
90 | model = new ModelType;
91 |
92 | nlohmann::json config = modelJson.at("config");
93 |
94 | model->SetWeights(modelJson.at("weights"));
95 |
96 | SetMaxAudioBufferSize(defaultMaxAudioBufferSize);
97 |
98 | return true;
99 | }
100 |
101 | void SetMaxAudioBufferSize(int maxSize)
102 | {
103 | (void)maxSize;
104 | }
105 |
106 | int GetReceptiveFieldSize()
107 | {
108 | return model->ReceptiveFieldSize;
109 | }
110 |
111 | void Process(float* input, float* output, size_t numSamples)
112 | {
113 | size_t offset = 0;
114 |
115 | while (numSamples > 0)
116 | {
117 | size_t toProcess = std::min(numSamples, model->GetMaxFrames());
118 |
119 | model->Process(input + offset, output + offset, toProcess);
120 |
121 | offset += toProcess;
122 | numSamples -= toProcess;
123 | }
124 | }
125 |
126 | void Prewarm()
127 | {
128 | model->Prewarm();
129 | }
130 |
131 | private:
132 | ModelType* model = nullptr;
133 | };
134 |
135 |
136 | class InternalWaveNetDefinitionBase
137 | {
138 | public:
139 | virtual InternalModel* CreateModel()
140 | {
141 | return nullptr;
142 | }
143 |
144 | virtual size_t GetNumChannels()
145 | {
146 | return 0;
147 | }
148 |
149 | virtual size_t GetHeadSize()
150 | {
151 | return 0;
152 | }
153 | };
154 |
155 | template
156 | class InternalWaveNetDefinitionT : public InternalWaveNetDefinitionBase
157 | {
158 | public:
159 | InternalModel* CreateModel()
160 | {
161 | return new InternalWaveNetModelT;
162 | }
163 |
164 | virtual size_t GetNumChannels()
165 | {
166 | return numChannels;
167 | }
168 |
169 | virtual size_t GetHeadSize()
170 | {
171 | return headSize;
172 | }
173 | };
174 |
175 | class InternalWaveNetModelDyn : public InternalModel
176 | {
177 | public:
178 | InternalWaveNetModelDyn()
179 | {
180 | }
181 |
182 | ~InternalWaveNetModelDyn()
183 | {
184 | if (model != nullptr)
185 | {
186 | delete model;
187 | model = nullptr;
188 | }
189 | }
190 |
191 | EModelLoadMode GetLoadMode()
192 | {
193 | return EModelLoadMode::Internal;
194 | }
195 |
196 | bool CreateModelFromNAMJson(const nlohmann::json& modelJson)
197 | {
198 | nlohmann::json config = modelJson.at("config");
199 |
200 | std::vector layerArrays;
201 |
202 | for (size_t i = 0; i < config.at("layers").size(); i++)
203 | {
204 | nlohmann::json layerConfig = config.at("layers").at(i);
205 |
206 | layerArrays.push_back(WaveNetLayerArray(layerConfig.at("input_size"), layerConfig.at("condition_size"), layerConfig.at("head_size"),
207 | layerConfig.at("channels"), layerConfig.at("kernel_size"), layerConfig.at("head_bias"), layerConfig.at("dilations")));
208 | }
209 |
210 | model = new WaveNetModel(layerArrays);
211 |
212 | model->SetWeights(modelJson.at("weights"));
213 |
214 | SetMaxAudioBufferSize(defaultMaxAudioBufferSize);
215 |
216 | return true;
217 | }
218 |
219 | void SetMaxAudioBufferSize(int maxSize)
220 | {
221 | model->SetMaxFrames(maxSize);
222 | }
223 |
224 | void Process(float* input, float* output, size_t numSamples)
225 | {
226 | size_t offset = 0;
227 |
228 | while (numSamples > 0)
229 | {
230 | size_t toProcess = std::min(numSamples, model->GetMaxFrames());
231 |
232 | model->Process(input + offset, output + offset, toProcess);
233 |
234 | offset += toProcess;
235 | numSamples -= toProcess;
236 | }
237 | }
238 |
239 | void Prewarm()
240 | {
241 | model->Prewarm();
242 | }
243 |
244 | private:
245 | WaveNetModel* model = nullptr;
246 | };
247 |
248 |
249 | template
250 | class InternalLSTMModelT : public InternalModel
251 | {
252 | public:
253 | InternalLSTMModelT()
254 | : model(nullptr)
255 | {
256 | }
257 |
258 | ~InternalLSTMModelT()
259 | {
260 | if (model != nullptr)
261 | {
262 | delete model;
263 | model = nullptr;
264 | }
265 | }
266 |
267 | bool IsStatic()
268 | {
269 | return true;
270 | }
271 |
272 | bool CreateModelFromNAMJson(const nlohmann::json& modelJson)
273 | {
274 | if (model != nullptr)
275 | {
276 | delete model;
277 | model = nullptr;
278 | }
279 |
280 | model = new LSTMModelT;
281 |
282 | nlohmann::json config = modelJson.at("config");
283 |
284 | model->SetNAMWeights(modelJson.at("weights"));
285 |
286 | SetMaxAudioBufferSize(defaultMaxAudioBufferSize);
287 |
288 | return true;
289 | }
290 |
291 | std::vector FlattenWeights(const nlohmann::json& weights)
292 | {
293 | std::vector vec;
294 |
295 | for (size_t i = 0; i < weights.size(); i++)
296 | {
297 | if (weights[i].is_array())
298 | {
299 | auto subVec = FlattenWeights(weights[i]);
300 | vec.insert(vec.end(), subVec.begin(), subVec.end());
301 | }
302 | else
303 | {
304 | vec.push_back(weights[i]);
305 | }
306 | }
307 |
308 | return vec;
309 | }
310 |
311 | bool CreateModelFromKerasJson(const nlohmann::json& modelJson)
312 | {
313 | if (model != nullptr)
314 | {
315 | delete model;
316 | model = nullptr;
317 | }
318 |
319 | model = new LSTMModelT;
320 |
321 | const auto layers = modelJson.at("layers");
322 | const size_t numLayers = layers.size();
323 |
324 | if (numLayers < 2)
325 | return false;
326 |
327 | auto lastLayer = layers[numLayers - 1];
328 |
329 | if (lastLayer.at("type") != "dense")
330 | return false;
331 |
332 | LSTMDef lstmDef;
333 |
334 | lstmDef.HeadWeights = FlattenWeights(lastLayer.at("weights").at(0));
335 | lstmDef.HeadBias = lastLayer.at("weights").at(1).at(0);
336 |
337 | for (size_t i = 0; i < (numLayers - 1); i++)
338 | {
339 | auto layer = layers[i];
340 |
341 | if (layer.at("type") != "lstm")
342 | return false;
343 |
344 | LSTMLayerDef layerDef;
345 |
346 | layerDef.InputWeights = FlattenWeights(layer.at("weights").at(0));
347 | layerDef.HiddenWeights = FlattenWeights(layer.at("weights").at(1));
348 | layerDef.BiasWeights = FlattenWeights(layer.at("weights").at(2));
349 |
350 | lstmDef.Layers.push_back(layerDef);
351 | }
352 |
353 | model->SetWeights(lstmDef);
354 |
355 | return true;
356 | }
357 |
358 | void SetMaxAudioBufferSize(int maxSize)
359 | {
360 | (void)maxSize;
361 | }
362 |
363 | void Process(float* input, float* output, size_t numSamples)
364 | {
365 | model->Process(input, output, numSamples);
366 | }
367 |
368 | void Prewarm()
369 | {
370 | NeuralModel::Prewarm(2048, 64);
371 | }
372 |
373 | private:
374 | LSTMModelT* model = nullptr;
375 | };
376 |
377 |
378 | class InternalLSTMDefinitionBase
379 | {
380 | public:
381 | virtual InternalModel* CreateModel()
382 | {
383 | return nullptr;
384 | }
385 |
386 | virtual size_t GetNumLayers()
387 | {
388 | return 0;
389 | }
390 |
391 | virtual size_t GetHiddenSize()
392 | {
393 | return 0;
394 | }
395 | };
396 |
397 | template
398 | class InternalLSTMDefinitionT : public InternalLSTMDefinitionBase
399 | {
400 | public:
401 | InternalModel* CreateModel()
402 | {
403 | return new InternalLSTMModelT;
404 | }
405 |
406 | virtual size_t GetNumLayers()
407 | {
408 | return NumLayers;
409 | }
410 |
411 | virtual size_t GetHiddenSize()
412 | {
413 | return HiddenSize;
414 | }
415 | };
416 |
417 | class InternalLSTMModelDyn : public InternalModel
418 | {
419 | public:
420 | InternalLSTMModelDyn()
421 | : model(nullptr)
422 | {
423 | }
424 |
425 | ~InternalLSTMModelDyn()
426 | {
427 | if (model != nullptr)
428 | {
429 | delete model;
430 | model = nullptr;
431 | }
432 | }
433 |
434 | bool CreateModelFromNAMJson(const nlohmann::json& modelJson)
435 | {
436 | if (model != nullptr)
437 | {
438 | delete model;
439 | model = nullptr;
440 | }
441 |
442 | nlohmann::json config = modelJson.at("config");
443 |
444 | model = new LSTMModel(config.at("num_layers"), config.at("hidden_size"));
445 |
446 | model->SetNAMWeights(modelJson.at("weights"));
447 |
448 | SetMaxAudioBufferSize(defaultMaxAudioBufferSize);
449 |
450 | return true;
451 | }
452 |
453 | std::vector FlattenWeights(const nlohmann::json& weights)
454 | {
455 | std::vector vec;
456 |
457 | for (size_t i = 0; i < weights.size(); i++)
458 | {
459 | if (weights[i].is_array())
460 | {
461 | auto subVec = FlattenWeights(weights[i]);
462 | vec.insert(vec.end(), subVec.begin(), subVec.end());
463 | }
464 | else
465 | {
466 | vec.push_back(weights[i]);
467 | }
468 | }
469 |
470 | return vec;
471 | }
472 |
473 | bool CreateModelFromKerasJson(const nlohmann::json& modelJson)
474 | {
475 | if (model != nullptr)
476 | {
477 | delete model;
478 | model = nullptr;
479 | }
480 |
481 | const auto layers = modelJson.at("layers");
482 | const size_t numLayers = layers.size();
483 | const size_t hiddenSize = layers.at(0).at("shape").back();
484 |
485 | if (numLayers < 2)
486 | return false;
487 |
488 | auto lastLayer = layers[numLayers - 1];
489 |
490 | if (lastLayer.at("type") != "dense")
491 | return false;
492 |
493 | model = new LSTMModel(numLayers - 1, hiddenSize);
494 |
495 | LSTMDef lstmDef;
496 |
497 | lstmDef.HeadWeights = FlattenWeights(lastLayer.at("weights").at(0));
498 | lstmDef.HeadBias = lastLayer.at("weights").at(1).at(0);
499 |
500 | for (size_t i = 0; i < (numLayers - 1); i++)
501 | {
502 | auto layer = layers[i];
503 |
504 | if (layer.at("type") != "lstm")
505 | return false;
506 |
507 | LSTMLayerDef layerDef;
508 |
509 | layerDef.InputWeights = FlattenWeights(layer.at("weights").at(0));
510 | layerDef.HiddenWeights = FlattenWeights(layer.at("weights").at(1));
511 | layerDef.BiasWeights = FlattenWeights(layer.at("weights").at(2));
512 |
513 | lstmDef.Layers.push_back(layerDef);
514 | }
515 |
516 | model->SetWeights(lstmDef);
517 |
518 | return true;
519 | }
520 |
521 | void SetMaxAudioBufferSize(int maxSize)
522 | {
523 | (void)maxSize;
524 | }
525 |
526 | void Process(float* input, float* output, size_t numSamples)
527 | {
528 | model->Process(input, output, numSamples);
529 | }
530 |
531 | void Prewarm()
532 | {
533 | NeuralModel::Prewarm(2048, 64);
534 | }
535 |
536 | private:
537 | LSTMModel* model = nullptr;
538 | };
539 | }
540 |
541 |
542 |
--------------------------------------------------------------------------------
/NeuralAudio/WaveNetDynamic.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | // Based on WaveNet model structure from https://github.com/sdatkinson/NeuralAmpModelerCore
4 | // with some template ideas from https://github.com/jatinchowdhury18/RTNeural-NAM
5 |
6 | #include
7 | #include
8 | #include "NeuralModel.h"
9 | #include "Activation.h"
10 |
11 | #ifndef WAVENET_MAX_NUM_FRAMES
12 | #define WAVENET_MAX_NUM_FRAMES 64
13 | #endif
14 |
15 | namespace NeuralAudio
16 | {
17 | class Conv1D
18 | {
19 | private:
20 | size_t inChannels;
21 | size_t outChannels;
22 | size_t kernelSize;
23 | bool doBias;
24 | size_t dilation;
25 | std::vector weights;
26 | Eigen::VectorXf bias;
27 |
28 | public:
29 | Conv1D(size_t inChannels, size_t outChannels, size_t kernelSize, bool doBias, size_t dilation) :
30 | inChannels(inChannels),
31 | outChannels(outChannels),
32 | kernelSize(kernelSize),
33 | doBias(doBias),
34 | dilation(dilation)
35 | {
36 | for (size_t k = 0; k < kernelSize; k++)
37 | {
38 | auto kernelWeights = Eigen::MatrixXf(outChannels, inChannels);
39 | weights.push_back(kernelWeights);
40 | }
41 |
42 | if (doBias)
43 | {
44 | bias.resize(outChannels);
45 | }
46 | }
47 |
48 | void SetWeights(std::vector::iterator& inWeights)
49 | {
50 | weights.resize(kernelSize);
51 |
52 | for (size_t i = 0; i < outChannels; i++)
53 | for (size_t j = 0; j < inChannels; j++)
54 | for (size_t k = 0; k < kernelSize; k++)
55 | weights[k](i, j) = *(inWeights++);
56 |
57 | if (doBias)
58 | {
59 | for (size_t i = 0; i < outChannels; i++)
60 | bias(i) = *(inWeights++);
61 | }
62 | }
63 |
64 | inline void Process(const Eigen::Ref& input, Eigen::Ref output, const size_t iStart, const size_t nCols) const
65 | {
66 | for (size_t k = 0; k < kernelSize; k++)
67 | {
68 | size_t offset = dilation * (k + 1 - kernelSize);
69 |
70 | auto& inBlock = input.middleCols(iStart + offset, nCols);
71 |
72 | if (k == 0)
73 | output.noalias() = weights[k] * inBlock;
74 | else
75 | output.noalias() += weights[k] * inBlock;
76 | }
77 |
78 | if (doBias)
79 | output.colwise() += bias;
80 | }
81 | };
82 |
83 | class DenseLayer
84 | {
85 | private:
86 | size_t inSize;
87 | size_t outSize;
88 | bool doBias;
89 | Eigen::MatrixXf weights;
90 | Eigen::VectorXf bias;
91 |
92 | public:
93 | DenseLayer(size_t inSize, size_t outSize, bool doBias) :
94 | inSize(inSize),
95 | outSize(outSize),
96 | doBias(doBias),
97 | weights(outSize, inSize)
98 | {
99 | if (doBias)
100 | {
101 | bias.resize(outSize);
102 | }
103 | }
104 |
105 | void SetWeights(std::vector::iterator& inWeights)
106 | {
107 | for (auto i = 0; i < outSize; i++)
108 | for (auto j = 0; j < inSize; j++)
109 | weights(i, j) = *(inWeights++);
110 |
111 | if (doBias)
112 | {
113 | for (auto i = 0; i < outSize; i++)
114 | bias(i) = *(inWeights++);
115 | }
116 | }
117 |
118 | void Process(const Eigen::Ref& input, Eigen::Ref output) const
119 | {
120 | if (doBias)
121 | {
122 | output.noalias() = (weights * input).colwise() + bias;
123 | }
124 | else
125 | {
126 | output.noalias() = weights * input;
127 | }
128 | }
129 |
130 | void ProcessAcc(const Eigen::Ref& input, Eigen::Ref output) const
131 | {
132 | if (doBias)
133 | {
134 | output.noalias() += (weights * input).colwise() + bias;
135 | }
136 | else
137 | {
138 | output.noalias() += weights * input;
139 | }
140 | }
141 | };
142 |
143 | class WaveNetLayer
144 | {
145 | private:
146 | size_t conditionSize;
147 | size_t channels;
148 | size_t kernelSize;
149 | size_t dilation;
150 | Conv1D conv1D;
151 | DenseLayer inputMixin;
152 | DenseLayer oneByOne;
153 | Eigen::MatrixXf state;
154 | Eigen::MatrixXf layerBuffer;
155 |
156 | public:
157 | size_t ReceptiveFieldSize;
158 | size_t BufferSize;
159 | size_t bufferStart;
160 |
161 | WaveNetLayer(size_t conditionSize, size_t channels, size_t kernelSize, size_t dilation) :
162 | conditionSize(conditionSize),
163 | channels(channels),
164 | kernelSize(kernelSize),
165 | dilation(dilation),
166 | conv1D(channels, channels, kernelSize, true, dilation),
167 | inputMixin(conditionSize, channels, false),
168 | oneByOne(channels, channels, true),
169 | state(channels, WAVENET_MAX_NUM_FRAMES),
170 | ReceptiveFieldSize((kernelSize - 1) * dilation),
171 | BufferSize((ReceptiveFieldSize * 2) + WAVENET_MAX_NUM_FRAMES)
172 | {
173 | state.setZero();
174 | }
175 |
176 | Eigen::MatrixXf& GetLayerBuffer()
177 | {
178 | return layerBuffer;
179 | }
180 |
181 | void AllocBuffer(size_t allocNum)
182 | {
183 | size_t size = BufferSize;
184 |
185 | layerBuffer.resize(channels, size);
186 | layerBuffer.setZero();
187 |
188 | bufferStart = ReceptiveFieldSize;
189 | }
190 |
191 | void SetWeights(std::vector