├── .travis.yml ├── CMakeLists.txt ├── FSM_OOFunctional.h ├── FSM_OOImplicitState.h ├── FSM_OOState.h ├── FSM_OOStateDynamic.h ├── FSM_Simple.h ├── FSM_TableState.h ├── FSM_TableStatePointers.h ├── FSM_VaryingTableState.h ├── KeyLookup.cpp ├── LICENSE ├── README.md ├── basictheory.cpp ├── branch_prediction.cpp ├── cachegrind.sh ├── cachesizeeffect.cpp ├── common.h ├── false_sharing.cpp ├── fsm.cpp ├── fsm_oo.cpp ├── fsm_simple.cpp ├── fsm_table.cpp ├── fsm_tableptrs.cpp ├── geom.cpp ├── geom.h ├── hotcold.cpp ├── linkedlists.cpp ├── makefile ├── matrixtranspose.cpp ├── modifying_memory.cpp ├── modifying_memory.cpp.dat ├── pcg-random.h ├── readingtests.cpp ├── simd_test.cpp └── speculativewaste.cpp /.travis.yml: -------------------------------------------------------------------------------- 1 | language: cpp 2 | 3 | matrix: 4 | include: 5 | - os: osx 6 | osx_image: xcode8 7 | env: 8 | - CMAKE_BUILD_TOOL="Unix Makefiles" 9 | - BUILD_COMMAND=make 10 | - os: osx 11 | osx_image: xcode9 12 | env: 13 | - MATRIX_EVAL="wget https://github.com/ninja-build/ninja/releases/download/v1.8.2/ninja-mac.zip && unzip ninja-mac.zip && export PATH=$PWD:$PATH" 14 | - CMAKE_BUILD_TOOL="Unix Makefiles" 15 | - BUILD_COMMAND=make 16 | - os: linux 17 | env: 18 | - CMAKE_BUILD_TOOL="Unix Makefiles" 19 | - BUILD_COMMAND=make 20 | addons: 21 | apt: 22 | packages: 23 | - ninja-build 24 | - os: linux 25 | addons: 26 | apt: 27 | sources: 28 | - ubuntu-toolchain-r-test 29 | packages: 30 | - g++-4.9 31 | env: 32 | - MATRIX_EVAL="CC=gcc-4.9 && CXX=g++-4.9" 33 | - CMAKE_BUILD_TOOL="Unix Makefiles" 34 | - BUILD_COMMAND=make 35 | - os: linux 36 | addons: 37 | apt: 38 | sources: 39 | - ubuntu-toolchain-r-test 40 | packages: 41 | - g++-5 42 | env: 43 | - MATRIX_EVAL="CC=gcc-5 && CXX=g++-5" 44 | - CMAKE_BUILD_TOOL="Unix Makefiles" 45 | - BUILD_COMMAND=make 46 | - os: linux 47 | addons: 48 | apt: 49 | sources: 50 | - ubuntu-toolchain-r-test 51 | packages: 52 | - g++-6 53 | env: 54 | - MATRIX_EVAL="CC=gcc-6 && CXX=g++-6" 55 | - CMAKE_BUILD_TOOL="Unix Makefiles" 56 | - BUILD_COMMAND=make 57 | - os: linux 58 | addons: 59 | apt: 60 | sources: 61 | - ubuntu-toolchain-r-test 62 | packages: 63 | - g++-7 ninja-build 64 | env: 65 | - MATRIX_EVAL="CC=gcc-7 && CXX=g++-7" 66 | - CMAKE_BUILD_TOOL="Unix Makefiles" 67 | - BUILD_COMMAND=make 68 | 69 | before_install: 70 | - eval "${MATRIX_EVAL}" 71 | 72 | before_script: 73 | - mkdir build 74 | - cd build 75 | - cmake .. -G "${CMAKE_BUILD_TOOL}" -DCMAKE_BUILD_TYPE=Release 76 | 77 | script: 78 | - ${BUILD_COMMAND} 79 | - ${BUILD_COMMAND} run_all 80 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.6) 2 | 3 | project(dodbook C CXX) 4 | 5 | set(CMAKE_CXX_STANDARD 11) 6 | set(CMAKE_CXX_STANDARD_REQUIRED ON) 7 | set(CMAKE_CXX_EXTENSIONS OFF) 8 | 9 | find_package(OpenMP) 10 | if(OPENMP_FOUND) 11 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") 12 | set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") 13 | endif() 14 | 15 | function(add_benchmark name) 16 | add_executable(${name} ${name}.cpp) 17 | add_custom_target(run_${name} 18 | COMMAND ${CMAKE_CURRENT_BINARY_DIR}/${name} 19 | DEPENDS ${name}) 20 | set(run_all_benchmarks ${run_all_benchmarks} run_${name} PARENT_SCOPE) 21 | endfunction() 22 | 23 | add_benchmark(KeyLookup) 24 | add_benchmark(basictheory) 25 | add_benchmark(branch_prediction) 26 | add_benchmark(cachesizeeffect) 27 | add_benchmark(fsm) 28 | add_benchmark(hotcold) 29 | add_benchmark(linkedlists) 30 | add_benchmark(matrixtranspose) 31 | add_benchmark(modifying_memory) 32 | add_benchmark(readingtests) 33 | add_benchmark(simd_test) 34 | add_benchmark(speculativewaste) 35 | if(OPENMP_FOUND) 36 | add_benchmark(false_sharing) 37 | endif() 38 | 39 | add_custom_target(run_all DEPENDS ${run_all_benchmarks}) 40 | -------------------------------------------------------------------------------- /FSM_OOFunctional.h: -------------------------------------------------------------------------------- 1 | namespace FSMOOFunctional { 2 | struct State; 3 | struct Machine { 4 | State *state = nullptr; 5 | float sleepiness; 6 | float hunger; 7 | float huntTimer; 8 | float eatTimer; 9 | inline void UpdateState( State *newState ); 10 | inline ~Machine(); 11 | }; 12 | struct State { 13 | virtual State * Update( Machine &M, float deltaTime ) = 0; 14 | virtual const char * GetName() { return "Base"; } 15 | }; 16 | struct Sleeping final : public State { 17 | State * Update( Machine &M, float deltaTime ) override; 18 | const char * GetName() override { return "Sleeping"; } 19 | }; 20 | struct Hunting final : public State { 21 | State * Update( Machine &M, float deltaTime ) override; 22 | const char * GetName() override { return "Hunting"; } 23 | }; 24 | struct Eating final : public State { 25 | State * Update( Machine &M, float deltaTime ) override; 26 | virtual const char * GetName() override { return "Eating"; } 27 | }; 28 | struct Exploring final : public State { 29 | State * Update( Machine &M, float deltaTime ) override; 30 | const char * GetName() override { return "Exploring"; } 31 | }; 32 | 33 | Sleeping m_commonSleeping; 34 | Hunting m_commonHunting; 35 | Eating m_commonEating; 36 | Exploring m_commonExploring; 37 | 38 | struct Data { 39 | Machine machine[NUM_MACHINES]; 40 | 41 | Data() { 42 | pcg32_random_t rng; 43 | pcg32_srandom_r(&rng, 1234, 5678); 44 | for( int m = 0; m < NUM_MACHINES; ++m ) { 45 | Machine &M = machine[m]; 46 | M.state = &m_commonSleeping; 47 | M.sleepiness = pcg32_random_r_rangef(&rng, 0.0f, 0.2f ); 48 | M.hunger = pcg32_random_r_rangef(&rng, 0.5f, 0.9f ); 49 | M.huntTimer = HUNTING_TIME; 50 | M.eatTimer = 0.0f; 51 | } 52 | } 53 | void Update( float deltaTime ) { 54 | for( int m = 0; m < NUM_MACHINES; ++m ) { 55 | Machine &M = machine[m]; 56 | State *newState = M.state->Update( M, deltaTime ); 57 | M.UpdateState(newState); 58 | } 59 | } 60 | int StateObjectToStateIndex( State *s ) { 61 | if( strcmp( s->GetName(), m_commonSleeping.GetName() ) == 0 ) 62 | return 0; 63 | if( strcmp( s->GetName(), m_commonHunting.GetName() ) == 0 ) 64 | return 1; 65 | if( strcmp( s->GetName(), m_commonEating.GetName() ) == 0 ) 66 | return 2; 67 | if( strcmp( s->GetName(), m_commonExploring.GetName() ) == 0 ) 68 | return 3; 69 | return -1; 70 | } 71 | #if CHECK_FOR_CONSISTENCY 72 | void Report( size_t *counters ) { 73 | for( int i = 0; i < 4; ++ i ) counters[i] = 0; 74 | 75 | for( int m = 0; m < NUM_MACHINES; ++m ) { 76 | Machine &M = machine[m]; 77 | int state = StateObjectToStateIndex( M.state ); 78 | counters[state] += 1; 79 | } 80 | } 81 | void PrintState() { 82 | for( int m = 0; m < NUM_MACHINES; ++m ) { 83 | Machine &M = machine[m]; 84 | int state = StateObjectToStateIndex( M.state ); 85 | printf( PRINT_FORMAT_STRING, "OOSTATE", STATENAME[state], M.sleepiness, M.hunger, M.huntTimer, M.eatTimer ); 86 | } 87 | } 88 | #endif 89 | }; 90 | 91 | // inlines 92 | inline void Machine::UpdateState( State *newState ) { 93 | if( newState ) { 94 | //if( state ) 95 | //delete state; 96 | state = newState; 97 | } 98 | } 99 | inline Machine::~Machine() { 100 | //if( state ) 101 | //delete state; 102 | state = nullptr; 103 | } 104 | 105 | State * Sleeping::Update( Machine &M, float deltaTime ) { 106 | M.hunger += deltaTime * SLEEP_HUNGER; 107 | M.sleepiness += deltaTime * SLEEP_SLEEP; 108 | if( M.sleepiness <= 0.0f ) { 109 | M.sleepiness = 0.0f; 110 | if( M.eatTimer > 0.0f ) { 111 | return &m_commonEating; 112 | } else { 113 | if( M.hunger > HUNGER_TRIGGER ) { 114 | M.huntTimer = HUNTING_TIME; 115 | return &m_commonHunting; 116 | } else { 117 | return &m_commonExploring; 118 | } 119 | } 120 | } 121 | return nullptr; 122 | } 123 | State * Hunting::Update( Machine &M, float deltaTime ) { 124 | M.hunger += deltaTime * HUNT_HUNGER; 125 | M.sleepiness += deltaTime * HUNT_SLEEP; 126 | M.huntTimer -= deltaTime; 127 | if( M.huntTimer <= 0.0f ) { 128 | M.eatTimer = EATING_TIME; 129 | if( M.sleepiness > SLEEP_TRIGGER ) { 130 | return &m_commonSleeping; 131 | } else { 132 | return &m_commonEating; 133 | } 134 | } 135 | return nullptr; 136 | } 137 | State * Eating::Update( Machine &M, float deltaTime ) { 138 | M.hunger += deltaTime * EAT_HUNGER; 139 | M.sleepiness += deltaTime * EAT_SLEEP; 140 | M.eatTimer -= deltaTime; 141 | if( M.sleepiness > SLEEP_TRIGGER ) { 142 | return &m_commonSleeping; 143 | } else { 144 | if( M.eatTimer <= 0.0f ) { 145 | if( M.hunger > HUNGER_TRIGGER ) { 146 | M.huntTimer = HUNTING_TIME; 147 | return &m_commonHunting; 148 | } else { 149 | return &m_commonExploring; 150 | } 151 | } 152 | } 153 | return nullptr; 154 | } 155 | State * Exploring::Update( Machine &M, float deltaTime ) { 156 | M.hunger += deltaTime * EXPLORE_HUNGER; 157 | M.sleepiness += deltaTime * EXPLORE_SLEEP; 158 | if( M.hunger > HUNGER_TRIGGER ) { 159 | M.huntTimer = HUNTING_TIME; 160 | return &m_commonHunting; 161 | } else { 162 | if( M.sleepiness > SLEEP_TRIGGER ) { 163 | return &m_commonSleeping; 164 | } 165 | } 166 | return nullptr; 167 | } 168 | } 169 | -------------------------------------------------------------------------------- /FSM_OOImplicitState.h: -------------------------------------------------------------------------------- 1 | namespace FSMOOImplicitState { 2 | struct State; 3 | struct Machine { 4 | State *state = nullptr; 5 | float sleepiness; 6 | float hunger; 7 | float eatTimer; 8 | inline void UpdateState( State *newState ); 9 | inline ~Machine(); 10 | }; 11 | struct State { 12 | virtual State * Update( Machine &M, float deltaTime ) = 0; 13 | virtual const char * GetName() { return "Base"; } 14 | virtual void Cleanup() {} 15 | }; 16 | struct Sleeping final : public State { 17 | State * Update( Machine &M, float deltaTime ) override; 18 | const char * GetName() override { return "Sleeping"; } 19 | }; 20 | struct Hunting final : public State { 21 | State * Update( Machine &M, float deltaTime ) override; 22 | const char * GetName() override { return "Hunting"; } 23 | float huntTimer = HUNTING_TIME; 24 | virtual void Cleanup() override { delete this; } 25 | }; 26 | struct Eating final : public State { 27 | State * Update( Machine &M, float deltaTime ) override; 28 | virtual const char * GetName() override { return "Eating"; } 29 | virtual void Cleanup() override {} 30 | }; 31 | struct Exploring final : public State { 32 | State * Update( Machine &M, float deltaTime ) override; 33 | const char * GetName() override { return "Exploring"; } 34 | virtual void Cleanup() override {} 35 | }; 36 | 37 | Sleeping m_commonSleeping; 38 | Hunting m_commonHunting; 39 | Eating m_commonEating; 40 | Exploring m_commonExploring; 41 | 42 | State * GetSleeping() { return &m_commonSleeping; } 43 | State * GetHunting() { return new Hunting; } 44 | State * GetEating() { return &m_commonEating; } 45 | State * GetExploring() { return &m_commonExploring; } 46 | 47 | struct Data { 48 | Machine machine[NUM_MACHINES]; 49 | 50 | Data() { 51 | pcg32_random_t rng; 52 | pcg32_srandom_r(&rng, 1234, 5678); 53 | for( int m = 0; m < NUM_MACHINES; ++m ) { 54 | Machine &M = machine[m]; 55 | M.state = &m_commonSleeping; 56 | M.sleepiness = pcg32_random_r_rangef(&rng, 0.0f, 0.2f ); 57 | M.hunger = pcg32_random_r_rangef(&rng, 0.5f, 0.9f ); 58 | M.eatTimer = 0.0f; 59 | } 60 | } 61 | void Update( float deltaTime ) { 62 | for( int m = 0; m < NUM_MACHINES; ++m ) { 63 | Machine &M = machine[m]; 64 | State *newState = M.state->Update( M, deltaTime ); 65 | M.UpdateState(newState); 66 | } 67 | } 68 | int StateObjectToStateIndex( State *s ) { 69 | if( strcmp( s->GetName(), m_commonSleeping.GetName() ) == 0 ) 70 | return 0; 71 | if( strcmp( s->GetName(), m_commonHunting.GetName() ) == 0 ) 72 | return 1; 73 | if( strcmp( s->GetName(), m_commonEating.GetName() ) == 0 ) 74 | return 2; 75 | if( strcmp( s->GetName(), m_commonExploring.GetName() ) == 0 ) 76 | return 3; 77 | return -1; 78 | } 79 | #if CHECK_FOR_CONSISTENCY 80 | void Report( size_t *counters ) { 81 | for( int i = 0; i < 4; ++ i ) counters[i] = 0; 82 | 83 | for( int m = 0; m < NUM_MACHINES; ++m ) { 84 | Machine &M = machine[m]; 85 | int state = StateObjectToStateIndex( M.state ); 86 | counters[state] += 1; 87 | } 88 | } 89 | void PrintState() { 90 | for( int m = 0; m < NUM_MACHINES; ++m ) { 91 | Machine &M = machine[m]; 92 | int state = StateObjectToStateIndex( M.state ); 93 | float huntTimer = 0.0f; 94 | if( Hunting *h = dynamic_cast( M.state ) ) { 95 | huntTimer = h->huntTimer; 96 | } 97 | printf( PRINT_FORMAT_STRING, "IMPLICIT", STATENAME[state], M.sleepiness, M.hunger, huntTimer, M.eatTimer ); 98 | } 99 | } 100 | #endif 101 | }; 102 | 103 | // inlines 104 | inline void Machine::UpdateState( State *newState ) { 105 | if( newState ) { 106 | //if( state ) 107 | //state->Cleanup(); 108 | state = newState; 109 | } 110 | } 111 | inline Machine::~Machine() { 112 | //if( state ) 113 | //state->Cleanup(); 114 | state = nullptr; 115 | } 116 | 117 | State * Sleeping::Update( Machine &M, float deltaTime ) { 118 | M.hunger += deltaTime * SLEEP_HUNGER; 119 | M.sleepiness += deltaTime * SLEEP_SLEEP; 120 | if( M.sleepiness <= 0.0f ) { 121 | M.sleepiness = 0.0f; 122 | if( M.eatTimer > 0.0f ) { 123 | return GetEating(); 124 | } else { 125 | if( M.hunger > HUNGER_TRIGGER ) { 126 | return GetHunting(); 127 | } else { 128 | return GetExploring(); 129 | } 130 | } 131 | } 132 | return nullptr; 133 | } 134 | State * Hunting::Update( Machine &M, float deltaTime ) { 135 | M.hunger += deltaTime * HUNT_HUNGER; 136 | M.sleepiness += deltaTime * HUNT_SLEEP; 137 | huntTimer -= deltaTime; 138 | if( huntTimer <= 0.0f ) { 139 | M.eatTimer = EATING_TIME; 140 | if( M.sleepiness > SLEEP_TRIGGER ) { 141 | return GetSleeping(); 142 | } else { 143 | return GetEating(); 144 | } 145 | } 146 | return nullptr; 147 | } 148 | State * Eating::Update( Machine &M, float deltaTime ) { 149 | M.hunger += deltaTime * EAT_HUNGER; 150 | M.sleepiness += deltaTime * EAT_SLEEP; 151 | M.eatTimer -= deltaTime; 152 | if( M.sleepiness > SLEEP_TRIGGER ) { 153 | return &m_commonSleeping; 154 | } else { 155 | if( M.eatTimer <= 0.0f ) { 156 | if( M.hunger > HUNGER_TRIGGER ) { 157 | return GetHunting(); 158 | } else { 159 | return GetExploring(); 160 | } 161 | } 162 | } 163 | return nullptr; 164 | } 165 | State * Exploring::Update( Machine &M, float deltaTime ) { 166 | M.hunger += deltaTime * EXPLORE_HUNGER; 167 | M.sleepiness += deltaTime * EXPLORE_SLEEP; 168 | if( M.hunger > HUNGER_TRIGGER ) { 169 | return GetHunting(); 170 | } else { 171 | if( M.sleepiness > SLEEP_TRIGGER ) { 172 | return GetSleeping(); 173 | } 174 | } 175 | return nullptr; 176 | } 177 | } 178 | -------------------------------------------------------------------------------- /FSM_OOState.h: -------------------------------------------------------------------------------- 1 | namespace FSMOOState { 2 | struct State; 3 | struct Machine { 4 | State *state = nullptr; 5 | float sleepiness; 6 | float hunger; 7 | float huntTimer; 8 | float eatTimer; 9 | inline void UpdateState( State *newState ); 10 | inline ~Machine(); 11 | }; 12 | struct State { 13 | virtual State * Update( Machine &M, float deltaTime ) = 0; 14 | virtual const char * GetName() { return "Base"; } 15 | }; 16 | struct Sleeping final : public State { 17 | State * Update( Machine &M, float deltaTime ) override; 18 | const char * GetName() override { return "Sleeping"; } 19 | }; 20 | struct Hunting final : public State { 21 | State * Update( Machine &M, float deltaTime ) override; 22 | const char * GetName() override { return "Hunting"; } 23 | }; 24 | struct Eating final : public State { 25 | State * Update( Machine &M, float deltaTime ) override; 26 | virtual const char * GetName() override { return "Eating"; } 27 | }; 28 | struct Exploring final : public State { 29 | State * Update( Machine &M, float deltaTime ) override; 30 | const char * GetName() override { return "Exploring"; } 31 | }; 32 | 33 | Sleeping m_commonSleeping; 34 | Hunting m_commonHunting; 35 | Eating m_commonEating; 36 | Exploring m_commonExploring; 37 | 38 | struct Data { 39 | Machine machine[NUM_MACHINES]; 40 | 41 | Data() { 42 | pcg32_random_t rng; 43 | pcg32_srandom_r(&rng, 1234, 5678); 44 | for( int m = 0; m < NUM_MACHINES; ++m ) { 45 | Machine &M = machine[m]; 46 | M.state = &m_commonSleeping; 47 | M.sleepiness = pcg32_random_r_rangef(&rng, 0.0f, 0.2f ); 48 | M.hunger = pcg32_random_r_rangef(&rng, 0.5f, 0.9f ); 49 | M.huntTimer = HUNTING_TIME; 50 | M.eatTimer = 0.0f; 51 | } 52 | } 53 | void Update( float deltaTime ) { 54 | for( int m = 0; m < NUM_MACHINES; ++m ) { 55 | Machine &M = machine[m]; 56 | State *newState = M.state->Update( M, deltaTime ); 57 | M.UpdateState(newState); 58 | } 59 | } 60 | int StateObjectToStateIndex( State *s ) { 61 | if( strcmp( s->GetName(), m_commonSleeping.GetName() ) == 0 ) 62 | return 0; 63 | if( strcmp( s->GetName(), m_commonHunting.GetName() ) == 0 ) 64 | return 1; 65 | if( strcmp( s->GetName(), m_commonEating.GetName() ) == 0 ) 66 | return 2; 67 | if( strcmp( s->GetName(), m_commonExploring.GetName() ) == 0 ) 68 | return 3; 69 | return -1; 70 | } 71 | #if CHECK_FOR_CONSISTENCY 72 | void Report( size_t *counters ) { 73 | for( int i = 0; i < 4; ++ i ) counters[i] = 0; 74 | 75 | for( int m = 0; m < NUM_MACHINES; ++m ) { 76 | Machine &M = machine[m]; 77 | int state = StateObjectToStateIndex( M.state ); 78 | counters[state] += 1; 79 | } 80 | } 81 | void PrintState() { 82 | for( int m = 0; m < NUM_MACHINES; ++m ) { 83 | Machine &M = machine[m]; 84 | int state = StateObjectToStateIndex( M.state ); 85 | printf( PRINT_FORMAT_STRING, "OOSTATE", STATENAME[state], M.sleepiness, M.hunger, M.huntTimer, M.eatTimer ); 86 | } 87 | } 88 | #endif 89 | }; 90 | 91 | // inlines 92 | inline void Machine::UpdateState( State *newState ) { 93 | if( newState ) { 94 | //if( state ) 95 | //delete state; 96 | state = newState; 97 | } 98 | } 99 | inline Machine::~Machine() { 100 | //if( state ) 101 | //delete state; 102 | state = nullptr; 103 | } 104 | 105 | State * Sleeping::Update( Machine &M, float deltaTime ) { 106 | M.hunger += deltaTime * SLEEP_HUNGER; 107 | M.sleepiness += deltaTime * SLEEP_SLEEP; 108 | if( M.sleepiness <= 0.0f ) { 109 | M.sleepiness = 0.0f; 110 | if( M.eatTimer > 0.0f ) { 111 | return &m_commonEating; 112 | } else { 113 | if( M.hunger > HUNGER_TRIGGER ) { 114 | M.huntTimer = HUNTING_TIME; 115 | return &m_commonHunting; 116 | } else { 117 | return &m_commonExploring; 118 | } 119 | } 120 | } 121 | return nullptr; 122 | } 123 | State * Hunting::Update( Machine &M, float deltaTime ) { 124 | M.hunger += deltaTime * HUNT_HUNGER; 125 | M.sleepiness += deltaTime * HUNT_SLEEP; 126 | M.huntTimer -= deltaTime; 127 | if( M.huntTimer <= 0.0f ) { 128 | M.eatTimer = EATING_TIME; 129 | if( M.sleepiness > SLEEP_TRIGGER ) { 130 | return &m_commonSleeping; 131 | } else { 132 | return &m_commonEating; 133 | } 134 | } 135 | return nullptr; 136 | } 137 | State * Eating::Update( Machine &M, float deltaTime ) { 138 | M.hunger += deltaTime * EAT_HUNGER; 139 | M.sleepiness += deltaTime * EAT_SLEEP; 140 | M.eatTimer -= deltaTime; 141 | if( M.sleepiness > SLEEP_TRIGGER ) { 142 | return &m_commonSleeping; 143 | } else { 144 | if( M.eatTimer <= 0.0f ) { 145 | if( M.hunger > HUNGER_TRIGGER ) { 146 | M.huntTimer = HUNTING_TIME; 147 | return &m_commonHunting; 148 | } else { 149 | return &m_commonExploring; 150 | } 151 | } 152 | } 153 | return nullptr; 154 | } 155 | State * Exploring::Update( Machine &M, float deltaTime ) { 156 | M.hunger += deltaTime * EXPLORE_HUNGER; 157 | M.sleepiness += deltaTime * EXPLORE_SLEEP; 158 | if( M.hunger > HUNGER_TRIGGER ) { 159 | M.huntTimer = HUNTING_TIME; 160 | return &m_commonHunting; 161 | } else { 162 | if( M.sleepiness > SLEEP_TRIGGER ) { 163 | return &m_commonSleeping; 164 | } 165 | } 166 | return nullptr; 167 | } 168 | } 169 | -------------------------------------------------------------------------------- /FSM_OOStateDynamic.h: -------------------------------------------------------------------------------- 1 | 2 | namespace FSMOOState { 3 | struct State; 4 | struct Machine { 5 | State *state = nullptr; 6 | float sleepiness; 7 | float hunger; 8 | float huntTimer; 9 | float eatTimer; 10 | inline void UpdateState( State *newState ); 11 | inline ~Machine(); 12 | }; 13 | struct State { 14 | virtual State * Update( Machine &M, float deltaTime ) = 0; 15 | }; 16 | struct Sleeping final : public State { 17 | virtual State * Update( Machine &M, float deltaTime ) override; 18 | }; 19 | struct Hunting final : public State { 20 | virtual State * Update( Machine &M, float deltaTime ) override; 21 | }; 22 | struct Eating final : public State { 23 | virtual State * Update( Machine &M, float deltaTime ) override; 24 | }; 25 | struct Exploring final : public State { 26 | virtual State * Update( Machine &M, float deltaTime ) override; 27 | }; 28 | 29 | Sleeping m_commonSleeping; 30 | Hunting m_commonHunting; 31 | Eating m_commonEating; 32 | Exploring m_commonExploring; 33 | 34 | struct Data { 35 | Machine machine[NUM_MACHINES]; 36 | 37 | Data() { 38 | pcg32_random_t rng; 39 | pcg32_srandom_r(&rng, 1234, 5678); 40 | for( int m = 0; m < NUM_MACHINES; ++m ) { 41 | Machine &M = machine[m]; 42 | M.state = new Sleeping; 43 | M.sleepiness = pcg32_random_r_rangef(&rng, 0.0f, 0.2f ); 44 | M.hunger = pcg32_random_r_rangef(&rng, 0.5f, 0.9f ); 45 | M.huntTimer = HUNTING_TIME; 46 | M.eatTimer = EATING_TIME; 47 | } 48 | } 49 | void Update( float deltaTime ) { 50 | for( int m = 0; m < NUM_MACHINES; ++m ) { 51 | Machine &M = machine[m]; 52 | M.UpdateState( M.state->Update( M, deltaTime ) ); 53 | } 54 | } 55 | }; 56 | 57 | // inlines 58 | inline void Machine::UpdateState( State *newState ) { 59 | if( newState ) { 60 | //if( state ) 61 | //delete state; 62 | state = newState; 63 | } 64 | } 65 | inline Machine::~Machine() { 66 | //if( state ) 67 | //delete state; 68 | state = nullptr; 69 | } 70 | 71 | State * Sleeping::Update( Machine &M, float deltaTime ) { 72 | M.hunger += deltaTime * SLEEP_HUNGER; 73 | M.sleepiness += deltaTime * SLEEP_SLEEP; 74 | if( M.sleepiness <= 0.0f ) { 75 | M.sleepiness = 0.0f; 76 | if( M.hunger > HUNGER_TRIGGER ) { 77 | return new Hunting; 78 | } else { 79 | return new Exploring; 80 | } 81 | } 82 | } 83 | State * Hunting::Update( Machine &M, float deltaTime ) { 84 | M.hunger += deltaTime * HUNT_HUNGER; 85 | M.sleepiness += deltaTime * HUNT_SLEEP; 86 | if( M.sleepiness > SLEEP_TRIGGER ) { 87 | return new Sleeping; 88 | } 89 | if( M.huntTimer <= 0.0f ) { 90 | M.huntTimer = HUNTING_TIME; 91 | M.eatTimer = EATING_TIME; 92 | return new Eating; 93 | } 94 | } 95 | State * Eating::Update( Machine &M, float deltaTime ) { 96 | M.hunger += deltaTime * EAT_HUNGER; 97 | M.sleepiness += deltaTime * EAT_SLEEP; 98 | M.eatTimer -= deltaTime; 99 | if( M.eatTimer <= 0.0f ) { 100 | if( M.hunger > HUNGER_TRIGGER ) { 101 | return new Hunting; 102 | } else { 103 | return new Exploring; 104 | } 105 | } 106 | } 107 | State * Exploring::Update( Machine &M, float deltaTime ) { 108 | M.hunger += deltaTime * EXPLORE_HUNGER; 109 | M.sleepiness += deltaTime * EXPLORE_SLEEP; 110 | if( M.hunger > HUNGER_TRIGGER ) { 111 | return new Hunting; 112 | } 113 | if( M.sleepiness > SLEEP_TRIGGER ) { 114 | return new Sleeping; 115 | } 116 | } 117 | } 118 | -------------------------------------------------------------------------------- /FSM_Simple.h: -------------------------------------------------------------------------------- 1 | 2 | namespace FSMSimple { 3 | enum State { 4 | S_sleeping, 5 | S_hunting, 6 | S_eating, 7 | S_exploring, 8 | }; 9 | 10 | struct Machine { 11 | State state; 12 | float sleepiness; 13 | float hunger; 14 | float huntTimer; 15 | float eatTimer; 16 | }; 17 | 18 | struct Data { 19 | Machine machine[NUM_MACHINES]; 20 | 21 | Data() { 22 | pcg32_random_t rng; 23 | pcg32_srandom_r(&rng, 1234, 5678); 24 | for( int m = 0; m < NUM_MACHINES; ++m ) { 25 | Machine &M = machine[m]; 26 | M.state = S_sleeping; 27 | M.sleepiness = pcg32_random_r_rangef(&rng, 0.0f, 0.2f ); 28 | M.hunger = pcg32_random_r_rangef(&rng, 0.5f, 0.9f ); 29 | M.huntTimer = HUNTING_TIME; 30 | M.eatTimer = 0.0f; 31 | } 32 | } 33 | void Update( float deltaTime ) { 34 | for( int m = 0; m < NUM_MACHINES; ++m ) { 35 | Machine &M = machine[m]; 36 | switch( M.state ) { 37 | case S_sleeping: 38 | { 39 | M.hunger += deltaTime * SLEEP_HUNGER; 40 | M.sleepiness += deltaTime * SLEEP_SLEEP; 41 | if( M.sleepiness <= 0.0f ) { 42 | M.sleepiness = 0.0f; 43 | if( M.eatTimer > 0.0f ) { 44 | M.state = S_eating; 45 | } else { 46 | if( M.hunger > HUNGER_TRIGGER ) { 47 | M.state = S_hunting; 48 | M.huntTimer = HUNTING_TIME; 49 | } else { 50 | M.state = S_exploring; 51 | } 52 | } 53 | } 54 | } break; 55 | case S_hunting: 56 | { 57 | M.hunger += deltaTime * HUNT_HUNGER; 58 | M.sleepiness += deltaTime * HUNT_SLEEP; 59 | M.huntTimer -= deltaTime; 60 | if( M.huntTimer <= 0.0f ) { 61 | M.eatTimer = EATING_TIME; 62 | if( M.sleepiness > SLEEP_TRIGGER ) { 63 | M.state = S_sleeping; 64 | } else { 65 | M.state = S_eating; 66 | } 67 | } else { 68 | } 69 | } break; 70 | case S_eating: 71 | { 72 | M.hunger += deltaTime * EAT_HUNGER; 73 | M.sleepiness += deltaTime * EAT_SLEEP; 74 | M.eatTimer -= deltaTime; 75 | if( M.sleepiness > SLEEP_TRIGGER ) { 76 | M.state = S_sleeping; 77 | } else { 78 | if( M.eatTimer <= 0.0f ) { 79 | if( M.hunger > HUNGER_TRIGGER ) { 80 | M.state = S_hunting; 81 | M.huntTimer = HUNTING_TIME; 82 | } else { 83 | M.state = S_exploring; 84 | } 85 | } 86 | } 87 | } break; 88 | case S_exploring: 89 | { 90 | M.hunger += deltaTime * EXPLORE_HUNGER; 91 | M.sleepiness += deltaTime * EXPLORE_SLEEP; 92 | if( M.hunger > HUNGER_TRIGGER ) { 93 | M.state = S_hunting; 94 | M.huntTimer = HUNTING_TIME; 95 | } 96 | else { 97 | if( M.sleepiness > SLEEP_TRIGGER ) { 98 | M.state = S_sleeping; 99 | } 100 | } 101 | } break; 102 | } 103 | } 104 | } 105 | #if CHECK_FOR_CONSISTENCY 106 | void Report( size_t *counters ) { 107 | for( int i = 0; i < 4; ++ i ) counters[i] = 0; 108 | 109 | for( int m = 0; m < NUM_MACHINES; ++m ) { 110 | Machine &M = machine[m]; 111 | counters[M.state] += 1; 112 | } 113 | } 114 | void PrintState() { 115 | for( int m = 0; m < NUM_MACHINES; ++m ) { 116 | Machine &M = machine[m]; 117 | printf( PRINT_FORMAT_STRING, "SIMPLE", STATENAME[M.state], M.sleepiness, M.hunger, M.huntTimer, M.eatTimer ); 118 | } 119 | } 120 | #endif 121 | }; 122 | } 123 | -------------------------------------------------------------------------------- /FSM_TableState.h: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | 4 | namespace FSMTableState { 5 | struct Machine { 6 | float sleepiness; 7 | float hunger; 8 | float huntTimer; 9 | float eatTimer; 10 | }; 11 | typedef std::vector MachineVector; 12 | 13 | struct Data { 14 | MachineVector sleeps; 15 | MachineVector hunts; 16 | MachineVector eats; 17 | MachineVector explores; 18 | 19 | Data() { 20 | pcg32_random_t rng; 21 | pcg32_srandom_r(&rng, 1234, 5678); 22 | for( int m = 0; m < NUM_MACHINES; ++m ) { 23 | Machine M; 24 | M.sleepiness = pcg32_random_r_rangef(&rng, 0.0f, 0.2f ); 25 | M.hunger = pcg32_random_r_rangef(&rng, 0.5f, 0.9f ); 26 | M.huntTimer = HUNTING_TIME; 27 | M.eatTimer = 0.0f; 28 | sleeps.push_back( M ); 29 | } 30 | } 31 | void Update( float deltaTime ) { 32 | MachineVector pendingSleep; 33 | MachineVector pendingHunt; 34 | MachineVector pendingEat; 35 | MachineVector pendingExplore; 36 | 37 | { 38 | for( MachineVector::iterator iter = sleeps.begin(); iter != sleeps.end(); ) { 39 | Machine &M = *iter; 40 | M.hunger += deltaTime * SLEEP_HUNGER; 41 | M.sleepiness += deltaTime * SLEEP_SLEEP; 42 | if( M.sleepiness <= 0.0f ) { 43 | M.sleepiness = 0.0f; 44 | if( M.eatTimer > 0.0f ) { 45 | pendingEat.push_back(M); 46 | } else { 47 | if( M.hunger > HUNGER_TRIGGER ) { 48 | M.huntTimer = HUNTING_TIME; 49 | pendingHunt.push_back(M); 50 | } else { 51 | pendingExplore.push_back(M); 52 | } 53 | } 54 | *iter = sleeps.back(); sleeps.pop_back(); 55 | } else { 56 | ++iter; 57 | } 58 | } 59 | for( MachineVector::iterator iter = hunts.begin(); iter != hunts.end(); ) { 60 | Machine &M = *iter; 61 | M.hunger += deltaTime * HUNT_HUNGER; 62 | M.sleepiness += deltaTime * HUNT_SLEEP; 63 | M.huntTimer -= deltaTime; 64 | if( M.huntTimer <= 0.0f ) { 65 | M.eatTimer = EATING_TIME; 66 | if( M.sleepiness > SLEEP_TRIGGER ) { 67 | pendingSleep.push_back(M); 68 | } else { 69 | pendingEat.push_back(M); 70 | } 71 | *iter = hunts.back(); hunts.pop_back(); 72 | } else { 73 | ++iter; 74 | } 75 | } 76 | for( MachineVector::iterator iter = eats.begin(); iter != eats.end(); ) { 77 | Machine &M = *iter; 78 | M.hunger += deltaTime * EAT_HUNGER; 79 | M.sleepiness += deltaTime * EAT_SLEEP; 80 | M.eatTimer -= deltaTime; 81 | if( M.sleepiness > SLEEP_TRIGGER ) { 82 | pendingSleep.push_back(M); 83 | *iter = eats.back(); eats.pop_back(); 84 | } else { 85 | if( M.eatTimer <= 0.0f ) { 86 | if( M.hunger > HUNGER_TRIGGER ) { 87 | M.huntTimer = HUNTING_TIME; 88 | pendingHunt.push_back(M); 89 | } else { 90 | pendingExplore.push_back(M); 91 | } 92 | *iter = eats.back(); eats.pop_back(); 93 | } else { 94 | ++iter; 95 | } 96 | } 97 | } 98 | for( MachineVector::iterator iter = explores.begin(); iter != explores.end(); ) { 99 | Machine &M = *iter; 100 | M.hunger += deltaTime * EXPLORE_HUNGER; 101 | M.sleepiness += deltaTime * EXPLORE_SLEEP; 102 | if( M.hunger > HUNGER_TRIGGER ) { 103 | M.huntTimer = HUNTING_TIME; 104 | pendingHunt.push_back(M); 105 | *iter = explores.back(); explores.pop_back(); 106 | } else { 107 | if( M.sleepiness > SLEEP_TRIGGER ) { 108 | pendingSleep.push_back(M); 109 | *iter = explores.back(); explores.pop_back(); 110 | } else { 111 | ++iter; 112 | } 113 | } 114 | } 115 | } 116 | 117 | { 118 | sleeps.insert( sleeps.end(), pendingSleep.begin(), pendingSleep.end() ); 119 | hunts.insert( hunts.end(), pendingHunt.begin(), pendingHunt.end() ); 120 | eats.insert( eats.end(), pendingEat.begin(), pendingEat.end() ); 121 | explores.insert( explores.end(), pendingExplore.begin(), pendingExplore.end() ); 122 | assert( sleeps.size() + hunts.size() + eats.size() + explores.size() == NUM_MACHINES ); 123 | } 124 | } 125 | #if CHECK_FOR_CONSISTENCY 126 | void Report( size_t *counters ) { 127 | counters[0] = sleeps.size(); 128 | counters[1] = hunts.size(); 129 | counters[2] = eats.size(); 130 | counters[3] = explores.size(); 131 | } 132 | void PrintState() { 133 | const char *name = "TABLE"; 134 | for( MachineVector::iterator iter = sleeps.begin(); iter != sleeps.end(); ++iter ) { 135 | Machine &M = *iter; 136 | printf( PRINT_FORMAT_STRING, name, STATENAME[0], M.sleepiness, M.hunger, M.huntTimer, M.eatTimer ); 137 | } 138 | for( MachineVector::iterator iter = hunts.begin(); iter != hunts.end(); ++iter ) { 139 | Machine &M = *iter; 140 | printf( PRINT_FORMAT_STRING, name, STATENAME[1], M.sleepiness, M.hunger, M.huntTimer, M.eatTimer ); 141 | } 142 | for( MachineVector::iterator iter = eats.begin(); iter != eats.end(); ++iter ) { 143 | Machine &M = *iter; 144 | printf( PRINT_FORMAT_STRING, name, STATENAME[2], M.sleepiness, M.hunger, M.huntTimer, M.eatTimer ); 145 | } 146 | for( MachineVector::iterator iter = explores.begin(); iter != explores.end(); ++iter ) { 147 | Machine &M = *iter; 148 | printf( PRINT_FORMAT_STRING, name, STATENAME[3], M.sleepiness, M.hunger, M.huntTimer, M.eatTimer ); 149 | } 150 | } 151 | #endif 152 | }; 153 | } 154 | -------------------------------------------------------------------------------- /FSM_TableStatePointers.h: -------------------------------------------------------------------------------- 1 | namespace FSMTableStatePointers { 2 | struct Machine { 3 | float sleepiness; 4 | float hunger; 5 | float huntTimer; 6 | float eatTimer; 7 | }; 8 | typedef Machine *MachinePtr; 9 | typedef std::vector MachineVector; 10 | 11 | struct Data { 12 | MachineVector sleeps; 13 | MachineVector hunts; 14 | MachineVector eats; 15 | MachineVector explores; 16 | 17 | Data() { 18 | pcg32_random_t rng; 19 | pcg32_srandom_r(&rng, 1234, 5678); 20 | for( int m = 0; m < NUM_MACHINES; ++m ) { 21 | Machine M; 22 | M.sleepiness = pcg32_random_r_rangef(&rng, 0.0f, 0.2f ); 23 | M.hunger = pcg32_random_r_rangef(&rng, 0.5f, 0.9f ); 24 | M.huntTimer = HUNTING_TIME; 25 | M.eatTimer = 0.0f; 26 | MachinePtr pM = new Machine; 27 | *pM = M; 28 | sleeps.push_back( pM ); 29 | } 30 | } 31 | void Update( float deltaTime ) { 32 | MachineVector pendingSleep; 33 | MachineVector pendingHunt; 34 | MachineVector pendingEat; 35 | MachineVector pendingExplore; 36 | 37 | { 38 | for( MachineVector::iterator iter = sleeps.begin(); iter != sleeps.end(); ) { 39 | MachinePtr &M = *iter; 40 | M->hunger += deltaTime * SLEEP_HUNGER; 41 | M->sleepiness += deltaTime * SLEEP_SLEEP; 42 | if( M->sleepiness <= 0.0f ) { 43 | M->sleepiness = 0.0f; 44 | if( M->eatTimer > 0.0f ) { 45 | pendingEat.push_back(M); 46 | } else { 47 | if( M->hunger > HUNGER_TRIGGER ) { 48 | M->huntTimer = HUNTING_TIME; 49 | pendingHunt.push_back(M); 50 | } else { 51 | pendingExplore.push_back(M); 52 | } 53 | } 54 | *iter = sleeps.back(); sleeps.pop_back(); 55 | } else { 56 | ++iter; 57 | } 58 | } 59 | for( MachineVector::iterator iter = hunts.begin(); iter != hunts.end(); ) { 60 | MachinePtr &M = *iter; 61 | M->hunger += deltaTime * HUNT_HUNGER; 62 | M->sleepiness += deltaTime * HUNT_SLEEP; 63 | M->huntTimer -= deltaTime; 64 | if( M->huntTimer <= 0.0f ) { 65 | M->eatTimer = EATING_TIME; 66 | if( M->sleepiness > SLEEP_TRIGGER ) { 67 | pendingSleep.push_back(M); 68 | } else { 69 | pendingEat.push_back(M); 70 | } 71 | *iter = hunts.back(); hunts.pop_back(); 72 | } else { 73 | ++iter; 74 | } 75 | } 76 | for( MachineVector::iterator iter = eats.begin(); iter != eats.end(); ) { 77 | MachinePtr &M = *iter; 78 | M->hunger += deltaTime * EAT_HUNGER; 79 | M->sleepiness += deltaTime * EAT_SLEEP; 80 | M->eatTimer -= deltaTime; 81 | if( M->sleepiness > SLEEP_TRIGGER ) { 82 | pendingSleep.push_back(M); 83 | *iter = eats.back(); eats.pop_back(); 84 | } else { 85 | if( M->eatTimer <= 0.0f ) { 86 | if( M->hunger > HUNGER_TRIGGER ) { 87 | M->huntTimer = HUNTING_TIME; 88 | pendingHunt.push_back(M); 89 | } else { 90 | pendingExplore.push_back(M); 91 | } 92 | *iter = eats.back(); eats.pop_back(); 93 | } else { 94 | ++iter; 95 | } 96 | } 97 | } 98 | for( MachineVector::iterator iter = explores.begin(); iter != explores.end(); ) { 99 | MachinePtr &M = *iter; 100 | M->hunger += deltaTime * EXPLORE_HUNGER; 101 | M->sleepiness += deltaTime * EXPLORE_SLEEP; 102 | if( M->hunger > HUNGER_TRIGGER ) { 103 | M->huntTimer = HUNTING_TIME; 104 | pendingHunt.push_back(M); 105 | *iter = explores.back(); explores.pop_back(); 106 | } else { 107 | if( M->sleepiness > SLEEP_TRIGGER ) { 108 | pendingSleep.push_back(M); 109 | *iter = explores.back(); explores.pop_back(); 110 | } else { 111 | ++iter; 112 | } 113 | } 114 | } 115 | } 116 | 117 | { 118 | sleeps.insert( sleeps.end(), pendingSleep.begin(), pendingSleep.end() ); 119 | hunts.insert( hunts.end(), pendingHunt.begin(), pendingHunt.end() ); 120 | eats.insert( eats.end(), pendingEat.begin(), pendingEat.end() ); 121 | explores.insert( explores.end(), pendingExplore.begin(), pendingExplore.end() ); 122 | assert( sleeps.size() + hunts.size() + eats.size() + explores.size() == NUM_MACHINES ); 123 | } 124 | } 125 | #if CHECK_FOR_CONSISTENCY 126 | void Report( size_t *counters ) { 127 | counters[0] = sleeps.size(); 128 | counters[1] = hunts.size(); 129 | counters[2] = eats.size(); 130 | counters[3] = explores.size(); 131 | } 132 | void PrintState() { 133 | const char *name = "TABLE"; 134 | for( MachineVector::iterator iter = sleeps.begin(); iter != sleeps.end(); ++iter ) { 135 | MachinePtr &M = *iter; 136 | printf( PRINT_FORMAT_STRING, name, STATENAME[0], M->sleepiness, M->hunger, M->huntTimer, M->eatTimer ); 137 | } 138 | for( MachineVector::iterator iter = hunts.begin(); iter != hunts.end(); ++iter ) { 139 | MachinePtr &M = *iter; 140 | printf( PRINT_FORMAT_STRING, name, STATENAME[1], M->sleepiness, M->hunger, M->huntTimer, M->eatTimer ); 141 | } 142 | for( MachineVector::iterator iter = eats.begin(); iter != eats.end(); ++iter ) { 143 | MachinePtr &M = *iter; 144 | printf( PRINT_FORMAT_STRING, name, STATENAME[2], M->sleepiness, M->hunger, M->huntTimer, M->eatTimer ); 145 | } 146 | for( MachineVector::iterator iter = explores.begin(); iter != explores.end(); ++iter ) { 147 | MachinePtr &M = *iter; 148 | printf( PRINT_FORMAT_STRING, name, STATENAME[3], M->sleepiness, M->hunger, M->huntTimer, M->eatTimer ); 149 | } 150 | } 151 | #endif 152 | }; 153 | } 154 | -------------------------------------------------------------------------------- /FSM_VaryingTableState.h: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | 4 | namespace FSMVaryingTableState { 5 | struct HuntingMachine; 6 | struct Machine { 7 | float sleepiness; 8 | float hunger; 9 | float eatTimer; 10 | operator HuntingMachine (); 11 | }; 12 | typedef std::vector MachineVector; 13 | struct HuntingMachine : public Machine { 14 | float huntTimer = HUNTING_TIME; 15 | }; 16 | Machine::operator HuntingMachine() { 17 | HuntingMachine m; 18 | m.sleepiness = sleepiness; 19 | m.hunger = hunger; 20 | m.eatTimer = eatTimer; 21 | return m; 22 | } 23 | typedef std::vector HuntingMachineVector; 24 | 25 | struct Data { 26 | MachineVector sleeps; 27 | HuntingMachineVector hunts; 28 | MachineVector eats; 29 | MachineVector explores; 30 | 31 | Data() { 32 | pcg32_random_t rng; 33 | pcg32_srandom_r(&rng, 1234, 5678); 34 | for( int m = 0; m < NUM_MACHINES; ++m ) { 35 | Machine M; 36 | M.sleepiness = pcg32_random_r_rangef(&rng, 0.0f, 0.2f ); 37 | M.hunger = pcg32_random_r_rangef(&rng, 0.5f, 0.9f ); 38 | M.eatTimer = 0.0f; 39 | sleeps.push_back( M ); 40 | } 41 | } 42 | void Update( float deltaTime ) { 43 | MachineVector pendingSleep; 44 | HuntingMachineVector pendingHunt; 45 | MachineVector pendingEat; 46 | MachineVector pendingExplore; 47 | 48 | { 49 | for( MachineVector::iterator iter = sleeps.begin(); iter != sleeps.end(); ) { 50 | Machine &M = *iter; 51 | M.hunger += deltaTime * SLEEP_HUNGER; 52 | M.sleepiness += deltaTime * SLEEP_SLEEP; 53 | if( M.sleepiness <= 0.0f ) { 54 | M.sleepiness = 0.0f; 55 | if( M.eatTimer > 0.0f ) { 56 | pendingEat.push_back(M); 57 | } else { 58 | if( M.hunger > HUNGER_TRIGGER ) { 59 | pendingHunt.push_back(M); 60 | } else { 61 | pendingExplore.push_back(M); 62 | } 63 | } 64 | //iter = sleeps.erase( iter ); 65 | *iter = sleeps.back(); sleeps.pop_back(); 66 | } else { 67 | ++iter; 68 | } 69 | } 70 | for( HuntingMachineVector::iterator iter = hunts.begin(); iter != hunts.end(); ) { 71 | HuntingMachine &M = *iter; 72 | M.hunger += deltaTime * HUNT_HUNGER; 73 | M.sleepiness += deltaTime * HUNT_SLEEP; 74 | M.huntTimer -= deltaTime; 75 | if( M.huntTimer <= 0.0f ) { 76 | M.eatTimer = EATING_TIME; 77 | if( M.sleepiness > SLEEP_TRIGGER ) { 78 | pendingSleep.push_back(M); 79 | } else { 80 | pendingEat.push_back(M); 81 | } 82 | *iter = hunts.back(); hunts.pop_back(); 83 | } else { 84 | ++iter; 85 | } 86 | } 87 | for( MachineVector::iterator iter = eats.begin(); iter != eats.end(); ) { 88 | Machine &M = *iter; 89 | M.hunger += deltaTime * EAT_HUNGER; 90 | M.sleepiness += deltaTime * EAT_SLEEP; 91 | M.eatTimer -= deltaTime; 92 | if( M.sleepiness > SLEEP_TRIGGER ) { 93 | pendingSleep.push_back(M); 94 | *iter = eats.back(); eats.pop_back(); 95 | } else { 96 | if( M.eatTimer <= 0.0f ) { 97 | if( M.hunger > HUNGER_TRIGGER ) { 98 | pendingHunt.push_back(M); 99 | } else { 100 | pendingExplore.push_back(M); 101 | } 102 | //iter = eats.erase( iter ); 103 | *iter = eats.back(); eats.pop_back(); 104 | } else { 105 | ++iter; 106 | } 107 | } 108 | } 109 | for( MachineVector::iterator iter = explores.begin(); iter != explores.end(); ) { 110 | Machine &M = *iter; 111 | M.hunger += deltaTime * EXPLORE_HUNGER; 112 | M.sleepiness += deltaTime * EXPLORE_SLEEP; 113 | if( M.hunger > HUNGER_TRIGGER ) { 114 | pendingHunt.push_back(M); 115 | //iter = explores.erase( iter ); 116 | *iter = explores.back(); explores.pop_back(); 117 | } else { 118 | if( M.sleepiness > SLEEP_TRIGGER ) { 119 | pendingSleep.push_back(M); 120 | //iter = explores.erase( iter ); 121 | *iter = explores.back(); explores.pop_back(); 122 | } else { 123 | ++iter; 124 | } 125 | } 126 | } 127 | } 128 | 129 | { 130 | sleeps.insert( sleeps.end(), pendingSleep.begin(), pendingSleep.end() ); 131 | hunts.insert( hunts.end(), pendingHunt.begin(), pendingHunt.end() ); 132 | eats.insert( eats.end(), pendingEat.begin(), pendingEat.end() ); 133 | explores.insert( explores.end(), pendingExplore.begin(), pendingExplore.end() ); 134 | assert( sleeps.size() + hunts.size() + eats.size() + explores.size() == NUM_MACHINES ); 135 | } 136 | } 137 | #if CHECK_FOR_CONSISTENCY 138 | void Report( size_t *counters ) { 139 | counters[0] = sleeps.size(); 140 | counters[1] = hunts.size(); 141 | counters[2] = eats.size(); 142 | counters[3] = explores.size(); 143 | } 144 | void PrintState() { 145 | const char *name = "VARYING"; 146 | for( MachineVector::iterator iter = sleeps.begin(); iter != sleeps.end(); ++iter ) { 147 | Machine &M = *iter; 148 | printf( PRINT_FORMAT_STRING, name, STATENAME[0], M.sleepiness, M.hunger, 0.0f, M.eatTimer ); 149 | } 150 | for( HuntingMachineVector::iterator iter = hunts.begin(); iter != hunts.end(); ++iter ) { 151 | HuntingMachine &M = *iter; 152 | printf( PRINT_FORMAT_STRING, name, STATENAME[1], M.sleepiness, M.hunger, M.huntTimer, M.eatTimer ); 153 | } 154 | for( MachineVector::iterator iter = eats.begin(); iter != eats.end(); ++iter ) { 155 | Machine &M = *iter; 156 | printf( PRINT_FORMAT_STRING, name, STATENAME[2], M.sleepiness, M.hunger, 0.0f, M.eatTimer ); 157 | } 158 | for( MachineVector::iterator iter = explores.begin(); iter != explores.end(); ++iter ) { 159 | Machine &M = *iter; 160 | printf( PRINT_FORMAT_STRING, name, STATENAME[3], M.sleepiness, M.hunger, 0.0f, M.eatTimer ); 161 | } 162 | } 163 | #endif 164 | }; 165 | } 166 | -------------------------------------------------------------------------------- /KeyLookup.cpp: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | 3 | int SECONDSOFANIMATION = 10; 4 | static const int NUM_QUERIES = 1000; 5 | static const int NUM_NODES = 145; 6 | static int minFrameRate = 10; 7 | static int maxFrameRate = 15; 8 | static const float RATIO_OF_NON_SCALING = 0.85f; 9 | 10 | //BEGIN 11 | // basic animation key lookup 12 | struct FullAnimKey { 13 | float time; 14 | Vec3 translation; 15 | Vec3 scale; 16 | Vec4 rotation; // sijk quaternion 17 | }; 18 | struct FullAnim { 19 | int numKeys; 20 | FullAnimKey *keys; 21 | FullAnimKey GetKeyAtTimeBinary( float t ) { 22 | int l = 0, h = numKeys-1; 23 | int m = (l+h) / 2; 24 | while( l < h ) { 25 | if( t < keys[m].time ) { 26 | h = m-1; 27 | } else { 28 | l = m; 29 | } 30 | m = (l+h+1) / 2; 31 | } 32 | return keys[m]; 33 | } 34 | FullAnimKey GetKeyAtTimeLinear( float t ) { 35 | int i = 0; 36 | while( i < numKeys ) { 37 | if( keys[i].time > t ) { 38 | --i; 39 | break; 40 | } 41 | ++i; 42 | } 43 | if( i < 0 ) 44 | return keys[0]; 45 | return keys[i]; 46 | } 47 | }; 48 | 49 | // looking up keys by time 50 | struct DataOnlyAnimKey { 51 | Vec3 translation; 52 | Vec3 scale; 53 | Vec4 rotation; // sijk quaternion 54 | }; 55 | struct DataOnlyAnim { 56 | int numKeys; 57 | float *keyTime; 58 | DataOnlyAnimKey *keys; 59 | DataOnlyAnimKey GetKeyAtTimeBinary( float t ) { 60 | int l = 0, h = numKeys-1; 61 | int m = (l+h) / 2; 62 | while( l < h ) { 63 | if( t < keyTime[m] ) { 64 | h = m-1; 65 | } else { 66 | l = m; 67 | } 68 | m = (l+h+1) / 2; 69 | } 70 | return keys[m]; 71 | } 72 | DataOnlyAnimKey GetKeyAtTimeLinear( float t ) { 73 | int i = 0; 74 | while( i < numKeys ) { 75 | if( keyTime[i] > t ) { 76 | --i; 77 | break; 78 | } 79 | ++i; 80 | } 81 | if( i < 0 ) 82 | return keys[0]; 83 | return keys[i]; 84 | } 85 | }; 86 | struct ClumpedAnim { 87 | int numKeys; 88 | float *keyTime; 89 | DataOnlyAnimKey *keys; 90 | static const int numPrefetchedKeyTimes = (64-sizeof(int)-sizeof(float*)-sizeof(DataOnlyAnimKey*))/sizeof(float); 91 | static const int keysPerLump = 64/sizeof(float); 92 | float firstStage[numPrefetchedKeyTimes]; 93 | DataOnlyAnimKey GetKeyAtTimeBinary( float t ) { 94 | for( int start = 0; start < numPrefetchedKeyTimes; ++start ) { 95 | if( firstStage[start] > t ) { 96 | int l = start*keysPerLump; 97 | int h = l + keysPerLump; 98 | h = h > numKeys ? numKeys : h; 99 | return GetKeyAtTimeBinary( t, l, h+1 ); 100 | } 101 | } 102 | return GetKeyAtTimeBinary( t, numPrefetchedKeyTimes*keysPerLump, numKeys ); 103 | } 104 | DataOnlyAnimKey GetKeyAtTimeBinary( float t, int l, int h ) { 105 | int m = (l+h) / 2; 106 | while( l < h ) { 107 | if( t < keyTime[m] ) { 108 | h = m-1; 109 | } else { 110 | l = m; 111 | } 112 | m = (l+h+1) / 2; 113 | } 114 | return keys[m]; 115 | } 116 | DataOnlyAnimKey GetKeyAtTimeLinear( float t ) { 117 | for( int start = 0; start < numPrefetchedKeyTimes; ++start ) { 118 | if( firstStage[start] > t ) { 119 | int l = start*keysPerLump; 120 | return GetKeyAtTimeLinear( t, l ); 121 | } 122 | } 123 | return GetKeyAtTimeLinear( t, numPrefetchedKeyTimes*keysPerLump ); 124 | } 125 | DataOnlyAnimKey GetKeyAtTimeLinear( float t, int startIndex ) { 126 | int i = startIndex; 127 | while( i < numKeys ) { 128 | if( keyTime[i] > t ) { 129 | --i; 130 | break; 131 | } 132 | ++i; 133 | } 134 | if( i < 0 ) 135 | return keys[0]; 136 | return keys[i]; 137 | } 138 | }; 139 | 140 | //END 141 | 142 | struct TranslationKey { float t; Vec3 pos; }; 143 | struct RotationKey { float t; Vec4 quat; }; 144 | struct ScaleKey { float t; Vec3 scale; }; 145 | 146 | typedef std::vector TKVec; 147 | typedef std::vector RKVec; 148 | typedef std::vector SKVec; 149 | struct AnimData { 150 | TKVec tKeys; 151 | RKVec rKeys; 152 | SKVec sKeys; 153 | }; 154 | 155 | 156 | AnimData PrepareData( uint32_t seed = 1234, bool bTranslationAnimation = false, bool bScaleAnimation = false ) { 157 | TKVec tKeys; 158 | RKVec rKeys; 159 | SKVec sKeys; 160 | 161 | pcg32_random_t rng; 162 | pcg32_srandom_r(&rng, seed, 5678); 163 | 164 | // Make a ten second long animation, with keys at various times 165 | int secondsOfAnimation = SECONDSOFANIMATION; 166 | 167 | // How many keys? At 60fps, motion capture can get very high framerates, but 168 | // maybe about 10-15 keyframes per second is about right. 169 | int keyCount = pcg32_random_r_range(&rng, 170 | minFrameRate*secondsOfAnimation, 171 | maxFrameRate*secondsOfAnimation); 172 | 173 | //printf( "Keys[%i] %s translation, %s scale\n", 174 | //keyCount, 175 | //bTranslationAnimation?"with":"without", 176 | //bScaleAnimation?"with":"without" 177 | //); 178 | #ifndef NDEBUG 179 | int sharedKeys = 1; 180 | if( bTranslationAnimation || bScaleAnimation ) { 181 | // if there is animation on the translation or scale components, then 182 | // somewhere between 2 and all of the keyframes will have the same timing. 183 | // (always share first and last keyframes times) 184 | sharedKeys = pcg32_random_r_range(&rng, 2, keyCount+1); 185 | assert( sharedKeys <= keyCount ); // just ensuring my random gen is actually behaving 186 | } 187 | #endif 188 | 189 | // generate some keyframe data 190 | for( int i = 0; i < keyCount; ++i ) { 191 | Vec4 r; // simulate a quaternion 192 | r.x = pcg32_random_r_rangef(&rng, -1, 1); 193 | r.y = pcg32_random_r_rangef(&rng, -1, 1); 194 | r.z = pcg32_random_r_rangef(&rng, -1, 1); 195 | r.w = 1.0f - sqrt( dot(r,r)); 196 | rKeys.push_back(RotationKey{-1,r}); 197 | if( i == 0 || bTranslationAnimation ) { 198 | Vec3 t; 199 | t.x = pcg32_random_r_rangef(&rng, -10, 10); 200 | t.y = pcg32_random_r_rangef(&rng, -10, 10); 201 | t.z = pcg32_random_r_rangef(&rng, -10, 10); 202 | tKeys.push_back(TranslationKey{-1,t}); 203 | } 204 | if( i == 0 || bScaleAnimation ) { 205 | Vec3 s; 206 | s.x = pcg32_random_r_rangef(&rng, 0.1f, 2.0f); 207 | s.y = pcg32_random_r_rangef(&rng, 0.1f, 2.0f); 208 | s.z = pcg32_random_r_rangef(&rng, 0.1f, 2.0f); 209 | sKeys.push_back(ScaleKey{-1,s}); 210 | } 211 | } 212 | 213 | if(!bTranslationAnimation) { 214 | // set an initial keyframe at time 0 215 | tKeys[0].t = 0; 216 | } 217 | if(!bScaleAnimation) { 218 | // set an initial keyframe at time 0 219 | sKeys[0].t = 0; 220 | } 221 | 222 | std::vector keyTimes; 223 | keyTimes.push_back(0.0f); 224 | keyTimes.push_back(secondsOfAnimation); 225 | // now generate some basic key times 226 | for( int i = 2; i < keyCount; ++i ) { 227 | keyTimes.push_back( 228 | pcg32_random_r_rangef(&rng, 0.1f, secondsOfAnimation -0.1f) 229 | ); // arbitrary reduction of range 230 | } 231 | std::sort(keyTimes.begin(), keyTimes.end()); 232 | // apply these times to the rotation keys 233 | for( int i = 0; i < keyCount; ++i ) { 234 | rKeys[i].t = keyTimes[i]; 235 | if( bTranslationAnimation ) { 236 | tKeys[i].t = keyTimes[i]; 237 | } 238 | if( bScaleAnimation ) { 239 | sKeys[i].t = keyTimes[i]; 240 | } 241 | } 242 | 243 | #if 0 // unsharing the key times 244 | int unsharedKeys = keyCount - sharedKeys; 245 | for( int i = 0; i < unsharedKeys; ++i ) { 246 | int keyToJitter = pcg32_random_r_range(&rng, 1, keyCount-1 ); // any key between the start and end 247 | 248 | // we're only after making sure they aren't the exact same time value, so just jitter them. 249 | if( bTranslationAnimation ) { 250 | tKeys[i].t = pcg32_random_r_rangef(&rng, tKeys[i-1].t, tKeys[i+1].t ); 251 | } 252 | if( bScaleAnimation ) { 253 | sKeys[i].t = pcg32_random_r_rangef(&rng, sKeys[i-1].t, sKeys[i+1].t ); 254 | } 255 | } 256 | #endif 257 | 258 | #ifndef NDEBUG 259 | // check we did all good 260 | for( auto r : rKeys ) { 261 | assert( r.t >= 0.0f ); 262 | assert( r.t <= secondsOfAnimation ); 263 | } 264 | for( auto t : tKeys ) { 265 | assert( t.t >= 0.0f ); 266 | assert( t.t <= secondsOfAnimation ); 267 | } 268 | for( auto s : sKeys ) { 269 | assert( s.t >= 0.0f ); 270 | assert( s.t <= secondsOfAnimation ); 271 | } 272 | #endif 273 | return AnimData { tKeys, rKeys, sKeys, }; 274 | } 275 | 276 | void FromData( FullAnim &out, const AnimData &in ) { 277 | std::vector outkeys; 278 | for( size_t i = 0; i < in.rKeys.size(); ++i ) { 279 | auto r = in.rKeys[i]; 280 | auto t = in.tKeys.size()==1?in.tKeys[0]:in.tKeys[i]; 281 | auto s = in.sKeys.size()==1?in.sKeys[0]:in.sKeys[i]; 282 | FullAnimKey ak; 283 | ak.time = r.t; 284 | ak.rotation = r.quat; 285 | ak.translation = t.pos; 286 | ak.scale = s.scale; 287 | outkeys.push_back( ak ); 288 | } 289 | out.keys = new FullAnimKey[outkeys.size()]; 290 | std::copy( outkeys.begin(), outkeys.end(), out.keys ); 291 | out.numKeys = outkeys.size(); 292 | } 293 | 294 | void FromData( DataOnlyAnim &out, const AnimData &in ) { 295 | std::vector outkeys; 296 | std::vector times; 297 | for( size_t i = 0; i < in.rKeys.size(); ++i ) { 298 | auto r = in.rKeys[i]; 299 | auto t = in.tKeys.size()==1?in.tKeys[0]:in.tKeys[i]; 300 | auto s = in.sKeys.size()==1?in.sKeys[0]:in.sKeys[i]; 301 | times.push_back( r.t ); 302 | DataOnlyAnimKey ak; 303 | ak.rotation = r.quat; 304 | ak.translation = t.pos; 305 | ak.scale = s.scale; 306 | outkeys.push_back( ak ); 307 | } 308 | out.keys = new DataOnlyAnimKey[outkeys.size()]; 309 | std::copy( outkeys.begin(), outkeys.end(), out.keys ); 310 | out.keyTime = new float[times.size()]; 311 | std::copy( times.begin(), times.end(), out.keyTime ); 312 | out.numKeys = outkeys.size(); 313 | } 314 | void FromData( ClumpedAnim &out, const AnimData &in ) { 315 | std::vector outkeys; 316 | std::vector times; 317 | for( size_t i = 0; i < in.rKeys.size(); ++i ) { 318 | auto r = in.rKeys[i]; 319 | auto t = in.tKeys.size()==1?in.tKeys[0]:in.tKeys[i]; 320 | auto s = in.sKeys.size()==1?in.sKeys[0]:in.sKeys[i]; 321 | times.push_back( r.t ); 322 | DataOnlyAnimKey ak; 323 | ak.rotation = r.quat; 324 | ak.translation = t.pos; 325 | ak.scale = s.scale; 326 | outkeys.push_back( ak ); 327 | } 328 | out.keys = new DataOnlyAnimKey[outkeys.size()]; 329 | std::copy( outkeys.begin(), outkeys.end(), out.keys ); 330 | out.keyTime = new float[times.size()]; 331 | std::copy( times.begin(), times.end(), out.keyTime ); 332 | out.numKeys = outkeys.size(); 333 | // fill out the precached times 334 | for( size_t i = 0; i < ClumpedAnim::numPrefetchedKeyTimes; ++i ) { 335 | size_t targetIndex = (i+1)*ClumpedAnim::keysPerLump; 336 | if( targetIndex < times.size() ) { 337 | out.firstStage[i] = times[targetIndex]; 338 | } else { 339 | out.firstStage[i] = HUGE_VALF; 340 | } 341 | } 342 | } 343 | 344 | struct HierarchyOutputData { 345 | struct NodeData { 346 | Vec3 translation; 347 | Vec3 scale; 348 | Vec4 rotation; // sijk quaternion 349 | }; 350 | NodeData nodeData[NUM_NODES]; 351 | }; 352 | template 353 | struct TestHierarchy { 354 | AnimType animForNode[NUM_NODES]; 355 | void SetupNode( int node, const AnimData &ad ) { 356 | FromData( animForNode[node], ad ); 357 | } 358 | HierarchyOutputData GetAtTBinary( float t ) { 359 | HierarchyOutputData hod; 360 | for( int i = 0; i < NUM_NODES; ++i ) { 361 | auto keyData = animForNode[i].GetKeyAtTimeBinary( t ); 362 | hod.nodeData[i].translation = keyData.translation; 363 | hod.nodeData[i].rotation = keyData.rotation; 364 | hod.nodeData[i].scale = keyData.scale; 365 | } 366 | return hod; 367 | } 368 | HierarchyOutputData GetAtTLinear( float t ) { 369 | HierarchyOutputData hod; 370 | for( int i = 0; i < NUM_NODES; ++i ) { 371 | auto keyData = animForNode[i].GetKeyAtTimeLinear( t ); 372 | hod.nodeData[i].translation = keyData.translation; 373 | hod.nodeData[i].rotation = keyData.rotation; 374 | hod.nodeData[i].scale = keyData.scale; 375 | } 376 | return hod; 377 | } 378 | }; 379 | 380 | volatile HierarchyOutputData output_data; 381 | struct Data { 382 | TestHierarchy fullAnimHierarchy; 383 | TestHierarchy dataOnlyHierarchy; 384 | TestHierarchy clumpedHierarchy; 385 | std::vector queries; 386 | 387 | Data() { 388 | AnimData ad = PrepareData( 1000, true, false ); 389 | fullAnimHierarchy.SetupNode( 0, ad ); 390 | dataOnlyHierarchy.SetupNode( 0, ad ); 391 | clumpedHierarchy.SetupNode( 0, ad ); 392 | for( int i = 1; i < NUM_NODES; ++i ) { 393 | // let's have some scaling on the last few nodes 394 | bool bScaled = i>(RATIO_OF_NON_SCALING*NUM_NODES); 395 | ad = PrepareData( 1000+i, false, bScaled ); 396 | fullAnimHierarchy.SetupNode( i, ad ); 397 | dataOnlyHierarchy.SetupNode( i, ad ); 398 | clumpedHierarchy.SetupNode( i, ad ); 399 | } 400 | { 401 | pcg32_random_t rng; 402 | pcg32_srandom_r(&rng, 1234312, 55544); 403 | for( int i = 0; i < NUM_QUERIES; ++i ) { 404 | float t = pcg32_random_r_rangef(&rng, -0.5f, SECONDSOFANIMATION + 0.5f ); 405 | queries.push_back( t ); 406 | } 407 | } 408 | } 409 | }; 410 | Data *gData; 411 | void TestFullAnimBinary() { 412 | for( auto t : gData->queries ) { 413 | HierarchyOutputData hod = gData->fullAnimHierarchy.GetAtTBinary(t); 414 | memcpy( (void*)&output_data, &hod, sizeof( output_data ) ); 415 | } 416 | } 417 | void TestFullAnimLinear() { 418 | for( auto t : gData->queries ) { 419 | HierarchyOutputData hod = gData->fullAnimHierarchy.GetAtTLinear(t); 420 | memcpy( (void*)&output_data, &hod, sizeof( output_data ) ); 421 | } 422 | } 423 | void TestDataOnlyBinary() { 424 | for( auto t : gData->queries ) { 425 | HierarchyOutputData hod = gData->dataOnlyHierarchy.GetAtTBinary(t); 426 | memcpy( (void*)&output_data, &hod, sizeof( output_data ) ); 427 | } 428 | } 429 | void TestDataOnlyLinear() { 430 | for( auto t : gData->queries ) { 431 | HierarchyOutputData hod = gData->dataOnlyHierarchy.GetAtTLinear(t); 432 | memcpy( (void*)&output_data, &hod, sizeof( output_data ) ); 433 | } 434 | } 435 | void TestClumpedBinary() { 436 | for( auto t : gData->queries ) { 437 | HierarchyOutputData hod = gData->clumpedHierarchy.GetAtTBinary(t); 438 | memcpy( (void*)&output_data, &hod, sizeof( output_data ) ); 439 | } 440 | } 441 | void TestClumpedLinear() { 442 | for( auto t : gData->queries ) { 443 | HierarchyOutputData hod = gData->clumpedHierarchy.GetAtTLinear(t); 444 | memcpy( (void*)&output_data, &hod, sizeof( output_data ) ); 445 | } 446 | } 447 | 448 | 449 | int main() { 450 | Data data; gData = &data; 451 | 452 | Test tests[] = { 453 | (Test){ TestFullAnimBinary, "Full anim key - binary search" }, 454 | (Test){ TestDataOnlyBinary, "Data only key - binary search" }, 455 | (Test){ TestClumpedBinary, "Pre-indexed - binary search" }, 456 | (Test){ TestFullAnimLinear, "Full anim key - linear search" }, 457 | (Test){ TestDataOnlyLinear, "Data only key - linear search" }, 458 | (Test){ TestClumpedLinear, "Pre-indexed - linear search" }, 459 | }; 460 | 461 | printf( "Animation key lookup tests\n" ); 462 | printf( "Clumped precached keys = %i clumps of %i keys (max key = %i)\n\n", ClumpedAnim::numPrefetchedKeyTimes, ClumpedAnim::keysPerLump, ClumpedAnim::numPrefetchedKeyTimes * ClumpedAnim::keysPerLump ); 463 | printf( "Each hierarchy has %i nodes.\n", NUM_NODES ); 464 | printf( "Each hierarchy is queried %i times\n", NUM_QUERIES ); 465 | 466 | RunTests( tests ); 467 | 468 | return 0; 469 | } 470 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Richard Fabian 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Data-oriented design book source code 2 | ------- 3 | 4 | This source code was used to perform measurements on multiple devices to find numbers and validation for the data-oriented design book. 5 | 6 | The source was normally compiled on Ubuntu Linux, but was also compiled on Raspberry Pi and termux using both GCC and Clang where possible to compare and contrast. 7 | 8 | There are currently no working makefiles for MacOS, or Windows operating systems. 9 | -------------------------------------------------------------------------------- /basictheory.cpp: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | 3 | const int NUM_TESTS = 11; 4 | int *A[NUM_TESTS], *B[NUM_TESTS], *C[NUM_TESTS], *D[NUM_TESTS]; 5 | int *a, *b, *c, *d; 6 | int sa, sb; 7 | const int ELEMENT_COUNT = 1 * 1024 * 1024; 8 | 9 | void SetupValues() { 10 | // fill with data 11 | pcg32_random_t rng; 12 | pcg32_srandom_r(&rng, 1234, 5678); 13 | sa = sb = 0; 14 | for( int i = 0; i < ELEMENT_COUNT; ++i ) { 15 | sa += a[i] = pcg32_random_r_range(&rng, 0, 255 ); 16 | sb += b[i] = pcg32_random_r_range(&rng, 0, 65535 ); 17 | } 18 | } 19 | void PrepTest() { 20 | static int lastTest = NUM_TESTS-1; 21 | static int test = 0; 22 | A[lastTest] = a; B[lastTest] = b; C[lastTest] = c; D[lastTest] = d; 23 | a = A[test]; b = B[test]; c = C[test]; d = D[test]; 24 | lastTest = test; 25 | test = (test+1) % NUM_TESTS; 26 | } 27 | void Setup() { 28 | Timer t; 29 | // allocate arrays 30 | for( int test = 0; test < NUM_TESTS; ++test ) { 31 | PrepTest(); 32 | a = (int*)aligned_alloc( 64, sizeof(int) * ELEMENT_COUNT ); 33 | b = (int*)aligned_alloc( 64, sizeof(int) * ELEMENT_COUNT ); 34 | c = (int*)aligned_alloc( 64, sizeof(int) * ELEMENT_COUNT ); 35 | d = (int*)aligned_alloc( 64, sizeof(int) * ELEMENT_COUNT ); 36 | SetupValues(); 37 | } 38 | printf( "Setup took %fms\n", t.elapsed() ); 39 | } 40 | 41 | int writeOut; 42 | void UseVariable( int var ) { 43 | writeOut += var; 44 | } 45 | 46 | void TestSummingSimple() { 47 | PrepTest(); 48 | int sum = 0; 49 | for( int i = 0; i < ELEMENT_COUNT; ++i ) { 50 | sum += a[i]; 51 | } 52 | UseVariable( sum ); 53 | } 54 | void TestSummingBackwards() { 55 | PrepTest(); 56 | int sum = 0; 57 | for( int i = ELEMENT_COUNT-1; i >= 0; --i ) { 58 | sum += b[i]; 59 | } 60 | UseVariable( sum ); 61 | } 62 | void TestSummingStrides() { 63 | PrepTest(); 64 | int sum = 0; 65 | const int STRIDE = 16; 66 | for( int offset = 0; offset < STRIDE; offset += 1 ) { 67 | for( int i = offset; i < ELEMENT_COUNT; i += STRIDE ) { 68 | sum += a[i]; 69 | } 70 | } 71 | UseVariable( sum ); 72 | } 73 | template 74 | void TestWriteRangeLimited() { 75 | PrepTest(); 76 | int mask = (byte_limit / sizeof( c[0] ))-1; 77 | for( int i = 0; i != ELEMENT_COUNT*16; i+= 16 ) { 78 | c[i&mask] = i; 79 | } 80 | } 81 | void TestWriteSimple() { 82 | PrepTest(); 83 | for( int i = 0; i != ELEMENT_COUNT; ++i ) { 84 | c[i] = i; 85 | } 86 | } 87 | void TestWriteBackwards() { 88 | PrepTest(); 89 | for( int i = ELEMENT_COUNT-1; i >= 0; --i ) { 90 | c[i] = i; 91 | } 92 | } 93 | void TestWriteStrides() { 94 | PrepTest(); 95 | const int STRIDE = 16; 96 | for( int offset = 0; offset < STRIDE; offset += 1 ) { 97 | for( int i = offset; i < ELEMENT_COUNT; i += STRIDE ) { 98 | c[i] = i; 99 | } 100 | } 101 | } 102 | void TestSimpleCopy() { 103 | PrepTest(); 104 | for( int i = 0; i < ELEMENT_COUNT; ++i ) { 105 | c[i] = a[i]; 106 | } 107 | } 108 | void TestMultiRead() { 109 | PrepTest(); 110 | for( int i = 0; i < ELEMENT_COUNT; ++i ) { 111 | c[i] = a[i] + b[i]; 112 | } 113 | } 114 | void TestMultiWrite() { 115 | PrepTest(); 116 | for( int i = 0; i < ELEMENT_COUNT; ++i ) { 117 | c[i] = a[i]; 118 | d[i] = a[i]; 119 | } 120 | } 121 | void TestMultiBoth() { 122 | PrepTest(); 123 | for( int i = 0; i < ELEMENT_COUNT; ++i ) { 124 | c[i] = a[i] + b[i]; 125 | d[i] = a[i] - b[i]; 126 | } 127 | } 128 | 129 | void TestWriteAndModifyPaired() { 130 | PrepTest(); 131 | for( int i = 0; i < ELEMENT_COUNT; i+=2 ) { 132 | c[i] = c[i] + b[i]; 133 | c[i+1] = b[i]; 134 | } 135 | } 136 | void TestWriteAndModifySeparate() { 137 | PrepTest(); 138 | const int HALF_ELEMENT_COUNT = ELEMENT_COUNT / 2; 139 | for( int i = 0; i < HALF_ELEMENT_COUNT; ++i ) { 140 | c[i] = c[i] + b[i]; 141 | c[i+HALF_ELEMENT_COUNT] = b[i]; 142 | } 143 | } 144 | 145 | int main() { 146 | Setup(); 147 | 148 | { 149 | Test tests[] = { 150 | (Test){ TestSimpleCopy, "Simple Copy" }, 151 | (Test){ TestMultiRead, "Summing Copy" }, 152 | (Test){ TestMultiWrite, "Splitting Copy" }, 153 | (Test){ TestMultiBoth, "Splitting Combine Copy" }, 154 | 155 | (Test){ TestWriteSimple, "Write Forwards" }, 156 | (Test){ TestWriteBackwards, "Write Backwards" }, 157 | (Test){ TestWriteStrides, "Write in Strides" }, 158 | 159 | (Test){ TestSummingSimple, "Summing Forwards" }, 160 | (Test){ TestSummingBackwards, "Summing Backwards" }, 161 | (Test){ TestSummingStrides, "Summing in Strides" }, 162 | 163 | (Test){ TestWriteAndModifyPaired, "Write and modify" }, 164 | (Test){ TestWriteAndModifySeparate, "Write only separated" }, 165 | }; 166 | 167 | RunTests( tests ); 168 | } 169 | 170 | return 0; 171 | } 172 | -------------------------------------------------------------------------------- /branch_prediction.cpp: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | 3 | int validated_result; 4 | volatile int output_buffer; 5 | 6 | int *a1, *b1; 7 | int *a2, *b2; 8 | const int ELEMENT_COUNT = 1 * 1024 * 1024; 9 | 10 | void SetupValues() { 11 | // fill with data 12 | pcg32_random_t rng; 13 | pcg32_srandom_r(&rng, 1234, 5678); 14 | std::vector sortingarray; 15 | for( int i = 0; i < ELEMENT_COUNT; ++i ) { 16 | a1[i] = pcg32_random_r_range(&rng, 0, 255 ); 17 | b1[i] = pcg32_random_r_range(&rng, 0, 65535 ); 18 | sortingarray.push_back( i ); 19 | } 20 | std::sort( sortingarray.begin(), sortingarray.end(), 21 | []( int first, int second ){ 22 | return a1[first] < a1[second]; 23 | } ); 24 | for( int i = 0; i < ELEMENT_COUNT; ++i ) { 25 | a2[i] = a1[sortingarray[i]]; 26 | b2[i] = b1[sortingarray[i]]; 27 | } 28 | } 29 | void Setup() { 30 | Timer t; 31 | // allocate arrays 32 | a1 = (int*)aligned_alloc( 64, sizeof(int) * ELEMENT_COUNT ); 33 | b1 = (int*)aligned_alloc( 64, sizeof(int) * ELEMENT_COUNT ); 34 | a2 = (int*)aligned_alloc( 64, sizeof(int) * ELEMENT_COUNT ); 35 | b2 = (int*)aligned_alloc( 64, sizeof(int) * ELEMENT_COUNT ); 36 | SetupValues(); 37 | printf( "Setup took %fms\n", t.elapsed() ); 38 | } 39 | 40 | int lookup[256]; 41 | 42 | void ClearCalcs() { 43 | for( int i = 0; i < 256; ++i ) { 44 | lookup[i] = 3000 - i*3; 45 | } 46 | } 47 | int CalculateForHigh( int a, int b ) { 48 | return (a^b) + lookup[a&255] + b; 49 | } 50 | int CalculateForLow( int a, int b ) { 51 | return lookup[b&255] + ( (a*b) > 100 ? 7 : 3 ); 52 | } 53 | 54 | 55 | void TrivialRandomBranching() { 56 | int sum=0; 57 | for (int i = 0; i < ELEMENT_COUNT; i++) { 58 | if( a1[i] > 128 ) { 59 | sum += b1[i]; 60 | } 61 | } 62 | output_buffer = sum; 63 | } 64 | 65 | void TrivialSortedBranching() { 66 | int sum=0; 67 | for (int i = 0; i < ELEMENT_COUNT; i++) { 68 | if( a2[i] > 128 ) { 69 | sum += b2[i]; 70 | } 71 | } 72 | output_buffer = sum; 73 | } 74 | void RealisticRandomBranching() { 75 | int sum=0; 76 | for (int i = 0; i < ELEMENT_COUNT; i++) { 77 | if( a1[i] > 128 ) { 78 | sum += CalculateForHigh( a1[i], b1[i] ); 79 | } else { 80 | sum += CalculateForLow( a1[i], b1[i] ); 81 | } 82 | } 83 | output_buffer = sum; 84 | } 85 | 86 | void RealisticSortedBranching() { 87 | int sum=0; 88 | for (int i = 0; i < ELEMENT_COUNT; i++) { 89 | if( a2[i] > 128 ) { 90 | sum += CalculateForHigh( a2[i], b2[i] ); 91 | } else { 92 | sum += CalculateForLow( a2[i], b2[i] ); 93 | } 94 | } 95 | output_buffer = sum; 96 | } 97 | 98 | int main() { 99 | ClearCalcs(); 100 | Setup(); 101 | 102 | Test tests[] = { 103 | (Test){ TrivialRandomBranching, "Trivial Random branching" }, 104 | (Test){ TrivialSortedBranching, "Trivial Sorted branching" }, 105 | (Test){ RealisticRandomBranching, "Random branching" }, 106 | (Test){ RealisticSortedBranching, "Sorted branching" }, 107 | }; 108 | 109 | // create a valid result 110 | TrivialRandomBranching(); 111 | validated_result = output_buffer; 112 | printf( "output_buffer should read %i\n", validated_result ); 113 | TrivialSortedBranching(); 114 | printf( "Test TrivialSortedBranching [%s] output_buffer = %i\n", validated_result==output_buffer ? "PASSED" : "FAILED", output_buffer ); 115 | RealisticRandomBranching(); 116 | validated_result = output_buffer; 117 | printf( "Test RealisticRandomBranching [%s] output_buffer = %i\n", validated_result==output_buffer ? "PASSED" : "FAILED", output_buffer ); 118 | RealisticSortedBranching(); 119 | printf( "Test RealisticSortedBranching [%s] output_buffer = %i\n", validated_result==output_buffer ? "PASSED" : "FAILED", output_buffer ); 120 | 121 | RunTests( tests ); 122 | 123 | return 0; 124 | } 125 | -------------------------------------------------------------------------------- /cachegrind.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | rm -f cachegrind.out.* 4 | valgrind --tool=cachegrind --branch-sim=yes --log-file=cg_simple.out ./fsm_simple.out 5 | valgrind --tool=cachegrind --branch-sim=yes --log-file=cg_oo.out ./fsm_oo.out 6 | valgrind --tool=cachegrind --branch-sim=yes --log-file=cg_table.out ./fsm_table.out 7 | valgrind --tool=cachegrind --branch-sim=yes --log-file=cg_tableptrs.out ./fsm_tableptrs.out 8 | -------------------------------------------------------------------------------- /cachesizeeffect.cpp: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | 3 | int *a, *b, *c, *d; 4 | int sa, sb; 5 | const int ELEMENT_COUNT = 2 * 1024 * 1024; // 8Mb of data 6 | 7 | void SetupValues() { 8 | // fill with data 9 | pcg32_random_t rng; 10 | pcg32_srandom_r(&rng, 1234, 5678); 11 | sa = sb = 0; 12 | for( int i = 0; i < ELEMENT_COUNT; ++i ) { 13 | sa += a[i] = pcg32_random_r_range(&rng, 0, 255 ); 14 | sb += b[i] = pcg32_random_r_range(&rng, 0, 65535 ); 15 | } 16 | } 17 | void Setup() { 18 | Timer t; 19 | // allocate arrays 20 | a = (int*)aligned_alloc( 64, sizeof(int) * ELEMENT_COUNT ); 21 | b = (int*)aligned_alloc( 64, sizeof(int) * ELEMENT_COUNT ); 22 | c = (int*)aligned_alloc( 64, sizeof(int) * ELEMENT_COUNT ); 23 | d = (int*)aligned_alloc( 64, sizeof(int) * ELEMENT_COUNT ); 24 | SetupValues(); 25 | printf( "Setup took %fms\n", t.elapsed() ); 26 | } 27 | 28 | int writeOut; 29 | void UseVariable( int var ) { 30 | writeOut += var; 31 | } 32 | 33 | void TestSummingSimple() { 34 | int sum = 0; 35 | for( int i = 0; i < ELEMENT_COUNT; ++i ) { 36 | sum += a[i]; 37 | } 38 | UseVariable( sum ); 39 | } 40 | void TestSummingBackwards() { 41 | int sum = 0; 42 | for( int i = ELEMENT_COUNT-1; i >= 0; --i ) { 43 | sum += a[i]; 44 | } 45 | UseVariable( sum ); 46 | } 47 | void TestSummingStrides() { 48 | int sum = 0; 49 | const int STRIDE = 16; 50 | for( int offset = 0; offset < STRIDE; offset += 1 ) { 51 | for( int i = offset; i < ELEMENT_COUNT; i += STRIDE ) { 52 | sum += a[i]; 53 | } 54 | } 55 | UseVariable( sum ); 56 | } 57 | template 58 | void TestWriteRangeLimited() { 59 | int mask = (byte_limit / sizeof( c[0] ))-1; 60 | for( int i = 0; i < ELEMENT_COUNT*16; i+= 16 ) { 61 | c[i&mask] = i; 62 | } 63 | } 64 | template 65 | void TestModifyRangeLimited() { 66 | int mask = (byte_limit / sizeof( c[0] ))-1; 67 | for( int i = 0; i < ELEMENT_COUNT*16; i+= 16 ) { 68 | c[i&mask] += 1; 69 | } 70 | } 71 | 72 | int main() { 73 | Setup(); 74 | 75 | Test tests[] = { 76 | (Test){ TestWriteRangeLimited<16*1024>, "Writing within the same 16k" }, 77 | (Test){ TestWriteRangeLimited<32*1024>, "Writing within the same 32k" }, 78 | (Test){ TestWriteRangeLimited<256*1024>, "Writing within the same 256k" }, 79 | (Test){ TestWriteRangeLimited<1*1024*1024>, "Writing within the same 1Mb" }, 80 | (Test){ TestWriteRangeLimited<2*1024*1024>, "Writing within the same 2Mb" }, 81 | (Test){ TestWriteRangeLimited<4*1024*1024>, "Writing within the same 4Mb" }, 82 | (Test){ TestWriteRangeLimited<8*1024*1024>, "Writing within the same 8Mb" }, 83 | (Test){ TestModifyRangeLimited<16*1024>, "Modifying within the same 16k" }, 84 | (Test){ TestModifyRangeLimited<32*1024>, "Modifying within the same 32k" }, 85 | (Test){ TestModifyRangeLimited<256*1024>, "Modifying within the same 256k" }, 86 | (Test){ TestModifyRangeLimited<1*1024*1024>, "Modifying within the same 1Mb" }, 87 | (Test){ TestModifyRangeLimited<2*1024*1024>, "Modifying within the same 2Mb" }, 88 | (Test){ TestModifyRangeLimited<4*1024*1024>, "Modifying within the same 4Mb" }, 89 | (Test){ TestModifyRangeLimited<8*1024*1024>, "Modifying within the same 8Mb" }, 90 | }; 91 | 92 | RunTests( tests ); 93 | 94 | return 0; 95 | } 96 | -------------------------------------------------------------------------------- /common.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "geom.h" 9 | #include "pcg-random.h" 10 | #include 11 | #include 12 | 13 | #ifndef _ISOC11_SOURCE 14 | void * aligned_alloc( size_t alignment, size_t num_bytes ) { 15 | void *mem = 0; 16 | #ifdef __APPLE__ 17 | posix_memalign((void **)&mem, alignment, num_bytes); 18 | #elif 19 | mem = memalign( alignment, num_bytes ); 20 | #endif 21 | return mem; 22 | } 23 | #endif 24 | 25 | #ifdef __GNUC__ 26 | /*code for GNU C compiler */ 27 | typedef float f4 __attribute__((aligned(4))); 28 | typedef float f8 __attribute__((aligned(8))); 29 | typedef float f16 __attribute__((aligned(16))); 30 | typedef float fCL __attribute__((aligned(64))); 31 | typedef int i4 __attribute__((aligned(4))); 32 | typedef int i8 __attribute__((aligned(8))); 33 | typedef int i16 __attribute__((aligned(16))); 34 | typedef int iCL __attribute__((aligned(64))); 35 | #elif __clang__ 36 | typedef float f4 __attribute__((aligned(4))); 37 | typedef float f8 __attribute__((aligned(8))); 38 | typedef float f16 __attribute__((aligned(16))); 39 | typedef float fCL __attribute__((aligned(64))); 40 | typedef int i4 __attribute__((aligned(4))); 41 | typedef int i8 __attribute__((aligned(8))); 42 | typedef int i16 __attribute__((aligned(16))); 43 | typedef int iCL __attribute__((aligned(64))); 44 | #elif _MSC_VER 45 | /*usually has the version number in _MSC_VER*/ 46 | /*code specific to MSVC compiler*/ 47 | typedef float __declspec( align( 4 ) ) f4; 48 | typedef float __declspec( align( 8 ) ) f8; 49 | typedef float __declspec( align( 16 ) ) f16; 50 | typedef float __declspec( align( 64 ) ) fCL; 51 | typedef int __declspec( align( 4 ) ) i4; 52 | typedef int __declspec( align( 8 ) ) i8; 53 | typedef int __declspec( align( 16 ) ) i16; 54 | typedef int __declspec( align( 64 ) ) iCL; 55 | #else 56 | #error "Beware, timings are going to be unlike other platforms." 57 | typedef float f4; 58 | typedef float f8; 59 | typedef float f16; 60 | typedef float fCL; 61 | typedef int i4; 62 | typedef int i8; 63 | typedef int i16; 64 | typedef int iCL; 65 | #endif 66 | 67 | const float TRIAL_TIMEOUT = 20.0f * 1000.0f; 68 | 69 | #include 70 | class Timer { 71 | public: 72 | Timer() : beg_(clock_::now()) {} 73 | void reset() { beg_ = clock_::now(); } 74 | double elapsed() const { 75 | return std::chrono::duration_cast 76 | (clock_::now() - beg_).count(); 77 | } 78 | 79 | private: 80 | typedef std::chrono::high_resolution_clock clock_; 81 | typedef std::chrono::duration ms_; 82 | std::chrono::time_point beg_; 83 | }; 84 | 85 | struct Stat { 86 | float average; 87 | float standardDeviation; 88 | float ninetyFivePercentMin; 89 | float ninetyFivePercentMax; 90 | float lowest, highest; 91 | void Calculate( float *series, int length ) { 92 | lowest = highest = series[0]; 93 | float sum = 0; 94 | for( int i = 0; i < length; ++i ) { 95 | float val = series[i]; 96 | lowest = lowest < val ? lowest : val; 97 | highest = highest > val ? highest : val; 98 | sum += val; 99 | } 100 | average = sum / length; 101 | float sds = 0; 102 | for( int i = 0; i < length; ++i ) { 103 | float d = series[i] - average; 104 | sds += d*d; 105 | } 106 | standardDeviation = sqrtf( sds / (length-1) ); 107 | ninetyFivePercentMin = average - standardDeviation*2; 108 | ninetyFivePercentMax = average + standardDeviation*2; 109 | } 110 | void Magnify( float multiplier ) { 111 | average *= multiplier; 112 | standardDeviation *= multiplier; 113 | ninetyFivePercentMin *= multiplier; 114 | ninetyFivePercentMax *= multiplier; 115 | lowest *= multiplier; 116 | highest *= multiplier; 117 | } 118 | }; 119 | 120 | #include 121 | #include 122 | 123 | typedef void (*TestFunc)(); 124 | struct Test { 125 | TestFunc func; 126 | char name[64]; 127 | Test( TestFunc f, const char* n ) : func(f) { strcpy( name, n ); } 128 | }; 129 | 130 | volatile int touchVal; 131 | 132 | struct CacheClearer { 133 | // a massive array used to read and write to, to flush any data in the cache. 134 | char *horribleBuffer = 0; 135 | static const int HORRIBLE_BUFFER_SIZE = 12 * 1024 * 1024; 136 | CacheClearer() { 137 | horribleBuffer = (char*)malloc( HORRIBLE_BUFFER_SIZE ); 138 | if( horribleBuffer ) { 139 | memset( horribleBuffer, touchVal, HORRIBLE_BUFFER_SIZE ); 140 | } 141 | } 142 | ~CacheClearer() { 143 | if( horribleBuffer ) { 144 | touchVal = horribleBuffer[(touchVal*touchVal)%HORRIBLE_BUFFER_SIZE] + 1; 145 | free( horribleBuffer ); 146 | } 147 | horribleBuffer = 0; 148 | } 149 | void ClearCaches() { 150 | for( int i = 0; i < HORRIBLE_BUFFER_SIZE; ++i ) { 151 | horribleBuffer[i] += 1; 152 | } 153 | } 154 | }; 155 | 156 | void Shuffle( pcg32_random_t *pcg, int *array, int length ) { 157 | for( int i = 0; i < length-1; ++i ) { 158 | int source = pcg32_random_r_range( pcg, i, length ); 159 | int t = array[i]; 160 | array[i] = array[source]; 161 | array[source] = t; 162 | } 163 | #if 0 164 | printf( "New order = " ); 165 | for( int i = 0; i < length; ++i ) { 166 | printf( "%i ", array[i] ); 167 | } 168 | printf( "\n" ); 169 | #endif 170 | } 171 | 172 | template 173 | void RunTests( TestStruct (&testArray)[numTests] ) { 174 | CacheClearer cc; 175 | 176 | const int TRIALS = 128; 177 | 178 | struct TimingData { 179 | int testID; 180 | float timing[TRIALS]; 181 | Stat s; 182 | }; 183 | TimingData timingData[numTests]; 184 | int testToDo[numTests]; 185 | for( int i = 0; i < numTests; ++i ) { 186 | testToDo[i] = i; 187 | } 188 | pcg32_random_t pcg; 189 | pcg32_srandom_r( &pcg, 1234, 5678 ); 190 | Shuffle( &pcg, testToDo, numTests ); 191 | Timer trialTimer; 192 | int trial = 0; 193 | while( trial < TRIALS && trialTimer.elapsed() < TRIAL_TIMEOUT ) { 194 | Shuffle( &pcg, testToDo, numTests ); 195 | for( int selector = 0; selector < numTests; ++selector ) { 196 | int tid = testToDo[selector]; 197 | 198 | cc.ClearCaches(); 199 | auto &test = testArray[tid]; 200 | auto &td = timingData[tid]; 201 | td.testID = tid; 202 | // warm the engines 203 | Timer t; 204 | test.func(); 205 | td.timing[trial] = t.elapsed(); 206 | } 207 | ++trial; 208 | } 209 | for( auto &td : timingData ) { 210 | td.s.Calculate( td.timing, trial ); 211 | } 212 | printf( "Managed %i trials in %fms\n\n", trial, trialTimer.elapsed() ); 213 | 214 | std::sort( 215 | std::begin(timingData), 216 | std::end(timingData), 217 | []( const TimingData &a, const TimingData &b ){ 218 | return a.s.average > b.s.average; 219 | } ); 220 | 221 | const char *timesuffix = "ms"; 222 | if( timingData[0].s.average < 20 ) { 223 | timesuffix = "us"; 224 | for( auto &td : timingData ) { 225 | td.s.Magnify( 1000.0f ); 226 | } 227 | } 228 | 229 | for( int tid = 0; tid < numTests; ++tid ) { 230 | auto &td = timingData[tid]; 231 | auto &test = testArray[td.testID]; 232 | printf( "Average% 9.2f%s (sd %.2f%s) for test [%s]\n", 233 | td.s.average, timesuffix, 234 | td.s.standardDeviation, timesuffix, 235 | test.name ); 236 | } 237 | #ifdef TARGET 238 | #define TOSTR(X) #X 239 | #define STR(x) TOSTR(x) 240 | char outfilename[256]; 241 | sprintf( outfilename, "testreport_%s.txt", STR(TARGET) ); 242 | if( FILE *fp = fopen( outfilename, "wt" ) ) { 243 | for( int tid = 0; tid < numTests; ++tid ) { 244 | auto &td = timingData[tid]; 245 | auto &test = testArray[td.testID]; 246 | fprintf( fp, "Average% 9.2f%s (sd %.2f%s) for test [%s]\n", 247 | td.s.average, timesuffix, 248 | td.s.standardDeviation, timesuffix, 249 | test.name ); 250 | } 251 | fclose( fp ); 252 | //printf( "Wrote report for " STR(TARGET) " to file %s\n", outfilename ); 253 | } else { 254 | printf( "unable to open file %s for writing\n", outfilename ); 255 | } 256 | sprintf( outfilename, "testdata_%s.csv", STR(TARGET) ); 257 | if( FILE *fp = fopen( outfilename, "wt" ) ) { 258 | fprintf( fp, "trialseq,testtime,testname\n" ); 259 | for( int tid = 0; tid < numTests; ++tid ) { 260 | auto &td = timingData[tid]; 261 | auto &test = testArray[td.testID]; 262 | for( int i = 0; i < trial; ++i ) { 263 | fprintf( fp, "%i,%f,%s\n", i, td.timing[i], test.name ); 264 | } 265 | } 266 | fclose( fp ); 267 | //printf( "Wrote data for " STR(TARGET) " to file %s\n", outfilename ); 268 | } else { 269 | printf( "unable to open file %s for writing\n", outfilename ); 270 | } 271 | #endif 272 | } 273 | 274 | -------------------------------------------------------------------------------- /false_sharing.cpp: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | #include 3 | 4 | int validated_result; 5 | volatile int output_buffer; 6 | 7 | int *a, *b; 8 | int sa, sb; 9 | const int ELEMENT_COUNT = 1 * 1024 * 1024; 10 | 11 | void SetupValues() { 12 | // fill with data 13 | pcg32_random_t rng; 14 | pcg32_srandom_r(&rng, 1234, 5678); 15 | sa = sb = 0; 16 | for( int i = 0; i < ELEMENT_COUNT; ++i ) { 17 | sa += a[i] = pcg32_random_r_range(&rng, 0, 255 ); 18 | sb += b[i] = pcg32_random_r_range(&rng, 0, 65535 ); 19 | } 20 | } 21 | void Setup() { 22 | Timer t; 23 | // allocate arrays 24 | a = (int*)aligned_alloc( 64, sizeof(int) * ELEMENT_COUNT ); 25 | b = (int*)aligned_alloc( 64, sizeof(int) * ELEMENT_COUNT ); 26 | SetupValues(); 27 | printf( "Setup took %fms\n", t.elapsed() ); 28 | } 29 | 30 | inline int CalcValue( int i ) { 31 | return a[i] + b[i]; 32 | } 33 | 34 | template 35 | void TestFalseSharing() { 36 | int sum=0; 37 | int aligned_sum_store[NUM_THREADS] __attribute__((aligned(64))); 38 | 39 | #pragma omp parallel num_threads(NUM_THREADS) 40 | { 41 | int me = omp_get_thread_num(); 42 | aligned_sum_store[me] = 0; 43 | 44 | //#pragma omp for 45 | for (int i = me; i < ELEMENT_COUNT; i += NUM_THREADS ) { 46 | aligned_sum_store[me] += CalcValue( i ); 47 | } 48 | 49 | #pragma omp atomic 50 | sum += aligned_sum_store[me]; 51 | } 52 | output_buffer = sum; 53 | } 54 | 55 | 56 | template 57 | void TestLocalAccumulator() { 58 | int sum=0; 59 | 60 | #pragma omp parallel num_threads(NUM_THREADS) 61 | { 62 | int me = omp_get_thread_num(); 63 | int local_accumulator = 0; 64 | 65 | //#pragma omp for 66 | for (int i = me; i < ELEMENT_COUNT; i += NUM_THREADS ) { 67 | local_accumulator += CalcValue( i ); 68 | } 69 | 70 | #pragma omp atomic 71 | sum += local_accumulator; 72 | } 73 | output_buffer = sum; 74 | } 75 | 76 | template 77 | void TestSplitLoad() { 78 | int sum=0; 79 | const int WORK_LOAD = ELEMENT_COUNT / NUM_THREADS; 80 | 81 | #pragma omp parallel num_threads(NUM_THREADS) 82 | { 83 | int me = omp_get_thread_num(); 84 | int local_accumulator = 0; 85 | 86 | const int start = WORK_LOAD * me; 87 | const int end = WORK_LOAD * (me+1); 88 | //#pragma omp for 89 | for (int i = start; i < end; ++i ) { 90 | local_accumulator += CalcValue( i ); 91 | } 92 | 93 | #pragma omp atomic 94 | sum += local_accumulator; 95 | } 96 | output_buffer = sum; 97 | } 98 | 99 | void TestSinglethreaded() { 100 | int sum=0; 101 | // just one thread 102 | { 103 | int local_accumulator = 0; 104 | 105 | for (int i = 0; i < ELEMENT_COUNT; i++) { 106 | local_accumulator += CalcValue( i ); 107 | } 108 | 109 | sum += local_accumulator; 110 | } 111 | output_buffer = sum; 112 | } 113 | 114 | int main() { 115 | Setup(); 116 | 117 | Test tests[] = { 118 | (Test){ TestSinglethreaded, "Single threaded" }, 119 | (Test){ TestFalseSharing<2>, "False sharing (2 threads)" }, 120 | (Test){ TestLocalAccumulator<2>, "Local accumulator (2 threads)" }, 121 | (Test){ TestSplitLoad<2>, "Split load (2 threads)" }, 122 | (Test){ TestFalseSharing<4>, "False sharing (4 threads)" }, 123 | (Test){ TestLocalAccumulator<4>, "Local accumulator (4 threads)" }, 124 | (Test){ TestSplitLoad<4>, "Split load (4 threads)" }, 125 | (Test){ TestFalseSharing<8>, "False sharing (8 threads)" }, 126 | (Test){ TestLocalAccumulator<8>, "Local accumulator (8 threads)" }, 127 | (Test){ TestSplitLoad<8>, "Split load (8 threads)" }, 128 | (Test){ TestFalseSharing<16>, "False sharing (16 threads)" }, 129 | (Test){ TestLocalAccumulator<16>, "Local accumulator (16 threads)" }, 130 | (Test){ TestSplitLoad<16>, "Split load (16 threads)" }, 131 | }; 132 | 133 | // create a valid result 134 | TestSinglethreaded(); 135 | validated_result = output_buffer; 136 | printf( "output_buffer should read %i\n", validated_result ); 137 | TestFalseSharing<4>(); 138 | printf( "Test FalseSharing [%s] output_buffer = %i\n", validated_result==output_buffer ? "PASSED" : "FAILED", output_buffer ); 139 | TestLocalAccumulator<4>(); 140 | printf( "Test LocalAccumul [%s] output_buffer = %i\n", validated_result==output_buffer ? "PASSED" : "FAILED", output_buffer ); 141 | TestSplitLoad<4>(); 142 | printf( "Test SplitLoad [%s] output_buffer = %i\n", validated_result==output_buffer ? "PASSED" : "FAILED", output_buffer ); 143 | 144 | RunTests( tests ); 145 | 146 | return validated_result == output_buffer ? 0 : 1; 147 | } 148 | -------------------------------------------------------------------------------- /fsm.cpp: -------------------------------------------------------------------------------- 1 | // Finite state machines test 2 | #include "common.h" 3 | 4 | const int NUM_MACHINES = 10000; 5 | const int FRAMES_PER_SECOND = 15; 6 | const int NUM_UPDATES = FRAMES_PER_SECOND * 1 * 10; // 10 seconds 7 | 8 | const float UPDATE_DELTA = 1.0f / FRAMES_PER_SECOND; // delta in seconds 9 | const float VARIANCE = UPDATE_DELTA * 0.1f; 10 | 11 | float HUNGER_TRIGGER = 1.0f; 12 | float SLEEP_HUNGER = 0.1f; 13 | float HUNT_HUNGER = 0.3f; 14 | float EAT_HUNGER = -1.0f; 15 | float EXPLORE_HUNGER = 0.2f; 16 | 17 | float SLEEP_TRIGGER = 1.0f; 18 | float SLEEP_SLEEP = -1.0f; 19 | float HUNT_SLEEP = 0.1f; 20 | float EAT_SLEEP = 0.2f; 21 | float EXPLORE_SLEEP = 0.1f; 22 | 23 | float HUNTING_TIME = 1.0f; 24 | float EATING_TIME = 0.5f; 25 | 26 | #define CHECK_FOR_CONSISTENCY 0 27 | #if CHECK_FOR_CONSISTENCY 28 | const char *REPORT_FORMAT_STRING = "%10s SLEEP %4zi HUNT %4zi EAT %4zi EXPLORE %4zi\n"; 29 | const char *PRINT_FORMAT_STRING = "%10s %10s S%+.2f H%+.2f H%+.2f E%+.2f\n"; 30 | const char *STATENAME[] = { 31 | "sleeping", 32 | "hunting", 33 | "eating", 34 | "exploring", 35 | }; 36 | #endif 37 | #include "FSM_Simple.h" 38 | #include "FSM_OOState.h" 39 | #include "FSM_OOImplicitState.h" 40 | #include "FSM_OOFunctional.h" 41 | #include "FSM_TableState.h" 42 | #include "FSM_TableStatePointers.h" 43 | #include "FSM_VaryingTableState.h" 44 | 45 | struct Data { 46 | FSMSimple::Data simple; 47 | FSMOOState::Data oostate; 48 | FSMOOImplicitState::Data ooimplicitstate; 49 | FSMOOFunctional::Data oofunctional; 50 | FSMTableState::Data tablestate; 51 | FSMTableStatePointers::Data tablestateptrs; 52 | FSMVaryingTableState::Data varyingtablestate; 53 | }; 54 | Data *gData; 55 | 56 | void TestFSMSimple() { 57 | pcg32_random_t rng; 58 | pcg32_srandom_r(&rng, 1234, 5678); 59 | for( int update = 0; update < NUM_UPDATES; ++update ) { 60 | float update_time = pcg32_random_r_rangef(&rng, UPDATE_DELTA - VARIANCE, UPDATE_DELTA + VARIANCE); 61 | gData->simple.Update( update_time ); 62 | } 63 | }; 64 | 65 | void TestFSMOOState() { 66 | pcg32_random_t rng; 67 | pcg32_srandom_r(&rng, 1234, 5678); 68 | for( int update = 0; update < NUM_UPDATES; ++update ) { 69 | float update_time = pcg32_random_r_rangef(&rng, UPDATE_DELTA - VARIANCE, UPDATE_DELTA + VARIANCE); 70 | gData->oostate.Update( update_time ); 71 | } 72 | }; 73 | 74 | void TestFSMOOImplicitState() { 75 | pcg32_random_t rng; 76 | pcg32_srandom_r(&rng, 1234, 5678); 77 | for( int update = 0; update < NUM_UPDATES; ++update ) { 78 | float update_time = pcg32_random_r_rangef(&rng, UPDATE_DELTA - VARIANCE, UPDATE_DELTA + VARIANCE); 79 | gData->ooimplicitstate.Update( update_time ); 80 | } 81 | }; 82 | 83 | void TestFSMOOVirtualFunctional() { 84 | pcg32_random_t rng; 85 | pcg32_srandom_r(&rng, 1234, 5678); 86 | for( int update = 0; update < NUM_UPDATES; ++update ) { 87 | float update_time = pcg32_random_r_rangef(&rng, UPDATE_DELTA - VARIANCE, UPDATE_DELTA + VARIANCE); 88 | gData->oofunctional.Update( update_time ); 89 | } 90 | }; 91 | 92 | void TestFSMTableState() { 93 | pcg32_random_t rng; 94 | pcg32_srandom_r(&rng, 1234, 5678); 95 | for( int update = 0; update < NUM_UPDATES; ++update ) { 96 | float update_time = pcg32_random_r_rangef(&rng, UPDATE_DELTA - VARIANCE, UPDATE_DELTA + VARIANCE); 97 | gData->tablestate.Update( update_time ); 98 | } 99 | }; 100 | 101 | void TestFSMTableStatePointers() { 102 | pcg32_random_t rng; 103 | pcg32_srandom_r(&rng, 1234, 5678); 104 | for( int update = 0; update < NUM_UPDATES; ++update ) { 105 | float update_time = pcg32_random_r_rangef(&rng, UPDATE_DELTA - VARIANCE, UPDATE_DELTA + VARIANCE); 106 | gData->tablestateptrs.Update( update_time ); 107 | } 108 | }; 109 | 110 | void TestFSMVaryingTableState() { 111 | pcg32_random_t rng; 112 | pcg32_srandom_r(&rng, 1234, 5678); 113 | for( int update = 0; update < NUM_UPDATES; ++update ) { 114 | float update_time = pcg32_random_r_rangef(&rng, UPDATE_DELTA - VARIANCE, UPDATE_DELTA + VARIANCE); 115 | gData->varyingtablestate.Update( update_time ); 116 | } 117 | }; 118 | 119 | int main() { 120 | Data data; 121 | gData = &data; 122 | 123 | #if CHECK_FOR_CONSISTENCY 124 | { 125 | pcg32_random_t rng; 126 | pcg32_srandom_r(&rng, 1234, 5678); 127 | bool inSync = true; 128 | for( int update = 0; update < 1000000 && inSync; ++update ) { 129 | float update_time = pcg32_random_r_rangef(&rng, UPDATE_DELTA - VARIANCE, UPDATE_DELTA + VARIANCE); 130 | gData->simple.Update( update_time ); 131 | gData->oostate.Update( update_time ); 132 | gData->ooimplicitstate.Update( update_time ); 133 | gData->oofunctional.Update( update_time ); 134 | gData->tablestate.Update( update_time ); 135 | gData->tablestateptrs.Update( update_time ); 136 | gData->varyingtablestate.Update( update_time ); 137 | 138 | size_t a[4], b[4]; 139 | gData->simple.Report(a); 140 | gData->oostate.Report(b); 141 | if( memcmp( a, b, sizeof( a ) ) ) { 142 | //printf( REPORT_FORMAT_STRING, "OOSTATE", b[0], b[1], b[2], b[3] ); 143 | //gData->oostate.PrintState(); 144 | inSync = false; 145 | } 146 | gData->ooimplicitstate.Report(b); 147 | if( memcmp( a, b, sizeof( a ) ) ) { 148 | //printf( REPORT_FORMAT_STRING, "OOIMPLICIT", b[0], b[1], b[2], b[3] ); 149 | //gData->ooimplicitstate.PrintState(); 150 | inSync = false; 151 | } 152 | gData->oofunctional.Report(b); 153 | if( memcmp( a, b, sizeof( a ) ) ) { 154 | //printf( REPORT_FORMAT_STRING, "OOIMPLICIT", b[0], b[1], b[2], b[3] ); 155 | //gData->oofunctional.PrintState(); 156 | inSync = false; 157 | } 158 | gData->tablestate.Report(b); 159 | if( memcmp( a, b, sizeof( a ) ) ) { 160 | //printf( REPORT_FORMAT_STRING, "TABLE", b[0], b[1], b[2], b[3] ); 161 | //gData->tablestate.PrintState(); 162 | inSync = false; 163 | } 164 | gData->tablestateptrs.Report(b); 165 | if( memcmp( a, b, sizeof( a ) ) ) { 166 | //printf( REPORT_FORMAT_STRING, "TABLE", b[0], b[1], b[2], b[3] ); 167 | //gData->tablestate.PrintState(); 168 | inSync = false; 169 | } 170 | gData->varyingtablestate.Report(b); 171 | if( memcmp( a, b, sizeof( a ) ) ) { 172 | //printf( REPORT_FORMAT_STRING, "VARYING", b[0], b[1], b[2], b[3] ); 173 | //gData->varyingtablestate.PrintState(); 174 | inSync = false; 175 | } 176 | 177 | if( update > 60000040 ) { 178 | printf("Update %i:\n", update ); 179 | gData->simple.PrintState(); 180 | gData->oostate.PrintState(); 181 | gData->ooimplicitstate.PrintState(); 182 | gData->oofunctional.PrintState(); 183 | gData->tablestate.PrintState(); 184 | gData->tablestateptrs.PrintState(); 185 | gData->varyingtablestate.PrintState(); 186 | } 187 | 188 | if( !inSync ) { 189 | printf( REPORT_FORMAT_STRING, "SIMPLE", a[0], a[1], a[2], a[3] ); 190 | gData->simple.PrintState(); 191 | printf( "update %i went out of sync\n", update ); 192 | } 193 | } 194 | } 195 | #else 196 | { 197 | 198 | Test tests[] = { 199 | (Test){ TestFSMTableState, "Table FSM" }, 200 | (Test){ TestFSMTableStatePointers, "Table FSM (pointers)" }, 201 | (Test){ TestFSMVaryingTableState, "Varying Table FSM" }, 202 | (Test){ TestFSMOOState, "OO FSM" }, 203 | (Test){ TestFSMOOImplicitState, "OO Implicit FSM" }, 204 | (Test){ TestFSMOOVirtualFunctional, "OO Functional FSM" }, 205 | (Test){ TestFSMSimple, "Simple FSM" }, 206 | }; 207 | 208 | printf("Trialling with %i machines over %i updates\n", NUM_MACHINES, NUM_UPDATES ); 209 | RunTests( tests ); 210 | } 211 | #endif 212 | 213 | return 0; 214 | } 215 | -------------------------------------------------------------------------------- /fsm_oo.cpp: -------------------------------------------------------------------------------- 1 | // Finite state machines test 2 | #include "common.h" 3 | 4 | const int NUM_MACHINES = 10000; 5 | const int FRAMES_PER_SECOND = 15; 6 | const int NUM_UPDATES = FRAMES_PER_SECOND * 1 * 10; // 10 seconds 7 | 8 | const float UPDATE_DELTA = 1.0f / FRAMES_PER_SECOND; // delta in seconds 9 | const float VARIANCE = UPDATE_DELTA * 0.1f; 10 | 11 | float HUNGER_TRIGGER = 1.0f; 12 | float SLEEP_HUNGER = 0.1f; 13 | float HUNT_HUNGER = 0.3f; 14 | float EAT_HUNGER = -1.0f; 15 | float EXPLORE_HUNGER = 0.2f; 16 | 17 | float SLEEP_TRIGGER = 1.0f; 18 | float SLEEP_SLEEP = -1.0f; 19 | float HUNT_SLEEP = 0.1f; 20 | float EAT_SLEEP = 0.2f; 21 | float EXPLORE_SLEEP = 0.1f; 22 | 23 | float HUNTING_TIME = 1.0f; 24 | float EATING_TIME = 0.5f; 25 | 26 | #define CHECK_FOR_CONSISTENCY 0 27 | #if CHECK_FOR_CONSISTENCY 28 | const char *REPORT_FORMAT_STRING = "%10s SLEEP %4zi HUNT %4zi EAT %4zi EXPLORE %4zi\n"; 29 | const char *PRINT_FORMAT_STRING = "%10s %10s S%+.2f H%+.2f H%+.2f E%+.2f\n"; 30 | const char *STATENAME[] = { 31 | "sleeping", 32 | "hunting", 33 | "eating", 34 | "exploring", 35 | }; 36 | #endif 37 | #include "FSM_OOState.h" 38 | 39 | struct Data { 40 | FSMOOState::Data oostate; 41 | }; 42 | Data *gData; 43 | 44 | void TestFSMOOState() { 45 | pcg32_random_t rng; 46 | pcg32_srandom_r(&rng, 1234, 5678); 47 | for( int update = 0; update < NUM_UPDATES; ++update ) { 48 | float update_time = pcg32_random_r_rangef(&rng, UPDATE_DELTA - VARIANCE, UPDATE_DELTA + VARIANCE); 49 | gData->oostate.Update( update_time ); 50 | } 51 | }; 52 | 53 | int main() { 54 | Data data; 55 | gData = &data; 56 | 57 | pcg32_random_t rng; 58 | pcg32_srandom_r(&rng, 1234, 5678); 59 | for( int update = 0; update < 10000; ++update ) { 60 | float update_time = pcg32_random_r_rangef(&rng, UPDATE_DELTA - VARIANCE, UPDATE_DELTA + VARIANCE); 61 | gData->oostate.Update( update_time ); 62 | } 63 | 64 | return 0; 65 | } 66 | -------------------------------------------------------------------------------- /fsm_simple.cpp: -------------------------------------------------------------------------------- 1 | // Finite state machines test 2 | #include "common.h" 3 | 4 | const int NUM_MACHINES = 10000; 5 | const int FRAMES_PER_SECOND = 15; 6 | const int NUM_UPDATES = FRAMES_PER_SECOND * 1 * 10; // 10 seconds 7 | 8 | const float UPDATE_DELTA = 1.0f / FRAMES_PER_SECOND; // delta in seconds 9 | const float VARIANCE = UPDATE_DELTA * 0.1f; 10 | 11 | float HUNGER_TRIGGER = 1.0f; 12 | float SLEEP_HUNGER = 0.1f; 13 | float HUNT_HUNGER = 0.3f; 14 | float EAT_HUNGER = -1.0f; 15 | float EXPLORE_HUNGER = 0.2f; 16 | 17 | float SLEEP_TRIGGER = 1.0f; 18 | float SLEEP_SLEEP = -1.0f; 19 | float HUNT_SLEEP = 0.1f; 20 | float EAT_SLEEP = 0.2f; 21 | float EXPLORE_SLEEP = 0.1f; 22 | 23 | float HUNTING_TIME = 1.0f; 24 | float EATING_TIME = 0.5f; 25 | 26 | #define CHECK_FOR_CONSISTENCY 0 27 | #if CHECK_FOR_CONSISTENCY 28 | const char *REPORT_FORMAT_STRING = "%10s SLEEP %4zi HUNT %4zi EAT %4zi EXPLORE %4zi\n"; 29 | const char *PRINT_FORMAT_STRING = "%10s %10s S%+.2f H%+.2f H%+.2f E%+.2f\n"; 30 | const char *STATENAME[] = { 31 | "sleeping", 32 | "hunting", 33 | "eating", 34 | "exploring", 35 | }; 36 | #endif 37 | #include "FSM_Simple.h" 38 | 39 | struct Data { 40 | FSMSimple::Data simple; 41 | }; 42 | Data *gData; 43 | 44 | void TestFSMSimple() { 45 | pcg32_random_t rng; 46 | pcg32_srandom_r(&rng, 1234, 5678); 47 | for( int update = 0; update < NUM_UPDATES; ++update ) { 48 | float update_time = pcg32_random_r_rangef(&rng, UPDATE_DELTA - VARIANCE, UPDATE_DELTA + VARIANCE); 49 | gData->simple.Update( update_time ); 50 | } 51 | }; 52 | 53 | int main() { 54 | Data data; 55 | gData = &data; 56 | 57 | pcg32_random_t rng; 58 | pcg32_srandom_r(&rng, 1234, 5678); 59 | for( int update = 0; update < 10000; ++update ) { 60 | float update_time = pcg32_random_r_rangef(&rng, UPDATE_DELTA - VARIANCE, UPDATE_DELTA + VARIANCE); 61 | gData->simple.Update( update_time ); 62 | } 63 | 64 | return 0; 65 | } 66 | -------------------------------------------------------------------------------- /fsm_table.cpp: -------------------------------------------------------------------------------- 1 | // Finite state machines test 2 | #include "common.h" 3 | 4 | const int NUM_MACHINES = 10000; 5 | const int FRAMES_PER_SECOND = 15; 6 | const int NUM_UPDATES = FRAMES_PER_SECOND * 1 * 10; // 10 seconds 7 | 8 | const float UPDATE_DELTA = 1.0f / FRAMES_PER_SECOND; // delta in seconds 9 | const float VARIANCE = UPDATE_DELTA * 0.1f; 10 | 11 | float HUNGER_TRIGGER = 1.0f; 12 | float SLEEP_HUNGER = 0.1f; 13 | float HUNT_HUNGER = 0.3f; 14 | float EAT_HUNGER = -1.0f; 15 | float EXPLORE_HUNGER = 0.2f; 16 | 17 | float SLEEP_TRIGGER = 1.0f; 18 | float SLEEP_SLEEP = -1.0f; 19 | float HUNT_SLEEP = 0.1f; 20 | float EAT_SLEEP = 0.2f; 21 | float EXPLORE_SLEEP = 0.1f; 22 | 23 | float HUNTING_TIME = 1.0f; 24 | float EATING_TIME = 0.5f; 25 | 26 | #define CHECK_FOR_CONSISTENCY 0 27 | #if CHECK_FOR_CONSISTENCY 28 | const char *REPORT_FORMAT_STRING = "%10s SLEEP %4zi HUNT %4zi EAT %4zi EXPLORE %4zi\n"; 29 | const char *PRINT_FORMAT_STRING = "%10s %10s S%+.2f H%+.2f H%+.2f E%+.2f\n"; 30 | const char *STATENAME[] = { 31 | "sleeping", 32 | "hunting", 33 | "eating", 34 | "exploring", 35 | }; 36 | #endif 37 | #include "FSM_TableState.h" 38 | 39 | struct Data { 40 | FSMTableState::Data tablestate; 41 | }; 42 | Data *gData; 43 | 44 | void TestFSMTableState() { 45 | pcg32_random_t rng; 46 | pcg32_srandom_r(&rng, 1234, 5678); 47 | for( int update = 0; update < NUM_UPDATES; ++update ) { 48 | float update_time = pcg32_random_r_rangef(&rng, UPDATE_DELTA - VARIANCE, UPDATE_DELTA + VARIANCE); 49 | gData->tablestate.Update( update_time ); 50 | } 51 | }; 52 | 53 | int main() { 54 | Data data; 55 | gData = &data; 56 | 57 | pcg32_random_t rng; 58 | pcg32_srandom_r(&rng, 1234, 5678); 59 | for( int update = 0; update < 10000; ++update ) { 60 | float update_time = pcg32_random_r_rangef(&rng, UPDATE_DELTA - VARIANCE, UPDATE_DELTA + VARIANCE); 61 | gData->tablestate.Update( update_time ); 62 | } 63 | 64 | return 0; 65 | } 66 | -------------------------------------------------------------------------------- /fsm_tableptrs.cpp: -------------------------------------------------------------------------------- 1 | // Finite state machines test 2 | #include "common.h" 3 | 4 | const int NUM_MACHINES = 10000; 5 | const int FRAMES_PER_SECOND = 15; 6 | const int NUM_UPDATES = FRAMES_PER_SECOND * 1 * 10; // 10 seconds 7 | 8 | const float UPDATE_DELTA = 1.0f / FRAMES_PER_SECOND; // delta in seconds 9 | const float VARIANCE = UPDATE_DELTA * 0.1f; 10 | 11 | float HUNGER_TRIGGER = 1.0f; 12 | float SLEEP_HUNGER = 0.1f; 13 | float HUNT_HUNGER = 0.3f; 14 | float EAT_HUNGER = -1.0f; 15 | float EXPLORE_HUNGER = 0.2f; 16 | 17 | float SLEEP_TRIGGER = 1.0f; 18 | float SLEEP_SLEEP = -1.0f; 19 | float HUNT_SLEEP = 0.1f; 20 | float EAT_SLEEP = 0.2f; 21 | float EXPLORE_SLEEP = 0.1f; 22 | 23 | float HUNTING_TIME = 1.0f; 24 | float EATING_TIME = 0.5f; 25 | 26 | #define CHECK_FOR_CONSISTENCY 0 27 | #if CHECK_FOR_CONSISTENCY 28 | const char *REPORT_FORMAT_STRING = "%10s SLEEP %4zi HUNT %4zi EAT %4zi EXPLORE %4zi\n"; 29 | const char *PRINT_FORMAT_STRING = "%10s %10s S%+.2f H%+.2f H%+.2f E%+.2f\n"; 30 | const char *STATENAME[] = { 31 | "sleeping", 32 | "hunting", 33 | "eating", 34 | "exploring", 35 | }; 36 | #endif 37 | #include "FSM_TableStatePointers.h" 38 | 39 | struct Data { 40 | FSMTableStatePointers::Data tablestate; 41 | }; 42 | Data *gData; 43 | 44 | void TestFSMTableState() { 45 | pcg32_random_t rng; 46 | pcg32_srandom_r(&rng, 1234, 5678); 47 | for( int update = 0; update < NUM_UPDATES; ++update ) { 48 | float update_time = pcg32_random_r_rangef(&rng, UPDATE_DELTA - VARIANCE, UPDATE_DELTA + VARIANCE); 49 | gData->tablestate.Update( update_time ); 50 | } 51 | }; 52 | 53 | int main() { 54 | Data data; 55 | gData = &data; 56 | 57 | pcg32_random_t rng; 58 | pcg32_srandom_r(&rng, 1234, 5678); 59 | for( int update = 0; update < 10000; ++update ) { 60 | float update_time = pcg32_random_r_rangef(&rng, UPDATE_DELTA - VARIANCE, UPDATE_DELTA + VARIANCE); 61 | gData->tablestate.Update( update_time ); 62 | } 63 | 64 | return 0; 65 | } 66 | -------------------------------------------------------------------------------- /geom.cpp: -------------------------------------------------------------------------------- 1 | #include "geom.h" 2 | 3 | AABB gAABBEverything( Vec3( -INFINITY, -INFINITY, -INFINITY ),Vec3( -INFINITY, -INFINITY, -INFINITY ) ); 4 | 5 | -------------------------------------------------------------------------------- /geom.h: -------------------------------------------------------------------------------- 1 | #ifndef _GEOM_H_ 2 | #define _GEOM_H_ 3 | 4 | #include 5 | 6 | struct Vec3 { 7 | float x,y,z; 8 | Vec3( float _x, float _y, float _z ) : x(_x), y(_y), z(_z) {} 9 | Vec3() {} 10 | Vec3 operator+(const Vec3 &other) const { return Vec3(x+other.x, y+other.y, z+other.z); } 11 | Vec3 operator-(const Vec3 &other) const { return Vec3(x-other.x, y-other.y, z-other.z); } 12 | Vec3 operator*(const float &other) const { return Vec3(x*other, y*other, z*other); } 13 | Vec3& operator+=(const Vec3 &other) { x+=other.x; y+=other.y; z+=other.z; return *this; } 14 | Vec3& operator-=(const Vec3 &other) { x-=other.x; y-=other.y; z-=other.z; return *this; } 15 | float dot(const Vec3 &other) const { return x*other.x + y*other.y + z*other.z; } 16 | float abs() const { return sqrtf(dot(*this)); } 17 | Vec3 cross(const Vec3 &other) { 18 | return Vec3( y*other.z - z*other.y, 19 | z*other.x - x*other.z, 20 | x*other.y - y*other.x ); 21 | } 22 | Vec3 normalized() const { return *this * (1.0f / abs()); } 23 | Vec3 operator-() const { return Vec3(-x, -y, -z); } 24 | }; 25 | 26 | struct Vec4 { 27 | float x,y,z,w; 28 | operator Vec3() const { return Vec3( x, y, z ); } 29 | }; 30 | 31 | struct Mat44 { 32 | Vec4 x,y,z,w; 33 | }; 34 | 35 | struct AABB { 36 | Vec3 aamin, aamax; 37 | static const AABB EVERYTHING; 38 | AABB() {} 39 | AABB( const Vec3 &minVal, const Vec3 &maxVal ) : aamin(minVal), aamax(maxVal) {} 40 | }; 41 | 42 | // Vec3 operations 43 | inline float dot( const Vec3 a, const Vec3 b ) { return a.dot(b); } 44 | 45 | #endif 46 | -------------------------------------------------------------------------------- /hotcold.cpp: -------------------------------------------------------------------------------- 1 | // Hot Cold data separation 2 | #include "common.h" 3 | 4 | const int NUM_PARTICLES = 10000; 5 | const int FRAMES_PER_SECOND = 60; 6 | const int NUM_UPDATES = FRAMES_PER_SECOND * 10; // ten seconds of particle updates at 60fps; 7 | 8 | const float UPDATE_DELTA = 1000.0f / FRAMES_PER_SECOND; // delta in ms 9 | 10 | struct particle_buffer_Simple { 11 | struct particle { 12 | Vec3 pos; 13 | Vec3 velocity; 14 | float lifetime; 15 | uint32_t colour; 16 | float size; 17 | uint32_t materialOrUVLookupData; 18 | }; 19 | particle *p; 20 | particle_buffer_Simple() { 21 | p = (particle*)malloc( sizeof(particle) * NUM_PARTICLES ); 22 | 23 | for( int i = 0; i < NUM_PARTICLES; ++i ) { 24 | p[i].pos.x = (i%7)-3; 25 | p[i].pos.y = (i%11)-5; 26 | p[i].pos.z = (i%9)-4; 27 | 28 | p[i].velocity.x = 2.0f; 29 | p[i].velocity.y = 100.0f; 30 | p[i].velocity.z = 7.0f; 31 | 32 | p[i].lifetime = 10.0f; 33 | p[i].colour = 0xffeebbff; 34 | p[i].size = 0.5f; 35 | p[i].materialOrUVLookupData = 12415; 36 | } 37 | } 38 | ~particle_buffer_Simple() { 39 | free(p); 40 | } 41 | }; 42 | struct particle_buffer_HotColdSplit { 43 | struct particle_hot { 44 | Vec3 pos; 45 | Vec3 velocity; 46 | }; 47 | struct particle_cold { 48 | float lifetime; 49 | uint32_t colour; 50 | float size; 51 | uint32_t materialOrUVLookupData; 52 | }; 53 | particle_hot *ph; 54 | particle_cold *pc; 55 | particle_buffer_HotColdSplit() { 56 | ph = (particle_hot*)malloc( sizeof(particle_hot) * NUM_PARTICLES ); 57 | pc = (particle_cold*)malloc( sizeof(particle_cold) * NUM_PARTICLES ); 58 | 59 | for( int i = 0; i < NUM_PARTICLES; ++i ) { 60 | ph[i].pos.x = (i%7)-3; 61 | ph[i].pos.y = (i%11)-5; 62 | ph[i].pos.z = (i%9)-4; 63 | 64 | ph[i].velocity.x = 2.0f; 65 | ph[i].velocity.y = 100.0f; 66 | ph[i].velocity.z = 7.0f; 67 | 68 | pc[i].lifetime = 10.0f; 69 | pc[i].colour = 0xffeebbff; 70 | pc[i].size = 0.5f; 71 | pc[i].materialOrUVLookupData = 12415; 72 | } 73 | } 74 | ~particle_buffer_HotColdSplit() { 75 | free(ph); 76 | free(pc); 77 | } 78 | }; 79 | struct particle_buffer_ReadWriteSplit { 80 | struct particle_read { 81 | Vec3 velocity; 82 | }; 83 | struct particle_write { 84 | Vec3 pos; 85 | }; 86 | struct particle_cold { 87 | float lifetime; 88 | uint32_t colour; 89 | float size; 90 | uint32_t materialOrUVLookupData; 91 | }; 92 | particle_read *pr; 93 | particle_write *pw; 94 | particle_cold *pc; 95 | particle_buffer_ReadWriteSplit() { 96 | pr = (particle_read*)malloc( sizeof(particle_read) * NUM_PARTICLES ); 97 | pw = (particle_write*)malloc( sizeof(particle_write) * NUM_PARTICLES ); 98 | pc = (particle_cold*)malloc( sizeof(particle_cold) * NUM_PARTICLES ); 99 | 100 | for( int i = 0; i < NUM_PARTICLES; ++i ) { 101 | pw[i].pos.x = (i%7)-3; 102 | pw[i].pos.y = (i%11)-5; 103 | pw[i].pos.z = (i%9)-4; 104 | 105 | pr[i].velocity.x = 2.0f; 106 | pr[i].velocity.y = 100.0f; 107 | pr[i].velocity.z = 7.0f; 108 | 109 | pc[i].lifetime = 10.0f; 110 | pc[i].colour = 0xffeebbff; 111 | pc[i].size = 0.5f; 112 | pc[i].materialOrUVLookupData = 12415; 113 | } 114 | } 115 | ~particle_buffer_ReadWriteSplit() { 116 | free(pr); 117 | free(pw); 118 | free(pc); 119 | } 120 | }; 121 | 122 | struct Data { 123 | particle_buffer_Simple pbSimple; 124 | particle_buffer_HotColdSplit pbHotCold; 125 | particle_buffer_ReadWriteSplit pbReadWrite; 126 | }; 127 | Data *gData; 128 | 129 | 130 | void TestUpdateParticles_Simple() { 131 | particle_buffer_Simple *pb = &gData->pbSimple; 132 | pcg32_random_t rng; 133 | pcg32_srandom_r(&rng, 1234, 5678); 134 | for( int u = 0; u < NUM_UPDATES; ++u ) { 135 | // ensure that compiler cannot optimise out delta_time, but it remains repeatable 136 | float delta_time = pcg32_random_r_rangef(&rng, UPDATE_DELTA * 0.9f, UPDATE_DELTA * 1.1f ); 137 | 138 | for( int i = 0; i < NUM_PARTICLES; ++i ) { 139 | particle_buffer_Simple::particle *p = pb->p+i; 140 | p->pos += p->velocity * delta_time; 141 | } 142 | } 143 | } 144 | void TestUpdateParticles_HotColdSplit() { 145 | particle_buffer_HotColdSplit *pb = &gData->pbHotCold; 146 | pcg32_random_t rng; 147 | pcg32_srandom_r(&rng, 1234, 5678); 148 | for( int u = 0; u < NUM_UPDATES; ++u ) { 149 | // ensure that compiler cannot optimise out delta_time, but it remains repeatable 150 | float delta_time = pcg32_random_r_rangef(&rng, UPDATE_DELTA * 0.9f, UPDATE_DELTA * 1.1f ); 151 | 152 | for( int i = 0; i < NUM_PARTICLES; ++i ) { 153 | particle_buffer_HotColdSplit::particle_hot *p = pb->ph+i; 154 | p->pos += p->velocity * delta_time; 155 | } 156 | } 157 | } 158 | void TestUpdateParticles_ReadWriteSplit() { 159 | particle_buffer_ReadWriteSplit *pb = &gData->pbReadWrite; 160 | pcg32_random_t rng; 161 | pcg32_srandom_r(&rng, 1234, 5678); 162 | for( int u = 0; u < NUM_UPDATES; ++u ) { 163 | // ensure that compiler cannot optimise out delta_time, but it remains repeatable 164 | float delta_time = pcg32_random_r_rangef(&rng, UPDATE_DELTA * 0.9f, UPDATE_DELTA * 1.1f ); 165 | 166 | for( int i = 0; i < NUM_PARTICLES; ++i ) { 167 | particle_buffer_ReadWriteSplit::particle_read *pr = pb->pr+i; 168 | particle_buffer_ReadWriteSplit::particle_write *pw = pb->pw+i; 169 | pw->pos += pr->velocity * delta_time; 170 | } 171 | } 172 | } 173 | 174 | 175 | int main() { 176 | Data data; 177 | gData = &data; 178 | 179 | Test tests[] = { 180 | (Test){ TestUpdateParticles_Simple, "Simple struct array" }, 181 | (Test){ TestUpdateParticles_HotColdSplit, "HotCold split struct array" }, 182 | (Test){ TestUpdateParticles_ReadWriteSplit, "ReadWrite split struct array" }, 183 | }; 184 | 185 | printf("Trialling with %i particles over %i updates\n", NUM_PARTICLES, NUM_UPDATES ); 186 | RunTests( tests ); 187 | 188 | return 0; 189 | } 190 | -------------------------------------------------------------------------------- /linkedlists.cpp: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | 3 | struct A { 4 | int val; 5 | int pad1; 6 | int pad2; 7 | int pad3; 8 | }; 9 | struct Alink { 10 | Alink *next; 11 | int val; 12 | int pad1; 13 | int pad2; 14 | int pad3; 15 | }; 16 | 17 | A *aArray; 18 | A **aPointerArray; 19 | Alink *aLinkedList; 20 | 21 | const int ELEMENT_COUNT = 4 * 1024 * 1024; 22 | 23 | void SetupValues() { 24 | // fill with data 25 | pcg32_random_t rng; 26 | pcg32_srandom_r(&rng, 1234, 5678); 27 | Alink *link = aLinkedList; 28 | for( int i = 0; i < ELEMENT_COUNT; ++i ) { 29 | int value = pcg32_random_r_range(&rng, 0, 255 ); 30 | aArray[i].val = value; 31 | aPointerArray[i]->val = value; 32 | link->val = value; 33 | link = link->next; 34 | } 35 | } 36 | void Setup() { 37 | Timer t; 38 | // allocate arrays 39 | aArray = (A*)malloc( sizeof(A) * ELEMENT_COUNT ); 40 | 41 | aPointerArray = (A**)malloc( sizeof(A*) * ELEMENT_COUNT ); 42 | 43 | // allocate elements 44 | for( size_t i = 0; i < ELEMENT_COUNT; ++i ) { 45 | Alink *link = (Alink*)malloc( sizeof(Alink) ); 46 | link->next = aLinkedList; 47 | aLinkedList = link; 48 | aPointerArray[i] = (A*)malloc( sizeof(A) ); 49 | } 50 | SetupValues(); 51 | printf( "Setup took %fms\n", t.elapsed() ); 52 | } 53 | 54 | int writeOut; 55 | void UseVariable( int var ) { 56 | writeOut += var; 57 | } 58 | 59 | void TestSumArray() { 60 | int accumulator = 0; 61 | for( int i = 0; i < ELEMENT_COUNT; i+=1 ) { 62 | accumulator += aArray[i].val; 63 | } 64 | UseVariable( accumulator ); 65 | //printf( "Acc %i\n", accumulator ); 66 | } 67 | void TestSumArrayPointer() { 68 | int accumulator = 0; 69 | for( int i = 0; i < ELEMENT_COUNT; i+=1 ) { 70 | accumulator += aPointerArray[i]->val; 71 | } 72 | UseVariable( accumulator ); 73 | //printf( "Acc %i\n", accumulator ); 74 | } 75 | void TestSumLinkedList() { 76 | int accumulator = 0; 77 | Alink *link = aLinkedList; 78 | while( link != nullptr ) { 79 | accumulator += link->val; 80 | link = link->next; 81 | } 82 | UseVariable( accumulator ); 83 | //printf( "Acc %i\n", accumulator ); 84 | } 85 | 86 | int main() { 87 | Setup(); 88 | 89 | { 90 | Test tests[] = { 91 | (Test){ TestSumArray, "Array Sum" }, 92 | (Test){ TestSumArrayPointer, "Pointer Array Sum" }, 93 | (Test){ TestSumLinkedList, "Linked List Sum" }, 94 | }; 95 | 96 | RunTests( tests ); 97 | } 98 | 99 | return 0; 100 | } 101 | -------------------------------------------------------------------------------- /makefile: -------------------------------------------------------------------------------- 1 | USE_CLANG=0 2 | ifeq ($(USE_CLANG),1) 3 | CC=clang 4 | LINK=-lm -lstdc++ 5 | else 6 | 7 | ifeq ($(shell uname -s),Darwin) 8 | # brew install g++ libomp 9 | CC=$(shell whereis g++) 10 | FOPENMP=-Xpreprocessor -fopenmp -lomp 11 | else 12 | CC=g++ 13 | FOPENMP=-fopenmp 14 | endif 15 | 16 | COMPILE=-march=native 17 | 18 | endif 19 | SOURCE=$(wildcard *.cpp) 20 | OUTS=$(SOURCE:.cpp=.out) 21 | #ASSEMBLY=$(SOURCE:.cpp=.s) 22 | TARGETS=$(OUTS) $(ASSEMBLY) 23 | COMMON=common.h geom.h geom.o 24 | ADDITIONAL=geom.o 25 | 26 | CFLAGS=$(COMPILE) -O3 -DNDEBUG -std=c++11 -Wall -Wextra -Werror 27 | #CFLAGS=$(COMPILE) -g -O0 -std=c++11 -Wall -Wextra -Werror 28 | FLAGS=$(CFLAGS) $(FOPENMP) $(LINK) 29 | 30 | all: $(TARGETS) 31 | 32 | run: $(OUTS) 33 | ./linkedlists.out 34 | 35 | runall: $(OUTS) 36 | ./basictheory.out 37 | ./branch_prediction.out 38 | ./cachesizeeffect.out 39 | ./false_sharing.out 40 | ./fsm.out 41 | ./hotcold.out 42 | ./KeyLookup.out 43 | ./matrixtranspose.out 44 | ./modifying_memory.out 45 | ./readingtests.out 46 | ./simd_test.out 47 | ./speculativewaste.out 48 | 49 | %.out: %.cpp $(COMMON) 50 | $(CC) $(ADDITIONAL) $(FLAGS) -DTARGET=$* -o $@ $< 51 | 52 | fsm.out: fsm.cpp $(COMMON) FSM_OOState.h FSM_Simple.h \ 53 | FSM_TableState.h FSM_OOImplicitState.h \ 54 | FSM_VaryingTableState.h FSM_TableStatePointers.h \ 55 | FSM_OOFunctional.h 56 | $(CC) $(ADDITIONAL) $(FLAGS) -DTARGET=$* -o fsm.out fsm.cpp 57 | 58 | %.s: %.cpp 59 | $(CC) $(FLAGS) -S -g -o $@ $< 60 | 61 | geom.o: geom.cpp geom.h 62 | $(CC) -c $(CFLAGS) -o geom.o geom.cpp 63 | 64 | geom.out: geom.cpp 65 | touch geom.out 66 | 67 | clean: 68 | rm -f *.out *.s *.o 69 | rm -f testdata*.csv 70 | rm -f testreport.txt 71 | 72 | cleanreports: 73 | rm -f testdata*.csv 74 | rm -f testreport.txt 75 | rm -f cachegrind.out.* 76 | rm -f cg_*.out 77 | -------------------------------------------------------------------------------- /matrixtranspose.cpp: -------------------------------------------------------------------------------- 1 | // Hot Cold data separation 2 | #include "common.h" 3 | 4 | const int MATRIX_SIZE = 1024; 5 | 6 | struct LargeMatrix { 7 | float m[MATRIX_SIZE * MATRIX_SIZE]; 8 | }; 9 | 10 | struct Data { 11 | LargeMatrix from, to; 12 | }; 13 | Data *gData; 14 | 15 | 16 | void TestTranspose_ReadRows() { 17 | float *in = &(gData->from.m[0]); 18 | float *out = &(gData->to.m[0]); 19 | 20 | for( int j = 0; j < MATRIX_SIZE; j++ ) { 21 | for( int i = 0; i < MATRIX_SIZE; i++ ) { 22 | out[i*MATRIX_SIZE+j]=in[j*MATRIX_SIZE+i]; 23 | } 24 | } 25 | } 26 | void TestTranspose_ReadColumns() { 27 | float *in = &(gData->from.m[0]); 28 | float *out = &(gData->to.m[0]); 29 | 30 | for( int i = 0; i < MATRIX_SIZE; i++ ) { 31 | for( int j = 0; j < MATRIX_SIZE; j++ ) { 32 | out[i*MATRIX_SIZE+j]=in[j*MATRIX_SIZE+i]; 33 | } 34 | } 35 | } 36 | template 37 | void TestTranspose_RowBlock() { 38 | float *in = &(gData->from.m[0]); 39 | float *out = &(gData->to.m[0]); 40 | 41 | for (int bj = 0; bj < MATRIX_SIZE; bj += block_size) { 42 | for (int bi = 0; bi < MATRIX_SIZE; bi += block_size) { 43 | int imax = bi + block_size; imax = imax < MATRIX_SIZE ? imax : MATRIX_SIZE; 44 | int jmax = bj + block_size; jmax = jmax < MATRIX_SIZE ? jmax : MATRIX_SIZE; 45 | for (int j = bj; j < jmax; ++j) { 46 | for (int i = bi; i < imax; ++i) { 47 | out[i*MATRIX_SIZE+j] = in[j*MATRIX_SIZE+i]; 48 | } 49 | } 50 | } 51 | } 52 | } 53 | template 54 | void TestTranspose_ColumnBlock() { 55 | float *in = &(gData->from.m[0]); 56 | float *out = &(gData->to.m[0]); 57 | 58 | for (int bi = 0; bi < MATRIX_SIZE; bi += block_size) { 59 | for (int bj = 0; bj < MATRIX_SIZE; bj += block_size) { 60 | int imax = bi + block_size; 61 | int jmax = bj + block_size; 62 | // these cause an overflow assumption warning on newer gcc compilers (found on 6.3.0) 63 | //imax = imax < MATRIX_SIZE ? imax : MATRIX_SIZE; 64 | //jmax = jmax < MATRIX_SIZE ? jmax : MATRIX_SIZE; 65 | for (int i = bi; i < imax; ++i) { 66 | for (int j = bj; j < jmax; ++j) { 67 | out[i*MATRIX_SIZE+j] = in[j*MATRIX_SIZE+i]; 68 | } 69 | } 70 | } 71 | } 72 | } 73 | template 74 | void TestTranspose_WriteBlock() { 75 | float *in = &(gData->from.m[0]); 76 | float *out = &(gData->to.m[0]); 77 | 78 | for (int bj = 0; bj < MATRIX_SIZE; bj += block_size) { 79 | int jmax = bj + block_size; jmax = jmax < MATRIX_SIZE ? jmax : MATRIX_SIZE; 80 | for (int i = 0; i < MATRIX_SIZE; ++i) { 81 | for (int j = bj; j < jmax; ++j) { 82 | out[i*MATRIX_SIZE+j] = in[j*MATRIX_SIZE+i]; 83 | } 84 | } 85 | } 86 | } 87 | template 88 | void TestTranspose_RowBlock2() { 89 | float *in = &(gData->from.m[0]); 90 | float *out = &(gData->to.m[0]); 91 | 92 | for (int bi = 0; bi < MATRIX_SIZE; bi += read_block) { 93 | for (int bj = 0; bj < MATRIX_SIZE; bj += write_block) { 94 | int imax = bi + read_block; imax = imax < MATRIX_SIZE ? imax : MATRIX_SIZE; 95 | int jmax = bj + write_block; jmax = jmax < MATRIX_SIZE ? jmax : MATRIX_SIZE; 96 | for (int i = bi; i < imax; ++i) { 97 | for (int j = bj; j < jmax; ++j) { 98 | out[i*MATRIX_SIZE+j] = in[j*MATRIX_SIZE+i]; 99 | } 100 | } 101 | } 102 | } 103 | } 104 | 105 | int main() { 106 | char *dataAlloc = (char*)aligned_alloc( 64, sizeof( Data ) + 64 ); 107 | 108 | gData = (Data*)(void*)(dataAlloc); 109 | 110 | Test tests[] = { 111 | (Test){ TestTranspose_ReadRows, "Read rows, write columns" }, 112 | (Test){ TestTranspose_ReadColumns, "Read columns, write rows" }, 113 | 114 | (Test){ TestTranspose_RowBlock<4>, "Read rows, blocks of 4" }, 115 | (Test){ TestTranspose_RowBlock<8>, "Read rows, blocks of 8" }, 116 | (Test){ TestTranspose_RowBlock<16>, "Read rows, blocks of 16" }, 117 | 118 | (Test){ TestTranspose_ColumnBlock<4>, "Read columns, blocks of 4" }, 119 | (Test){ TestTranspose_ColumnBlock<8>, "Read columns, blocks of 8" }, 120 | (Test){ TestTranspose_ColumnBlock<16>, "Read columns, blocks of 16" }, 121 | 122 | (Test){ TestTranspose_WriteBlock<4>, "Write blocks of 4" }, 123 | (Test){ TestTranspose_WriteBlock<8>, "Write blocks of 8" }, 124 | (Test){ TestTranspose_WriteBlock<16>, "Write blocks of 16" }, 125 | 126 | //(Test){ TestTranspose_RowBlock2<2,2>, "Read rows, blocks 2x2" }, 127 | //(Test){ TestTranspose_RowBlock2<4,2>, "Read rows, blocks 4x2" }, 128 | //(Test){ TestTranspose_RowBlock2<8,2>, "Read rows, blocks 8x2" }, 129 | //(Test){ TestTranspose_RowBlock2<16,2>, "Read rows, blocks 16x2" }, 130 | //(Test){ TestTranspose_RowBlock2<2,4>, "Read rows, blocks 2x4" }, 131 | //(Test){ TestTranspose_RowBlock2<4,4>, "Read rows, blocks 4x4" }, 132 | //(Test){ TestTranspose_RowBlock2<8,4>, "Read rows, blocks 8x4" }, 133 | //(Test){ TestTranspose_RowBlock2<16,4>, "Read rows, blocks 16x4" }, 134 | //(Test){ TestTranspose_RowBlock2<2,8>, "Read rows, blocks 2x8" }, 135 | //(Test){ TestTranspose_RowBlock2<4,8>, "Read rows, blocks 4x8" }, 136 | //(Test){ TestTranspose_RowBlock2<8,8>, "Read rows, blocks 8x8" }, 137 | //(Test){ TestTranspose_RowBlock2<16,8>, "Read rows, blocks 16x8" }, 138 | //(Test){ TestTranspose_RowBlock2<2,16>, "Read rows, blocks 2x16" }, 139 | //(Test){ TestTranspose_RowBlock2<4,16>, "Read rows, blocks 4x16" }, 140 | //(Test){ TestTranspose_RowBlock2<8,16>, "Read rows, blocks 8x16" }, 141 | //(Test){ TestTranspose_RowBlock2<16,16>, "Read rows, blocks 16x16" }, 142 | }; 143 | 144 | printf("Trialling with aligned matrices %ix%i\n", MATRIX_SIZE, MATRIX_SIZE ); 145 | RunTests( tests ); 146 | 147 | gData = (Data*)(void*)(dataAlloc+3); 148 | printf("Trialling with unaligned matrices %ix%i\n", MATRIX_SIZE, MATRIX_SIZE ); 149 | RunTests( tests ); 150 | 151 | free(dataAlloc); 152 | return 0; 153 | } 154 | -------------------------------------------------------------------------------- /modifying_memory.cpp: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | 3 | int *c; 4 | //const int BUFFER_SIZE = 4 * 1024 * 1024; // in bytes 5 | const int BUFFER_SIZE = 1024 * 1024; // in bytes 6 | 7 | void Setup() { 8 | Timer t; 9 | // allocate array 10 | c = (int*)aligned_alloc( 64, BUFFER_SIZE ); 11 | printf( "Setup took %fms\n", t.elapsed() ); 12 | } 13 | 14 | template 15 | void TestParallelModifyTemplate() { 16 | T *modify_buffer_ptr = (T*)(void*)c; 17 | T temp[NumToParallelModify]; 18 | const size_t TOTAL_ELEMENTS = BUFFER_SIZE / sizeof(T); 19 | for( size_t i = 0; i < TOTAL_ELEMENTS; i+=NumToParallelModify ) { 20 | for( size_t j = 0; j < NumToParallelModify; ++j ) { 21 | temp[j] = modify_buffer_ptr[i+j]; 22 | temp[j] += i+j; 23 | modify_buffer_ptr[i+j] = temp[j]; 24 | } 25 | } 26 | } 27 | template 28 | void TestBatchModifyTemplate() { 29 | T *modify_buffer_ptr = (T*)(void*)c; 30 | T temp[NumToParallelModify]; 31 | const size_t TOTAL_ELEMENTS = BUFFER_SIZE / sizeof(T); 32 | for( size_t i = 0; i < TOTAL_ELEMENTS; i+=NumToParallelModify ) { 33 | for( size_t j = 0; j < NumToParallelModify; ++j ) { 34 | temp[j] = modify_buffer_ptr[i+j]; 35 | } 36 | for( int j = 0; j < NumToParallelModify; ++j ) { 37 | temp[j] += i+j; 38 | } 39 | for( int j = 0; j < NumToParallelModify; ++j ) { 40 | modify_buffer_ptr[i+j] = temp[j]; 41 | } 42 | } 43 | } 44 | void TestParallelModify64Bytes() { 45 | uint8_t *modify_buffer_ptr = (uint8_t*)(void*)c; 46 | uint8_t temp[64]; 47 | for( int i = 0; i < BUFFER_SIZE; i+=64 ) { 48 | for( int j = 0; j < 64; ++j ) { 49 | temp[j] = modify_buffer_ptr[i+j]; 50 | temp[j] += i+j; 51 | modify_buffer_ptr[i+j] = temp[j]; 52 | } 53 | } 54 | } 55 | 56 | typedef void (*TestFunc)(); 57 | struct DetailedTest { 58 | int typesize, parallelcount; 59 | TestFunc func; 60 | char name[64]; 61 | DetailedTest( int ts, int pc, TestFunc f, const char* n ) : typesize(ts), parallelcount(pc), func(f) { strcpy( name, n ); } 62 | }; 63 | 64 | int main() { 65 | Setup(); 66 | 67 | DetailedTest tests[] = { 68 | (DetailedTest){ sizeof(int8_t),1, TestParallelModifyTemplate, "Modifying 8bit ints, 1 at a time" }, 69 | (DetailedTest){ sizeof(int8_t),2, TestParallelModifyTemplate, "Modifying 8bit ints, 2 at a time" }, 70 | (DetailedTest){ sizeof(int8_t),4, TestParallelModifyTemplate, "Modifying 8bit ints, 4 at a time" }, 71 | (DetailedTest){ sizeof(int8_t),8, TestParallelModifyTemplate, "Modifying 8bit ints, 8 at a time" }, 72 | (DetailedTest){ sizeof(int8_t),16, TestParallelModifyTemplate, "Modifying 8bit ints, 16 at a time" }, 73 | (DetailedTest){ sizeof(int8_t),32, TestParallelModifyTemplate, "Modifying 8bit ints, 32 at a time" }, 74 | (DetailedTest){ sizeof(int8_t),64, TestParallelModifyTemplate, "Modifying 8bit ints, 64 at a time" }, 75 | 76 | (DetailedTest){ sizeof(int16_t),1, TestParallelModifyTemplate, "Modifying 16bit ints, 1 at a time" }, 77 | (DetailedTest){ sizeof(int16_t),2, TestParallelModifyTemplate, "Modifying 16bit ints, 2 at a time" }, 78 | (DetailedTest){ sizeof(int16_t),4, TestParallelModifyTemplate, "Modifying 16bit ints, 4 at a time" }, 79 | (DetailedTest){ sizeof(int16_t),8, TestParallelModifyTemplate, "Modifying 16bit ints, 8 at a time" }, 80 | (DetailedTest){ sizeof(int16_t),16, TestParallelModifyTemplate, "Modifying 16bit ints, 16 at a time" }, 81 | (DetailedTest){ sizeof(int16_t),32, TestParallelModifyTemplate, "Modifying 16bit ints, 32 at a time" }, 82 | (DetailedTest){ sizeof(int16_t),64, TestParallelModifyTemplate, "Modifying 16bit ints, 64 at a time" }, 83 | 84 | (DetailedTest){ sizeof(int32_t),1, TestParallelModifyTemplate, "Modifying 32bit ints, 1 at a time" }, 85 | (DetailedTest){ sizeof(int32_t),2, TestParallelModifyTemplate, "Modifying 32bit ints, 2 at a time" }, 86 | (DetailedTest){ sizeof(int32_t),4, TestParallelModifyTemplate, "Modifying 32bit ints, 4 at a time" }, 87 | (DetailedTest){ sizeof(int32_t),8, TestParallelModifyTemplate, "Modifying 32bit ints, 8 at a time" }, 88 | (DetailedTest){ sizeof(int32_t),16, TestParallelModifyTemplate, "Modifying 32bit ints, 16 at a time" }, 89 | (DetailedTest){ sizeof(int32_t),32, TestParallelModifyTemplate, "Modifying 32bit ints, 32 at a time" }, 90 | (DetailedTest){ sizeof(int32_t),64, TestParallelModifyTemplate, "Modifying 32bit ints, 64 at a time" }, 91 | 92 | (DetailedTest){ sizeof(int64_t),1, TestParallelModifyTemplate, "Modifying 64bit ints, 1 at a time" }, 93 | (DetailedTest){ sizeof(int64_t),2, TestParallelModifyTemplate, "Modifying 64bit ints, 2 at a time" }, 94 | (DetailedTest){ sizeof(int64_t),4, TestParallelModifyTemplate, "Modifying 64bit ints, 4 at a time" }, 95 | (DetailedTest){ sizeof(int64_t),8, TestParallelModifyTemplate, "Modifying 64bit ints, 8 at a time" }, 96 | (DetailedTest){ sizeof(int64_t),16, TestParallelModifyTemplate, "Modifying 64bit ints, 16 at a time" }, 97 | (DetailedTest){ sizeof(int64_t),32, TestParallelModifyTemplate, "Modifying 64bit ints, 32 at a time" }, 98 | (DetailedTest){ sizeof(int64_t),64, TestParallelModifyTemplate, "Modifying 64bit ints, 64 at a time" }, 99 | 100 | //(DetailedTest){ sizeof(uint8_t),1, TestBatchModifyTemplate, "Modifying bytes, 1 to a batch" }, 101 | //(DetailedTest){ sizeof(uint8_t),2, TestBatchModifyTemplate, "Modifying bytes, 2 to a batch" }, 102 | //(DetailedTest){ sizeof(uint8_t),4, TestBatchModifyTemplate, "Modifying bytes, 4 to a batch" }, 103 | //(DetailedTest){ sizeof(uint8_t),8, TestBatchModifyTemplate, "Modifying bytes, 8 to a batch" }, 104 | //(DetailedTest){ sizeof(uint8_t),16, TestBatchModifyTemplate, "Modifying bytes, 16 to a batch" }, 105 | //(DetailedTest){ sizeof(uint8_t),32, TestBatchModifyTemplate, "Modifying bytes, 32 to a batch" }, 106 | //(DetailedTest){ sizeof(uint8_t),64, TestBatchModifyTemplate, "Modifying bytes, 64 to a batch" }, 107 | //(DetailedTest){ sizeof(int32_t),1, TestBatchModifyTemplate, "Modifying 32bit ints, 1 to a batch" }, 108 | //(DetailedTest){ sizeof(int32_t),2, TestBatchModifyTemplate, "Modifying 32bit ints, 2 to a batch" }, 109 | //(DetailedTest){ sizeof(int32_t),4, TestBatchModifyTemplate, "Modifying 32bit ints, 4 to a batch" }, 110 | //(DetailedTest){ sizeof(int32_t),8, TestBatchModifyTemplate, "Modifying 32bit ints, 8 to a batch" }, 111 | //(DetailedTest){ sizeof(int32_t),16, TestBatchModifyTemplate, "Modifying 32bit ints, 16 to a batch" }, 112 | //(DetailedTest){ sizeof(int32_t),32, TestBatchModifyTemplate, "Modifying 32bit ints, 32 to a batch" }, 113 | //(DetailedTest){ sizeof(int32_t),64, TestBatchModifyTemplate, "Modifying 32bit ints, 64 to a batch" }, 114 | //(DetailedTest){ sizeof(int64_t),1, TestBatchModifyTemplate, "Modifying 64bit ints, 1 to a batch" }, 115 | //(DetailedTest){ sizeof(int64_t),2, TestBatchModifyTemplate, "Modifying 64bit ints, 2 to a batch" }, 116 | //(DetailedTest){ sizeof(int64_t),4, TestBatchModifyTemplate, "Modifying 64bit ints, 4 to a batch" }, 117 | //(DetailedTest){ sizeof(int64_t),8, TestBatchModifyTemplate, "Modifying 64bit ints, 8 to a batch" }, 118 | //(DetailedTest){ sizeof(int64_t),16, TestBatchModifyTemplate, "Modifying 64bit ints, 16 to a batch" }, 119 | //(DetailedTest){ sizeof(int64_t),32, TestBatchModifyTemplate, "Modifying 64bit ints, 32 to a batch" }, 120 | //(DetailedTest){ sizeof(int64_t),64, TestBatchModifyTemplate, "Modifying 64bit ints, 64 to a batch" }, 121 | 122 | //(DetailedTest){ sizeof(uint8_t),64, TestParallelModify64Bytes, "Modifying bytes, 64 at a time (no template)" }, 123 | }; 124 | 125 | RunTests( tests ); 126 | 127 | //const int numTests = sizeof( tests ) / sizeof(tests[0]); 128 | //Timer trialTimer; 129 | //int trial = 0; 130 | //while( trial < TRIALS && trialTimer.elapsed() < TRIAL_TIMEOUT ) { 131 | // for( auto &test : tests ) { 132 | // for( int round = 0; round < ROUNDS; ++round ) { 133 | // Timer t; 134 | // test.func(); 135 | // test.timing[round+ROUNDS*trial] = t.elapsed(); 136 | // } 137 | // test.s.Calculate( test.timing, ROUNDS ); 138 | // } 139 | // ++trial; 140 | //} 141 | //for( auto &test : tests ) { 142 | // test.s.Calculate( test.timing, ROUNDS*trial ); 143 | //} 144 | 145 | //printf( "Managed %i trials in %fms\n\n", trial, trialTimer.elapsed() ); 146 | //std::sort( std::begin(tests), std::end(tests), 147 | // []( const Test &a, const Test &b ){ 148 | // return a.s.average > b.s.average; 149 | // } ); 150 | //const char *timesuffix = "ms"; 151 | //if( tests[0].s.average < 2 ) { 152 | // timesuffix = "us"; 153 | // for( auto &test : tests ) { 154 | // test.s.Magnify( 1000.0f ); 155 | // } 156 | //} 157 | 158 | // 159 | //for( auto test : tests ) { 160 | // printf( "Average% 8.2f%s (95%% +-%.2f%s) for test [%s]\n", 161 | // test.s.average, timesuffix, 162 | // test.s.standardDeviation*2.0f, timesuffix, 163 | // test.name ); 164 | //} 165 | // write to output file if possible 166 | 167 | //{ 168 | // std::sort( std::begin(tests), std::end(tests), 169 | // []( const Test &a, const Test &b ){ 170 | // if( a.typesize != b.typesize ) 171 | // return a.typesize < b.typesize; 172 | // if( a.parallelcount != b.parallelcount ) 173 | // return a.parallelcount < b.parallelcount; 174 | // return false; 175 | // } ); 176 | // char filename[256]; strcpy( filename, __FILE__ ".dat" ); 177 | // FILE *fp = fopen( filename, "wt" ); 178 | // if( fp ) { 179 | // int sizes[] = { 1,2,4,8 }; 180 | // for( int s : sizes ) { 181 | // for( auto test : tests ) { 182 | // if( test.typesize == s ) { 183 | // //fprintf( fp, "%i, %f\n", test.parallelcount, test.s.average ); 184 | // fprintf( fp, "%i, %f, %f\n", test.parallelcount, test.s.average, test.s.standardDeviation ); 185 | // } 186 | // } 187 | // fprintf( fp, "\n\n" ); 188 | // } 189 | // fclose( fp ); 190 | // } 191 | //} 192 | return 0; 193 | } 194 | -------------------------------------------------------------------------------- /modifying_memory.cpp.dat: -------------------------------------------------------------------------------- 1 | 1, 16.306866, 3.189020 2 | 2, 22.735029, 2.643049 3 | 4, 30.656015, 2.613136 4 | 8, 41.987907, 3.351187 5 | 16, 54.206932, 5.812555 6 | 32, 9.632219, 1.039084 7 | 64, 25.281727, 1.831344 8 | 9 | 10 | 1, 10.407656, 1.479602 11 | 2, 16.954084, 1.714481 12 | 4, 28.490650, 2.627053 13 | 8, 38.513771, 2.868184 14 | 16, 50.636837, 4.916823 15 | 32, 16.239670, 1.759487 16 | 64, 12.997274, 1.657438 17 | 18 | 19 | 1, 9.561871, 0.639848 20 | 2, 16.420263, 1.561353 21 | 4, 31.858936, 2.527328 22 | 8, 46.579048, 4.226237 23 | 16, 61.935135, 4.306820 24 | 32, 9.987448, 1.358156 25 | 64, 9.593568, 0.590449 26 | 27 | 28 | 1, 9.604195, 0.799417 29 | 2, 16.852236, 1.446934 30 | 4, 30.143702, 2.161220 31 | 8, 42.097721, 3.737612 32 | 16, 55.041168, 3.416576 33 | 32, 9.570395, 0.421820 34 | 64, 9.744603, 1.441535 35 | 36 | 37 | -------------------------------------------------------------------------------- /pcg-random.h: -------------------------------------------------------------------------------- 1 | // *Really* minimal PCG32 code / (c) 2014 M.E. O'Neill / pcg-random.org 2 | // Licensed under Apache License 2.0 (NO WARRANTY, etc. see website) 3 | 4 | typedef struct { uint64_t state; uint64_t inc; } pcg32_random_t; 5 | 6 | uint32_t pcg32_random_r(pcg32_random_t* rng) 7 | { 8 | uint64_t oldstate = rng->state; 9 | // Advance internal state 10 | rng->state = oldstate * 6364136223846793005ULL + (rng->inc|1); 11 | // Calculate output function (XSH RR), uses old state for max ILP 12 | uint32_t xorshifted = ((oldstate >> 18u) ^ oldstate) >> 27u; 13 | uint32_t rot = oldstate >> 59u; 14 | return (xorshifted >> rot) | (xorshifted << ((-rot) & 31)); 15 | } 16 | 17 | // + more 18 | 19 | // pcg32_srandom(initstate, initseq) 20 | // pcg32_srandom_r(rng, initstate, initseq): 21 | // Seed the rng. Specified in two parts, state initializer and a 22 | // sequence selection constant (a.k.a. stream id) 23 | 24 | void pcg32_srandom_r(pcg32_random_t* rng, uint64_t initstate, uint64_t initseq) 25 | { 26 | rng->state = 0U; 27 | rng->inc = (initseq << 1u) | 1u; 28 | pcg32_random_r(rng); 29 | rng->state += initstate; 30 | pcg32_random_r(rng); 31 | } 32 | 33 | bool pcg32_random_r_probability(pcg32_random_t* rng, float probablity) 34 | { 35 | uint64_t oldstate = rng->state; 36 | // Advance internal state 37 | rng->state = oldstate * 6364136223846793005ULL + (rng->inc|1); 38 | // Calculate output function (XSH RR), uses old state for max ILP 39 | uint32_t xorshifted = ((oldstate >> 18u) ^ oldstate) >> 27u; 40 | uint32_t rot = oldstate >> 59u; 41 | uint32_t rval = (xorshifted >> rot) | (xorshifted << ((-rot) & 31)); 42 | return (rval & 16777215) < (probablity*16777215); 43 | } 44 | 45 | static uint32_t big_int_val = 1<<30; 46 | static uint32_t big_int_mask = big_int_val-1; 47 | uint32_t pcg32_random_r_range(pcg32_random_t* rng, uint32_t minVal, uint32_t maxVal) 48 | { 49 | uint64_t oldstate = rng->state; 50 | // Advance internal state 51 | rng->state = oldstate * 6364136223846793005ULL + (rng->inc|1); 52 | // Calculate output function (XSH RR), uses old state for max ILP 53 | uint32_t xorshifted = ((oldstate >> 18u) ^ oldstate) >> 27u; 54 | uint32_t rot = oldstate >> 59u; 55 | uint32_t rval = (xorshifted >> rot) | (xorshifted << ((-rot) & 31)); 56 | float inRange = ((float)(rval&big_int_mask)) / big_int_val; 57 | return minVal + (maxVal-minVal) * inRange; 58 | } 59 | 60 | float pcg32_random_r_rangef(pcg32_random_t* rng, float minVal, float maxVal) 61 | { 62 | uint64_t oldstate = rng->state; 63 | // Advance internal state 64 | rng->state = oldstate * 6364136223846793005ULL + (rng->inc|1); 65 | // Calculate output function (XSH RR), uses old state for max ILP 66 | uint32_t xorshifted = ((oldstate >> 18u) ^ oldstate) >> 27u; 67 | uint32_t rot = oldstate >> 59u; 68 | uint32_t rval = (xorshifted >> rot) | (xorshifted << ((-rot) & 31)); 69 | float inRange = ((float)(rval&big_int_mask)) / big_int_val; 70 | return minVal + (maxVal-minVal) * inRange; 71 | } 72 | 73 | -------------------------------------------------------------------------------- /readingtests.cpp: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | 3 | int *a, *b, *c; // read from arrays 4 | int sa, sb, sc; 5 | const int ELEMENT_COUNT = 1 * 1024 * 1024; 6 | 7 | void SetupValues() { 8 | // fill with data 9 | pcg32_random_t rng; 10 | pcg32_srandom_r(&rng, 1234, 5678); 11 | sa = sb = sc = 0; 12 | for( int i = 0; i < ELEMENT_COUNT; ++i ) { 13 | sa += a[i] = pcg32_random_r_range(&rng, 0, 255 ); 14 | sb += b[i] = pcg32_random_r_range(&rng, 0, 65535 ); 15 | sc += c[i] = pcg32_random_r_range(&rng, 0, 100 ); 16 | } 17 | } 18 | void Setup() { 19 | Timer t; 20 | // allocate arrays 21 | a = (int*)aligned_alloc( 64, sizeof(int) * ELEMENT_COUNT ); 22 | b = (int*)aligned_alloc( 64, sizeof(int) * ELEMENT_COUNT ); 23 | c = (int*)aligned_alloc( 64, sizeof(int) * ELEMENT_COUNT ); 24 | SetupValues(); 25 | printf( "Setup took %fms\n", t.elapsed() ); 26 | } 27 | 28 | bool SumReadingLinear( int *array, int expected_sum ) { 29 | int sum = 0; 30 | for( int i = 0; i < ELEMENT_COUNT; ++i ) { 31 | sum += array[i]; 32 | } 33 | return sum == expected_sum; 34 | } 35 | bool SumReadingBackwards( int *array, int expected_sum ) { 36 | int sum = 0; 37 | for( int i = ELEMENT_COUNT-1; i >= 0; --i ) { 38 | sum += array[i]; 39 | } 40 | return sum == expected_sum; 41 | } 42 | bool SumReadStriding( int *array, int expected_sum ) { 43 | int sum = 0; 44 | const int STRIDE = 16; 45 | for( int j = 0; j < STRIDE; ++j ) { 46 | for( int i = 0; i < ELEMENT_COUNT; i+=STRIDE ) { 47 | sum += array[i]; 48 | } 49 | } 50 | return sum == expected_sum; 51 | } 52 | 53 | volatile int output_buffer; 54 | 55 | void TestReadingBackwards( ) { 56 | int success = 0; 57 | success += SumReadingBackwards( a, sa ); 58 | success += SumReadingBackwards( b, sb ); 59 | success += SumReadingBackwards( c, sc ); 60 | output_buffer += success; 61 | } 62 | void TestReadingLinear() { 63 | int success = 0; 64 | success += SumReadingLinear( a, sa ); 65 | success += SumReadingLinear( b, sb ); 66 | success += SumReadingLinear( c, sc ); 67 | output_buffer += success; 68 | } 69 | void TestReadStriding() { 70 | int success = 0; 71 | success += SumReadStriding( a, sa ); 72 | success += SumReadStriding( b, sb ); 73 | success += SumReadStriding( c, sc ); 74 | output_buffer += success; 75 | } 76 | 77 | int main() { 78 | Setup(); 79 | 80 | Test tests[] = { 81 | (Test){ TestReadingBackwards, "Read Backwards" }, 82 | (Test){ TestReadingLinear, "Read Linear" }, 83 | (Test){ TestReadStriding, "Read Striding" }, 84 | }; 85 | 86 | RunTests( tests ); 87 | return 0; 88 | } 89 | -------------------------------------------------------------------------------- /simd_test.cpp: -------------------------------------------------------------------------------- 1 | // SIMD particle update 2 | #include "common.h" 3 | #if __SSE__ 4 | #include 5 | #endif 6 | #if __AVX__ 7 | #include 8 | #endif 9 | 10 | const int NUM_PARTICLES = 10000; 11 | const int FRAMES_PER_SECOND = 60; 12 | const int NUM_UPDATES = FRAMES_PER_SECOND * 10; // ten seconds of particle updates at 60fps; 13 | 14 | const float UPDATE_DELTA = 1000.0f / FRAMES_PER_SECOND; // delta in ms 15 | 16 | struct particle_buffer_AoS { 17 | struct particle { 18 | float x,y,z,vx,vy,vz,t; 19 | }; 20 | particle *p; 21 | float gravity; 22 | particle_buffer_AoS() { 23 | p = (particle*)malloc( sizeof(particle) * NUM_PARTICLES ); 24 | gravity = -9.81f; 25 | 26 | for( int i = 0; i < NUM_PARTICLES; ++i ) { 27 | p[i].x = (i%7)-3; 28 | p[i].y = (i%11)-5; 29 | p[i].z = (i%9)-4; 30 | 31 | p[i].vx = 2.0f; 32 | p[i].vy = 100.0f; 33 | p[i].vz = 7.0f; 34 | 35 | p[i].t = 0.0f; 36 | } 37 | } 38 | ~particle_buffer_AoS() { 39 | free(p); 40 | } 41 | }; 42 | 43 | struct particle_buffer { 44 | float *posx, *posy, *posz; 45 | float *vx, *vy, *vz; 46 | float gravity; 47 | particle_buffer() { 48 | posx = (float*)aligned_alloc( 32, sizeof(float) * NUM_PARTICLES ); 49 | posy = (float*)aligned_alloc( 32, sizeof(float) * NUM_PARTICLES ); 50 | posz = (float*)aligned_alloc( 32, sizeof(float) * NUM_PARTICLES ); 51 | vx = (float*)aligned_alloc( 32, sizeof(float) * NUM_PARTICLES ); 52 | vy = (float*)aligned_alloc( 32, sizeof(float) * NUM_PARTICLES ); 53 | vz = (float*)aligned_alloc( 32, sizeof(float) * NUM_PARTICLES ); 54 | 55 | gravity = -9.81f; 56 | 57 | for( int i = 0; i < NUM_PARTICLES; ++i ) { 58 | posx[i] = (i%7)-3; 59 | posy[i] = (i%11)-5; 60 | posz[i] = (i%9)-4; 61 | 62 | vx[i] = 2.0f; 63 | vy[i] = 100.0f; 64 | vz[i] = 7.0f; 65 | } 66 | } 67 | ~particle_buffer() { 68 | free(posx); 69 | free(posy); 70 | free(posz); 71 | free(vx); 72 | free(vy); 73 | free(vz); 74 | } 75 | }; 76 | 77 | void SimpleUpdateParticlesAoS( particle_buffer_AoS *pb, float delta_time ) { 78 | float g = pb->gravity; 79 | float gd2 = g * delta_time * delta_time * 0.5f; 80 | float gd = g * delta_time; 81 | for( int i = 0; i < NUM_PARTICLES; ++i ) { 82 | particle_buffer_AoS::particle *p = pb->p+i; 83 | p->x += p->vx * delta_time; 84 | p->y += p->vy * delta_time + gd2; 85 | p->z += p->vz * delta_time; 86 | p->vy += gd; 87 | } 88 | } 89 | 90 | void SimpleUpdateParticles( particle_buffer *pb, float delta_time ) { 91 | float g = pb->gravity; 92 | float gd2 = g * delta_time * delta_time * 0.5f; 93 | float gd = g * delta_time; 94 | for( int i = 0; i < NUM_PARTICLES; ++i ) { 95 | pb->posx[i] += pb->vx[i] * delta_time; 96 | pb->posy[i] += pb->vy[i] * delta_time + gd2; 97 | pb->posz[i] += pb->vz[i] * delta_time; 98 | pb->vy[i] += gd; 99 | } 100 | } 101 | void SliceUpdateParticles( particle_buffer *pb, float delta_time ) { 102 | float g = pb->gravity; 103 | float gd2 = g * delta_time * delta_time * 0.5f; 104 | float gd = g * delta_time; 105 | for( int i = 0; i < NUM_PARTICLES; ++i ) { 106 | pb->posx[i] += pb->vx[i] * delta_time; 107 | } 108 | for( int i = 0; i < NUM_PARTICLES; ++i ) { 109 | pb->posy[i] += pb->vy[i] * delta_time + gd2; 110 | pb->vy[i] += gd; 111 | } 112 | for( int i = 0; i < NUM_PARTICLES; ++i ) { 113 | pb->posz[i] += pb->vz[i] * delta_time; 114 | } 115 | } 116 | #if __SSE__ 117 | void SIMD_SSE_UpdateParticles( particle_buffer *pb, float delta_time ) { 118 | float g = pb->gravity; 119 | float f_gd = g * delta_time; 120 | float f_gd2 = pb->gravity * delta_time * delta_time * 0.5f; 121 | 122 | // delta_time 123 | __m128 mmd = _mm_setr_ps( delta_time, delta_time, delta_time, delta_time ); 124 | // gravity * delta_time 125 | __m128 mmgd = _mm_load1_ps( &f_gd ); 126 | // gravity * delta_time * delta_time * 0.5f 127 | __m128 mmgd2 = _mm_load1_ps( &f_gd2 ); 128 | 129 | __m128 *px = (__m128*)pb->posx; 130 | __m128 *py = (__m128*)pb->posy; 131 | __m128 *pz = (__m128*)pb->posz; 132 | __m128 *vx = (__m128*)pb->vx; 133 | __m128 *vy = (__m128*)pb->vy; 134 | __m128 *vz = (__m128*)pb->vz; 135 | 136 | int iterationCount = NUM_PARTICLES / 4; 137 | for( int i = 0; i < iterationCount; ++i ) { 138 | __m128 dx = _mm_mul_ps(vx[i], mmd ); 139 | __m128 dy = _mm_add_ps( _mm_mul_ps(vy[i], mmd ), mmgd2 ); 140 | __m128 dz = _mm_mul_ps(vz[i], mmd ); 141 | __m128 newx = _mm_add_ps(px[i], dx); 142 | __m128 newy = _mm_add_ps(py[i], dy); 143 | __m128 newz = _mm_add_ps(pz[i], dz); 144 | __m128 newvy = _mm_add_ps(vy[i], mmgd); 145 | _mm_store_ps((float*)(px+i), newx); 146 | _mm_store_ps((float*)(py+i), newy); 147 | _mm_store_ps((float*)(pz+i), newz); 148 | _mm_store_ps((float*)(vy+i), newvy); 149 | } 150 | } 151 | void SIMD_SSE_UpdateParticlesSliced( particle_buffer *pb, float delta_time ) { 152 | float g = pb->gravity; 153 | float f_gd = g * delta_time; 154 | float f_gd2 = pb->gravity * delta_time * delta_time * 0.5f; 155 | 156 | // delta_time 157 | __m128 mmd = _mm_setr_ps( delta_time, delta_time, delta_time, delta_time ); 158 | // gravity * delta_time 159 | __m128 mmgd = _mm_load1_ps( &f_gd ); 160 | // gravity * delta_time * delta_time * 0.5f 161 | __m128 mmgd2 = _mm_load1_ps( &f_gd2 ); 162 | 163 | __m128 *px = (__m128*)pb->posx; 164 | __m128 *py = (__m128*)pb->posy; 165 | __m128 *pz = (__m128*)pb->posz; 166 | __m128 *vx = (__m128*)pb->vx; 167 | __m128 *vy = (__m128*)pb->vy; 168 | __m128 *vz = (__m128*)pb->vz; 169 | 170 | int iterationCount = NUM_PARTICLES / 4; 171 | for( int i = 0; i < iterationCount; ++i ) { 172 | __m128 dx = _mm_mul_ps(vx[i], mmd ); 173 | __m128 newx = _mm_add_ps(px[i], dx); 174 | _mm_store_ps((float*)(px+i), newx); 175 | } 176 | for( int i = 0; i < iterationCount; ++i ) { 177 | __m128 dy = _mm_add_ps( _mm_mul_ps(vy[i], mmd ), mmgd2 ); 178 | __m128 newy = _mm_add_ps(py[i], dy); 179 | __m128 newvy = _mm_add_ps(vy[i], mmgd); 180 | _mm_store_ps((float*)(py+i), newy); 181 | _mm_store_ps((float*)(vy+i), newvy); 182 | } 183 | for( int i = 0; i < iterationCount; ++i ) { 184 | __m128 dz = _mm_mul_ps(vz[i], mmd ); 185 | __m128 newz = _mm_add_ps(pz[i], dz); 186 | _mm_store_ps((float*)(pz+i), newz); 187 | } 188 | } 189 | #endif 190 | 191 | #if __AVX__ 192 | void SIMD_AVX_UpdateParticles( particle_buffer *pb, float delta_time ) { 193 | float g = pb->gravity; 194 | float f_gd = g * delta_time; 195 | float f_gd2 = pb->gravity * delta_time * delta_time * 0.5f; 196 | 197 | // delta_time 198 | __m256 mm256d = _mm256_set1_ps( delta_time ); 199 | // gravity * delta_time 200 | __m256 mm256gd = _mm256_set1_ps( f_gd ); 201 | // gravity * delta_time * delta_time * 0.5f 202 | __m256 mm256gd2 = _mm256_set1_ps( f_gd2 ); 203 | 204 | __m256 *px = (__m256*)pb->posx; 205 | __m256 *py = (__m256*)pb->posy; 206 | __m256 *pz = (__m256*)pb->posz; 207 | __m256 *vx = (__m256*)pb->vx; 208 | __m256 *vy = (__m256*)pb->vy; 209 | __m256 *vz = (__m256*)pb->vz; 210 | 211 | int iterationCount = NUM_PARTICLES / 8; 212 | for( int i = 0; i < iterationCount; ++i ) { 213 | __m256 dx = _mm256_mul_ps(vx[i], mm256d ); 214 | __m256 dy = _mm256_add_ps( _mm256_mul_ps(vy[i], mm256d ), mm256gd2 ); 215 | __m256 dz = _mm256_mul_ps(vz[i], mm256d ); 216 | __m256 newx = _mm256_add_ps(px[i], dx); 217 | __m256 newy = _mm256_add_ps(py[i], dy); 218 | __m256 newz = _mm256_add_ps(pz[i], dz); 219 | __m256 newvy = _mm256_add_ps(vy[i], mm256gd); 220 | _mm256_store_ps((float*)(px+i), newx); 221 | _mm256_store_ps((float*)(py+i), newy); 222 | _mm256_store_ps((float*)(pz+i), newz); 223 | _mm256_store_ps((float*)(vy+i), newvy); 224 | } 225 | } 226 | #endif 227 | 228 | struct Data { 229 | particle_buffer_AoS pbAoS; 230 | particle_buffer pbsimple; 231 | particle_buffer pbslice; 232 | particle_buffer pbSIMDSSE; 233 | particle_buffer pbSIMDAVX; 234 | }; 235 | Data *gData; 236 | 237 | #if __AVX__ 238 | void TestAVX() { 239 | for( int frame = 0; frame < NUM_UPDATES; ++frame ) { 240 | SIMD_AVX_UpdateParticles( &gData->pbSIMDAVX, UPDATE_DELTA); 241 | } 242 | } 243 | #endif 244 | #if __SSE__ 245 | void TestSSE() { 246 | for( int frame = 0; frame < NUM_UPDATES; ++frame ) { 247 | SIMD_SSE_UpdateParticles( &gData->pbSIMDSSE, UPDATE_DELTA); 248 | } 249 | } 250 | void TestSSESliced() { 251 | for( int frame = 0; frame < NUM_UPDATES; ++frame ) { 252 | SIMD_SSE_UpdateParticlesSliced( &gData->pbSIMDSSE, UPDATE_DELTA); 253 | } 254 | } 255 | #endif 256 | void TestSoASliced() { 257 | for( int frame = 0; frame < NUM_UPDATES; ++frame ) { 258 | SliceUpdateParticles( &gData->pbslice, UPDATE_DELTA); 259 | } 260 | } 261 | void TestSoA() { 262 | for( int frame = 0; frame < NUM_UPDATES; ++frame ) { 263 | SimpleUpdateParticles( &gData->pbsimple, UPDATE_DELTA); 264 | } 265 | } 266 | void TestAoS() { 267 | for( int frame = 0; frame < NUM_UPDATES; ++frame ) { 268 | SimpleUpdateParticlesAoS( &gData->pbAoS, UPDATE_DELTA); 269 | } 270 | } 271 | 272 | int main() { 273 | Data data; 274 | gData = &data; 275 | 276 | Test tests[] = { 277 | (Test){ TestAoS, "array of structs" }, 278 | (Test){ TestSoA, "struct of arrays naive processing" }, 279 | (Test){ TestSoASliced, "struct of arrays partitioned processing" }, 280 | #if __AVX__ 281 | (Test){ TestAVX, "AVX" }, 282 | #endif 283 | #if __SSE__ 284 | (Test){ TestSSE, "SSE" }, 285 | (Test){ TestSSESliced, "SSE partitioned" }, 286 | #endif 287 | }; 288 | 289 | 290 | printf("Trialling with %i particles over %i updates\n", NUM_PARTICLES, NUM_UPDATES ); 291 | RunTests( tests ); 292 | return 0; 293 | } 294 | -------------------------------------------------------------------------------- /speculativewaste.cpp: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | #include 3 | 4 | struct B { 5 | int height; 6 | bool isClockwise; 7 | }; 8 | 9 | struct A { 10 | bool canStandOnOneLeg; 11 | bool hasTheirOwnHair; 12 | bool ownsADog; 13 | bool isOwnedByACat; 14 | 15 | // cached "has B info" 16 | bool isCached25 : 1; 17 | bool isCached50 : 1; 18 | bool isCached75 : 1; 19 | bool isCached95 : 1; 20 | bool isCached99 : 1; 21 | bool hasBInfo : 1; 22 | bool isTall : 1; 23 | }; 24 | 25 | static std::map BInfoMap; 26 | static std::vector AInfoVec; 27 | 28 | static const int NUM_IN_TEST = 128 * 1024; 29 | 30 | void Setup() { 31 | Timer t; 32 | pcg32_random_t rng; 33 | pcg32_srandom_r(&rng, 1234, 5678); 34 | AInfoVec.clear(); 35 | BInfoMap.clear(); 36 | for( int i = 0; i < NUM_IN_TEST; ++i ) { 37 | A a; 38 | a.canStandOnOneLeg = pcg32_random_r_probability(&rng, 0.99f); 39 | a.hasTheirOwnHair = pcg32_random_r_probability(&rng, 0.90f); 40 | a.ownsADog = pcg32_random_r_probability(&rng, 0.46f); 41 | a.isOwnedByACat = pcg32_random_r_probability(&rng, 0.42f); 42 | a.isCached25 = pcg32_random_r_probability(&rng, 0.25f); 43 | a.isCached50 = pcg32_random_r_probability(&rng, 0.5f); 44 | a.isCached75 = pcg32_random_r_probability(&rng, 0.75f); 45 | a.isCached95 = pcg32_random_r_probability(&rng, 0.95f); 46 | a.isCached99 = pcg32_random_r_probability(&rng, 0.99f); 47 | a.hasBInfo = pcg32_random_r_probability(&rng, 0.25f); 48 | if( a.hasBInfo ) { 49 | B b; 50 | b.height = pcg32_random_r_range(&rng, 150, 200); 51 | b.isClockwise = pcg32_random_r_probability(&rng, 0.5f); 52 | BInfoMap[i] = b; 53 | a.isTall = b.height > 185; 54 | } 55 | AInfoVec.push_back( a ); 56 | } 57 | printf( "Setup took %fms\n", t.elapsed() ); 58 | } 59 | 60 | std::pair Simple() { 61 | int good = 0; 62 | int taller = 0; 63 | for( int i = 0; i < NUM_IN_TEST; ++i ) { 64 | A &a = AInfoVec[i]; 65 | if( a.canStandOnOneLeg && a.hasTheirOwnHair ) { 66 | good += 1; 67 | if( BInfoMap.find( i ) != BInfoMap.end() ) { 68 | if( BInfoMap[i].height > 185 ) { 69 | taller += 1; 70 | } 71 | } 72 | } 73 | } 74 | return std::pair(good,taller); 75 | } 76 | 77 | std::pair Bool() { 78 | int good = 0; 79 | int taller = 0; 80 | for( int i = 0; i < NUM_IN_TEST; ++i ) { 81 | A &a = AInfoVec[i]; 82 | if( a.canStandOnOneLeg && a.hasTheirOwnHair ) { 83 | good += 1; 84 | if( a.hasBInfo ) { 85 | if( BInfoMap[i].height > 185 ) { 86 | taller += 1; 87 | } 88 | } 89 | } 90 | } 91 | return std::pair(good,taller); 92 | } 93 | 94 | std::pair CachedBool50() { 95 | int good = 0; 96 | int taller = 0; 97 | for( int i = 0; i < NUM_IN_TEST; ++i ) { 98 | A &a = AInfoVec[i]; 99 | if( a.canStandOnOneLeg && a.hasTheirOwnHair ) { 100 | good += 1; 101 | if( a.isCached50 ) { 102 | if( a.hasBInfo ) { 103 | if( BInfoMap[i].height > 185 ) { 104 | taller += 1; 105 | } 106 | } 107 | } else { 108 | if( BInfoMap.find( i ) != BInfoMap.end() ) { 109 | if( BInfoMap[i].height > 185 ) { 110 | taller += 1; 111 | } 112 | } 113 | } 114 | } 115 | } 116 | return std::pair(good,taller); 117 | } 118 | 119 | std::pair Cached() { 120 | int good = 0; 121 | int taller = 0; 122 | for( int i = 0; i < NUM_IN_TEST; ++i ) { 123 | A &a = AInfoVec[i]; 124 | if( a.canStandOnOneLeg && a.hasTheirOwnHair ) { 125 | good += 1; 126 | if( a.hasBInfo && a.isTall ) { 127 | taller += 1; 128 | } 129 | } 130 | } 131 | return std::pair(good,taller); 132 | } 133 | 134 | std::pair PartiallyCached25() { 135 | int good = 0; 136 | int taller = 0; 137 | for( int i = 0; i < NUM_IN_TEST; ++i ) { 138 | A &a = AInfoVec[i]; 139 | if( a.canStandOnOneLeg && a.hasTheirOwnHair ) { 140 | good += 1; 141 | if( a.isCached25 ) { 142 | if( a.hasBInfo && a.isTall ) { 143 | taller += 1; 144 | } 145 | } else { 146 | if( BInfoMap.find( i ) != BInfoMap.end() ) { 147 | if( BInfoMap[i].height > 185 ) { 148 | taller += 1; 149 | } 150 | } 151 | } 152 | } 153 | } 154 | return std::pair(good,taller); 155 | } 156 | std::pair PartiallyCached50() { 157 | int good = 0; 158 | int taller = 0; 159 | for( int i = 0; i < NUM_IN_TEST; ++i ) { 160 | A &a = AInfoVec[i]; 161 | if( a.canStandOnOneLeg && a.hasTheirOwnHair ) { 162 | good += 1; 163 | if( a.isCached50 ) { 164 | if( a.hasBInfo && a.isTall ) { 165 | taller += 1; 166 | } 167 | } else { 168 | if( BInfoMap.find( i ) != BInfoMap.end() ) { 169 | if( BInfoMap[i].height > 185 ) { 170 | taller += 1; 171 | } 172 | } 173 | } 174 | } 175 | } 176 | return std::pair(good,taller); 177 | } 178 | std::pair PartiallyCached75() { 179 | int good = 0; 180 | int taller = 0; 181 | for( int i = 0; i < NUM_IN_TEST; ++i ) { 182 | A &a = AInfoVec[i]; 183 | if( a.canStandOnOneLeg && a.hasTheirOwnHair ) { 184 | good += 1; 185 | if( a.isCached75 ) { 186 | if( a.hasBInfo && a.isTall ) { 187 | taller += 1; 188 | } 189 | } else { 190 | if( BInfoMap.find( i ) != BInfoMap.end() ) { 191 | if( BInfoMap[i].height > 185 ) { 192 | taller += 1; 193 | } 194 | } 195 | } 196 | } 197 | } 198 | return std::pair(good,taller); 199 | } 200 | std::pair PartiallyCached95() { 201 | int good = 0; 202 | int taller = 0; 203 | for( int i = 0; i < NUM_IN_TEST; ++i ) { 204 | A &a = AInfoVec[i]; 205 | if( a.canStandOnOneLeg && a.hasTheirOwnHair ) { 206 | good += 1; 207 | if( a.isCached95 ) { 208 | if( a.hasBInfo && a.isTall ) { 209 | taller += 1; 210 | } 211 | } else { 212 | if( BInfoMap.find( i ) != BInfoMap.end() ) { 213 | if( BInfoMap[i].height > 185 ) { 214 | taller += 1; 215 | } 216 | } 217 | } 218 | } 219 | } 220 | return std::pair(good,taller); 221 | } 222 | std::pair PartiallyCached99() { 223 | int good = 0; 224 | int taller = 0; 225 | for( int i = 0; i < NUM_IN_TEST; ++i ) { 226 | A &a = AInfoVec[i]; 227 | if( a.canStandOnOneLeg && a.hasTheirOwnHair ) { 228 | good += 1; 229 | if( a.isCached99 ) { 230 | if( a.hasBInfo && a.isTall ) { 231 | taller += 1; 232 | } 233 | } else { 234 | if( BInfoMap.find( i ) != BInfoMap.end() ) { 235 | if( BInfoMap[i].height > 185 ) { 236 | taller += 1; 237 | } 238 | } 239 | } 240 | } 241 | } 242 | return std::pair(good,taller); 243 | } 244 | 245 | int validCount[16]; 246 | std::pair valid; 247 | 248 | void TestSimple() { 249 | std::pair result = Simple(); 250 | if( valid.first == result.first && valid.second == result.second ) 251 | validCount[0] += 1; 252 | } 253 | void TestBool() { 254 | std::pair result = Bool(); 255 | if( valid.first == result.first && valid.second == result.second ) 256 | validCount[1] += 1; 257 | } 258 | void TestCachedBool() { 259 | std::pair result = CachedBool50(); 260 | if( valid.first == result.first && valid.second == result.second ) 261 | validCount[1] += 1; 262 | } 263 | void TestCached() { 264 | std::pair result = Cached(); 265 | if( valid.first == result.first && valid.second == result.second ) 266 | validCount[2] += 1; 267 | } 268 | void TestPartiallyCached25() { 269 | std::pair result = PartiallyCached25(); 270 | if( valid.first == result.first && valid.second == result.second ) 271 | validCount[3] += 1; 272 | } 273 | void TestPartiallyCached50() { 274 | std::pair result = PartiallyCached50(); 275 | if( valid.first == result.first && valid.second == result.second ) 276 | validCount[4] += 1; 277 | } 278 | void TestPartiallyCached75() { 279 | std::pair result = PartiallyCached75(); 280 | if( valid.first == result.first && valid.second == result.second ) 281 | validCount[5] += 1; 282 | } 283 | void TestPartiallyCached95() { 284 | std::pair result = PartiallyCached95(); 285 | if( valid.first == result.first && valid.second == result.second ) 286 | validCount[5] += 1; 287 | } 288 | void TestPartiallyCached99() { 289 | std::pair result = PartiallyCached99(); 290 | if( valid.first == result.first && valid.second == result.second ) 291 | validCount[5] += 1; 292 | } 293 | 294 | 295 | int main() { 296 | 297 | Test tests[] = { 298 | (Test){ TestSimple, "Simple, check the map" }, 299 | (Test){ TestBool, "Simple, cache presence" }, 300 | (Test){ TestCachedBool, "Partially cached presence (50%)" }, 301 | (Test){ TestCached, "Fully cached query" }, 302 | (Test){ TestPartiallyCached25, "Partially cached query (25%)" }, 303 | (Test){ TestPartiallyCached50, "Partially cached query (50%)" }, 304 | (Test){ TestPartiallyCached75, "Partially cached query (75%)" }, 305 | (Test){ TestPartiallyCached95, "Partially cached query (95%)" }, 306 | (Test){ TestPartiallyCached99, "Partially cached query (99%)" }, 307 | }; 308 | 309 | Setup(); 310 | 311 | valid = Simple(); 312 | 313 | RunTests( tests ); 314 | 315 | return 0; 316 | } 317 | --------------------------------------------------------------------------------