├── conanfile.txt ├── tests ├── main.cpp └── CMakeLists.txt ├── src ├── main.cpp ├── Markov.hpp └── Markov.cpp ├── CMakeLists.txt ├── examples └── exampleOne.cpp ├── .gitignore ├── README.md ├── LICENSE └── hnnews.txt /conanfile.txt: -------------------------------------------------------------------------------- 1 | [requires] 2 | Catch2/2.5.0@catchorg/stable 3 | 4 | [generators] 5 | cmake -------------------------------------------------------------------------------- /tests/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | 4 | auto main() -> int { 5 | 6 | return 1; 7 | } -------------------------------------------------------------------------------- /src/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | auto main() -> int { 4 | 5 | std::cout << "Markov Chains are great " << "\n"; 6 | return 1; 7 | } -------------------------------------------------------------------------------- /tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.10) 2 | 3 | project(tests) 4 | 5 | 6 | set(FILES main.cpp) 7 | 8 | add_executable(tests ${FILES}) 9 | 10 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.10) 2 | 3 | project(cppmarkov) 4 | 5 | set(FILES src/main.cpp src/Markov.cpp src/Markov.hpp) 6 | 7 | set(CMAKE_CXX_STANDARD 17) 8 | 9 | set(CMAKE_BINARY_DIR bin) 10 | 11 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) 12 | 13 | #add_subdirectory(tests) 14 | 15 | add_executable(cppmarkov ${FILES}) 16 | 17 | -------------------------------------------------------------------------------- /examples/exampleOne.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "Markov.hpp" 3 | 4 | auto main() -> int { 5 | 6 | Markov::Chain chain(1); 7 | 8 | chain.add("I am sam"); 9 | chain.add("I am an Engineer"); 10 | chain.add("I like coding"); 11 | 12 | Markov::Ngram currentState = {"Engineer"}; 13 | Markov::NextState ns("$"); 14 | 15 | std::cout << chain.transitionProbability(ns,currentState) << "\n"; // Printing 1 16 | 17 | return 1; 18 | } -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | *.d 3 | 4 | # Compiled Object files 5 | *.slo 6 | *.lo 7 | *.o 8 | *.obj 9 | 10 | # Precompiled Headers 11 | *.gch 12 | *.pch 13 | 14 | # Compiled Dynamic libraries 15 | *.so 16 | *.dylib 17 | *.dll 18 | 19 | # Fortran module files 20 | *.mod 21 | *.smod 22 | 23 | # Compiled Static libraries 24 | *.lai 25 | *.la 26 | *.a 27 | *.lib 28 | 29 | # Executables 30 | *.exe 31 | *.out 32 | *.app 33 | 34 | .idea/ 35 | .vscode/ 36 | cmake-build-debug/ 37 | CMakeCache.txt 38 | build/ 39 | bin/ 40 | *.cbp 41 | cmake_install.cmake 42 | CMakeFiles/ 43 | graph_info.json -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # cppmarkov 2 | 3 | > A Markov chain is a mathematical system which keeps track of transitions between different states and tells you the probability of a transition occurring between two states. 4 | 5 | This repository is an implementation in C++17 of that stochastic process. 6 | 7 | # Compiling and running the example 8 | 9 | ## Requirements 10 | 11 | - CMake 3.10.2 or greater 12 | - A C++ compiler (GCC or Clang etc. ) that supports C++17 13 | 14 | ## Compiling 15 | 16 | In order to compile just run 17 | 18 | ```cmake 19 | 20 | ~ $ cmake --build . 21 | 22 | ``` 23 | 24 | And the resulting binary will be in the `bin` folder. 25 | 26 | ## Code example 27 | 28 | ```cpp 29 | 30 | #include 31 | #include "Markov.hpp" 32 | 33 | auto main() -> int { 34 | 35 | Markov::Chain chain(1); 36 | 37 | chain.add("I am sam"); 38 | chain.add("I am an Engineer"); 39 | chain.add("I like coding"); 40 | 41 | Markov::Ngram currentState = {"Engineer"}; 42 | Markov::NextState ns("$"); 43 | 44 | std::cout << chain.transitionProbability(ns,currentState) << "\n"; // Printing 1 45 | 46 | return 1; 47 | } 48 | 49 | ``` 50 | 51 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Sonkeng Maldini 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/Markov.hpp: -------------------------------------------------------------------------------- 1 | #ifndef H_MARKOV_INCLUDED 2 | #define H_MARKOV_ICLUDED 3 | #pragma once 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #define START_TOKEN "^" 14 | #define END_TOKEN "$" 15 | 16 | namespace Markov { 17 | 18 | //using SparseArray = std::map; // The int here is the probability from State i to State j 19 | using Ngram = std::vector; 20 | using NextState = std::string; 21 | using Pair = std::pair; 22 | 23 | 24 | struct Occurence{ 25 | int occurence; 26 | 27 | Occurence(){ 28 | occurence = 1; 29 | } 30 | 31 | void operator++(){ 32 | ++occurence; 33 | } 34 | 35 | void operator=(int i){ 36 | occurence = i; 37 | } 38 | 39 | operator int (){ 40 | return occurence; 41 | } 42 | }; 43 | 44 | struct Chain { 45 | 46 | std::map transitionMatrix; // 47 | std::map ipMap_; // index pair Map 48 | bool computed_; 49 | int m_order; // The Order of the Markov Chain. 1-order , 2-order etc. 50 | std::vector probabilities_; 51 | 52 | static int incr_; 53 | 54 | explicit Chain(int order) ; 55 | 56 | /** 57 | * @brief Returns the transition probability between two states 58 | */ 59 | 60 | auto transitionProbability(const NextState& ns, const Ngram& ngram) -> double; 61 | 62 | /** 63 | * @brief add a new String to the chain 64 | */ 65 | 66 | auto add(std::string&& s) -> void; 67 | 68 | /** 69 | * @brief Create a new pair according to the order 70 | */ 71 | 72 | auto makePairs(std::vector&& v,int order) -> std::vector; 73 | 74 | /** 75 | * @brief Generates new text based on an initial seed 76 | */ 77 | 78 | auto generateWord(int length) -> std::string; 79 | 80 | auto storeProbabilities() -> void ; 81 | }; 82 | 83 | 84 | 85 | auto operator<(const Pair& lhs, const Pair& rhs) -> bool; 86 | 87 | /** 88 | * Helper to split string into vector 89 | */ 90 | auto split(std::string str,char delimiter) -> std::vector; 91 | 92 | /** 93 | * @brief take two Ngram and tell whether they're equal or not 94 | * @return bool 95 | */ 96 | 97 | auto compare(const Ngram& lhs, const Ngram& rhs) -> bool; 98 | 99 | /** 100 | * @brief Join the Ngram vector into string 101 | */ 102 | auto join(Ngram& n) -> std::string; 103 | }; // end namespace Markov 104 | 105 | 106 | #endif -------------------------------------------------------------------------------- /src/Markov.cpp: -------------------------------------------------------------------------------- 1 | #include "Markov.hpp" 2 | 3 | 4 | Markov::Chain::Chain(int order): m_order(order){} 5 | 6 | auto Markov::Chain::add(std::string&& str) -> void { 7 | // I am sam. I am an Engineer. I like Coding. 8 | 9 | auto startTokens = std::string(START_TOKEN, m_order); 10 | auto endTokens = std::string(END_TOKEN,m_order); 11 | str = startTokens +" " +str +" " +endTokens; 12 | 13 | //Extract N-gram from the given string 14 | auto strVec = Markov::split(str,' '); 15 | 16 | std::vector pairs = makePairs( std::move(strVec), m_order ); 17 | 18 | for(const auto& aPair : pairs ){ 19 | 20 | if( transitionMatrix.count( aPair ) ) { 21 | transitionMatrix[aPair]++; 22 | }else{ 23 | transitionMatrix.insert( { aPair, 1 } ); 24 | ipMap_.insert( {incr_++, aPair} ); 25 | } 26 | } 27 | } 28 | 29 | auto Markov::Chain::makePairs(std::vector&& strVec,int order) -> std::vector{ 30 | 31 | std::vector pairList; 32 | 33 | for(int i(1); i < strVec.size()-order;++i){ 34 | 35 | Ngram currState; 36 | NextState ns; 37 | 38 | ns = strVec.at( i+order ); 39 | for( int j(i); j< i+order; ++j){ 40 | currState.push_back(strVec.at(j)); 41 | } 42 | 43 | pairList.push_back( { currState,ns } ); 44 | } 45 | 46 | return pairList; 47 | } 48 | 49 | auto Markov::Chain::transitionProbability(const Markov::NextState& ns, const Markov::Ngram& currentState) -> double { 50 | 51 | if( currentState.size() != m_order){ 52 | throw 1; 53 | } 54 | 55 | int frequenceOfng{0}, sumOther{0}; 56 | 57 | Pair pairToLookFor{ currentState,ns }; 58 | 59 | if( transitionMatrix.count( pairToLookFor ) ){ 60 | 61 | frequenceOfng = transitionMatrix[ pairToLookFor ] ; 62 | // Divide by the currentState to any other. 63 | 64 | for(auto& [key,val] : transitionMatrix){ 65 | 66 | if( compare(key.first,currentState) ){ 67 | sumOther += val; 68 | } 69 | } 70 | 71 | }else{ 72 | std::cout << "The transition was not found "<< pairToLookFor.first.back() << " " << pairToLookFor.second <<"\n"; 73 | return -1; 74 | } 75 | 76 | return static_cast(frequenceOfng) / static_cast(sumOther); 77 | } 78 | 79 | 80 | auto Markov::Chain::storeProbabilities() -> void { 81 | 82 | if( ! computed_ ){ 83 | for(const auto& [k,v] : transitionMatrix) { 84 | auto prob = transitionProbability(k.second,k.first); 85 | probabilities_.push_back( prob ); 86 | } 87 | computed_ = true; 88 | } 89 | return; 90 | } 91 | 92 | auto Markov::Chain::generateWord(int length) -> std::string { 93 | 94 | this->storeProbabilities(); 95 | 96 | std::vector v; 97 | std::random_device rd; 98 | std::mt19937 gen(rd()); 99 | 100 | for(double& c : probabilities_ ) { 101 | v.push_back( c*100 ); 102 | } 103 | 104 | std::discrete_distribution<> dist( v.begin(), v.end()); 105 | 106 | std::string res; 107 | 108 | for(int n=0; n std::vector{ 125 | 126 | std::vector internal; 127 | std::stringstream ss(str); // Turn the string into a stream. 128 | std::string tok; 129 | 130 | while(getline(ss, tok, delimiter)) { 131 | internal.push_back(tok); 132 | } 133 | 134 | return internal; 135 | } 136 | 137 | auto Markov::compare(const Markov::Ngram& lhs,const Markov::Ngram& rhs) -> bool { 138 | bool areOfEqualWord = true; 139 | 140 | if( lhs.size() != rhs.size() ) { 141 | return false; 142 | } 143 | 144 | for(int i(0); i< rhs.size() ; ++i){ 145 | if( lhs.at(i) != rhs.at(i) ) { 146 | areOfEqualWord = false; 147 | break; 148 | } 149 | } 150 | 151 | return areOfEqualWord; 152 | } 153 | 154 | auto Markov::join(Markov::Ngram& ngram) -> std::string { 155 | 156 | std::string res; 157 | 158 | for(auto& str: ngram){ 159 | res+=str +" "; 160 | } 161 | return res; 162 | } 163 | 164 | int Markov::Chain::incr_{0}; 165 | -------------------------------------------------------------------------------- /hnnews.txt: -------------------------------------------------------------------------------- 1 | "I Miss Microsoft Encarta" 2 | "Information operations directed at Hong Kong" 3 | "Google OAuth Is Failing with 500 Error Code" 4 | "Updating our advertising policies on state media" 5 | "How Rust optimizes async/await" 6 | "Cerebras Systems unveils a record 1.2T transistor chip for AI" 7 | "Statement on the Purpose of a Corporation" 8 | "Performance Matters" 9 | "Rendering on the Web" 10 | "OpenDrop: An Open Source AirDrop Implementation" 11 | "Solitude and Leadership (2010)" 12 | "SharpScript" 13 | "Show HN: A little web app for playing around with colors" 14 | "Every productivity thought I've ever had, as concisely as possible" 15 | "Wireless Carrier Throttling of Online Video Is Pervasive: Study" 16 | "Why Did China Just Devalue the Yuan? How Trade Works with Free-Floating Currency" 17 | "In Amazon’s Bookstore, Orwell Gets a Rewrite" 18 | "Rules for Intuitive UX" 19 | "Early Years of Computer Gaming: Steve Russell and Nolan Bushnell (2002) [video]" 20 | "The Restaurant of Mistaken Orders" 21 | "Rashomon of Disclosure" 22 | "Version Museum" 23 | "Down and Out in the Magic Kingdom (2011)" 24 | "Traits of Serverless Architecture" 25 | "Programming as Theory Building (1985) [pdf]" 26 | "Hacking Image Interpolation for Fun and Profit" 27 | "Portugal trials use of goats to clear vegetation for reduction of wild fire risk" 28 | "Mistakes to Avoid When Buying a MicroSD Card (2018)" 29 | "Handler beliefs affect scent detection dog outcomes (2011)" 30 | "The World's Largest Submarine: The Soviet Union's Pr. 941 Typhoon SSBN (2014)" 31 | "Twitter is blocked in China, but its state news agency is buying promoted tweets" 32 | "How to Build Good Software" 33 | "A parrot has a question for humans" 34 | "The Lost City of Heracleion" 35 | "What is Haberman?" 36 | "Show HN: Chart.xkcd – Xkcd-styled chart library" 37 | "Dark Corners and Pitfalls of C++" 38 | "Nigerian Teens Are Making Sci-Fi Shorts with Slick Visual Effects" 39 | "“Wealth work” is one of America’s fastest-growing industries" 40 | "World's largest urban farm to open on a Paris rooftop" 41 | "Richard Sorge: The Soviet Union’s Master Spy" 42 | "Show HN: A little web app for playing around with colors" 43 | "Show HN: Chart.xkcd – Xkcd-styled chart library" 44 | "Show HN: Crawlab: Open-Source Web Crawler Admin Platform That Runs Any Language" 45 | "Show HN: Saag as a Service – macronutrient-portioned Indian spinach curry" 46 | "Show HN: ClojureScript pixel game engine with Blender live-reloading" 47 | "Show HN: distri: a Linux distribution to research fast package management" 48 | "Show HN: Add web analytics to a Google Doc in 20 seconds" 49 | "Show HN: Prophecy.io – Cloud Native Data Engineering" 50 | "Show HN: Smartip.io – Reliable and Accurate IP Geolocation and Threat API" 51 | "Show HN: Scenery — Asynchronous communication for teams" 52 | "Show HN: Pitaya Go, IoT Dev Board with Multiprotocol Wireless Connectivity" 53 | "Show HN: A marketplace to hire no code experts" 54 | "Show HN: Register expiring premium domain names for just $99" 55 | "Show HN: Lazy – Free Bootstrap UI Kit" 56 | "Show HN: Software jobs with a difference. Filter jobs by interview type" 57 | "Show HN: Yet another free HTML form to email for your static websites" 58 | "Show HN: Software Engineering 101" 59 | "Show HN: A Detailed Look into Scammer Strategy to Steal Your Money – Part 3/3" 60 | "Show HN: How to Build Community – my 4 years research" 61 | "Show HN: AlertChimp – Manage prod role, no default permission, elevate on demand""Japanese anime studio Khara moving to Blender" 62 | "WeWTF" 63 | "I've reproduced 130 research papers about “predicting the stock market”" 64 | "How to Build Good Software" 65 | "Apple will soon treat online web tracking the same as a security vulnerability" 66 | "A Walk in Hong Kong" 67 | "OpenDrop: An Open Source AirDrop Implementation" 68 | "Tech Interview Handbook" 69 | "Start Your Own ISP" 70 | "We Have Ruined Childhood" 71 | "Rails 6.0" 72 | "Google Employee Writes Memo About ‘The Burden of Being Black at Google’" 73 | "YAML: Probably not so great after all" 74 | "Async-std: an async port of the Rust standard library" 75 | "Big Coal Plants Begin to Close" 76 | "The most frequent 777 characters give 90% coverage of Kanji in the wild" 77 | "Every productivity thought I've ever had, as concisely as possible" 78 | "The Architecture of Open Source Applications" 79 | "Cool but obscure X11 tools" 80 | "Japan surpasses China as largest foreign holder of US Treasurys" 81 | "U.S. Farmers Stung by Tariffs Now Face a $3.5B Corn Loss" 82 | "The Impact of Meditating Every Day" 83 | "I Miss Microsoft Encarta" 84 | "In the US, it's cheaper to build and operate wind farms than buy fossil fuels" 85 | "Key Negotiation of Bluetooth Attack" 86 | "Python vs. Rust for Neural Networks" 87 | "Making email more modern with JMAP" 88 | "Google Plans to Deprecate FTP URL Support in Chrome" 89 | "Rules for Intuitive UX" 90 | "YouTube shuts down music companies’ use of manual copyright claims" 91 | "Show HN: Chart.xkcd – Xkcd-styled chart library" 92 | "Nigerian Teens Are Making Sci-Fi Shorts with Slick Visual Effects" 93 | "Performance Matters" 94 | "Dirty tricks 6502 programmers use" 95 | "Open letter from an Android developer to the Google Play team" 96 | "Media Can’t Stop Presenting Horrifying Stories as ‘Uplifting’ Perseverance Porn" 97 | "35% Faster Than The Filesystem (2017)" 98 | "Software architects should be involved in earliest system engineering activities" 99 | "Why doesn't mathematics collapse, though humans often make mistakes in proofs?" 100 | "Show HN: Saag as a Service – macronutrient-portioned Indian spinach curry" 101 | "NOAA Data Confirms July Was Hottest Month Ever Recorded" 102 | "Nvidia CEO says Google is the only customer building its own silicon at scale" 103 | "Die With Me – A chat app you can only use when you have less than 5% battery" 104 | "Video Game Preservation – An archive of commercial video game source code" 105 | "Alone in the Dark" 106 | "The first solar road has turned out to be a disappointing failure" 107 | "Twitter locked my account for a nine year old tweet" 108 | "Hacking the Sonos IKEA Symfonisk into a High Quality Speaker Amp" 109 | "Twitter is displaying China-made ads attacking Hong Kong protesters" 110 | --------------------------------------------------------------------------------