├── .gitignore ├── .vs └── cs-hidden-markov-models │ └── v14 │ └── .suo ├── LICENSE ├── README.md ├── cs-hidden-markov-models-samples ├── App.config ├── HiddenMarkovClassifierUT.Learning.cs ├── HiddenMarkovModelUT.BaumWelchLearning.cs ├── HiddenMarkovModelUT.Evaluate.cs ├── HiddenMarkovModelUT.Generate.cs ├── HiddenMarkovModelUT.MaximumLikelihoodLearning.cs ├── HiddenMarkovModelUT.ViterbiLearning.cs ├── MathHelpers │ └── FTMergeSort.cs ├── MathUtils │ ├── FTANCOVA.cs │ ├── FTLogNormal.cs │ ├── FTMatrix.cs │ ├── FTMedian.cs │ ├── FTPercentileFunction.cs │ └── FTQuantileFunction.cs ├── Program.cs ├── Properties │ └── AssemblyInfo.cs └── cs-hidden-markov-models-samples.csproj ├── cs-hidden-markov-models.sln ├── cs-hidden-markov-models ├── ForwardBackwardAlgorithm.Log.Continuous.cs ├── ForwardBackwardAlgorithm.Log.cs ├── ForwardBackwardAlgorithm.cs ├── Helpers │ ├── Codification.cs │ ├── DiagnosticsHelper.cs │ ├── MathHelper.cs │ └── ValidationHelper.cs ├── HiddenMarkovClassifier.Continuous.cs ├── HiddenMarkovClassifier.cs ├── HiddenMarkovModel.Continuous.cs ├── HiddenMarkovModel.Generate.cs ├── HiddenMarkovModel.Predict.cs ├── HiddenMarkovModel.cs ├── Learning │ ├── Supervised │ │ ├── HiddenMarkovClassifierLearning.Continuous.cs │ │ ├── HiddenMarkovClassifierLearning.cs │ │ ├── MaximumLikelihoodLearning.Continuous.cs │ │ └── MaximumLikelihoodLearning.cs │ └── Unsupervised │ │ ├── BaumWelchLearning.Continuous.cs │ │ ├── BaumWelchLearning.cs │ │ ├── IUnsupervisedLearning.Continuous.cs │ │ ├── IUnsupervisedLearning.cs │ │ ├── ViterbiLearning.Continuous.cs │ │ └── ViterbiLearning.cs ├── MathHelpers │ ├── Factorial.cs │ ├── Gamma.cs │ ├── LogHelper.cs │ ├── MergeSort.cs │ └── PolynomialHelper.cs ├── MathUtils │ ├── Constants.cs │ ├── Distribution │ │ ├── Binomial.cs │ │ ├── ChiSquare.cs │ │ ├── DistributionModel.cs │ │ ├── Erlang.cs │ │ ├── Exponential.cs │ │ ├── FDistribution.cs │ │ ├── Gaussian.cs │ │ ├── LogNormal.cs │ │ ├── MultivariateDistributionModel.cs │ │ ├── Poisson.cs │ │ └── StudentT.cs │ ├── LinearAlgebra │ │ └── MatrixOp.cs │ ├── SpecialFunctions │ │ ├── ClampFunction.cs │ │ ├── ErrorFunction.cs │ │ ├── GammaFunction.cs │ │ ├── InverseErrorFunction.cs │ │ ├── InverseLogitFunction.cs │ │ └── LogitFunction.cs │ └── Statistics │ │ ├── ANCOVA.cs │ │ ├── ANCOVAv2.cs │ │ ├── ANOVA.cs │ │ ├── Bootstrapping.cs │ │ ├── CLT.cs │ │ ├── ChiSquareGOFTest.cs │ │ ├── ChiSquareIndependenceTest.cs │ │ ├── CollectionExtensionMethods.cs │ │ ├── ConfidenceInterval.cs │ │ ├── Correlation.cs │ │ ├── DescriptiveStatistics.cs │ │ ├── HypothesisTesting.cs │ │ ├── HypothesisTestingForProportion.cs │ │ ├── LinearCombination.cs │ │ ├── MANCOVA.cs │ │ ├── MANOVA.cs │ │ ├── Mean.cs │ │ ├── Median.cs │ │ ├── MultipleComparisons.cs │ │ ├── Sample.cs │ │ ├── StandardError.cs │ │ ├── StdDev.cs │ │ └── TwoWayANOVA.cs ├── Properties │ └── AssemblyInfo.cs ├── Topology │ ├── Ergodic.cs │ ├── Forward.cs │ └── ITopology.cs ├── Viterbi.Log.Continuous.cs ├── Viterbi.Log.cs ├── Viterbi.cs ├── cs-hidden-markov-models.csproj └── cs-hidden-markov-models.csproj.user ├── icon.jpg ├── notes └── nuget-packaging.md └── nuget ├── cs-hidden-markov-models.1.0.1.nupkg ├── cs-hidden-markov-models.nuspec └── lib └── net452 ├── Debug └── cs-hidden-markov-models.dll └── Release └── cs-hidden-markov-models.dll /.gitignore: -------------------------------------------------------------------------------- 1 | cs-hidden-markov-models/bin 2 | cs-hidden-markov-models/obj 3 | 4 | cs-hidden-markov-models-samples/bin 5 | cs-hidden-markov-models-samples/obj -------------------------------------------------------------------------------- /.vs/cs-hidden-markov-models/v14/.suo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chen0040/cs-hidden-markov-models/8a0887e5294148cac60664d68f57a0594889854f/.vs/cs-hidden-markov-models/v14/.suo -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Xianshun Chen 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /cs-hidden-markov-models-samples/App.config: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /cs-hidden-markov-models-samples/HiddenMarkovModelUT.BaumWelchLearning.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using HiddenMarkovModels.Learning.Unsupervised; 6 | using HiddenMarkovModels.MathUtils.Distribution; 7 | using HiddenMarkovModels.Topology; 8 | 9 | namespace HiddenMarkovModels.UT 10 | { 11 | public partial class HiddenMarkovModelUT 12 | { 13 | public static void BaumWelchLearning() 14 | { 15 | int[][] sequences = new int[][] 16 | { 17 | new int[] { 0,1,1,1,1,0,1,1,1,1 }, 18 | new int[] { 0,1,1,1,0,1,1,1,1,1 }, 19 | new int[] { 0,1,1,1,1,1,1,1,1,1 }, 20 | new int[] { 0,1,1,1,1,1 }, 21 | new int[] { 0,1,1,1,1,1,1 }, 22 | new int[] { 0,1,1,1,1,1,1,1,1,1 }, 23 | new int[] { 0,1,1,1,1,1,1,1,1,1 }, 24 | }; 25 | 26 | // Creates a new Hidden Markov Model with 3 states for 27 | // an output alphabet of two characters (zero and one) 28 | HiddenMarkovModel hmm = new HiddenMarkovModel(state_count: 3, symbol_count: 2); 29 | 30 | // Try to fit the model to the data until the difference in 31 | // the average log-likelihood changes only by as little as 0.0001 32 | var teacher = new BaumWelchLearning(hmm) { Tolerance = 0.0001, Iterations = 0 }; 33 | double ll = teacher.Run(sequences); 34 | 35 | // Calculate the probability that the given 36 | // sequences originated from the model 37 | double l1 = hmm.Evaluate(new int[] { 0, 1 }); // 0.999 38 | double l2 = hmm.Evaluate(new int[] { 0, 1, 1, 1 }); // 0.916 39 | Console.WriteLine("l1: {0}", System.Math.Exp(l1)); 40 | Console.WriteLine("l2: {0}", System.Math.Exp(l2)); 41 | 42 | // Sequences which do not start with zero have much lesser probability. 43 | double l3 = hmm.Evaluate(new int[] { 1, 1 }); // 0.000 44 | double l4 = hmm.Evaluate(new int[] { 1, 0, 0, 0 }); // 0.000 45 | Console.WriteLine("l3: {0}", System.Math.Exp(l3)); 46 | Console.WriteLine("l4: {0}", System.Math.Exp(l4)); 47 | 48 | // Sequences which contains few errors have higher probability 49 | // than the ones which do not start with zero. This shows some 50 | // of the temporal elasticity and error tolerance of the HMMs. 51 | double l5 = hmm.Evaluate(new int[] { 0, 1, 0, 1, 1, 1, 1, 1, 1 }); // 0.034 52 | double l6 = hmm.Evaluate(new int[] { 0, 1, 1, 1, 1, 1, 1, 0, 1 }); // 0.034 53 | Console.WriteLine("l5: {0}", System.Math.Exp(l5)); 54 | Console.WriteLine("l6: {0}", System.Math.Exp(l6)); 55 | } 56 | 57 | public static void BaumWelchLearningContinuous() 58 | { 59 | // Create continuous sequences. In the sequences below, there 60 | // seems to be two states, one for values between 0 and 1 and 61 | // another for values between 5 and 7. The states seems to be 62 | // switched on every observation. 63 | double[][] sequences = new double[][] 64 | { 65 | new double[] { 0.1, 5.2, 0.3, 6.7, 0.1, 6.0 }, 66 | new double[] { 0.2, 6.2, 0.3, 6.3, 0.1, 5.0 }, 67 | new double[] { 0.1, 7.0, 0.1, 7.0, 0.2, 5.6 }, 68 | }; 69 | 70 | 71 | // Specify a initial normal distribution for the samples. 72 | Gaussian density = new Gaussian(); 73 | 74 | // Creates a continuous hidden Markov Model with two states organized in a forward 75 | // topology and an underlying univariate Normal distribution as probability density. 76 | var model = new HiddenMarkovModel(new Ergodic(2), density); 77 | 78 | // Configure the learning algorithms to train the sequence classifier until the 79 | // difference in the average log-likelihood changes only by as little as 0.0001 80 | var teacher = new BaumWelchLearning(model) 81 | { 82 | Tolerance = 0.0001, 83 | Iterations = 0, 84 | }; 85 | 86 | // Fit the model 87 | double logLikelihood = teacher.Run(sequences); 88 | 89 | // See the log-probability of the sequences learned 90 | double a1 = model.Evaluate(new double[] { 0.1, 5.2, 0.3, 6.7, 0.1, 6.0 }); // -0.12799388666109757 91 | double a2 = model.Evaluate(new double[] { 0.2, 6.2, 0.3, 6.3, 0.1, 5.0 }); // 0.01171157434400194 92 | Console.WriteLine("a1 = {0}", a1); 93 | Console.WriteLine("a2 = {0}", a2); 94 | 95 | // See the log-probability of an unrelated sequence 96 | double a3 = model.Evaluate(new[] { 1.1, 2.2, 1.3, 3.2, 4.2, 1.0 }); // -298.7465244473417 97 | Console.WriteLine("a3 = {0}", a3); 98 | 99 | // We can transform the log-probabilities to actual probabilities: 100 | double likelihood = System.Math.Exp(logLikelihood); 101 | a1 = System.Math.Exp(a1); // 0.879 102 | a2 = System.Math.Exp(a2); // 1.011 103 | a3 = System.Math.Exp(a3); // 0.000 104 | Console.WriteLine("a1 = {0}", a1); 105 | Console.WriteLine("a2 = {0}", a2); 106 | Console.WriteLine("a3 = {0}", a3); 107 | 108 | // We can also ask the model to decode one of the sequences. After 109 | // this step the state variable will contain: { 0, 1, 0, 1, 0, 1 } 110 | 111 | double lll; 112 | int[] states = model.Decode(new double[] { 0.1, 5.2, 0.3, 6.7, 0.1, 6.0 }, out lll); 113 | Console.WriteLine("states: {{{0}}}", string.Join(", ", states)); 114 | Console.WriteLine("lll: {0}", lll); 115 | } 116 | } 117 | } 118 | -------------------------------------------------------------------------------- /cs-hidden-markov-models-samples/HiddenMarkovModelUT.Evaluate.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | 6 | namespace HiddenMarkovModels.UT 7 | { 8 | public partial class HiddenMarkovModelUT 9 | { 10 | public static void Evaluate() 11 | { 12 | double[,] transition = 13 | { 14 | {0.7, 0.3}, 15 | {0.4, 0.6} 16 | }; 17 | 18 | double[,] emission = 19 | { 20 | { 0.1, 0.4, 0.5}, 21 | { 0.6, 0.3, 0.1} 22 | }; 23 | 24 | double[] initial = { 0.6, 0.4 }; 25 | 26 | HiddenMarkovModel hmm = new HiddenMarkovModel(transition, emission, initial); 27 | int[] sequence = new int[] { 0, 1, 2 }; 28 | 29 | double logLikeliHood = hmm.Evaluate(sequence); 30 | 31 | // At this point, the log-likelihood of the sequence 32 | // occurring within the model is -3.3928721329161653. 33 | Console.WriteLine("logLikeliHood: {0}", logLikeliHood); 34 | } 35 | 36 | public static void Decode() 37 | { 38 | double[,] transition = 39 | { 40 | {0.7, 0.3}, 41 | {0.4, 0.6} 42 | }; 43 | 44 | double[,] emission = 45 | { 46 | { 0.1, 0.4, 0.5}, 47 | { 0.6, 0.3, 0.1} 48 | }; 49 | 50 | double[] initial = { 0.6, 0.4 }; 51 | 52 | HiddenMarkovModel hmm = new HiddenMarkovModel(transition, emission, initial); 53 | int[] sequence = new int[] { 0, 1, 2 }; 54 | 55 | // At this point, the state path will be 1-0-0 and the 56 | // log-likelihood will be -4.3095199438871337 57 | double logLikelihood; 58 | int[] path = hmm.Decode(sequence, out logLikelihood); 59 | 60 | Console.WriteLine("Path: {0}", string.Join("-", path)); 61 | Console.WriteLine("logLikelihood: {0}", logLikelihood); 62 | } 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /cs-hidden-markov-models-samples/HiddenMarkovModelUT.Generate.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using HiddenMarkovModels.Learning.Unsupervised; 6 | using HiddenMarkovModels.Topology; 7 | using HiddenMarkovModels.Helpers; 8 | 9 | namespace HiddenMarkovModels.UT 10 | { 11 | public partial class HiddenMarkovModelUT 12 | { 13 | public static void Generate() 14 | { 15 | MathHelper.SetupGenerator(42); 16 | 17 | // Consider some phrases: 18 | // 19 | string[][] phrases = 20 | { 21 | new[] { "those", "are", "sample", "words", "from", "a", "dictionary" }, 22 | new[] { "those", "are", "sample", "words" }, 23 | new[] { "sample", "words", "are", "words" }, 24 | new[] { "those", "words" }, 25 | new[] { "those", "are", "words" }, 26 | new[] { "words", "from", "a", "dictionary" }, 27 | new[] { "those", "are", "words", "from", "a", "dictionary" } 28 | }; 29 | 30 | // Let's begin by transforming them to sequence of 31 | // integer labels using a codification codebook: 32 | var codebook = new Codification(phrases); 33 | 34 | // Now we can create the training data for the models: 35 | int[][] sequence = codebook.Translate(phrases); 36 | 37 | // To create the models, we will specify a forward topology, 38 | // as the sequences have definite start and ending points. 39 | // 40 | var topology = new Forward(state_count: 4); 41 | int symbols = codebook.SymbolCount; // We have 7 different words 42 | Console.WriteLine("Symbol Count: {0}", symbols); 43 | 44 | // Create the hidden Markov model 45 | HiddenMarkovModel hmm = new HiddenMarkovModel(topology, symbols); 46 | 47 | // Create the learning algorithm 48 | BaumWelchLearning teacher = new BaumWelchLearning(hmm); 49 | 50 | // Teach the model about the phrases 51 | double error = teacher.Run(sequence); 52 | 53 | // Now, we can ask the model to generate new samples 54 | // from the word distributions it has just learned: 55 | // 56 | int[] sample = hmm.Generate(3); 57 | 58 | // And the result will be: "those", "are", "words". 59 | string[] result = codebook.Translate(sample); 60 | 61 | foreach(string result_word in result) 62 | { 63 | Console.WriteLine(result_word); 64 | } 65 | } 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /cs-hidden-markov-models-samples/HiddenMarkovModelUT.MaximumLikelihoodLearning.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using HiddenMarkovModels.Learning.Supervised; 6 | 7 | namespace HiddenMarkovModels.UT 8 | { 9 | public partial class HiddenMarkovModelUT 10 | { 11 | public static void MaximumLikelihoodLearning() 12 | { 13 | int[][] observations = 14 | { 15 | new int[] { 0,0,0,1,0,0 }, 16 | new int[] { 1,0,0,1,0,0 }, 17 | new int[] { 0,0,1,0,0,0 }, 18 | new int[] { 0,0,0,0,1,0 }, 19 | new int[] { 1,0,0,0,1,0 }, 20 | new int[] { 0,0,0,1,1,0 }, 21 | new int[] { 1,0,0,0,0,0 }, 22 | new int[] { 1,0,1,0,0,0 }, 23 | }; 24 | 25 | // Now those are the visible states associated with each observation in each 26 | // observation sequence above. Note that there is always one state assigned 27 | // to each observation, so the lengths of the sequence of observations and 28 | // the sequence of states must always match. 29 | 30 | int[][] paths = 31 | { 32 | new int[] { 0,0,1,0,1,0 }, 33 | new int[] { 1,0,1,0,1,0 }, 34 | new int[] { 1,0,0,1,1,0 }, 35 | new int[] { 1,0,1,1,1,0 }, 36 | new int[] { 1,0,0,1,0,1 }, 37 | new int[] { 0,0,1,0,0,1 }, 38 | new int[] { 0,0,1,1,0,1 }, 39 | new int[] { 0,1,1,1,0,0 }, 40 | }; 41 | 42 | // Create our Markov model with two states (0, 1) and two symbols (0, 1) 43 | HiddenMarkovModel model = new HiddenMarkovModel(state_count: 2, symbol_count: 2); 44 | 45 | // Now we can create our learning algorithm 46 | MaximumLikelihoodLearning teacher = new MaximumLikelihoodLearning(model) 47 | { 48 | // Set some options 49 | UseLaplaceRule = false 50 | }; 51 | 52 | // and finally learn a model using the algorithm 53 | double logLikelihood = teacher.Run(observations, paths); 54 | 55 | 56 | // To check what has been learned, we can extract the emission 57 | // and transition matrices, as well as the initial probability 58 | // vector from the HMM to compare against expected values: 59 | 60 | double[] pi = model.ProbabilityVector; // { 0.5, 0.5 } 61 | double[,] A = model.TransitionMatrix; // { { 7/20, 13/20 }, { 14/20, 6/20 } } 62 | double[,] B = model.EmissionMatrix; // { { 17/25, 8/25 }, { 19/23, 4/23 } } 63 | 64 | Console.WriteLine("pi: {{{0}}}", string.Join(", ", pi)); 65 | Console.WriteLine("A: {0}", ToString(A)); 66 | Console.WriteLine("B: {0}", ToString(B)); 67 | } 68 | 69 | private static string ToString(double[,] matrix) 70 | { 71 | StringBuilder sb = new StringBuilder(); 72 | 73 | sb.Append("{ "); 74 | for (int i = 0; i < matrix.GetLength(0); ++i) 75 | { 76 | if (i != 0) 77 | { 78 | sb.Append(", "); 79 | } 80 | sb.Append("{ "); 81 | for (int j = 0; j < matrix.GetLength(1); ++j) 82 | { 83 | if (j == 0) 84 | { 85 | sb.AppendFormat("{0}", matrix[i, j]); 86 | } 87 | else 88 | { 89 | sb.AppendFormat(", {0}", matrix[i, j]); 90 | } 91 | } 92 | sb.Append(" }"); 93 | } 94 | sb.Append(" }"); 95 | 96 | return sb.ToString(); 97 | } 98 | 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /cs-hidden-markov-models-samples/HiddenMarkovModelUT.ViterbiLearning.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using HiddenMarkovModels.Learning.Unsupervised; 6 | 7 | namespace HiddenMarkovModels.UT 8 | { 9 | public partial class HiddenMarkovModelUT 10 | { 11 | public static void ViterbiLearning() 12 | { 13 | int[][] sequences = new int[][] 14 | { 15 | new int[] { 0,1,1,1,1,0,1,1,1,1 }, 16 | new int[] { 0,1,1,1,0,1,1,1,1,1 }, 17 | new int[] { 0,1,1,1,1,1,1,1,1,1 }, 18 | new int[] { 0,1,1,1,1,1 }, 19 | new int[] { 0,1,1,1,1,1,1 }, 20 | new int[] { 0,1,1,1,1,1,1,1,1,1 }, 21 | new int[] { 0,1,1,1,1,1,1,1,1,1 }, 22 | }; 23 | 24 | // Creates a new Hidden Markov Model with 3 states for 25 | // an output alphabet of two characters (zero and one) 26 | HiddenMarkovModel hmm = new HiddenMarkovModel(state_count: 3, symbol_count: 2); 27 | 28 | // Try to fit the model to the data until the difference in 29 | // the average log-likelihood changes only by as little as 0.0001 30 | var teacher = new ViterbiLearning(hmm) { Tolerance = 0.0001, Iterations = 0 }; 31 | double ll = teacher.Run(sequences); 32 | 33 | // Calculate the probability that the given 34 | // sequences originated from the model 35 | double l1 = hmm.Evaluate(new int[] { 0, 1 }); // 0.999 36 | double l2 = hmm.Evaluate(new int[] { 0, 1, 1, 1 }); // 0.916 37 | Console.WriteLine("l1: {0}", System.Math.Exp(l1)); 38 | Console.WriteLine("l2: {0}", System.Math.Exp(l2)); 39 | 40 | // Sequences which do not start with zero have much lesser probability. 41 | double l3 = hmm.Evaluate(new int[] { 1, 1 }); // 0.000 42 | double l4 = hmm.Evaluate(new int[] { 1, 0, 0, 0 }); // 0.000 43 | Console.WriteLine("l3: {0}", System.Math.Exp(l3)); 44 | Console.WriteLine("l4: {0}", System.Math.Exp(l4)); 45 | 46 | // Sequences which contains few errors have higher probability 47 | // than the ones which do not start with zero. This shows some 48 | // of the temporal elasticity and error tolerance of the HMMs. 49 | double l5 = hmm.Evaluate(new int[] { 0, 1, 0, 1, 1, 1, 1, 1, 1 }); // 0.034 50 | double l6 = hmm.Evaluate(new int[] { 0, 1, 1, 1, 1, 1, 1, 0, 1 }); // 0.034 51 | Console.WriteLine("l5: {0}", System.Math.Exp(l5)); 52 | Console.WriteLine("l6: {0}", System.Math.Exp(l6)); 53 | } 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /cs-hidden-markov-models-samples/MathHelpers/FTMergeSort.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using HiddenMarkovModels.MathHelpers; 6 | 7 | namespace HiddenMarkovModels.MathHelpers.FT 8 | { 9 | public class FTMergeSort 10 | { 11 | public static void RunExample() 12 | { 13 | double[] a = new double[] { 4, 2, 6, 3, 5, 6, 1, 10, 10, 11, 32, 12 }; 14 | for (int i = 0; i < a.Length; ++i) 15 | { 16 | Console.Write("{0} ", a[i]); 17 | } 18 | Console.WriteLine(); 19 | MergeSort.Sort(a); 20 | for (int i = 0; i < a.Length; ++i) 21 | { 22 | Console.Write("{0} ", a[i]); 23 | } 24 | Console.WriteLine(); 25 | } 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /cs-hidden-markov-models-samples/MathUtils/FTANCOVA.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using HiddenMarkovModels.MathUtils.Statistics; 6 | 7 | namespace HiddenMarkovModels.MathUtils.FT 8 | { 9 | public class FTANCOVA 10 | { 11 | public static void RunExample() 12 | { 13 | Tuple[] data = new Tuple[]{ 14 | Tuple.Create(5.0, 20.0, 1), 15 | Tuple.Create(10.0, 23.0, 1), 16 | Tuple.Create(12.0, 30.0, 1), 17 | Tuple.Create(9.0, 25.0, 1), 18 | Tuple.Create(23.0, 34.0, 1), 19 | Tuple.Create(21.0, 40.0, 1), 20 | Tuple.Create(14.0, 27.0, 1), 21 | Tuple.Create(18.0, 38.0, 1), 22 | Tuple.Create(6.0, 24.0, 1), 23 | Tuple.Create(13.0, 31.0, 1), 24 | Tuple.Create(7.0, 19.0, 2), 25 | Tuple.Create(12.0, 26.0, 2), 26 | Tuple.Create(27.0, 33.0, 2), 27 | Tuple.Create(24.0, 35.0, 2), 28 | Tuple.Create(18.0, 30.0, 2), 29 | Tuple.Create(22.0, 31.0, 2), 30 | Tuple.Create(26.0, 34.0, 2), 31 | Tuple.Create(21.0, 28.0, 2), 32 | Tuple.Create(14.0, 23.0, 2), 33 | Tuple.Create(9.0, 22.0, 2), 34 | }; 35 | 36 | double[] x = new double[data.Length]; 37 | double[] y = new double[data.Length]; 38 | int[] grpCat = new int[data.Length]; 39 | 40 | for(int i=0; i < data.Length; ++i) 41 | { 42 | x[i]=data[i].Item1; 43 | y[i]=data[i].Item2; 44 | grpCat[i]=data[i].Item3; 45 | } 46 | 47 | ANCOVA output = null; 48 | ANCOVA.RunANCOVA(x, y, grpCat, out output); 49 | Console.WriteLine(output.Summary); 50 | 51 | } 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /cs-hidden-markov-models-samples/MathUtils/FTLogNormal.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using HiddenMarkovModels.MathUtils.Distribution; 6 | 7 | namespace HiddenMarkovModels.MathUtils.FT 8 | { 9 | public class FTLogNormal 10 | { 11 | public static void Run() 12 | { 13 | LogNormal ln = new LogNormal(5.13, 0.17); 14 | Gaussian normal_distribution = ln.ToNormal(); 15 | Console.WriteLine("Geometric Mean: {0} Geometric Standard Deviation: {1}", ln.GeometricMean, ln.GeometricStdDev); 16 | Console.WriteLine("Normal: ({0}, {1})", normal_distribution.Mean, normal_distribution.StdDev); 17 | 18 | } 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /cs-hidden-markov-models-samples/MathUtils/FTMatrix.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using HiddenMarkovModels.MathUtils.LinearAlgebra; 6 | 7 | namespace HiddenMarkovModels.MathUtils.FT 8 | { 9 | public class FTMatrix 10 | { 11 | public static void RunExample() 12 | { 13 | double[][] A = new double[4][]{ 14 | new double[4] { 1, 0, 2, -1}, 15 | new double[4] { 3, 0, 0, 5}, 16 | new double[4] { 2, 1, 4, -3}, 17 | new double[4] { 1, 0, 5, 0} 18 | }; 19 | 20 | RunExample(A); 21 | 22 | A = new double[3][]{ 23 | new double[3] { -2, 2, -3}, 24 | new double[3] { -1, 1, 3}, 25 | new double[3] { 2, 0, -1} 26 | }; 27 | 28 | RunExample(A); 29 | 30 | A = new double[3][]{ 31 | new double[3] { 1, 2, 3}, 32 | new double[3] { 4, 5, 6}, 33 | new double[3] { 7, 8, 2} 34 | }; 35 | 36 | RunExample(A); 37 | } 38 | 39 | public static void RunExample(double[][] A) 40 | { 41 | Console.WriteLine("A = {0}", MatrixOp.Summary(A)); 42 | 43 | double[][] C = MatrixOp.GetUpperTriangularMatrix(A); 44 | Console.WriteLine("C = {0}", MatrixOp.Summary(C)); 45 | 46 | double detA = MatrixOp.GetDeterminant(A); 47 | 48 | Console.WriteLine("det(A) = {0}", detA); 49 | } 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /cs-hidden-markov-models-samples/MathUtils/FTMedian.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using HiddenMarkovModels.MathUtils.Statistics; 6 | 7 | namespace HiddenMarkovModels.MathUtils.FT 8 | { 9 | public class FTMedian 10 | { 11 | public static void RunExample() 12 | { 13 | double[] a = new double[] { 4, 2, 6, 3, 5, 6, 1, 10, 10, 11, 32, 12 }; 14 | Console.WriteLine("Median: {0}", Median.GetMedian(a)); 15 | } 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /cs-hidden-markov-models-samples/MathUtils/FTPercentileFunction.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using HiddenMarkovModels.MathUtils.SpecialFunctions; 6 | using HiddenMarkovModels.MathUtils.Distribution; 7 | 8 | namespace HiddenMarkovModels.MathUtils.FT 9 | { 10 | public class FTPercentileFunction 11 | { 12 | public static void RunExample() 13 | { 14 | double q = 1800; 15 | double mu = 1500; 16 | double sigma = 300; 17 | double p = Gaussian.GetPercentile(q, mu, sigma); 18 | Console.WriteLine("p = {0}", p); 19 | 20 | Console.WriteLine("p = {0}", Gaussian.GetPercentile(50, 45, 3.2)); 21 | } 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /cs-hidden-markov-models-samples/MathUtils/FTQuantileFunction.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using HiddenMarkovModels.MathUtils.Statistics; 6 | using HiddenMarkovModels.MathUtils.SpecialFunctions; 7 | using HiddenMarkovModels.MathUtils.Distribution; 8 | 9 | namespace HiddenMarkovModels.MathUtils.FT 10 | { 11 | public class FTQuantileFunction 12 | { 13 | public static void RunExample() 14 | { 15 | double prob = 0.95; 16 | double prob1 = (1 - prob) / 2; 17 | double prob2 = 1 - prob1; 18 | double z1 = Gaussian.GetQuantile(prob1); 19 | double z2 = Gaussian.GetQuantile(prob2); 20 | 21 | Console.WriteLine("{0:0.000}", z1); 22 | Console.WriteLine("{0:0.000}", z2); 23 | } 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /cs-hidden-markov-models-samples/Program.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using HiddenMarkovModels.MathUtils.FT; 6 | using HiddenMarkovModels.UT; 7 | 8 | namespace HiddenMarkovModels.FT 9 | { 10 | class Program 11 | { 12 | static void Main(string[] args) 13 | { 14 | //FTLogNormal.Run(); 15 | //FTQuantileFunction.RunExample(); 16 | //FTMedian.RunExample(); 17 | //FTPercentileFunction.RunExample(); 18 | //FTMergeSort.RunExample(); 19 | //FTANCOVA.RunExample(); 20 | FTMatrix.RunExample(); 21 | 22 | //HiddenMarkovModelUT.Evaluate(); 23 | //HiddenMarkovModelUT.Decode(); 24 | 25 | //HiddenMarkovModelUT.MaximumLikelihoodLearning(); 26 | //HiddenMarkvoModelUT.ViterbiLearning(); 27 | //HiddenMarkvoModelUT.BaumWelchLearning(); 28 | //HiddenMarkovModelUT.BaumWelchLearningContinuous(); 29 | 30 | //HiddenMarkovClassifierUT.LearnAndPredict(); 31 | HiddenMarkovClassifierUT.LearnAndPredictContinuous(); 32 | 33 | //HiddenMarkovModelUT.Generate(); 34 | } 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /cs-hidden-markov-models-samples/Properties/AssemblyInfo.cs: -------------------------------------------------------------------------------- 1 | using System.Reflection; 2 | using System.Runtime.CompilerServices; 3 | using System.Runtime.InteropServices; 4 | 5 | // General Information about an assembly is controlled through the following 6 | // set of attributes. Change these attribute values to modify the information 7 | // associated with an assembly. 8 | [assembly: AssemblyTitle("cs-hidden-markov-models-samples")] 9 | [assembly: AssemblyDescription("")] 10 | [assembly: AssemblyConfiguration("")] 11 | [assembly: AssemblyCompany("")] 12 | [assembly: AssemblyProduct("cs-hidden-markov-models-samples")] 13 | [assembly: AssemblyCopyright("Copyright © 2017")] 14 | [assembly: AssemblyTrademark("")] 15 | [assembly: AssemblyCulture("")] 16 | 17 | // Setting ComVisible to false makes the types in this assembly not visible 18 | // to COM components. If you need to access a type in this assembly from 19 | // COM, set the ComVisible attribute to true on that type. 20 | [assembly: ComVisible(false)] 21 | 22 | // The following GUID is for the ID of the typelib if this project is exposed to COM 23 | [assembly: Guid("0af7bc97-3823-449d-994a-17c8a3cb982d")] 24 | 25 | // Version information for an assembly consists of the following four values: 26 | // 27 | // Major Version 28 | // Minor Version 29 | // Build Number 30 | // Revision 31 | // 32 | // You can specify all the values or you can default the Build and Revision Numbers 33 | // by using the '*' as shown below: 34 | // [assembly: AssemblyVersion("1.0.*")] 35 | [assembly: AssemblyVersion("1.0.0.0")] 36 | [assembly: AssemblyFileVersion("1.0.0.0")] 37 | -------------------------------------------------------------------------------- /cs-hidden-markov-models-samples/cs-hidden-markov-models-samples.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | AnyCPU 7 | {0AF7BC97-3823-449D-994A-17C8A3CB982D} 8 | Exe 9 | Properties 10 | cs_hidden_markov_models_samples 11 | cs-hidden-markov-models-samples 12 | v4.5.2 13 | 512 14 | true 15 | 16 | 17 | AnyCPU 18 | true 19 | full 20 | false 21 | bin\Debug\ 22 | DEBUG;TRACE 23 | prompt 24 | 4 25 | 26 | 27 | AnyCPU 28 | pdbonly 29 | true 30 | bin\Release\ 31 | TRACE 32 | prompt 33 | 4 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | {812a4c20-2223-4093-a482-186cdde79470} 68 | cs-hidden-markov-models 69 | 70 | 71 | 72 | 79 | -------------------------------------------------------------------------------- /cs-hidden-markov-models.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio 14 4 | VisualStudioVersion = 14.0.25123.0 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "cs-hidden-markov-models", "cs-hidden-markov-models\cs-hidden-markov-models.csproj", "{812A4C20-2223-4093-A482-186CDDE79470}" 7 | EndProject 8 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "cs-hidden-markov-models-samples", "cs-hidden-markov-models-samples\cs-hidden-markov-models-samples.csproj", "{0AF7BC97-3823-449D-994A-17C8A3CB982D}" 9 | EndProject 10 | Global 11 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 12 | Debug|Any CPU = Debug|Any CPU 13 | Release|Any CPU = Release|Any CPU 14 | EndGlobalSection 15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 16 | {812A4C20-2223-4093-A482-186CDDE79470}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 17 | {812A4C20-2223-4093-A482-186CDDE79470}.Debug|Any CPU.Build.0 = Debug|Any CPU 18 | {812A4C20-2223-4093-A482-186CDDE79470}.Release|Any CPU.ActiveCfg = Release|Any CPU 19 | {812A4C20-2223-4093-A482-186CDDE79470}.Release|Any CPU.Build.0 = Release|Any CPU 20 | {0AF7BC97-3823-449D-994A-17C8A3CB982D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 21 | {0AF7BC97-3823-449D-994A-17C8A3CB982D}.Debug|Any CPU.Build.0 = Debug|Any CPU 22 | {0AF7BC97-3823-449D-994A-17C8A3CB982D}.Release|Any CPU.ActiveCfg = Release|Any CPU 23 | {0AF7BC97-3823-449D-994A-17C8A3CB982D}.Release|Any CPU.Build.0 = Release|Any CPU 24 | EndGlobalSection 25 | GlobalSection(SolutionProperties) = preSolution 26 | HideSolutionNode = FALSE 27 | EndGlobalSection 28 | EndGlobal 29 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/Helpers/Codification.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | 6 | namespace HiddenMarkovModels.Helpers 7 | { 8 | public class Codification 9 | { 10 | protected int mSymbolCount = 0; 11 | protected Dictionary mSymbolIdTable = new Dictionary(); 12 | protected List mIdSymbolTable = new List(); 13 | 14 | public int SymbolCount 15 | { 16 | get { return mSymbolCount; } 17 | } 18 | 19 | public Codification(string[][] phrases) 20 | { 21 | foreach (string[] row in phrases) 22 | { 23 | Parse(row); 24 | } 25 | } 26 | 27 | public string[] Translate(int[] id_list) 28 | { 29 | int N = id_list.Length; 30 | string[] phrases = new string[N]; 31 | for (int i = 0; i < N; ++i) 32 | { 33 | phrases[i] = Translate(id_list[i]); 34 | } 35 | return phrases; 36 | } 37 | 38 | public string[][] Translate(int[][] id_matrix) 39 | { 40 | int N1 = id_matrix.Length; 41 | string[][] phrases = new string[N1][]; 42 | for (int i = 0; i < N1; ++i) 43 | { 44 | phrases[i] = Translate(id_matrix[i]); 45 | } 46 | return phrases; 47 | } 48 | 49 | public int[] Translate(string[] phrases) 50 | { 51 | int N = phrases.Length; 52 | int[] id_list = new int[N]; 53 | for (int i = 0; i < N; ++i) 54 | { 55 | id_list[i] = Translate(phrases[i]); 56 | } 57 | 58 | return id_list; 59 | } 60 | 61 | public int[][] Translate(string[][] phrases) 62 | { 63 | int N1=phrases.Length; 64 | int[][] id_matrix = new int[N1][]; 65 | for (int i = 0; i < N1; ++i) 66 | { 67 | id_matrix[i] = Translate(phrases[i]); 68 | } 69 | return id_matrix; 70 | } 71 | 72 | public int Translate(string word) 73 | { 74 | if(mSymbolIdTable.ContainsKey(word)) 75 | { 76 | return mSymbolIdTable[word]; 77 | } 78 | else 79 | { 80 | throw new ArgumentOutOfRangeException(string.Format("Symbol {0} is not found in the codification", word)); 81 | } 82 | } 83 | 84 | public string Translate(int id) 85 | { 86 | if (mIdSymbolTable.Count > id) 87 | { 88 | return mIdSymbolTable[id]; 89 | } 90 | else 91 | { 92 | throw new ArgumentOutOfRangeException(string.Format("Symbol ID {0} is not found in the codification", id)); 93 | } 94 | } 95 | 96 | public void Parse(string[] phrases) 97 | { 98 | foreach (string word in phrases) 99 | { 100 | if (!mSymbolIdTable.ContainsKey(word)) 101 | { 102 | mSymbolIdTable[word] = mSymbolCount; 103 | mIdSymbolTable.Add(word); 104 | 105 | mSymbolCount++; 106 | } 107 | } 108 | } 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/Helpers/DiagnosticsHelper.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | 6 | namespace HiddenMarkovModels.Helpers 7 | { 8 | public class DiagnosticsHelper 9 | { 10 | public static void Assert(bool condition) 11 | { 12 | System.Diagnostics.Debug.Assert(condition); 13 | } 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/Helpers/MathHelper.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using HiddenMarkovModels.MathUtils.Distribution; 6 | 7 | namespace HiddenMarkovModels.Helpers 8 | { 9 | public class MathHelper 10 | { 11 | //static Random mRandom; 12 | 13 | public static void SetupGenerator(int seed) 14 | { 15 | //mRandom = new Random(seed); 16 | DistributionModel.SetSeed((uint)seed); 17 | } 18 | 19 | public static double LogProbabilityFunction(DistributionModel distrubiton, double value) 20 | { 21 | return distrubiton.LogProbabilityFunction(value); 22 | } 23 | 24 | public static double NextDouble() 25 | { 26 | //if (mRandom == null) 27 | //{ 28 | // mRandom = new Random(); 29 | //} 30 | //return mRandom.NextDouble(); 31 | return DistributionModel.GetUniform(); 32 | } 33 | 34 | public static double[] GetRow(double[,] matrix, int row_index) 35 | { 36 | int column_count = matrix.GetLength(1); 37 | double[] row = new double[column_count]; 38 | 39 | for (int column_index = 0; column_index < column_count; ++column_index) 40 | { 41 | row[column_index] = matrix[row_index, column_index]; 42 | } 43 | return row; 44 | } 45 | 46 | public static int Random(double[] probabilities) 47 | { 48 | double uniform = NextDouble(); 49 | 50 | double cumulativeSum = 0; 51 | 52 | // Use the probabilities to partition the [0,1] interval 53 | // and check inside which range the values fall into. 54 | 55 | for (int i = 0; i < probabilities.Length; i++) 56 | { 57 | cumulativeSum += probabilities[i]; 58 | 59 | if (uniform < cumulativeSum) 60 | return i; 61 | } 62 | 63 | throw new InvalidOperationException("Generated value is not between 0 and 1."); 64 | } 65 | 66 | public static T[][] Split(T[] vector, int size) 67 | { 68 | int n = vector.Length / size; 69 | T[][] r = new T[n][]; 70 | for (int i = 0; i < n; i++) 71 | { 72 | T[] ri = r[i] = new T[size]; 73 | for (int j = 0; j < size; j++) 74 | ri[j] = vector[j * n + i]; 75 | } 76 | return r; 77 | } 78 | 79 | public static T[] Concatenate(T[][] matrix) 80 | { 81 | List vector = new List(); 82 | for (int i = 0; i < matrix.Length; ++i) 83 | { 84 | T[] row = matrix[i]; 85 | vector.AddRange(row); 86 | } 87 | return vector.ToArray(); 88 | } 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/Helpers/ValidationHelper.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | 6 | namespace HiddenMarkovModels.Helpers 7 | { 8 | public class ValidationHelper 9 | { 10 | public static void ValidateObservationDb(int[][] observations_db, int inclusive_lower_bound, int exclusive_upper_bound) 11 | { 12 | int K = observations_db.Length; 13 | for (int k = 0; k < K; ++k) 14 | { 15 | int[] observations = observations_db[k]; 16 | int T = observations.Length; 17 | for (int t = 0; t < T; ++t) 18 | { 19 | if (observations[t] >= exclusive_upper_bound || observations[t] < inclusive_lower_bound) 20 | { 21 | string error_message = string.Format("observation sequence contains symbol outside the range [{0}-{1})", inclusive_lower_bound, exclusive_upper_bound); 22 | throw new ArgumentOutOfRangeException("observations_db", error_message); 23 | } 24 | } 25 | } 26 | } 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/HiddenMarkovClassifier.Continuous.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using HiddenMarkovModels.MathHelpers; 6 | using System.Threading.Tasks; 7 | using HiddenMarkovModels.MathUtils.Distribution; 8 | using HiddenMarkovModels.Helpers; 9 | using HiddenMarkovModels.Topology; 10 | 11 | namespace HiddenMarkovModels 12 | { 13 | public partial class HiddenMarkovClassifier 14 | { 15 | 16 | public HiddenMarkovClassifier(int class_count, int[] state_count_array, DistributionModel B_distribution) 17 | { 18 | mClassCount = class_count; 19 | mSymbolCount = -1; 20 | 21 | DiagnosticsHelper.Assert(state_count_array.Length >= class_count); 22 | 23 | mModels = new HiddenMarkovModel[mClassCount]; 24 | for (int i = 0; i < mClassCount; ++i) 25 | { 26 | HiddenMarkovModel hmm = new HiddenMarkovModel(state_count_array[i], B_distribution); 27 | mModels[i] = hmm; 28 | } 29 | 30 | mClassPriors = new double[mClassCount]; 31 | for (int i = 0; i < mClassCount; ++i) 32 | { 33 | mClassPriors[i] = 1.0 / mClassCount; 34 | } 35 | } 36 | 37 | public HiddenMarkovClassifier(int class_count, ITopology topology, DistributionModel B_distribution) 38 | { 39 | mClassCount = class_count; 40 | mSymbolCount = -1; 41 | 42 | mModels = new HiddenMarkovModel[mClassCount]; 43 | 44 | for (int i = 0; i < mClassCount; ++i) 45 | { 46 | HiddenMarkovModel hmm = new HiddenMarkovModel(topology, B_distribution); 47 | mModels[i] = hmm; 48 | } 49 | 50 | mClassPriors = new double[mClassCount]; 51 | for (int i = 0; i < mClassCount; ++i) 52 | { 53 | mClassPriors[i] = 1.0 / mClassCount; 54 | } 55 | } 56 | 57 | public HiddenMarkovClassifier(int class_count, ITopology[] topology_array, DistributionModel B_distribution) 58 | { 59 | mClassCount = class_count; 60 | mSymbolCount = -1; 61 | 62 | DiagnosticsHelper.Assert(topology_array.Length >= class_count); 63 | 64 | mModels = new HiddenMarkovModel[mClassCount]; 65 | 66 | for (int i = 0; i < mClassCount; ++i) 67 | { 68 | HiddenMarkovModel hmm = new HiddenMarkovModel(topology_array[i], B_distribution); 69 | mModels[i] = hmm; 70 | } 71 | 72 | mClassPriors = new double[mClassCount]; 73 | for (int i = 0; i < mClassCount; ++i) 74 | { 75 | mClassPriors[i] = 1.0 / mClassCount; 76 | } 77 | } 78 | 79 | protected double LogLikelihood(double[] sequence) 80 | { 81 | double sum = Double.NegativeInfinity; 82 | 83 | for (int i = 0; i < mModels.Length; i++) 84 | { 85 | double prior = System.Math.Log(mClassPriors[i]); 86 | double model = mModels[i].Evaluate(sequence); 87 | double result = LogHelper.LogSum(prior, model); 88 | 89 | sum = LogHelper.LogSum(sum, result); 90 | } 91 | 92 | return sum; 93 | } 94 | 95 | public int Compute(double[] sequence) 96 | { 97 | double[] class_probabilities = null; 98 | return Compute(sequence, out class_probabilities); 99 | } 100 | 101 | public int Compute(double[] sequence, out double logLikelihood) 102 | { 103 | double[] class_probabilities = null; 104 | int output = Compute(sequence, out class_probabilities); 105 | logLikelihood = LogLikelihood(sequence); 106 | return output; 107 | } 108 | 109 | public int Compute(double[] sequence, out double[] class_probabilities) 110 | { 111 | double[] logLikelihoods = new double[mModels.Length]; 112 | double thresholdValue = Double.NegativeInfinity; 113 | 114 | 115 | Parallel.For(0, mModels.Length + 1, i => 116 | { 117 | if (i < mModels.Length) 118 | { 119 | logLikelihoods[i] = mModels[i].Evaluate(sequence); 120 | } 121 | else if (mThreshold != null) 122 | { 123 | thresholdValue = mThreshold.Evaluate(sequence); 124 | } 125 | }); 126 | 127 | double lnsum = Double.NegativeInfinity; 128 | for (int i = 0; i < mClassPriors.Length; i++) 129 | { 130 | logLikelihoods[i] = System.Math.Log(mClassPriors[i]) + logLikelihoods[i]; 131 | lnsum = LogHelper.LogSum(lnsum, logLikelihoods[i]); 132 | } 133 | 134 | if (mThreshold != null) 135 | { 136 | thresholdValue = System.Math.Log(mWeight) + thresholdValue; 137 | lnsum = LogHelper.LogSum(lnsum, thresholdValue); 138 | } 139 | 140 | int most_likely_model_index = 0; 141 | double most_likely_model_probablity = double.NegativeInfinity; 142 | for (int i = 0; i < mClassCount; ++i) 143 | { 144 | if (most_likely_model_probablity < logLikelihoods[i]) 145 | { 146 | most_likely_model_probablity = logLikelihoods[i]; 147 | most_likely_model_index = i; 148 | } 149 | } 150 | 151 | if (lnsum != Double.NegativeInfinity) 152 | { 153 | for (int i = 0; i < logLikelihoods.Length; i++) 154 | logLikelihoods[i] -= lnsum; 155 | } 156 | 157 | // Convert to probabilities 158 | class_probabilities = logLikelihoods; 159 | for (int i = 0; i < logLikelihoods.Length; i++) 160 | { 161 | class_probabilities[i] = System.Math.Exp(logLikelihoods[i]); 162 | } 163 | 164 | return (thresholdValue > most_likely_model_probablity) ? -1 : most_likely_model_index; 165 | } 166 | } 167 | } 168 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/HiddenMarkovClassifier.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using HiddenMarkovModels.Helpers; 6 | using HiddenMarkovModels.Topology; 7 | using HiddenMarkovModels.MathHelpers; 8 | using System.Threading.Tasks; 9 | 10 | namespace HiddenMarkovModels 11 | { 12 | public partial class HiddenMarkovClassifier 13 | { 14 | protected int mClassCount; 15 | protected int mSymbolCount; 16 | 17 | protected HiddenMarkovModel[] mModels; 18 | protected HiddenMarkovModel mThreshold; 19 | protected double mWeight; 20 | protected double[] mClassPriors; 21 | 22 | 23 | /// 24 | /// Gets the prior distribution assumed for the classes. 25 | /// 26 | /// 27 | public double[] Priors 28 | { 29 | get { return mClassPriors; } 30 | } 31 | 32 | public int ClassCount 33 | { 34 | get { return mClassCount; } 35 | } 36 | 37 | public int SymbolCount 38 | { 39 | get { return mSymbolCount; } 40 | } 41 | 42 | public HiddenMarkovModel Threshold 43 | { 44 | get { return mThreshold; } 45 | set { mThreshold = value; } 46 | } 47 | 48 | public HiddenMarkovClassifier(int class_count, int[] state_count_array, int symbol_count) 49 | { 50 | mClassCount = class_count; 51 | mSymbolCount = symbol_count; 52 | 53 | DiagnosticsHelper.Assert(state_count_array.Length >= class_count); 54 | 55 | mModels = new HiddenMarkovModel[mClassCount]; 56 | for (int i = 0; i < mClassCount; ++i) 57 | { 58 | HiddenMarkovModel hmm = new HiddenMarkovModel(state_count_array[i], symbol_count); 59 | mModels[i] = hmm; 60 | } 61 | 62 | mClassPriors = new double[mClassCount]; 63 | for (int i = 0; i < mClassCount; ++i) 64 | { 65 | mClassPriors[i] = 1.0 / mClassCount; 66 | } 67 | } 68 | 69 | public HiddenMarkovClassifier(int class_count, ITopology topology, int symbol_count) 70 | { 71 | mClassCount = class_count; 72 | mSymbolCount = symbol_count; 73 | 74 | mModels = new HiddenMarkovModel[mClassCount]; 75 | 76 | for (int i = 0; i < mClassCount; ++i) 77 | { 78 | HiddenMarkovModel hmm = new HiddenMarkovModel(topology, symbol_count); 79 | mModels[i] = hmm; 80 | } 81 | 82 | mClassPriors = new double[mClassCount]; 83 | for (int i = 0; i < mClassCount; ++i) 84 | { 85 | mClassPriors[i] = 1.0 / mClassCount; 86 | } 87 | } 88 | 89 | public HiddenMarkovClassifier(int class_count, ITopology[] topology_array, int symbol_count) 90 | { 91 | mClassCount = class_count; 92 | mSymbolCount = symbol_count; 93 | 94 | DiagnosticsHelper.Assert(topology_array.Length >= class_count); 95 | 96 | mModels = new HiddenMarkovModel[mClassCount]; 97 | 98 | for (int i = 0; i < mClassCount; ++i) 99 | { 100 | HiddenMarkovModel hmm = new HiddenMarkovModel(topology_array[i], symbol_count); 101 | mModels[i] = hmm; 102 | } 103 | 104 | mClassPriors = new double[mClassCount]; 105 | for (int i = 0; i < mClassCount; ++i) 106 | { 107 | mClassPriors[i] = 1.0 / mClassCount; 108 | } 109 | } 110 | 111 | protected double LogLikelihood(int[] sequence) 112 | { 113 | double sum = Double.NegativeInfinity; 114 | 115 | for (int i = 0; i < mModels.Length; i++) 116 | { 117 | double prior = System.Math.Log(mClassPriors[i]); 118 | double model = mModels[i].Evaluate(sequence); 119 | double result = LogHelper.LogSum(prior, model); 120 | 121 | sum = LogHelper.LogSum(sum, result); 122 | } 123 | 124 | return sum; 125 | } 126 | 127 | public int Compute(int[] sequence) 128 | { 129 | double[] class_probabilities = null; 130 | return Compute(sequence, out class_probabilities); 131 | } 132 | 133 | public int Compute(int[] sequence, out double[] class_probabilities) 134 | { 135 | double[] logLikelihoods = new double[mModels.Length]; 136 | double thresholdValue = Double.NegativeInfinity; 137 | 138 | 139 | Parallel.For(0, mModels.Length + 1, i => 140 | { 141 | if (i < mModels.Length) 142 | { 143 | logLikelihoods[i] = mModels[i].Evaluate(sequence); 144 | } 145 | else if (mThreshold != null) 146 | { 147 | thresholdValue = mThreshold.Evaluate(sequence); 148 | } 149 | }); 150 | 151 | double lnsum = Double.NegativeInfinity; 152 | for (int i = 0; i < mClassPriors.Length; i++) 153 | { 154 | logLikelihoods[i] = System.Math.Log(mClassPriors[i]) + logLikelihoods[i]; 155 | lnsum = LogHelper.LogSum(lnsum, logLikelihoods[i]); 156 | } 157 | 158 | if (mThreshold != null) 159 | { 160 | thresholdValue = System.Math.Log(mWeight) + thresholdValue; 161 | lnsum = LogHelper.LogSum(lnsum, thresholdValue); 162 | } 163 | 164 | int most_likely_model_index = 0; 165 | double most_likely_model_probablity = double.NegativeInfinity; 166 | for(int i=0; i < mClassCount; ++i) 167 | { 168 | if(most_likely_model_probablity < logLikelihoods[i]) 169 | { 170 | most_likely_model_probablity=logLikelihoods[i]; 171 | most_likely_model_index=i; 172 | } 173 | } 174 | 175 | if (lnsum != Double.NegativeInfinity) 176 | { 177 | for (int i = 0; i < logLikelihoods.Length; i++) 178 | logLikelihoods[i] -= lnsum; 179 | } 180 | 181 | // Convert to probabilities 182 | class_probabilities = logLikelihoods; 183 | for (int i = 0; i < logLikelihoods.Length; i++) 184 | { 185 | class_probabilities[i] = System.Math.Exp(logLikelihoods[i]); 186 | } 187 | 188 | return (thresholdValue > most_likely_model_probablity) ? -1 : most_likely_model_index; 189 | } 190 | 191 | public HiddenMarkovModel[] Models 192 | { 193 | get { return mModels; } 194 | private set { } 195 | } 196 | } 197 | } 198 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/HiddenMarkovModel.Continuous.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using HiddenMarkovModels.MathUtils.Distribution; 6 | using HiddenMarkovModels.Topology; 7 | using HiddenMarkovModels.Helpers; 8 | using HiddenMarkovModels.MathHelpers; 9 | 10 | namespace HiddenMarkovModels 11 | { 12 | public partial class HiddenMarkovModel 13 | { 14 | protected DistributionModel[] mEmissionModels; 15 | 16 | protected int mDimension = 1; 17 | protected bool mMultivariate; 18 | 19 | public int Dimension 20 | { 21 | get { return mDimension; } 22 | } 23 | 24 | public DistributionModel[] EmissionModels 25 | { 26 | get { return mEmissionModels; } 27 | } 28 | 29 | public HiddenMarkovModel(ITopology topology, DistributionModel emissions) 30 | { 31 | mStateCount = topology.Create(out mLogTransitionMatrix, out mLogProbabilityVector); 32 | 33 | mEmissionModels = new DistributionModel[mStateCount]; 34 | 35 | for (int i = 0; i < mStateCount; ++i) 36 | { 37 | mEmissionModels[i] = emissions.Clone(); 38 | } 39 | 40 | if (emissions is MultivariateDistributionModel) 41 | { 42 | mMultivariate = true; 43 | mDimension = ((MultivariateDistributionModel)mEmissionModels[0]).Dimension; 44 | } 45 | } 46 | 47 | public HiddenMarkovModel(ITopology topology, DistributionModel[] emissions) 48 | { 49 | mStateCount = topology.Create(out mLogTransitionMatrix, out mLogProbabilityVector); 50 | DiagnosticsHelper.Assert(emissions.Length == mStateCount); 51 | 52 | mEmissionModels = new DistributionModel[mStateCount]; 53 | 54 | for (int i = 0; i < mStateCount; ++i) 55 | { 56 | mEmissionModels[i] = emissions[i].Clone(); 57 | } 58 | 59 | if (emissions[0] is MultivariateDistributionModel) 60 | { 61 | mMultivariate = true; 62 | mDimension = ((MultivariateDistributionModel)mEmissionModels[0]).Dimension; 63 | } 64 | } 65 | 66 | public HiddenMarkovModel(double[,] A, DistributionModel[] emissions, double[] pi) 67 | { 68 | mStateCount = mLogProbabilityVector.Length; 69 | DiagnosticsHelper.Assert(emissions.Length == mStateCount); 70 | 71 | mLogTransitionMatrix = LogHelper.Log(A); 72 | mLogProbabilityVector = LogHelper.Log(pi); 73 | 74 | mEmissionModels = new DistributionModel[mStateCount]; 75 | 76 | for (int i = 0; i < mStateCount; ++i) 77 | { 78 | mEmissionModels[i] = emissions[i].Clone(); 79 | } 80 | 81 | if (emissions[0] is MultivariateDistributionModel) 82 | { 83 | mMultivariate = true; 84 | mDimension = ((MultivariateDistributionModel)mEmissionModels[0]).Dimension; 85 | } 86 | } 87 | 88 | public HiddenMarkovModel(int state_count, DistributionModel emissions) 89 | { 90 | mStateCount = state_count; 91 | 92 | mLogTransitionMatrix = new double[mStateCount, mStateCount]; 93 | mLogProbabilityVector = new double[mStateCount]; 94 | 95 | mLogProbabilityVector[0] = 1.0; 96 | 97 | for (int i = 0; i < mStateCount; ++i) 98 | { 99 | mLogProbabilityVector[i] = System.Math.Log(mLogProbabilityVector[i]); 100 | 101 | for (int j = 0; j < mStateCount; ++j) 102 | { 103 | mLogTransitionMatrix[i, j] = System.Math.Log(1.0 / mStateCount); 104 | } 105 | } 106 | 107 | mEmissionModels = new DistributionModel[mStateCount]; 108 | 109 | for (int i = 0; i < mStateCount; ++i) 110 | { 111 | mEmissionModels[i] = emissions.Clone(); 112 | } 113 | 114 | if (emissions is MultivariateDistributionModel) 115 | { 116 | mMultivariate = true; 117 | mDimension = ((MultivariateDistributionModel)mEmissionModels[0]).Dimension; 118 | } 119 | } 120 | 121 | public HiddenMarkovModel(int state_count, DistributionModel[] emissions) 122 | { 123 | mStateCount = state_count; 124 | DiagnosticsHelper.Assert(emissions.Length == mStateCount); 125 | 126 | mLogTransitionMatrix = new double[mStateCount, mStateCount]; 127 | mLogProbabilityVector = new double[mStateCount]; 128 | 129 | mLogProbabilityVector[0] = 1.0; 130 | 131 | for (int i = 0; i < mStateCount; ++i) 132 | { 133 | mLogProbabilityVector[i] = System.Math.Log(mLogProbabilityVector[i]); 134 | 135 | for (int j = 0; j < mStateCount; ++j) 136 | { 137 | mLogTransitionMatrix[i, j] = System.Math.Log(1.0 / mStateCount); 138 | } 139 | } 140 | 141 | mEmissionModels = new DistributionModel[mStateCount]; 142 | 143 | for (int i = 0; i < mStateCount; ++i) 144 | { 145 | mEmissionModels[i] = emissions[0].Clone(); 146 | } 147 | 148 | if (emissions[0] is MultivariateDistributionModel) 149 | { 150 | mMultivariate = true; 151 | mDimension = ((MultivariateDistributionModel)mEmissionModels[0]).Dimension; 152 | } 153 | } 154 | 155 | public double Evaluate(double[] sequence) 156 | { 157 | double logLikelihood; 158 | ForwardBackwardAlgorithm.LogForward(mLogTransitionMatrix, mEmissionModels, mLogProbabilityVector, sequence, out logLikelihood); 159 | 160 | return logLikelihood; 161 | } 162 | 163 | public int[] Decode(double[] sequence, out double logLikelihood) 164 | { 165 | return Viterbi.LogForward(mLogTransitionMatrix, mEmissionModels, mLogProbabilityVector, sequence, out logLikelihood); 166 | } 167 | 168 | public int[] Decode(double[] sequence) 169 | { 170 | double logLikelihood; 171 | return Decode(sequence, out logLikelihood); 172 | } 173 | } 174 | } 175 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/HiddenMarkovModel.Generate.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using HiddenMarkovModels.Helpers; 6 | using HiddenMarkovModels.MathHelpers; 7 | 8 | namespace HiddenMarkovModels 9 | { 10 | public partial class HiddenMarkovModel 11 | { 12 | public int[] Generate(int samples) 13 | { 14 | int[] path; 15 | double logLikelihood; 16 | return Generate(samples, out path, out logLikelihood); 17 | } 18 | 19 | /// 20 | /// Generates a random vector of observations from the model. 21 | /// 22 | /// 23 | /// The number of samples to generate. 24 | /// The log-likelihood of the generated observation sequence. 25 | /// The Viterbi path of the generated observation sequence. 26 | /// 27 | /// 28 | /// An usage example is available at the documentation page. 29 | /// 30 | /// 31 | /// A random vector of observations drawn from the model. 32 | /// 33 | public int[] Generate(int samples, out int[] path, out double logLikelihood) 34 | { 35 | double[] transitions = mLogProbabilityVector; 36 | double[] emissions; 37 | 38 | int[] observations = new int[samples]; 39 | logLikelihood = Double.NegativeInfinity; 40 | path = new int[samples]; 41 | 42 | 43 | // For each observation to be generated 44 | for (int t = 0; t < observations.Length; t++) 45 | { 46 | // Navigate randomly on one of the state transitions 47 | int state = MathHelper.Random(LogHelper.Exp(transitions)); 48 | 49 | // Generate a sample for the state 50 | emissions = MathHelper.GetRow(mLogEmissionMatrix, state); 51 | 52 | int symbol = MathHelper.Random(LogHelper.Exp(emissions)); 53 | 54 | // Store the sample 55 | observations[t] = symbol; 56 | path[t] = state; 57 | 58 | // Compute log-likelihood up to this point 59 | logLikelihood = LogHelper.LogSum(logLikelihood, transitions[state] + emissions[symbol]); 60 | 61 | // Continue sampling 62 | transitions = MathHelper.GetRow(mLogTransitionMatrix, state); 63 | } 64 | 65 | return observations; 66 | } 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/HiddenMarkovModel.Predict.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using HiddenMarkovModels.MathHelpers; 6 | 7 | namespace HiddenMarkovModels 8 | { 9 | public partial class HiddenMarkovModel 10 | { 11 | /// 12 | /// Predicts next observations occurring after a given observation sequence. 13 | /// 14 | public int[] Predict(int[] observations, int next, out double logLikelihood) 15 | { 16 | double[][] logLikelihoods; 17 | return Predict(observations, next, out logLikelihood, out logLikelihoods); 18 | } 19 | 20 | /// 21 | /// Predicts next observations occurring after a given observation sequence. 22 | /// 23 | public int[] Predict(int[] observations, int next) 24 | { 25 | double logLikelihood; 26 | double[][] logLikelihoods; 27 | return Predict(observations, next, out logLikelihood, out logLikelihoods); 28 | } 29 | 30 | /// 31 | /// Predicts next observations occurring after a given observation sequence. 32 | /// 33 | public int[] Predict(int[] observations, int next, out double[][] logLikelihoods) 34 | { 35 | double logLikelihood; 36 | return Predict(observations, next, out logLikelihood, out logLikelihoods); 37 | } 38 | 39 | /// 40 | /// Predicts the next observation occurring after a given observation sequence. 41 | /// 42 | public int Predict(int[] observations, out double[] probabilities) 43 | { 44 | double[][] logLikelihoods; 45 | double logLikelihood; 46 | int prediction = Predict(observations, 1, out logLikelihood, out logLikelihoods)[0]; 47 | probabilities = logLikelihoods[0]; 48 | return prediction; 49 | } 50 | 51 | /// 52 | /// Predicts the next observations occurring after a given observation sequence (using Viterbi algorithm) 53 | /// 54 | public int[] Predict(int[] observations, int next, out double logLikelihood, out double[][] logLikelihoods) 55 | { 56 | int T = next; 57 | double[,] logA = LogTransitionMatrix; 58 | double[,] logB = LogEmissionMatrix; 59 | double[] logPi = LogProbabilityVector; 60 | 61 | int[] prediction = new int[next]; 62 | logLikelihoods = new double[next][]; 63 | 64 | 65 | // Compute forward probabilities for the given observation sequence. 66 | double[,] lnFw0 = ForwardBackwardAlgorithm.LogForward(logA, logB, logPi, observations, out logLikelihood); 67 | 68 | // Create a matrix to store the future probabilities for the prediction 69 | // sequence and copy the latest forward probabilities on its first row. 70 | double[,] lnFwd = new double[T + 1, mStateCount]; 71 | 72 | 73 | // 1. Initialization 74 | for (int i = 0; i < mStateCount; i++) 75 | lnFwd[0, i] = lnFw0[observations.Length - 1, i]; 76 | 77 | // 2. Induction 78 | for (int t = 0; t < T; t++) 79 | { 80 | double[] weights = new double[mSymbolCount]; 81 | for (int s = 0; s < mSymbolCount; s++) 82 | { 83 | weights[s] = Double.NegativeInfinity; 84 | 85 | for (int i = 0; i < mStateCount; i++) 86 | { 87 | double sum = Double.NegativeInfinity; 88 | for (int j = 0; j < mStateCount; j++) 89 | sum = LogHelper.LogSum(sum, lnFwd[t, j] + logA[j, i]); 90 | lnFwd[t + 1, i] = sum + logB[i, s]; 91 | 92 | weights[s] = LogHelper.LogSum(weights[s], lnFwd[t + 1, i]); 93 | } 94 | } 95 | 96 | double sumWeight = Double.NegativeInfinity; 97 | for (int i = 0; i < weights.Length; i++) 98 | sumWeight = LogHelper.LogSum(sumWeight, weights[i]); 99 | for (int i = 0; i < weights.Length; i++) 100 | weights[i] -= sumWeight; 101 | 102 | 103 | // Select most probable symbol 104 | double maxWeight = weights[0]; 105 | prediction[t] = 0; 106 | for (int i = 1; i < weights.Length; i++) 107 | { 108 | if (weights[i] > maxWeight) 109 | { 110 | maxWeight = weights[i]; 111 | prediction[t] = i; 112 | } 113 | } 114 | 115 | // Recompute log-likelihood 116 | logLikelihoods[t] = weights; 117 | logLikelihood = maxWeight; 118 | } 119 | 120 | 121 | return prediction; 122 | } 123 | } 124 | } 125 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/HiddenMarkovModel.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using HiddenMarkovModels.MathHelpers; 6 | using HiddenMarkovModels.Topology; 7 | 8 | namespace HiddenMarkovModels 9 | { 10 | public partial class HiddenMarkovModel 11 | { 12 | protected double[,] mLogTransitionMatrix; 13 | protected double[,] mLogEmissionMatrix; 14 | protected double[] mLogProbabilityVector; 15 | protected int mSymbolCount = 0; 16 | protected int mStateCount = 0; 17 | 18 | public double[,] LogTransitionMatrix 19 | { 20 | get { return mLogTransitionMatrix; } 21 | } 22 | 23 | public double[,] LogEmissionMatrix 24 | { 25 | get { return mLogEmissionMatrix; } 26 | } 27 | 28 | public double[] LogProbabilityVector 29 | { 30 | get { return mLogProbabilityVector; } 31 | } 32 | 33 | public double[,] TransitionMatrix 34 | { 35 | get { return LogHelper.Exp(mLogTransitionMatrix); } 36 | } 37 | 38 | public double[,] EmissionMatrix 39 | { 40 | get { return LogHelper.Exp(mLogEmissionMatrix); } 41 | } 42 | 43 | public double[] ProbabilityVector 44 | { 45 | get { return LogHelper.Exp(mLogProbabilityVector); } 46 | } 47 | 48 | /// 49 | /// The number of states in the hidden Markov model 50 | /// 51 | public int StateCount 52 | { 53 | get { return mStateCount; } 54 | } 55 | 56 | /// 57 | /// The size of symbol set used to construct any observation from this model 58 | /// 59 | public int SymbolCount 60 | { 61 | get { return mSymbolCount; } 62 | } 63 | 64 | public HiddenMarkovModel(double[,] A, double[,] B, double[] pi) 65 | { 66 | mLogTransitionMatrix = LogHelper.Log(A); 67 | mLogEmissionMatrix = LogHelper.Log(B); 68 | mLogProbabilityVector = LogHelper.Log(pi); 69 | 70 | mStateCount = mLogProbabilityVector.Length; 71 | mSymbolCount = mLogEmissionMatrix.GetLength(1); 72 | } 73 | 74 | public HiddenMarkovModel(ITopology topology, int symbol_count) 75 | { 76 | mSymbolCount = symbol_count; 77 | mStateCount = topology.Create(out mLogTransitionMatrix, out mLogProbabilityVector); 78 | 79 | mLogEmissionMatrix = new double[mStateCount, mSymbolCount]; 80 | 81 | for (int i = 0; i < mStateCount; i++) 82 | { 83 | for (int j = 0; j < mSymbolCount; j++) 84 | mLogEmissionMatrix[i, j] = System.Math.Log(1.0 / mSymbolCount); 85 | } 86 | } 87 | 88 | public HiddenMarkovModel(int state_count, int symbol_count) 89 | { 90 | mStateCount = state_count; 91 | mSymbolCount = symbol_count; 92 | 93 | mLogTransitionMatrix = new double[mStateCount, mStateCount]; 94 | mLogProbabilityVector = new double[mStateCount]; 95 | mLogEmissionMatrix = new double[mStateCount, mSymbolCount]; 96 | 97 | mLogProbabilityVector[0] = 1.0; 98 | 99 | for (int i = 0; i < mStateCount; ++i) 100 | { 101 | mLogProbabilityVector[i] = System.Math.Log(mLogProbabilityVector[i]); 102 | 103 | for (int j = 0; j < mStateCount; ++j) 104 | { 105 | mLogTransitionMatrix[i, j] = System.Math.Log(1.0 / mStateCount); 106 | } 107 | } 108 | 109 | for (int i = 0; i < mStateCount; i++) 110 | { 111 | for (int j = 0; j < mSymbolCount; j++) 112 | mLogEmissionMatrix[i, j] = System.Math.Log(1.0 / mSymbolCount); 113 | } 114 | } 115 | 116 | public double Evaluate(int[] sequence) 117 | { 118 | double logLikelihood; 119 | ForwardBackwardAlgorithm.LogForward(mLogTransitionMatrix, mLogEmissionMatrix, mLogProbabilityVector, sequence, out logLikelihood); 120 | 121 | return logLikelihood; 122 | } 123 | 124 | public int[] Decode(int[] sequence, out double logLikelihood) 125 | { 126 | return Viterbi.LogForward(mLogTransitionMatrix, mLogEmissionMatrix, mLogProbabilityVector, sequence, out logLikelihood); 127 | } 128 | 129 | public int[] Decode(int[] sequence) 130 | { 131 | double logLikelihood; 132 | return Decode(sequence, out logLikelihood); 133 | } 134 | } 135 | } 136 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/Learning/Supervised/HiddenMarkovClassifierLearning.Continuous.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using HiddenMarkovModels.Learning.Unsupervised; 6 | using System.Threading.Tasks; 7 | using HiddenMarkovModels.Helpers; 8 | using System.Threading; 9 | 10 | namespace HiddenMarkovModels.Learning.Supervised 11 | { 12 | public partial class HiddenMarkovClassifierLearning 13 | { 14 | public double ComputeError(double[][] inputs, int[] outputs) 15 | { 16 | int errors = 0; 17 | Parallel.For(0, inputs.Length, i => 18 | { 19 | int expectedOutput = outputs[i]; 20 | int actualOutput = mClassifier.Compute(inputs[i]); 21 | 22 | if (expectedOutput != actualOutput) 23 | { 24 | Interlocked.Increment(ref errors); 25 | } 26 | }); 27 | 28 | return errors / (double)inputs.Length; 29 | } 30 | 31 | public double Run(double[][] observations_db, int[] class_labels) 32 | { 33 | int class_count = mClassifier.ClassCount; 34 | double[] logLikelihood = new double[class_count]; 35 | 36 | int K=class_labels.Length; 37 | 38 | DiagnosticsHelper.Assert(observations_db.Length==K); 39 | 40 | int[] class_label_counts = new int[class_count]; 41 | 42 | Parallel.For(0, class_count, i => 43 | { 44 | IUnsupervisedLearning teacher = mAlgorithmEntity(i); 45 | 46 | List match_record_index_set = new List(); 47 | for (int k = 0; k < K; ++k) 48 | { 49 | if (class_labels[k] == i) 50 | { 51 | match_record_index_set.Add(k); 52 | } 53 | } 54 | 55 | int K2 = match_record_index_set.Count; 56 | 57 | class_label_counts[i] = K2; 58 | 59 | if (K2 != 0) 60 | { 61 | double[][] observations_subdb = new double[K2][]; 62 | for (int k = 0; k < K2; ++k) 63 | { 64 | int record_index = match_record_index_set[k]; 65 | observations_subdb[k] = observations_db[record_index]; 66 | } 67 | 68 | 69 | logLikelihood[i] = teacher.Run(observations_subdb); 70 | } 71 | 72 | }); 73 | 74 | if (mEmpirical) 75 | { 76 | for (int i = 0; i < class_count; i++) 77 | { 78 | mClassifier.Priors[i] = (double)class_label_counts[i] / K; 79 | } 80 | } 81 | 82 | //if (mRejection) 83 | //{ 84 | // mClassifier.Threshold = Threshold(); 85 | //} 86 | 87 | return logLikelihood.Sum(); 88 | } 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/Learning/Supervised/HiddenMarkovClassifierLearning.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using HiddenMarkovModels.Learning.Unsupervised; 6 | using System.Threading.Tasks; 7 | using HiddenMarkovModels.Helpers; 8 | using System.Threading; 9 | 10 | namespace HiddenMarkovModels.Learning.Supervised 11 | { 12 | public partial class HiddenMarkovClassifierLearning 13 | { 14 | protected HiddenMarkovClassifier mClassifier; 15 | public delegate IUnsupervisedLearning HiddenMarkovModelLearningAlgorithmEntity(int index); 16 | protected HiddenMarkovModelLearningAlgorithmEntity mAlgorithmEntity; 17 | 18 | protected bool mEmpirical = false; 19 | protected bool mRejection = false; 20 | 21 | public bool Empirical 22 | { 23 | get { return mEmpirical; } 24 | set { mEmpirical = value; } 25 | } 26 | 27 | public bool Rejection 28 | { 29 | get { return mRejection; } 30 | set { mRejection = value; } 31 | } 32 | 33 | public HiddenMarkovClassifierLearning(HiddenMarkovClassifier classifier, HiddenMarkovModelLearningAlgorithmEntity algorithm = null) 34 | { 35 | mClassifier = classifier; 36 | 37 | int class_count = classifier.ClassCount; 38 | 39 | mAlgorithmEntity=algorithm; 40 | 41 | if(mAlgorithmEntity==null) 42 | { 43 | mAlgorithmEntity = model_index => new BaumWelchLearning(classifier.Models[model_index]) 44 | { 45 | Tolerance = 0.001, 46 | Iterations = 0 47 | }; 48 | } 49 | } 50 | 51 | public double ComputeError(int[][] inputs, int[] outputs) 52 | { 53 | int errors = 0; 54 | Parallel.For(0, inputs.Length, i => 55 | { 56 | int expectedOutput = outputs[i]; 57 | int actualOutput = mClassifier.Compute(inputs[i]); 58 | 59 | if (expectedOutput != actualOutput) 60 | { 61 | Interlocked.Increment(ref errors); 62 | } 63 | }); 64 | 65 | return errors / (double)inputs.Length; 66 | } 67 | 68 | public double Run(int[][] observations_db, int[] class_labels) 69 | { 70 | ValidationHelper.ValidateObservationDb(observations_db, 0, mClassifier.SymbolCount); 71 | 72 | int class_count = mClassifier.ClassCount; 73 | double[] logLikelihood = new double[class_count]; 74 | 75 | int K=class_labels.Length; 76 | 77 | DiagnosticsHelper.Assert(observations_db.Length==K); 78 | 79 | int[] class_label_counts = new int[class_count]; 80 | 81 | Parallel.For(0, class_count, i => 82 | { 83 | IUnsupervisedLearning teacher = mAlgorithmEntity(i); 84 | 85 | List match_record_index_set = new List(); 86 | for (int k = 0; k < K; ++k) 87 | { 88 | if (class_labels[k] == i) 89 | { 90 | match_record_index_set.Add(k); 91 | } 92 | } 93 | 94 | int K2 = match_record_index_set.Count; 95 | 96 | class_label_counts[i] = K2; 97 | 98 | if (K2 != 0) 99 | { 100 | int[][] observations_subdb = new int[K2][]; 101 | for (int k = 0; k < K2; ++k) 102 | { 103 | int record_index = match_record_index_set[k]; 104 | observations_subdb[k] = observations_db[record_index]; 105 | } 106 | 107 | 108 | logLikelihood[i] = teacher.Run(observations_subdb); 109 | } 110 | 111 | }); 112 | 113 | if (mEmpirical) 114 | { 115 | for (int i = 0; i < class_count; i++) 116 | { 117 | mClassifier.Priors[i] = (double)class_label_counts[i] / K; 118 | } 119 | } 120 | 121 | //if (mRejection) 122 | //{ 123 | // mClassifier.Threshold = Threshold(); 124 | //} 125 | 126 | return logLikelihood.Sum(); 127 | } 128 | } 129 | } 130 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/Learning/Supervised/MaximumLikelihoodLearning.Continuous.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using HiddenMarkovModels.MathHelpers; 6 | using HiddenMarkovModels.Helpers; 7 | using HiddenMarkovModels.MathUtils.Distribution; 8 | 9 | namespace HiddenMarkovModels.Learning.Supervised 10 | { 11 | public partial class MaximumLikelihoodLearning 12 | { 13 | public double Run(double[][] observations_db, int[][] path_db) 14 | { 15 | int K = observations_db.Length; 16 | 17 | DiagnosticsHelper.Assert(path_db.Length == K); 18 | 19 | int N = mModel.StateCount; 20 | int M = mModel.SymbolCount; 21 | 22 | int[] initial=new int[N]; 23 | int[,] transition_matrix = new int[N, N]; 24 | 25 | for (int k = 0; k < K; ++k) 26 | { 27 | initial[path_db[k][0]]++; 28 | } 29 | 30 | int T = 0; 31 | 32 | for (int k = 0; k < K; ++k) 33 | { 34 | int[] path = path_db[k]; 35 | double[] observations = observations_db[k]; 36 | 37 | T = path.Length; 38 | for (int t = 0; t < T-1; ++t) 39 | { 40 | transition_matrix[path[t], path[t + 1]]++; 41 | } 42 | } 43 | 44 | 45 | // 3. Count emissions for each state 46 | List[] clusters = new List[N]; 47 | for (int i = 0; i < N; i++) 48 | clusters[i] = new List(); 49 | 50 | // Count symbol frequencies per state 51 | for (int k = 0; k < K; k++) 52 | { 53 | for (int t = 0; t < path_db[k].Length; t++) 54 | { 55 | int state = path_db[k][t]; 56 | double symbol = observations_db[k][t]; 57 | 58 | clusters[state].Add(symbol); 59 | } 60 | } 61 | 62 | 63 | // Estimate probability distributions 64 | for (int i = 0; i < N; i++) 65 | { 66 | if (clusters[i].Count > 0) 67 | { 68 | mModel.EmissionModels[i].Process(clusters[i].ToArray()); 69 | } 70 | } 71 | 72 | if (mUseLaplaceRule) 73 | { 74 | for (int i = 0; i < N; ++i) 75 | { 76 | initial[i]++; 77 | 78 | for (int j = 0; j < N; ++j) 79 | { 80 | transition_matrix[i, j]++; 81 | } 82 | } 83 | } 84 | 85 | int initial_sum = initial.Sum(); 86 | int[] transition_sum_vec = Sum(transition_matrix, 1); 87 | 88 | for (int i = 0; i < N; ++i) 89 | { 90 | mModel.LogProbabilityVector[i] = System.Math.Log(initial[i] / (double)initial_sum); 91 | } 92 | 93 | for (int i = 0; i < N; ++i) 94 | { 95 | double transition_sum = (double)transition_sum_vec[i]; 96 | for (int j = 0; j < N; ++j) 97 | { 98 | mModel.LogTransitionMatrix[i, j] = System.Math.Log(transition_matrix[i, j] / transition_sum); 99 | } 100 | } 101 | 102 | double logLikelihood = double.NegativeInfinity; 103 | for (int i = 0; i < observations_db.Length; i++) 104 | logLikelihood = LogHelper.LogSum(logLikelihood, mModel.Evaluate(observations_db[i])); 105 | 106 | return logLikelihood; 107 | 108 | } 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/Learning/Supervised/MaximumLikelihoodLearning.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using HiddenMarkovModels.MathHelpers; 6 | using HiddenMarkovModels.Helpers; 7 | 8 | namespace HiddenMarkovModels.Learning.Supervised 9 | { 10 | public partial class MaximumLikelihoodLearning 11 | { 12 | protected HiddenMarkovModel mModel; 13 | public MaximumLikelihoodLearning(HiddenMarkovModel model) 14 | { 15 | mModel = model; 16 | } 17 | 18 | protected bool mUseLaplaceRule=true; 19 | /// 20 | /// Gets or sets whether to use Laplace's rule 21 | /// of succession to avoid zero probabilities. 22 | /// 23 | /// 24 | public bool UseLaplaceRule 25 | { 26 | get { return mUseLaplaceRule; } 27 | set { mUseLaplaceRule = value; } 28 | } 29 | 30 | public double Run(int[][] observations_db, int[][] path_db) 31 | { 32 | int K = observations_db.Length; 33 | 34 | DiagnosticsHelper.Assert(path_db.Length == K); 35 | 36 | int N = mModel.StateCount; 37 | int M = mModel.SymbolCount; 38 | 39 | int[] initial=new int[N]; 40 | int[,] transition_matrix = new int[N, N]; 41 | int[,] emission_matrix = new int[N, M]; 42 | 43 | for (int k = 0; k < K; ++k) 44 | { 45 | initial[path_db[k][0]]++; 46 | } 47 | 48 | int T = 0; 49 | 50 | for (int k = 0; k < K; ++k) 51 | { 52 | int[] path = path_db[k]; 53 | int[] observations = observations_db[k]; 54 | 55 | T = path.Length; 56 | for (int t = 0; t < T-1; ++t) 57 | { 58 | transition_matrix[path[t], path[t + 1]]++; 59 | } 60 | 61 | for (int t = 0; t < T; ++t) 62 | { 63 | emission_matrix[path[t], observations[t]]++; 64 | } 65 | } 66 | 67 | if (mUseLaplaceRule) 68 | { 69 | for (int i = 0; i < N; ++i) 70 | { 71 | initial[i]++; 72 | 73 | for (int j = 0; j < N; ++j) 74 | { 75 | transition_matrix[i, j]++; 76 | } 77 | 78 | for (int j = 0; j < M; ++j) 79 | { 80 | emission_matrix[i, j]++; 81 | } 82 | } 83 | } 84 | 85 | int initial_sum = initial.Sum(); 86 | int[] transition_sum_vec = Sum(transition_matrix, 1); 87 | int[] emission_sum_vec = Sum(emission_matrix, 1); 88 | 89 | for (int i = 0; i < N; ++i) 90 | { 91 | mModel.LogProbabilityVector[i] = System.Math.Log(initial[i] / (double)initial_sum); 92 | } 93 | 94 | for (int i = 0; i < N; ++i) 95 | { 96 | double transition_sum = (double)transition_sum_vec[i]; 97 | for (int j = 0; j < N; ++j) 98 | { 99 | mModel.LogTransitionMatrix[i, j] = System.Math.Log(transition_matrix[i, j] / transition_sum); 100 | } 101 | } 102 | 103 | for (int i = 0; i < N; ++i) 104 | { 105 | double emission_sum = (double)emission_sum_vec[i]; 106 | for (int m = 0; m < M; ++m) 107 | { 108 | mModel.LogEmissionMatrix[i, m] = System.Math.Log(emission_matrix[i, m] / emission_sum); 109 | } 110 | } 111 | 112 | double logLikelihood = double.NegativeInfinity; 113 | for (int i = 0; i < observations_db.Length; i++) 114 | logLikelihood = LogHelper.LogSum(logLikelihood, mModel.Evaluate(observations_db[i])); 115 | 116 | return logLikelihood; 117 | 118 | } 119 | 120 | private static int[] Sum(int[,] matrix, int dimension) 121 | { 122 | int dim1_length = matrix.GetLength(0); 123 | int dim2_length = matrix.GetLength(1); 124 | 125 | int[] vec = null; 126 | if (dimension == 0) 127 | { 128 | vec = new int[dim2_length]; 129 | for (int j = 0; j < dim2_length; ++j) 130 | { 131 | int sum=0; 132 | for (int i = 0; i < dim1_length; ++i) 133 | { 134 | sum += matrix[i, j]; 135 | } 136 | vec[j] = sum; 137 | } 138 | 139 | return vec; 140 | } 141 | else if (dimension == 1) 142 | { 143 | vec = new int[dim1_length]; 144 | for (int i = 0; i < dim1_length; ++i) 145 | { 146 | int sum = 0; 147 | for (int j = 0; j < dim2_length; ++j) 148 | { 149 | sum += matrix[i, j]; 150 | } 151 | vec[i] = sum; 152 | } 153 | 154 | return vec; 155 | } 156 | 157 | return vec; 158 | } 159 | } 160 | } 161 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/Learning/Unsupervised/IUnsupervisedLearning.Continuous.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | 6 | namespace HiddenMarkovModels.Learning.Unsupervised 7 | { 8 | public partial interface IUnsupervisedLearning 9 | { 10 | double Run(double[][] observations_db); 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/Learning/Unsupervised/IUnsupervisedLearning.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | 6 | namespace HiddenMarkovModels.Learning.Unsupervised 7 | { 8 | public partial interface IUnsupervisedLearning 9 | { 10 | double Run(int[][] observations_db); 11 | HiddenMarkovModel Model 12 | { 13 | get; 14 | set; 15 | } 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/Learning/Unsupervised/ViterbiLearning.Continuous.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using HiddenMarkovModels.Learning.Supervised; 6 | using HiddenMarkovModels.MathHelpers; 7 | 8 | namespace HiddenMarkovModels.Learning.Unsupervised 9 | { 10 | public partial class ViterbiLearning 11 | { 12 | public double Run(double[][] observations_db) 13 | { 14 | 15 | int K = observations_db.Length; 16 | 17 | double currLogLikelihood = Double.NegativeInfinity; 18 | 19 | for (int k = 0; k < K; ++k) 20 | { 21 | currLogLikelihood = LogHelper.LogSum(currLogLikelihood, mModel.Evaluate(observations_db[k])); 22 | } 23 | 24 | double oldLogLikelihood = -1; 25 | double deltaLogLikelihood = -1; 26 | int iteration = 0; 27 | do{ 28 | oldLogLikelihood=currLogLikelihood; 29 | 30 | int[][] paths_db = new int[K][]; 31 | for(int k=0; k < K; ++k) 32 | { 33 | paths_db[k]=mModel.Decode(observations_db[k]); 34 | } 35 | 36 | mMaximumLikelihoodLearner.Run(observations_db, paths_db); 37 | 38 | currLogLikelihood = double.NegativeInfinity; 39 | for (int k = 0; k < K; ++k) 40 | { 41 | currLogLikelihood = LogHelper.LogSum(currLogLikelihood, mModel.Evaluate(observations_db[k])); 42 | } 43 | 44 | deltaLogLikelihood = System.Math.Abs(currLogLikelihood - oldLogLikelihood); 45 | iteration++; 46 | }while(!ShouldTerminate(deltaLogLikelihood, iteration)); 47 | 48 | return currLogLikelihood; 49 | } 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/Learning/Unsupervised/ViterbiLearning.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using HiddenMarkovModels.Learning.Supervised; 6 | using HiddenMarkovModels.MathHelpers; 7 | 8 | namespace HiddenMarkovModels.Learning.Unsupervised 9 | { 10 | public partial class ViterbiLearning : IUnsupervisedLearning 11 | { 12 | protected MaximumLikelihoodLearning mMaximumLikelihoodLearner; 13 | protected HiddenMarkovModel mModel; 14 | protected int mIterations = 0; 15 | protected double mTolerance = 0.001; 16 | 17 | public HiddenMarkovModel Model 18 | { 19 | get { return mModel; } 20 | set { mModel = value; } 21 | } 22 | 23 | public int Iterations 24 | { 25 | get { return mIterations; } 26 | set { mIterations = value; } 27 | } 28 | 29 | public double Tolerance 30 | { 31 | get { return mTolerance; } 32 | set { mTolerance = value; } 33 | } 34 | 35 | public bool UseLaplaceRule 36 | { 37 | get 38 | { 39 | return mMaximumLikelihoodLearner.UseLaplaceRule; 40 | } 41 | set 42 | { 43 | mMaximumLikelihoodLearner.UseLaplaceRule = value; 44 | } 45 | } 46 | 47 | public ViterbiLearning(HiddenMarkovModel hmm) 48 | { 49 | mModel = hmm; 50 | mMaximumLikelihoodLearner = new MaximumLikelihoodLearning(hmm); 51 | } 52 | 53 | public double Run(int[][] observations_db) 54 | { 55 | 56 | int K = observations_db.Length; 57 | 58 | double currLogLikelihood = Double.NegativeInfinity; 59 | 60 | for (int k = 0; k < K; ++k) 61 | { 62 | currLogLikelihood = LogHelper.LogSum(currLogLikelihood, mModel.Evaluate(observations_db[k])); 63 | } 64 | 65 | double oldLogLikelihood = -1; 66 | double deltaLogLikelihood = -1; 67 | int iteration = 0; 68 | do{ 69 | oldLogLikelihood=currLogLikelihood; 70 | 71 | int[][] paths_db = new int[K][]; 72 | for(int k=0; k < K; ++k) 73 | { 74 | paths_db[k]=mModel.Decode(observations_db[k]); 75 | } 76 | 77 | mMaximumLikelihoodLearner.Run(observations_db, paths_db); 78 | 79 | currLogLikelihood = double.NegativeInfinity; 80 | for (int k = 0; k < K; ++k) 81 | { 82 | currLogLikelihood = LogHelper.LogSum(currLogLikelihood, mModel.Evaluate(observations_db[k])); 83 | } 84 | 85 | deltaLogLikelihood = System.Math.Abs(currLogLikelihood - oldLogLikelihood); 86 | iteration++; 87 | }while(!ShouldTerminate(deltaLogLikelihood, iteration)); 88 | 89 | return currLogLikelihood; 90 | } 91 | 92 | protected bool ShouldTerminate(double change, int iteration) 93 | { 94 | if (change <= mTolerance) 95 | { 96 | return true; 97 | } 98 | 99 | if (mIterations > 0 && mIterations <= iteration) 100 | { 101 | return true; 102 | } 103 | 104 | return false; 105 | } 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/MathHelpers/LogHelper.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | 6 | namespace HiddenMarkovModels.MathHelpers 7 | { 8 | public static class LogHelper 9 | { 10 | /// 11 | /// Computes log(1+x) without losing precision for small sample of x. 12 | /// 13 | /// 14 | /// 15 | /// References: 16 | /// - http://www.johndcook.com/csharp_log_one_plus_x.html 17 | /// 18 | /// 19 | public static double Log1p(double x) 20 | { 21 | if (x <= -1.0) 22 | return Double.NaN; 23 | 24 | if (System.Math.Abs(x) > 1e-4) 25 | return System.Math.Log(1.0 + x); 26 | 27 | // Use Taylor approx. log(1 + x) = x - x^2/2 with error roughly x^3/3 28 | // Since |x| < 10^-4, |x|^3 < 10^-12, relative error less than 10^-8 29 | return (-0.5 * x + 1.0) * x; 30 | } 31 | 32 | /// 33 | /// Computes x + y without losing precision using ln(x) and ln(y). 34 | /// 35 | /// 36 | public static double LogSum(double lna, double lnc) 37 | { 38 | if (lna == Double.NegativeInfinity) 39 | return lnc; 40 | if (lnc == Double.NegativeInfinity) 41 | return lna; 42 | 43 | if (lna > lnc) 44 | return lna + Log1p(System.Math.Exp(lnc - lna)); 45 | 46 | return lnc + Log1p(System.Math.Exp(lna - lnc)); 47 | } 48 | 49 | /// 50 | /// Computes x + y without losing precision using ln(x) and ln(y). 51 | /// 52 | /// 53 | public static double LogSum(float lna, float lnc) 54 | { 55 | if (lna == Single.NegativeInfinity) 56 | return lnc; 57 | if (lnc == Single.NegativeInfinity) 58 | return lna; 59 | 60 | if (lna > lnc) 61 | return lna + Log1p(System.Math.Exp(lnc - lna)); 62 | 63 | return lnc + Log1p(System.Math.Exp(lna - lnc)); 64 | } 65 | 66 | /// 67 | /// Elementwise Log operation. 68 | /// 69 | /// 70 | public static double[,] Log(this double[,] value) 71 | { 72 | int rows = value.GetLength(0); 73 | int cols = value.GetLength(1); 74 | 75 | double[,] r = new double[rows, cols]; 76 | for (int i = 0; i < rows; i++) 77 | for (int j = 0; j < cols; j++) 78 | r[i, j] = System.Math.Log(value[i, j]); 79 | return r; 80 | } 81 | 82 | /// 83 | /// Elementwise Exp operation. 84 | /// 85 | /// 86 | public static double[,] Exp(this double[,] value) 87 | { 88 | int rows = value.GetLength(0); 89 | int cols = value.GetLength(1); 90 | 91 | double[,] r = new double[rows, cols]; 92 | for (int i = 0; i < rows; i++) 93 | for (int j = 0; j < cols; j++) 94 | r[i, j] = System.Math.Exp(value[i, j]); 95 | return r; 96 | } 97 | 98 | /// 99 | /// Elementwise Exp operation. 100 | /// 101 | /// 102 | public static double[] Exp(this double[] value) 103 | { 104 | double[] r = new double[value.Length]; 105 | for (int i = 0; i < value.Length; i++) 106 | r[i] = System.Math.Exp(value[i]); 107 | return r; 108 | } 109 | 110 | 111 | /// 112 | /// Elementwise Log operation. 113 | /// 114 | /// 115 | public static double[] Log(this double[] value) 116 | { 117 | double[] r = new double[value.Length]; 118 | for (int i = 0; i < value.Length; i++) 119 | r[i] = System.Math.Log(value[i]); 120 | return r; 121 | } 122 | 123 | private static double[] lnfcache; 124 | 125 | /// 126 | /// Returns the log factorial of a number (ln(n!)) 127 | /// 128 | /// 129 | public static double LogFactorial(int n) 130 | { 131 | if (lnfcache == null) 132 | lnfcache = new double[101]; 133 | 134 | if (n < 0) 135 | { 136 | // GetFactorial is not defined for negative numbers. 137 | throw new ArgumentException("Argument cannot be negative.", "n"); 138 | } 139 | if (n <= 1) 140 | { 141 | // GetFactorial for n between 0 and 1 is 1, so log(factorial(n)) is 0. 142 | return 0.0; 143 | } 144 | if (n <= 100) 145 | { 146 | // Compute the factorial using ln(gamma(n)) approximation, using the cache 147 | // if the value has been previously computed. 148 | return (lnfcache[n] > 0) ? lnfcache[n] : (lnfcache[n] = Gamma.Log(n + 1.0)); 149 | } 150 | else 151 | { 152 | // Just compute the factorial using ln(gamma(n)) approximation. 153 | return Gamma.Log(n + 1.0); 154 | } 155 | } 156 | } 157 | } 158 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/MathHelpers/MergeSort.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | 6 | namespace HiddenMarkovModels.MathHelpers 7 | { 8 | /// 9 | /// Ascending sort by merge sort 10 | /// 11 | public class MergeSort 12 | { 13 | public static void Sort(double[] a) 14 | { 15 | double[] aux = new double[a.Length]; 16 | int lo = 0; 17 | int hi = a.Length - 1; 18 | Sort(a, aux, lo, hi); 19 | } 20 | 21 | public static void Sort(double[] a, double[] aux, int lo, int hi) 22 | { 23 | if (hi - lo < 5) 24 | { 25 | SelectionSort(a, lo, hi); 26 | return; 27 | } 28 | int mid = (hi + lo) / 2; 29 | Sort(a, aux, lo, mid, hi); 30 | } 31 | 32 | public static void Sort(double[] a, double[] aux, int lo, int mid, int hi) 33 | { 34 | Sort(a, aux, lo, mid); 35 | Sort(a, aux, mid + 1, hi); 36 | int i = lo, j = mid + 1; 37 | for (int k = lo; k <= hi; ++k) 38 | { 39 | if (i <= mid && (j > hi || a[i] < a[j])) 40 | { 41 | aux[k] = a[i++]; 42 | } 43 | else 44 | { 45 | aux[k] = a[j++]; 46 | } 47 | } 48 | 49 | for (int k = lo; k <= hi; ++k) 50 | { 51 | a[k] = aux[k]; 52 | } 53 | 54 | } 55 | 56 | private static void SelectionSort(double[] a, int lo, int hi) 57 | { 58 | for (int i = lo; i <= hi; ++i) 59 | { 60 | double c = a[i]; 61 | int jpi = i; 62 | for (int j = i + 1; j <= hi; ++j) 63 | { 64 | if (c > a[j]) 65 | { 66 | jpi = j; 67 | c = a[j]; 68 | } 69 | } 70 | if (i != jpi) 71 | { 72 | double temp = a[i]; 73 | a[i] = a[jpi]; 74 | a[jpi] = temp; 75 | } 76 | } 77 | } 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/MathHelpers/PolynomialHelper.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | 6 | namespace HiddenMarkovModels.MathHelpers 7 | { 8 | public class PolynomialHelper 9 | { 10 | /// 11 | /// Evaluates polynomial of degree N 12 | /// 13 | /// 14 | public static double Polevl(double x, double[] coef, int n) 15 | { 16 | double ans; 17 | 18 | ans = coef[0]; 19 | 20 | for (int i = 1; i <= n; i++) 21 | ans = ans * x + coef[i]; 22 | 23 | return ans; 24 | } 25 | 26 | /// 27 | /// Evaluates polynomial of degree N with assumption that coef[N] = 1.0 28 | /// 29 | /// 30 | public static double P1evl(double x, double[] coef, int n) 31 | { 32 | double ans; 33 | 34 | ans = x + coef[0]; 35 | 36 | for (int i = 1; i < n; i++) 37 | ans = ans * x + coef[i]; 38 | 39 | return ans; 40 | } 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/MathUtils/Constants.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | 6 | namespace HiddenMarkovModels.MathUtils 7 | { 8 | public class Constants 9 | { 10 | /// Maximum log on the machine. 11 | public const double LogMax = 7.09782712893383996732E2; 12 | 13 | /// Square root of 2: sqrt(2). 14 | public const double Sqrt2 = 1.4142135623730950488016887; 15 | 16 | /// Square root of twice number pi: sqrt(2*π). 17 | public const double Sqrt2PI = 2.50662827463100050242E0; 18 | 19 | 20 | /// Log of number pi: log(pi). 21 | public const double LogPI = 1.14472988584940017414; 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/MathUtils/Distribution/Binomial.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using HiddenMarkovModels.MathHelpers; 6 | 7 | namespace HiddenMarkovModels.MathUtils.Distribution 8 | { 9 | /// 10 | /// Binomial conditions: 11 | /// 1. the trials must be independent 12 | /// 2. the number of trials, N, must be fixed 13 | /// 3. each trial outcome must be classified as a success or failure 14 | /// 15 | /// 16 | public class Binomial : DistributionModel 17 | { 18 | public double mP = 0.5; //probability of success in a Bernouli trial 19 | public int mN = 10; //number of Bernouli trials 20 | 21 | /// 22 | /// Probability of success 23 | /// 24 | public double P 25 | { 26 | get { return mP; } 27 | set { mP = value; } 28 | } 29 | 30 | /// 31 | /// The number of Bernouli trials 32 | /// 33 | public int N 34 | { 35 | get { return mN; } 36 | set { mN = value; } 37 | } 38 | 39 | /// 40 | /// Return the total number out of N observations 41 | /// 42 | /// 43 | public override double Next() 44 | { 45 | int count = 0; 46 | for (int i = 0; i < mN; ++i) 47 | { 48 | count += GetUniform() <= mP ? 1 : 0; 49 | } 50 | return count; 51 | } 52 | 53 | public override DistributionModel Clone() 54 | { 55 | Binomial clone = new Binomial(); 56 | clone.P = mP; 57 | clone.N = mN; 58 | return clone; 59 | } 60 | 61 | 62 | public override double LogProbabilityFunction(double k) 63 | { 64 | return System.Math.Log(GetPMF((int)System.Math.Floor(k))); 65 | } 66 | 67 | public override double GetPDF(double x) 68 | { 69 | throw new NotImplementedException("Binomial distribution does not have a PDF"); 70 | } 71 | 72 | /// 73 | /// Return the probability P(x <= k), which is the probability that at most k successes are observed out of total of n Bernouli trials 74 | /// 75 | /// The number of Bernouli trials in which a success is observed 76 | /// The total number of Bernouli trials 77 | /// The probability that a success is observed in a Bernouli trial 78 | /// P(x <= k) 79 | public static double GetProbabilityLessEqualTo(int K, int n, double p) 80 | { 81 | double prob = 0; 82 | for (int i = 0; i <= K; ++i) 83 | { 84 | prob += GetPMF(i, n, p); 85 | } 86 | return prob; 87 | } 88 | 89 | public override double GetCDF(double x) 90 | { 91 | int k = (int)(System.Math.Floor(x)); 92 | 93 | return GetProbabilityLessEqualTo(k, mN, mP); 94 | } 95 | 96 | /// 97 | /// Attempt to approximate a normal distribution N(mu, sigma) 98 | /// 99 | /// 100 | /// 101 | /// True if normal distribution can be approximated by the binomial distribution 102 | public bool TryApproximateNormalDistribution(out double mu, out double sigma) 103 | { 104 | double expected_success_count = mN * mP; 105 | double expected_failure_count = mN * (1 - mP); 106 | bool can_approx_normal = expected_failure_count >= 10 && expected_success_count >= 10; //when expected number successes and failures is >= 10, can approximate by a normal distribution 107 | 108 | mu = mN * mP; 109 | sigma = System.Math.Sqrt(mN * mP * (1 - mP)); 110 | return can_approx_normal; 111 | } 112 | 113 | /// 114 | /// Return the probability mass function: P(x = k) = Binomial.Coeff(n, k) * p^k * (1-p)^(n-k), which is the probability that k successes are observed out of total of n Bernouli trials 115 | /// 116 | /// The number of Bernouli trials in which a success is observed 117 | /// The total number of Bernouli trials 118 | /// The probability that a success is observed in a Bernouli trial 119 | /// P(x = k) 120 | public static double GetPMF(int k, int n, double p) 121 | { 122 | return BinomialCoeff(k, n) * System.Math.Pow(p, k) * System.Math.Pow(1 - p, n - k); 123 | } 124 | 125 | public override double GetPMF(int k) 126 | { 127 | return GetPMF(k, mN, mP); 128 | } 129 | 130 | public static double BinomialCoeff(int k, int n) 131 | { 132 | return (double)Factorial.GetFactorial(n - k + 1, n) / Factorial.GetFactorial(n - k); 133 | } 134 | 135 | 136 | 137 | /// 138 | /// Given a set of simulations, each simulation i representing N Bernouli trials, and values[i] is the number of successes in simulation i, compute the P, mu, and standard deviation 139 | /// 140 | /// values[i] is the number of trials out of the N Bernouli trials (in the simulation #i) in which success is observed 141 | public override void Process(double[] values) 142 | { 143 | int count = values.Length; 144 | double[] p = new double[count]; 145 | for (int i = 0; i < count; ++i) 146 | { 147 | p[i] = values[i] / mN; 148 | } 149 | mP = Statistics.Mean.GetMean(p); 150 | mMean = mN * mP; 151 | mStdDev = System.Math.Sqrt(mN * mP * (1 - mP)); 152 | } 153 | 154 | public override void Process(double[] values, double[] weights) 155 | { 156 | throw new NotImplementedException(); 157 | } 158 | 159 | public static double GetPercentile(int k, int N, double p, bool fast = false) 160 | { 161 | double expected_success_count = N * p; 162 | double expected_failure_count = N * (1 - p); 163 | bool can_approx_normal = expected_failure_count >= 10 && expected_success_count >= 10; //when expected number successes and failures is >= 10, can approximate by a normal distribution 164 | if (!can_approx_normal || !fast) 165 | { 166 | return Binomial.GetProbabilityLessEqualTo(k, N, p); 167 | } 168 | else 169 | { 170 | double mu = N * p; 171 | double sigma = System.Math.Sqrt(N * p * (1 - p)); 172 | double k_adj = k - 0.5; 173 | double z = (k_adj - mu) / sigma; 174 | return Gaussian.GetPercentile(z); 175 | 176 | } 177 | } 178 | } 179 | } 180 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/MathUtils/Distribution/ChiSquare.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | 6 | namespace HiddenMarkovModels.MathUtils.Distribution 7 | { 8 | public class ChiSquare 9 | { 10 | private static double logSqrtPi = System.Math.Log(System.Math.Sqrt(System.Math.PI)); 11 | private static double rezSqrtPi = 1 / System.Math.Sqrt(System.Math.PI); 12 | private static double bigx = 20.0; 13 | public const double EPSILON = .0000000001; 14 | 15 | /// 16 | /// Return the probability density function of a F-distribution 17 | /// 18 | /// reference value for the variable x following the F-distribution 19 | /// degrees of freedom 20 | /// 21 | /// The probability densitiy function 22 | public static double GetPDF(double F, int df, double deltaF = 0.0001) 23 | { 24 | double F1 = F - deltaF / 2; 25 | double F2 = F + deltaF / 2; 26 | if (F1 <= EPSILON) 27 | { 28 | F1 = F; 29 | deltaF = deltaF / 2; 30 | } 31 | 32 | double p1 = GetPercentile(F1, df); 33 | double p2 = GetPercentile(F2, df); 34 | double areaP = p2 - p1; 35 | return areaP / deltaF; 36 | } 37 | 38 | /// 39 | /// Return the critical value F for p = P(x <= F), where p is the percentile 40 | /// 41 | /// The implementation here is adapted from http://www.cs.umb.edu/~rickb/files/disc_proj/disc/weka/weka-3-2-3/weka/core/Statistics.java 42 | /// 43 | /// percentile P(x <= F) 44 | /// degrees of freedom of numerator 45 | /// The critical value F for p = P(x <= F) 46 | public static double GetQuantile(double p, int df) 47 | { 48 | double fval; 49 | double maxf = 99999.0; 50 | double minf = .000001; 51 | 52 | if (p <= 0.0 || p >= 1.0) 53 | return (0.0); 54 | 55 | fval = 1.0 / p; // initial value for guess fval, the smaller the p, the larger the F 56 | 57 | while (System.Math.Abs(maxf - minf) > .000001) 58 | { 59 | if (GetPercentile(fval, df) > p) // F too large 60 | maxf = fval; 61 | else // F too small 62 | minf = fval; 63 | fval = (maxf + minf) * 0.5; 64 | } 65 | 66 | return (fval); 67 | } 68 | 69 | /// 70 | /// Return the P(y < x) where y follows the Chi^2 distribution 71 | /// 72 | /// reference value for y which follows the Chi^2 distribution 73 | /// degrees of freedom 74 | /// The cumulative probability P(y < x) 75 | public static double GetPercentile(double x, int df) 76 | { 77 | return 1 - ChiSquaredProbability(x, df); 78 | } 79 | 80 | /// 81 | /// Return the P(y > x) where y follows the Chi^2 distribution 82 | /// 83 | /// The implementation here is adapted from http://www.cs.umb.edu/~rickb/files/disc_proj/disc/weka/weka-3-2-3/weka/core/Statistics.java 84 | /// 85 | /// reference value for y which follows the Chi^2 distribution 86 | /// degrees of freedom 87 | /// The probability P(y > x) 88 | private static double ChiSquaredProbability(double x, int df) 89 | { 90 | double a, y = 0, s, e, c, z, val; 91 | bool even; 92 | 93 | if (x <= 0 || df < 1) 94 | return (1); 95 | a = 0.5 * x; 96 | even = (((int)(2 * (df / 2))) == df); 97 | if (df > 1) 98 | y = System.Math.Exp(-a); //((-a < -bigx) ? 0.0 : Math.exp (-a)); 99 | s = (even ? y : (2.0 * Gaussian.GetPercentile(-System.Math.Sqrt(x)))); 100 | if (df > 2) 101 | { 102 | x = 0.5 * (df - 1.0); 103 | z = (even ? 1.0 : 0.5); 104 | if (a > bigx) 105 | { 106 | e = (even ? 0.0 : logSqrtPi); 107 | c = System.Math.Log(a); 108 | while (z <= x) 109 | { 110 | e = System.Math.Log(z) + e; 111 | val = c * z - a - e; 112 | s += System.Math.Exp(val); //((val < -bigx) ? 0.0 : Math.exp (val)); 113 | z += 1.0; 114 | } 115 | return (s); 116 | } 117 | else 118 | { 119 | e = (even ? 1.0 : (rezSqrtPi / System.Math.Sqrt(a))); 120 | c = 0.0; 121 | while (z <= x) 122 | { 123 | e = e * (a / z); 124 | c = c + e; 125 | z += 1.0; 126 | } 127 | return (c * y + s); 128 | } 129 | } 130 | else 131 | { 132 | return (s); 133 | } 134 | } 135 | } 136 | } 137 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/MathUtils/Distribution/Erlang.cs: -------------------------------------------------------------------------------- 1 | /// \file Erlang.cs 2 | /// 3 | /// Contains the class representing a random number generator based on Erlang distribution Erlang(\f$k\f$, \f$\lambda\f$). 4 | /// 5 | using System; 6 | using System.Collections.Generic; 7 | using System.Linq; 8 | using System.Text; 9 | using HiddenMarkovModels.MathHelpers; 10 | 11 | namespace HiddenMarkovModels.MathUtils.Distribution 12 | { 13 | /// 14 | /// Class representing a random number generator based on Erlang distribution Erlang(\f$k\f$, \f$\lambda\f$). 15 | /// \f$k\f$ represents the shape parameter of the Erlang distribution and \f$\lambda\f$ represents the rate parameter of the Erlang distribution 16 | ///
    17 | ///
  1. The mean is \f$\mu = \frac{k}{\lambda}\f$
  2. 18 | ///
  3. The Variance is \f$\sigma^2 = \frac{k}{\lambda^2}\f$
  4. 19 | ///
  5. The skewness is \f$\frac{2}{\sqrt{k}}\f$
  6. 20 | ///
21 | ///
22 | public class Erlang : DistributionModel 23 | { 24 | private double mLnConstant; 25 | 26 | /// 27 | /// Constructor with \f$k\f$ and \f$\lambda\f$ 28 | /// 29 | /// \f$k\f$ for Erlang(\f$k\f$, \f$\lambda\f$) 30 | /// \f$\lambda\f$ for Erlang(\f$k\f$, \f$\lambda\f$) 31 | public Erlang(int _k, double _lambda) 32 | { 33 | m_k = _k; 34 | m_lambda = _lambda; 35 | 36 | if (m_lambda != 0) 37 | { 38 | mMean = m_k / m_lambda; 39 | mStdDev = System.Math.Sqrt(m_k / (m_lambda * m_lambda)); 40 | } 41 | 42 | double theta = 1 / m_lambda; 43 | 44 | mLnConstant = -(m_k * System.Math.Log(theta) + Gamma.Log(m_k)); 45 | } 46 | 47 | /// 48 | /// Constructor 49 | /// 50 | public Erlang() 51 | { 52 | 53 | } 54 | 55 | public override double LogProbabilityFunction(double x) 56 | { 57 | double theta = 1 / m_lambda; 58 | return mLnConstant + (m_k - 1) * System.Math.Log(x) - x / theta; 59 | } 60 | 61 | public override double GetPDF(double x) 62 | { 63 | return System.Math.Exp(LogProbabilityFunction(x)); 64 | } 65 | 66 | public override double GetCDF(double x) 67 | { 68 | double sum = 0; 69 | for (int n = 0; n < m_k; ++n) 70 | { 71 | sum += System.Math.Exp(-m_lambda * x) * System.Math.Pow(m_lambda * x, n) / Factorial.GetFactorial(n); 72 | } 73 | return 1 - sum; 74 | } 75 | 76 | public override DistributionModel Clone() 77 | { 78 | return new Erlang(m_k, m_lambda); 79 | } 80 | 81 | /// 82 | /// Member variable representing the shape parameter of the Erlang distribution 83 | /// 84 | private int m_k; 85 | 86 | /// 87 | /// Property representing the shape parameter of the Erlang distribution 88 | /// 89 | public int k 90 | { 91 | get 92 | { 93 | return m_k; 94 | } 95 | set 96 | { 97 | m_k = value; 98 | } 99 | } 100 | 101 | /// 102 | /// Member variable representing the rate parameter of the Erlang distribution 103 | /// 104 | private double m_lambda; 105 | /// 106 | /// Member variable representing the rate parameter of the Erlang distribution 107 | /// 108 | public double lambda 109 | { 110 | get 111 | { 112 | return m_lambda; 113 | } 114 | set 115 | { 116 | m_lambda = value; 117 | } 118 | } 119 | 120 | /// 121 | /// Method that returns a double value randomly generated from the Erlang distribution Erlang(\f$k\f$, \f$\lambda\f$) 122 | /// 123 | /// A double value randomly generated from the Erlang distribution 124 | public override double Next() 125 | { 126 | double product = 1.0; 127 | for (int i = 0; i < k; i++) 128 | { 129 | product *= GetUniform(); 130 | } 131 | 132 | // Subtract product from 1.0 to avoid Math.Log(0.0) 133 | double r = -1.0 / lambda * System.Math.Log(product); 134 | return r; 135 | } 136 | 137 | public override void Process(double[] values) 138 | { 139 | double lnsum = 0; 140 | int count = values.Length; 141 | for (int i = 0; i < count; ++i) 142 | { 143 | lnsum += System.Math.Log(values[i]); 144 | } 145 | 146 | double mean = values.Average(); 147 | 148 | double s = System.Math.Log(mean) - lnsum / count; 149 | 150 | double newK = (3 - s + System.Math.Sqrt((s - 3) * (s - 3) + 24 * s)) / (12 * s); 151 | 152 | double oldK; 153 | 154 | do 155 | { 156 | oldK = newK; 157 | newK = oldK - (System.Math.Log(newK) - Gamma.Digamma(newK) - s) / ((1 / newK) - Gamma.Trigamma(newK)); 158 | } 159 | while (System.Math.Abs(oldK - newK) / System.Math.Abs(oldK) < double.Epsilon); 160 | 161 | double theta = mean / newK; 162 | 163 | m_lambda = 1 / theta; 164 | m_k = (int)newK; 165 | 166 | mLnConstant = -(m_k * System.Math.Log(theta) + Gamma.Log(m_k)); 167 | 168 | mMean = mean; 169 | mStdDev = System.Math.Sqrt(m_k) / m_lambda; 170 | } 171 | 172 | public override void Process(double[] values, double[] weights) 173 | { 174 | Process(values); 175 | } 176 | } 177 | } 178 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/MathUtils/Distribution/Exponential.cs: -------------------------------------------------------------------------------- 1 | /// \file Exponential.cs 2 | /// 3 | /// Contains the class representing a random number generator Exponential(\f$\lambda\f$). 4 | /// 5 | using System; 6 | using System.Collections.Generic; 7 | using System.Linq; 8 | using System.Text; 9 | 10 | namespace HiddenMarkovModels.MathUtils.Distribution 11 | { 12 | /// 13 | /// Class representing a random number generator Exponential(\f$\lambda\f$). 14 | /// The \f$\lambda\f$ is the rate parameter or inverse scale of the Exponential distribution 15 | ///
    16 | ///
  1. The mean is \f$\mu = \frac{1}{\lambda}\f$
  2. 17 | ///
  3. The variance is \f$\sigma^2 = \frac{1}{\lambda^2}\f$
  4. 18 | ///
  5. The skewness is 2
  6. 19 | ///
20 | ///
21 | public class Exponential : DistributionModel 22 | { 23 | protected double mLnlambda; 24 | protected double mLambda; 25 | 26 | /// 27 | /// Constructor 28 | /// 29 | /// The seed for the random number generator 30 | public Exponential(uint seed) 31 | : base(seed) 32 | { 33 | 34 | } 35 | 36 | public Exponential() 37 | { 38 | 39 | } 40 | 41 | /// 42 | /// Return the log of the PDF(x) 43 | /// 44 | /// 45 | /// 46 | public override double LogProbabilityFunction(double x) 47 | { 48 | return mLnlambda - mLambda * x; 49 | } 50 | 51 | public override double GetCDF(double x) 52 | { 53 | return 1 - System.Math.Exp(-mLambda * x); 54 | } 55 | 56 | public override double GetPDF(double x) 57 | { 58 | return mLambda * System.Math.Exp(-mLambda * x); 59 | } 60 | 61 | /// 62 | /// Constructor 63 | /// 64 | public Exponential(double rate) 65 | { 66 | mLambda = rate; 67 | mMean = 1 / mLambda; 68 | mLnlambda = System.Math.Log(mLambda); 69 | } 70 | 71 | public override DistributionModel Clone() 72 | { 73 | return new Exponential(mLambda); 74 | } 75 | 76 | /// 77 | /// Method that returns a random number generated from the Exponential distribution with mean \f$\mu = 1\f$ 78 | /// 79 | /// 80 | private double GetExponential() 81 | { 82 | return -System.Math.Log(GetUniform()); 83 | } 84 | 85 | /// 86 | /// Method that returns a random number generated from the Exponential distribution with \f$\lambda=\frac{1}{\mu}\f$ (\f$\mu\f$ is the mean of the distribution) 87 | /// 88 | /// 89 | public override double Next() 90 | { 91 | if (mMean <= 0.0) 92 | { 93 | string msg = string.Format("Mean must be positive. Received {0}.", mMean); 94 | throw new ArgumentOutOfRangeException(msg); 95 | } 96 | return mMean * GetExponential(); 97 | } 98 | 99 | public override void Process(double[] values) 100 | { 101 | int count = values.Length; 102 | if (count == 0) 103 | { 104 | mMean = 0; 105 | mStdDev = 0; 106 | return; 107 | } 108 | 109 | mMean = values.Average(); 110 | mLambda = 1 / mMean; 111 | mLnlambda = System.Math.Log(mLambda); 112 | } 113 | 114 | public override void Process(double[] values, double[] weights) 115 | { 116 | double sum = 0; 117 | int count = values.Length; 118 | for (int i = 0; i < count; ++i) 119 | { 120 | sum += (values[i] * weights[i]); 121 | } 122 | 123 | double weight_sum = 0; 124 | for (int i = 0; i < count; ++i) 125 | { 126 | weight_sum += weights[i]; 127 | } 128 | 129 | mMean = sum / weight_sum; 130 | mLambda = 1 / mMean; 131 | mLnlambda = System.Math.Log(mLambda); 132 | } 133 | } 134 | } 135 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/MathUtils/Distribution/LogNormal.cs: -------------------------------------------------------------------------------- 1 | /// \file LogNormal.cs 2 | /// 3 | /// Contains the class representing a random number generator based on LogNormal distribution 4 | /// 5 | using System; 6 | using System.Collections.Generic; 7 | using System.Linq; 8 | using System.Text; 9 | using HiddenMarkovModels.MathUtils.SpecialFunctions; 10 | 11 | namespace HiddenMarkovModels.MathUtils.Distribution 12 | { 13 | /// 14 | /// Class representing a random number generator based on LogNormal distribution 15 | /// x is log-normally distributed if its natural logarithm log(x) is normally distributed. That is, log(x) ~ N(mu, sigma) 16 | /// 17 | public class LogNormal : Gaussian 18 | { 19 | private double mu = 0; 20 | private double sigma = 0; 21 | 22 | public LogNormal(double mu, double sigma) 23 | : base(mu, sigma) 24 | { 25 | 26 | } 27 | 28 | public double GeometricMean 29 | { 30 | get { return System.Math.Exp(mMean); } 31 | } 32 | 33 | public double GeometricStdDev 34 | { 35 | get { return System.Math.Exp(mStdDev); } 36 | } 37 | 38 | public Gaussian ToNormal() 39 | { 40 | 41 | double mu = mMean; 42 | double sigma = mStdDev; 43 | 44 | double normal_mu = System.Math.Exp(mu + 0.5 * sigma * sigma); 45 | double normal_sigma = normal_mu * System.Math.Sqrt(System.Math.Exp(sigma * sigma) - 1); 46 | 47 | Gaussian normalDistribution = new Gaussian(normal_mu, normal_sigma); 48 | return normalDistribution; 49 | } 50 | 51 | /// 52 | /// Method that returns a randomly generated number from a LogNormal distribution 53 | /// 54 | /// 55 | public override double Next() 56 | { 57 | return System.Math.Exp(GetNormal() * sigma + mu); 58 | } 59 | 60 | public override DistributionModel Clone() 61 | { 62 | LogNormal clone = new LogNormal(mMean, mStdDev); 63 | clone.mu = mu; 64 | clone.sigma = sigma; 65 | return clone; 66 | } 67 | 68 | /// 69 | /// Return the log of the PDF(x) 70 | /// 71 | /// 72 | /// 73 | public override double LogProbabilityFunction(double x) 74 | { 75 | double z = (System.Math.Log(x) - mu) / sigma; 76 | return -System.Math.Log(Constants.Sqrt2PI * sigma) + (-z * z) * 0.5 - System.Math.Log(x); 77 | } 78 | 79 | public override double GetPDF(double x) 80 | { 81 | double lnx = System.Math.Log(x); 82 | return System.Math.Exp(-(lnx - mu) * (lnx - mu) / (2 * sigma * sigma)) / (x * sigma * Constants.Sqrt2PI); 83 | } 84 | 85 | public override double GetCDF(double x) 86 | { 87 | return 0.5 + 0.5 * ErrorFunction.GetErf((System.Math.Log(x) - mu) / (Constants.Sqrt2 * sigma)); 88 | } 89 | 90 | /// 91 | /// Method that computes the mean \f$\mu\f$ and standard deviation \f$\sigma\f$ for the random number generator from a sample of sample 92 | /// 93 | /// The sample of sample 94 | public override void Process(double[] values) 95 | { 96 | int count = values.Length; 97 | if (count == 0) 98 | { 99 | mMean = 0; 100 | mStdDev = 0; 101 | return; 102 | } 103 | 104 | double[] logValues = new double[count]; 105 | for (int i = 0; i < count; ++i) 106 | { 107 | logValues[i] = System.Math.Log(values[i]); 108 | } 109 | 110 | mu = logValues.Average(); 111 | 112 | double c = 0; 113 | double sqr_sum_log = 0; 114 | for (int i = 0; i < count; ++i) 115 | { 116 | c = (logValues[i] - mu); 117 | sqr_sum_log += (c * c); 118 | } 119 | 120 | sigma = System.Math.Sqrt(sqr_sum_log / count); 121 | 122 | mMean = System.Math.Exp(mu + sigma * sigma / 2); 123 | mStdDev = System.Math.Exp(2 * mu + sigma * sigma) * (System.Math.Exp(sigma * sigma) - 1); 124 | } 125 | 126 | public override void Process(double[] values, double[] weights) 127 | { 128 | int count = values.Length; 129 | double[] logValues = new double[count]; 130 | for (int i = 0; i < count; ++i) 131 | { 132 | logValues[i] = System.Math.Log(values[i]); 133 | } 134 | 135 | double sum = 0; 136 | 137 | for (int i = 0; i < count; ++i) 138 | { 139 | sum += (logValues[i] * weights[i]); 140 | } 141 | 142 | double weight_sum = 0; 143 | for (int i = 0; i < count; ++i) 144 | { 145 | weight_sum += weights[i]; 146 | } 147 | 148 | mu = sum / weight_sum; 149 | 150 | double sqr_sum = 0; 151 | double c = 0; 152 | double w = 0; 153 | double a = 0; 154 | double b = 0; 155 | for (int i = 0; i < count; ++i) 156 | { 157 | c = (logValues[i] - mu); 158 | w = weights[i]; 159 | 160 | sqr_sum += (w * c * c); 161 | 162 | b += w; 163 | a += w * w; 164 | } 165 | 166 | sigma = System.Math.Sqrt(sqr_sum * (b / (b * b - a))); 167 | 168 | mMean = System.Math.Exp(mu + sigma * sigma / 2); 169 | mStdDev = System.Math.Exp(2 * mu + sigma * sigma) * (System.Math.Exp(sigma * sigma) - 1); 170 | } 171 | 172 | } 173 | } 174 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/MathUtils/Distribution/MultivariateDistributionModel.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | 6 | namespace HiddenMarkovModels.MathUtils.Distribution 7 | { 8 | public abstract class MultivariateDistributionModel : DistributionModel 9 | { 10 | protected int mDimension = 1; 11 | public int Dimension 12 | { 13 | get { return mDimension; } 14 | set { mDimension = value; } 15 | } 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/MathUtils/Distribution/Poisson.cs: -------------------------------------------------------------------------------- 1 | /// \file Poisson.cs 2 | /// 3 | /// Contains the class representing a random number generator based on the Poisson distribution. 4 | /// 5 | using System; 6 | using System.Collections.Generic; 7 | using System.Linq; 8 | using System.Text; 9 | using HiddenMarkovModels.MathHelpers; 10 | 11 | namespace HiddenMarkovModels.MathUtils.Distribution 12 | { 13 | /// 14 | /// Class representing a random number generator based on the Poisson distribution. 15 | ///
    16 | ///
  1. The mean is \f$\mu=\lambda\f$
  2. 17 | ///
  3. The variance is \f$\sigma^2=\lambda\f$
  4. 18 | ///
19 | ///
20 | public class Poisson : DistributionModel 21 | { 22 | /// 23 | /// Constructor 24 | /// 25 | public Poisson() 26 | { 27 | 28 | } 29 | 30 | /// 31 | /// Return the log of PMF(x) 32 | /// 33 | /// 34 | /// 35 | public override double LogProbabilityFunction(double x) 36 | { 37 | int k = (int)(System.Math.Floor(x)); 38 | double lambda = mMean; 39 | 40 | return (k * System.Math.Log(lambda) - LogHelper.LogFactorial(k)) - lambda; 41 | } 42 | 43 | public override double GetCDF(double x) 44 | { 45 | int k = (int)(System.Math.Floor(x)); 46 | double sum = 0; 47 | double lambda = mMean; 48 | for (int i = 0; i <= k; ++k) 49 | { 50 | sum += (System.Math.Pow(lambda, i) / Factorial.GetFactorial(i)); 51 | } 52 | return System.Math.Exp(-lambda) * sum; 53 | } 54 | 55 | public override double GetPDF(double x) 56 | { 57 | throw new NotImplementedException("Poisson distribution does not have PDF"); 58 | } 59 | 60 | public override double GetPMF(int k) 61 | { 62 | double lambda = mMean; 63 | return GetPMF(k, lambda); 64 | } 65 | 66 | public static double GetPMF(int k, double lambda) 67 | { 68 | return System.Math.Pow(lambda, k) * System.Math.Exp(-lambda) / Factorial.GetFactorial(k); 69 | } 70 | 71 | /// 72 | /// Method that returns a randomly generated number from the Poisson(\f$\lambda\f$) distribution 73 | /// 74 | /// 75 | public override double Next() 76 | { 77 | return GetPoisson(mMean); 78 | } 79 | 80 | /// 81 | /// Method that returns a randomly generated number from the Poisson(\f$\lambda\f$) distribution. 82 | ///
    83 | ///
  1. When the value of \f$\lambda\f$ is small (i.e. \f$\lambda < 30.0\f$), the method returns PoissonSmall()
  2. 84 | ///
  3. When the value of \f$\lambda\f$ is large (i.e. \f$\lambda >= 30.0\f$), the method returns PoissonLarge()
  4. 85 | ///
86 | ///
87 | /// 88 | /// 89 | private static double GetPoisson(double lambda) 90 | { 91 | return (lambda < 30.0) ? PoissonSmall(lambda) : PoissonLarge(lambda); 92 | } 93 | 94 | public override DistributionModel Clone() 95 | { 96 | return new Poisson(); 97 | } 98 | 99 | /// 100 | /// Method that returns a randomly generated number when \f$\lambda\f$ is small 101 | /// 102 | /// The mean and variance \f$\lambda\f$ 103 | /// A randomly generated number 104 | private static double PoissonSmall(double lambda) 105 | { 106 | // Algorithm due to Donald Knuth, 1969. 107 | double p = 1.0, L = System.Math.Exp(-lambda); 108 | int k = 0; 109 | do 110 | { 111 | k++; 112 | p *= GetUniform(); 113 | } 114 | while (p > L); 115 | return k - 1; 116 | } 117 | 118 | 119 | 120 | /// 121 | /// Method that returns a randomly generated number when \f$\lambda\f$ is large 122 | /// 123 | /// The mean and variance \f$\lambda\f$ 124 | /// A randomly generated number 125 | private static double PoissonLarge(double lambda) 126 | { 127 | // "Rejection method PA" from "The Computer Generation of 128 | // Poisson Random Variables" by A. C. Atkinson, 129 | // Journal of the Royal Statistical Society Series C 130 | // (Applied Statistics) Vol. 28, No. 1. (1979) 131 | // The article is on pages 29-35. 132 | // The algorithm given here is on page 32. 133 | 134 | double c = 0.767 - 3.36 / lambda; 135 | double beta = System.Math.PI / System.Math.Sqrt(3.0 * lambda); 136 | double alpha = beta * lambda; 137 | double k = System.Math.Log(c) - lambda - System.Math.Log(beta); 138 | 139 | for (;;) 140 | { 141 | double u = GetUniform(); 142 | double x = (alpha - System.Math.Log((1.0 - u) / u)) / beta; 143 | double r = System.Math.Floor(x + 0.5); 144 | int n = (int)r; 145 | if (n < 0) 146 | continue; 147 | double v = GetUniform(); 148 | double y = alpha - beta * x; 149 | double temp = 1.0 + System.Math.Exp(y); 150 | double lhs = y + System.Math.Log(v / (temp * temp)); 151 | double rhs = k + n * System.Math.Log(lambda) - Factorial.LogFactorial(n); 152 | if (lhs <= rhs) 153 | return r; 154 | } 155 | } 156 | 157 | public override void Process(double[] values) 158 | { 159 | int count = values.Length; 160 | if (count == 0) 161 | { 162 | mMean = 0; 163 | mStdDev = 0; 164 | return; 165 | } 166 | 167 | mMean = values.Average(); 168 | } 169 | 170 | public override void Process(double[] values, double[] weights) 171 | { 172 | double sum = 0; 173 | int count = values.Length; 174 | for (int i = 0; i < count; ++i) 175 | { 176 | sum += (values[i] * weights[i]); 177 | } 178 | 179 | double weight_sum = 0; 180 | for (int i = 0; i < count; ++i) 181 | { 182 | weight_sum += weights[i]; 183 | } 184 | 185 | mMean = sum / weight_sum; 186 | } 187 | } 188 | } 189 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/MathUtils/SpecialFunctions/ClampFunction.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | 6 | namespace HiddenMarkovModels.MathUtils.SpecialFunctions 7 | { 8 | public class ClampFunction 9 | { 10 | public static double Clamp(double value, double lower, double upper) 11 | { 12 | return System.Math.Min(upper, System.Math.Max(value, lower)); 13 | } 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/MathUtils/SpecialFunctions/ErrorFunction.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | 6 | namespace HiddenMarkovModels.MathUtils.SpecialFunctions 7 | { 8 | public class ErrorFunction 9 | { 10 | // fractional error in math formula less than 1.2 * 10 ^ -7. 11 | // although subject to catastrophic cancellation when test_statistic in very close to 0 12 | // from Chebyshev fitting formula for GetErf(test_statistic) from Numerical Recipes, 6.2 13 | public static double GetErf(double z) 14 | { 15 | double t = 1.0 / (1.0 + 0.5 * System.Math.Abs(z)); 16 | 17 | // use Horner's method 18 | double ans = 1 - t * System.Math.Exp(-z * z - 1.26551223 + 19 | t * (1.00002368 + 20 | t * (0.37409196 + 21 | t * (0.09678418 + 22 | t * (-0.18628806 + 23 | t * (0.27886807 + 24 | t * (-1.13520398 + 25 | t * (1.48851587 + 26 | t * (-0.82215223 + 27 | t * (0.17087277)))))))))); 28 | if (z >= 0) return ans; 29 | else return -ans; 30 | } 31 | 32 | // fractional error less than x.xx * 10 ^ -4. 33 | // Algorithm 26.2.17 in Abromowitz and Stegun, Handbook of Mathematical. 34 | public static double GetErf2(double z) 35 | { 36 | double t = 1.0 / (1.0 + 0.47047 * System.Math.Abs(z)); 37 | double poly = t * (0.3480242 + t * (-0.0958798 + t * (0.7478556))); 38 | double ans = 1.0 - poly * System.Math.Exp(-z * z); 39 | if (z >= 0) return ans; 40 | else return -ans; 41 | } 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/MathUtils/SpecialFunctions/GammaFunction.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using HiddenMarkovModels.MathHelpers; 6 | 7 | namespace HiddenMarkovModels.MathUtils.SpecialFunctions 8 | { 9 | public class GammaFunction 10 | { 11 | public static double GetGamma(double x) 12 | { 13 | return System.Math.Exp(Gamma.Log(x)); 14 | } 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/MathUtils/SpecialFunctions/InverseErrorFunction.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | 6 | namespace HiddenMarkovModels.MathUtils.SpecialFunctions 7 | { 8 | public class InverseErrorFunction 9 | { 10 | /// 11 | /// \operatorname{GetErf}^{-1}(x)\approx \sgn(x) \sqrt{\sqrt{\left(\frac{2}{\pi a}+\frac{\ln(1-x^2)}{2}\right)^2 - \frac{\ln(1-x^2)}{a}}-\left(\frac{2}{\pi a}+\frac{\ln(1-x^2)}{2}\right)}. 12 | /// 13 | /// 14 | /// 15 | public static double GetInvErf(double x) 16 | { 17 | double z; 18 | double a = 0.147; 19 | double the_sign_of_x; 20 | if (0 == x) 21 | { 22 | the_sign_of_x = 0; 23 | } 24 | else if (x > 0) 25 | { 26 | the_sign_of_x = 1; 27 | } 28 | else 29 | { 30 | the_sign_of_x = -1; 31 | } 32 | 33 | if (0 != x) 34 | { 35 | var ln_1minus_x_sqrd = System.Math.Log(1 - x * x); 36 | var ln_1minusxx_by_a = ln_1minus_x_sqrd / a; 37 | var ln_1minusxx_by_2 = ln_1minus_x_sqrd / 2; 38 | var ln_etc_by2_plus2 = ln_1minusxx_by_2 + (2 / (System.Math.PI * a)); 39 | var first_sqrt = System.Math.Sqrt((ln_etc_by2_plus2 * ln_etc_by2_plus2) - ln_1minusxx_by_a); 40 | var second_sqrt = System.Math.Sqrt(first_sqrt - ln_etc_by2_plus2); 41 | z = second_sqrt * the_sign_of_x; 42 | } 43 | else 44 | { // x is zero 45 | z = 0; 46 | } 47 | return z; 48 | } 49 | 50 | 51 | 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/MathUtils/SpecialFunctions/InverseLogitFunction.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | 6 | namespace HiddenMarkovModels.MathUtils.SpecialFunctions 7 | { 8 | public class InverseLogitFunction 9 | { 10 | public static double GetInvLogit(double alpha) 11 | { 12 | return 1.0 / (1.0 + System.Math.Exp(-alpha)); 13 | } 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/MathUtils/SpecialFunctions/LogitFunction.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | 6 | namespace HiddenMarkovModels.MathUtils.SpecialFunctions 7 | { 8 | public class LogitFunction 9 | { 10 | public static double GetLogit(double p) 11 | { 12 | return System.Math.Log(p / (1 - p)); 13 | } 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/MathUtils/Statistics/CLT.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using HiddenMarkovModels.MathUtils.Distribution; 6 | 7 | namespace HiddenMarkovModels.MathUtils.Statistics 8 | { 9 | /// 10 | /// This class represents the Central Limit Theorem. 11 | /// 12 | /// The Central Limit Theorem (CLT) states that: 13 | /// The distribution of sample statistics (e.g., sample mean) is nearly normal, centered at the population mean, and with a standard deviation equal to the population standard deviation 14 | /// divided by square root of the sample size. 15 | /// 16 | public class CLT 17 | { 18 | /// 19 | /// Estimate the normal distribution of a sample mean (for a continuous variable) 20 | /// 21 | /// The Central Limit Theorem (CLT) states that: 22 | /// The distribution of sample statistics (e.g., sample mean) is nearly normal, centered at the population mean, and with a standard deviation equal to the population standard deviation 23 | /// divided by square root of the sample size. 24 | /// 25 | /// With CTL, we can estimate the the normal distribution of a sample, given its estimated mean and stddev as well as the sample size. 26 | /// 27 | /// For the CTL to hold true for a sample, the following conditions must be met: 28 | /// 1. Independence: Sample observations must be independent. 29 | /// > random sample/assignment 30 | /// > if sampling without replacement, the sample size < 10% of the population 31 | /// 2. Sample size/skew: Either the population distribution is normal, or if the population distribution is skewed, the sample size is large (rule of thumb: sample size > 30) 32 | /// 33 | /// point estimate of sample mean 34 | /// standard deviation of a random sample 35 | /// the size of the random sample 36 | /// The normal distribution of the sample means for a random sample drawn from the population 37 | public static Gaussian EstimateSampleMeanDistribution(double sampleMean, double sampleStdDev, int sampleSize) 38 | { 39 | double SE = StandardError.GetStandardError(sampleStdDev, sampleSize); 40 | return new Gaussian(sampleMean, SE); 41 | } 42 | 43 | /// 44 | /// Estimate the normal distribution of a sample proportion (for a categorical variable with two values { "SUCCESS", "FAILURE" }) 45 | /// 46 | /// The Centrl Limit Theorem (CLT) for proportions: 47 | /// The distribution of sample proportions is nearly normal, centered at the population proportion, and with a standard error inversely proportional to the sample size. 48 | /// 49 | /// Conditions for the CLT for proportions: 50 | /// 1. Independence: Sampled observations must be independent. 51 | /// > random sample/assignment 52 | /// > if sampling without replacement, n < 10% population 53 | /// 2. Sample size / skew: There should be at least 10 successes and 10 failures in the sample: np >= 10 and n(1-p) >= 10 54 | /// 55 | /// 56 | /// 57 | /// 58 | public static Gaussian EstimateSampleProportionDistribution(double p, int sampleSize) 59 | { 60 | double SE = StandardError.GetStandardErrorForProportion(p, sampleSize); 61 | return new Gaussian(p, SE); 62 | } 63 | 64 | 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/MathUtils/Statistics/ChiSquareGOFTest.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using HiddenMarkovModels.MathUtils.Distribution; 6 | 7 | namespace HiddenMarkovModels.MathUtils.Statistics 8 | { 9 | /// 10 | /// "Goodness of fit" test based on chi-square test. 11 | /// 12 | /// In this case, we are dealing with one categorical variable, which has more than 2 levels, (e.g., the categorical variable "animal" has many levels such as "dog", "cat", "fish", ...) 13 | /// 14 | /// We are given: 15 | /// 1. the expected distribution / percentage of each level for the categorical variable in the population 16 | /// 2. the actual count of each level for the categorical variable within the sample data 17 | /// 3. the sample data size 18 | /// The objective is to test whether the actual distribution of each level for the categorical variable in the population matches with the expected distribution of each level 19 | /// 20 | /// Hypotheses are: 21 | /// H_0 : actual distribution of each level = expected distribution of each level 22 | /// H_A : actual distribution of each level != expected distribution of each level 23 | /// 24 | /// Conditions for the test: 25 | /// 1. Independence: Sampled observations must be independent 26 | /// > random sample/assignment 27 | /// > if sampling without replacement, n < 10% of population 28 | /// > each case only contributes to one level 29 | /// 2. Sample size: each particular scenario/level in the sample data must have at least 5 counts. 30 | /// 31 | /// p-value = P(observed or more extreme mismatch of expected and actual level distribution | H_0 is true) 32 | /// 33 | /// Reject H_0 if p-value < alpha (i.e. the significance level) 34 | /// 35 | public class ChiSquareGOFTest 36 | { 37 | /// 38 | /// GOF test for one categorical variable with more than two levels. 39 | /// 40 | /// Hypotheses are: 41 | /// H_0 : actual distribution of each level = expected distribution of each level 42 | /// H_1 : actual distribution of each level != expected distribution of each level 43 | /// 44 | /// p-value = P(observed or more mismatch of expected and actual level distribution | H_0 is true) 45 | /// 46 | /// Reject H_0 if p-value < alpha 47 | /// 48 | /// The count of each level in the sample data for the categorical variable 49 | /// The expected distribution / percentage of each level in the population for the categorical variable 50 | /// p-value which is P(observed or more extreme mismatch of expected and actual level distribution | H_0 is true 51 | /// alpha 52 | /// True if H_0 is rejected; False if H_0 is failed to be rejected 53 | public bool RejectH0(int[] observedCountInEachLevel, double[] expectedPercentageOfEachLevel, out double pValue, double significance_level = 0.05) 54 | { 55 | int sampleSize = 0; 56 | int countOfLevels = observedCountInEachLevel.Length; 57 | for (int i = 0; i < countOfLevels; ++i) 58 | { 59 | sampleSize += observedCountInEachLevel[i]; 60 | } 61 | int[] expectedCountInEachLevel = new int[countOfLevels]; 62 | 63 | int r = sampleSize; 64 | for (int i = 0; i < countOfLevels; ++i) 65 | { 66 | expectedCountInEachLevel[i] = (int)(expectedPercentageOfEachLevel[i] * sampleSize); 67 | r -= expectedCountInEachLevel[i]; 68 | } 69 | if (r > 0) expectedCountInEachLevel[0] += r; 70 | 71 | double ChiSq = 0; 72 | for (int i = 0; i < countOfLevels; ++i) 73 | { 74 | ChiSq += System.Math.Pow(observedCountInEachLevel[i] - expectedCountInEachLevel[i], 2) / expectedCountInEachLevel[i]; 75 | } 76 | 77 | pValue = 1 - ChiSquare.GetPercentile(ChiSq, countOfLevels - 1); 78 | return pValue < significance_level; 79 | } 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/MathUtils/Statistics/ChiSquareIndependenceTest.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using HiddenMarkovModels.MathUtils.Distribution; 6 | 7 | namespace HiddenMarkovModels.MathUtils.Statistics 8 | { 9 | /// 10 | /// Chi^2 independence test for two categorical variables 11 | /// 12 | /// In this case, we are dealing with two categorical variables, one of this having at least more than two levels 13 | /// 14 | /// The hypotheses are: 15 | /// H_0 : variable 1 is independent of variable 2 16 | /// H_A : variable 1 and variable 2 are dependent 17 | /// 18 | /// Evaluating the hypotheses: 19 | /// > quantify how different observed counts are from the expected counts 20 | /// > large deviation from what whould expected based on sampling variation (chance) alone provide strong evidence for the alternative hypothesis 21 | /// > called an independence test since we are evaluating the relationship between two categorical variables 22 | /// 23 | /// As input, we are given a contingency table formed from the sample data: 24 | /// 1. Each table row represent a level in categorical variable 1 25 | /// 2. Each table col represent a level in categorical variable 2 26 | /// 3. Each table cell, cell[r, c] represents the number of cases / records in the sample data having 27 | /// : variable1 = variable1.Levels[r] 28 | /// : varibale2 = variable2.Levels[c] 29 | /// 30 | /// Conditions for the test: 31 | /// 1. Independence: Sampled observations must be independent 32 | /// > random sample/assignment 33 | /// > if sampling without replacement, n < 10% of population 34 | /// > each case only contributes to one cell in the contigency table 35 | /// 2. Sample size: each particular scenario/cell in the contingency table must have at least 5 counts. 36 | /// 37 | public class ChiSquareIndependenceTest 38 | { 39 | /// 40 | /// Chi^2 independence test for categorical variables, var1 and var2 41 | /// 42 | /// The hypotheses are: 43 | /// H_0 : variable 1 is independent of variable 2 44 | /// H_A : variable 1 and variable 2 are dependent 45 | /// 46 | /// p-value = P(observed or more extreme events that favors H_A | H_0) 47 | /// 48 | /// Now assuming H_0 is true, that is, the var1 and var2 are independent, 49 | /// This implies the distribution of each level of var1 in each level of var2 should be the same 50 | /// In other words, the expected distribution of each level of var1 in each level of var2 is given by distributionInEachLevel_var1 51 | /// Now we can build a new contingency table containing the expected count corresponding to each level of both var1 and var2 52 | /// 53 | /// Reject H_0 if p-value < alpha 54 | /// 55 | /// The contingency table in which each cell contains the counts of records in the sample data that matches the row (i.e. a var1 level) and col (i.e. a var2 level) 56 | /// p-value = P(observed or more extreme events that favors H_A | H_0) 57 | /// alpha 58 | /// True if H_0 is rejected; False if H_0 is failed to be rejected 59 | public bool RejectH0(int[][] contingency_table, out double pValue, double signficance_level = 0.05) 60 | { 61 | int countOfLevels_var1 = contingency_table.Length; 62 | int countOfLevels_var2 = contingency_table[0].Length; 63 | 64 | int sampleSize = 0; 65 | int[] countInEachLevel_var1 = new int[countOfLevels_var1]; 66 | for (int row = 0; row < countOfLevels_var1; ++row) 67 | { 68 | int countInLevel = 0; 69 | for (int col = 0; col < countOfLevels_var2; ++col) 70 | { 71 | countInLevel += contingency_table[row][col]; 72 | } 73 | countInEachLevel_var1[row] = countInLevel; 74 | sampleSize += countInLevel; 75 | } 76 | double[] distributionInEachLevel_var1 = new double[countOfLevels_var1]; 77 | for (int row = 0; row < countOfLevels_var1; ++row) 78 | { 79 | distributionInEachLevel_var1[row] = (double)countInEachLevel_var1[row] / sampleSize; 80 | } 81 | 82 | int[] countInEachLevel_var2 = new int[countOfLevels_var2]; 83 | for (int col = 0; col < countOfLevels_var2; ++col) 84 | { 85 | int countInLevel = 0; 86 | for (int row = 0; row < countOfLevels_var1; ++row) 87 | { 88 | countInLevel += contingency_table[row][col]; 89 | } 90 | countInEachLevel_var2[col] = countInLevel; 91 | } 92 | 93 | //Now assuming H_0 is true, that is, the var1 and var2 are independent, 94 | //This implies the distribution of each level of var1 in each level of var2 should be the same 95 | //In other words, the expected distribution of each level of var1 in each level of var2 is given by distributionInEachLevel_var1 96 | //Now we can build a new contingency table containing the expected count corresponding to each level of both var1 and var2 97 | double[][] expected_contingency_table = new double[countOfLevels_var1][]; 98 | for (int row = 0; row < countOfLevels_var1; ++row) 99 | { 100 | expected_contingency_table[row] = new double[countOfLevels_var2]; 101 | for (int col = 0; col < countOfLevels_var2; ++col) 102 | { 103 | expected_contingency_table[row][col] = countInEachLevel_var2[col] * distributionInEachLevel_var1[row]; 104 | } 105 | } 106 | 107 | double ChiSq = 0; 108 | for (int row = 0; row < countOfLevels_var1; ++row) 109 | { 110 | for (int col = 0; col < countOfLevels_var2; ++col) 111 | { 112 | ChiSq += System.Math.Pow(contingency_table[row][col] - expected_contingency_table[row][col], 2) / expected_contingency_table[row][col]; 113 | } 114 | } 115 | 116 | int df = (countOfLevels_var1 - 1) * (countOfLevels_var2 - 1); 117 | pValue = 1 - ChiSquare.GetPercentile(ChiSq, df); 118 | return pValue < signficance_level; 119 | } 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/MathUtils/Statistics/CollectionExtensionMethods.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | 6 | namespace HiddenMarkovModels.MathUtils.Statistics 7 | { 8 | public static class CollectionExtensionMethods 9 | { 10 | public static double? Average(this IEnumerable values, int precision_point) 11 | { 12 | int count = values.Count(); 13 | if (count == 0) return null; 14 | return System.Math.Round(values.Average(), precision_point); 15 | } 16 | 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/MathUtils/Statistics/Correlation.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using HiddenMarkovModels.MathUtils.Distribution; 6 | 7 | namespace HiddenMarkovModels.MathUtils.Statistics 8 | { 9 | /// 10 | /// Correlation: strength of a linear relationship. 11 | /// 12 | /// Correleation, which always taks values between -1 and 1, describse the strength of the lienar relationship between two variables. 13 | /// We denote the correlation by R. 14 | /// 15 | /// Only when the relationship is perfectly linear is the correlation either -1 or +1. 16 | /// If the relationship is strong and positive, the correlation will be near +1 17 | /// If the relationship is strong and negative, the correlation will be near -1 18 | /// If there is no apparent linear relationship between the variables, then the correlation will be near zero. 19 | /// 20 | public class Correlation 21 | { 22 | /// 23 | /// Return the correlation for observations (x_1, y_1), (x_2, y_2), ... (x_n, y_n), where n is the sample size 24 | /// The correlation is computed as correlation(x, y) = sum_i((x_i - mu_x) * (y_i - mu_y)) / (sum_i((x_i - mu_x)^2) * sum_i((y_i - mu_y)^2)) 25 | /// which can also be written as n * sum_i((x_i - mu_x) * (y_i - mu_y) / (sigma_x * sigma_y)) 26 | /// where mu_x = sum_i(x_i) / n and sigma_x = sqrt(sum_i((x_i - mu_x)^2) / n) 27 | /// 28 | /// The observations (x_1, y_1), (x_2, y_2), ... (x_n, y_n), where n is the sample size 29 | /// The correlation value for variable x and y 30 | public double GetCorrelation(Tuple[] observations) 31 | { 32 | int n = observations.Length; 33 | double[] x = new double[n]; 34 | double[] y = new double[n]; 35 | for (int i = 0; i < n; ++i) 36 | { 37 | x[i] = observations[i].Item1; 38 | y[i] = observations[i].Item2; 39 | } 40 | 41 | double mu_x = Mean.GetMean(x); 42 | double mu_y = Mean.GetMean(y); 43 | 44 | double sigma_x = StdDev.GetStdDev(x, mu_x); 45 | double sigma_y = StdDev.GetStdDev(y, mu_y); 46 | 47 | double sum = 0; 48 | for (int i = 0; i < n; ++i) 49 | { 50 | sum += ((x[i] - mu_x) / sigma_x) * ((y[i] - mu_y) / sigma_y); 51 | } 52 | return sum / n; 53 | } 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/MathUtils/Statistics/DescriptiveStatistics.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | 6 | namespace HiddenMarkovModels.MathUtils.Statistics 7 | { 8 | public class DescriptiveStatistics 9 | { 10 | protected int mCount = 0; 11 | protected double[] mSortedData = null; 12 | protected double mMinValue = double.MinValue; 13 | protected double mMaxValue = double.MaxValue; 14 | protected double mAverage; 15 | protected double mMedian; 16 | protected double mStdDev; 17 | 18 | public DescriptiveStatistics(IEnumerable values) 19 | { 20 | mCount = values.Count(); 21 | 22 | mSortedData = values.OrderBy(x => x).ToArray(); 23 | mMinValue = mSortedData[0]; 24 | mMaxValue = mSortedData[mCount - 1]; 25 | 26 | mAverage = mSortedData.Average(); 27 | 28 | if (mCount % 2 == 0) 29 | { 30 | int mid_index = mCount / 2; 31 | mMedian = (mSortedData[mid_index - 1] + mSortedData[mid_index]) / 2; 32 | } 33 | else 34 | { 35 | mMedian = mSortedData[(mCount + 1) / 2]; 36 | } 37 | 38 | 39 | } 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/MathUtils/Statistics/LinearCombination.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using HiddenMarkovModels.MathUtils.Distribution; 6 | 7 | namespace HiddenMarkovModels.MathUtils.Statistics 8 | { 9 | /// 10 | /// Statistics related to the linear combination of two variables. 11 | /// 12 | public class LinearCombination 13 | { 14 | /// 15 | /// Return the distribution of a*x + b*y for correlated random variables x and y 16 | /// 17 | /// random variable x 18 | /// random variable y 19 | /// a which is the coefficient of x 20 | /// b which is the coefficient of y 21 | /// correlation between x and y 22 | /// 23 | public static DistributionModel Sum(DistributionModel x, DistributionModel y, int x_coefficient, double y_coefficient, double correlation) 24 | { 25 | DistributionModel sum = x.Clone(); 26 | sum.Mean = x_coefficient * x.Mean + y_coefficient * y.Mean; 27 | sum.StdDev = System.Math.Sqrt(System.Math.Pow(x_coefficient * x.StdDev, 2) + System.Math.Pow(y_coefficient * y.StdDev, 2) + 2 * correlation * x_coefficient * x.StdDev * y_coefficient * y.StdDev); 28 | return sum; 29 | } 30 | 31 | /// 32 | /// Return the NormalTable distribution of population statistic (a*x + b*y) for correlated random variables x and y 33 | /// 34 | /// random sample for random variable x 35 | /// random sample for random variable y 36 | /// a which is the coefficient of x 37 | /// b which is the coefficient of y 38 | /// correlation between x and y 39 | /// output mean for the a*x + b*y 40 | /// output standard error for the a*x + b*y 41 | public static void Sum(double[] x, double[] y, int x_coefficient, double y_coefficient, double correlation, out double result_mean, out double result_SE) 42 | { 43 | result_mean = 0; 44 | result_SE = 0; 45 | 46 | double mean_x = Mean.GetMean(x); 47 | double mean_y = Mean.GetMean(y); 48 | 49 | double stddev_x = StdDev.GetStdDev(x, mean_x); 50 | double stddev_y = StdDev.GetStdDev(y, mean_y); 51 | 52 | result_mean = x_coefficient * mean_x + y_coefficient * mean_y; 53 | result_SE = System.Math.Sqrt(System.Math.Pow(x_coefficient * stddev_x, 2) / x.Length + System.Math.Pow(y_coefficient * stddev_y, 2) / y.Length + 2 * correlation * x_coefficient * stddev_x * y_coefficient * stddev_y / System.Math.Sqrt(x.Length * y.Length)); 54 | 55 | } 56 | 57 | /// 58 | /// Return the distribution of x + y for correlated random variables x and y 59 | /// 60 | /// random variable x 61 | /// random variable y 62 | /// correlation between x and y 63 | /// 64 | public static DistributionModel Sum(DistributionModel x, DistributionModel y, double correlation) 65 | { 66 | return Sum(x, y, 1, 1, correlation); 67 | } 68 | 69 | /// 70 | /// Return the distribution of x - y for correlated random variables x and y 71 | /// 72 | /// random variable x 73 | /// random variable y 74 | /// correlation between x and y 75 | /// 76 | public static DistributionModel Diff(DistributionModel x, DistributionModel y, double correlation) 77 | { 78 | return Sum(x, y, 1, -1, correlation); 79 | } 80 | 81 | /// 82 | /// Return the NormalTable distribution of population statistic (x - y) for correlated random variables x and y 83 | /// 84 | /// random sample for random variable x 85 | /// random sample for random variable y 86 | /// a which is the coefficient of x 87 | /// b which is the coefficient of y 88 | /// correlation between x and y 89 | /// output mean for the a*x + b*y 90 | /// output standard deviation for the a*x + b*y 91 | public static void Diff(double[] x, double[] y, double correlation, out double result_mean, out double result_stddev) 92 | { 93 | Sum(x, y, 1, -1, correlation, out result_mean, out result_stddev); 94 | } 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/MathUtils/Statistics/Mean.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | 6 | namespace HiddenMarkovModels.MathUtils.Statistics 7 | { 8 | public class Mean 9 | { 10 | /// 11 | /// Return the sample mean averaged from multiple samples 12 | /// 13 | /// List of mean for each sample 14 | /// List of size for each sample 15 | /// 16 | public static double GetMeanForWeightedAverage(double[] sampleMeans, int[] sampleSizes) 17 | { 18 | int totalSampleSize = 0; 19 | for (int i = 0; i < sampleSizes.Length; ++i) 20 | { 21 | totalSampleSize += sampleSizes[i]; 22 | } 23 | double sum = 0; 24 | for (int i = 0; i < sampleSizes.Length; ++i) 25 | { 26 | sum += (sampleSizes[i] * sampleMeans[i] / totalSampleSize); 27 | } 28 | return sum; 29 | } 30 | 31 | public static double GetMean(double[] sample) 32 | { 33 | double sum = 0; 34 | for (int i = 0; i < sample.Length; ++i) 35 | { 36 | sum += sample[i]; 37 | } 38 | return sample.Length > 0 ? sum / sample.Length : 0; 39 | } 40 | 41 | public static double GetMean(IList sample) 42 | { 43 | double sum = 0; 44 | for (int i = 0; i < sample.Count; ++i) 45 | { 46 | sum += sample[i]; 47 | } 48 | return sample.Count > 0 ? sum / sample.Count : 0; 49 | } 50 | 51 | /// 52 | /// Return the sample mean averaged from multiple samples 53 | /// 54 | /// List of mean for each sample 55 | /// List of size for each sample 56 | /// 57 | public static float GetMeanForWeightedAverage(float[] sampleMeans, int[] sampleSizes) 58 | { 59 | int totalSampleSize = 0; 60 | for (int i = 0; i < sampleSizes.Length; ++i) 61 | { 62 | totalSampleSize += sampleSizes[i]; 63 | } 64 | float sum = 0; 65 | for (int i = 0; i < sampleSizes.Length; ++i) 66 | { 67 | sum += (sampleSizes[i] * sampleMeans[i] / totalSampleSize); 68 | } 69 | return sum; 70 | } 71 | 72 | public static float GetMean(float[] sample) 73 | { 74 | float sum = 0; 75 | for (int i = 0; i < sample.Length; ++i) 76 | { 77 | sum += sample[i]; 78 | } 79 | return sample.Length > 0 ? sum / sample.Length : 0; 80 | } 81 | 82 | public static float GetMean(IList sample) 83 | { 84 | float sum = 0; 85 | int count = sample.Count; 86 | for (int i = 0; i < count; ++i) 87 | { 88 | sum += sample[i]; 89 | } 90 | return count > 0 ? sum / count : 0; 91 | } 92 | 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/MathUtils/Statistics/Median.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | 6 | namespace HiddenMarkovModels.MathUtils.Statistics 7 | { 8 | public class Median 9 | { 10 | public static double GetMedian(double[] a) 11 | { 12 | int alen = a.Length; 13 | QuickSort(a, 0, alen - 1); 14 | 15 | if (alen % 2 == 0) 16 | { 17 | return (a[alen / 2 - 1] + a[alen / 2]) / 2; 18 | } 19 | else 20 | { 21 | return a[alen / 2]; 22 | } 23 | } 24 | 25 | public static double GetMedian(IList a) 26 | { 27 | int alen = a.Count; 28 | QuickSort(a, 0, alen - 1); 29 | 30 | if (alen % 2 == 0) 31 | { 32 | return (a[alen / 2 - 1] + a[alen / 2]) / 2; 33 | } 34 | else 35 | { 36 | return a[alen / 2]; 37 | } 38 | } 39 | 40 | private static void QuickSort(IList a, int lo, int hi) 41 | { 42 | if (lo >= hi) return; 43 | int j = Partition(a, lo, hi); 44 | QuickSort(a, lo, j - 1); 45 | QuickSort(a, j + 1, hi); 46 | } 47 | 48 | private static int Partition(IList a, int lo, int hi) 49 | { 50 | double loVal = a[lo]; 51 | int i = lo + 1; 52 | int j = hi; 53 | while (true) 54 | { 55 | while (a[i] < loVal && i < hi) 56 | { 57 | i++; 58 | } 59 | while (a[j] > loVal && j > lo) 60 | { 61 | j--; 62 | } 63 | if (i < j) 64 | { 65 | double temp = a[i]; 66 | a[i] = a[j]; 67 | a[j] = temp; 68 | i++; 69 | j--; 70 | } 71 | else 72 | { 73 | break; 74 | } 75 | } 76 | 77 | a[lo] = a[j]; 78 | a[j] = loVal; 79 | return j; 80 | } 81 | 82 | private static void QuickSort(double[] a, int lo, int hi) 83 | { 84 | if (lo >= hi) return; 85 | int j = Partition(a, lo, hi); 86 | QuickSort(a, lo, j - 1); 87 | QuickSort(a, j + 1, hi); 88 | } 89 | 90 | private static int Partition(double[] a, int lo, int hi) 91 | { 92 | double loVal = a[lo]; 93 | int i = lo + 1; 94 | int j = hi; 95 | while (true) 96 | { 97 | while (a[i] < loVal && i < hi) 98 | { 99 | i++; 100 | } 101 | while (a[j] > loVal && j > lo) 102 | { 103 | j--; 104 | } 105 | if (i < j) 106 | { 107 | double temp = a[i]; 108 | a[i] = a[j]; 109 | a[j] = temp; 110 | i++; 111 | j--; 112 | } 113 | else 114 | { 115 | break; 116 | } 117 | } 118 | 119 | a[lo] = a[j]; 120 | a[j] = loVal; 121 | return j; 122 | } 123 | 124 | /// 125 | public static float GetMedian(float[] a) 126 | { 127 | int alen = a.Length; 128 | QuickSort(a, 0, alen - 1); 129 | 130 | if (alen % 2 == 0) 131 | { 132 | return (a[alen / 2 - 1] + a[alen / 2]) / 2; 133 | } 134 | else 135 | { 136 | return a[alen / 2]; 137 | } 138 | } 139 | 140 | public static float GetMedian(IList a) 141 | { 142 | int alen = a.Count; 143 | QuickSort(a, 0, alen - 1); 144 | 145 | if (alen % 2 == 0) 146 | { 147 | return (a[alen / 2 - 1] + a[alen / 2]) / 2; 148 | } 149 | else 150 | { 151 | return a[alen / 2]; 152 | } 153 | } 154 | 155 | private static void QuickSort(IList a, int lo, int hi) 156 | { 157 | if (lo >= hi) return; 158 | int j = Partition(a, lo, hi); 159 | QuickSort(a, lo, j - 1); 160 | QuickSort(a, j + 1, hi); 161 | } 162 | 163 | private static int Partition(IList a, int lo, int hi) 164 | { 165 | float loVal = a[lo]; 166 | int i = lo + 1; 167 | int j = hi; 168 | while (true) 169 | { 170 | while (a[i] < loVal && i < hi) 171 | { 172 | i++; 173 | } 174 | while (a[j] > loVal && j > lo) 175 | { 176 | j--; 177 | } 178 | if (i < j) 179 | { 180 | float temp = a[i]; 181 | a[i] = a[j]; 182 | a[j] = temp; 183 | i++; 184 | j--; 185 | } 186 | else 187 | { 188 | break; 189 | } 190 | } 191 | 192 | a[lo] = a[j]; 193 | a[j] = loVal; 194 | return j; 195 | } 196 | 197 | private static void QuickSort(float[] a, int lo, int hi) 198 | { 199 | if (lo >= hi) return; 200 | int j = Partition(a, lo, hi); 201 | QuickSort(a, lo, j - 1); 202 | QuickSort(a, j + 1, hi); 203 | } 204 | 205 | private static int Partition(float[] a, int lo, int hi) 206 | { 207 | float loVal = a[lo]; 208 | int i = lo + 1; 209 | int j = hi; 210 | while (true) 211 | { 212 | while (a[i] < loVal && i < hi) 213 | { 214 | i++; 215 | } 216 | while (a[j] > loVal && j > lo) 217 | { 218 | j--; 219 | } 220 | if (i < j) 221 | { 222 | float temp = a[i]; 223 | a[i] = a[j]; 224 | a[j] = temp; 225 | i++; 226 | j--; 227 | } 228 | else 229 | { 230 | break; 231 | } 232 | } 233 | 234 | a[lo] = a[j]; 235 | a[j] = loVal; 236 | return j; 237 | } 238 | } 239 | } 240 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/MathUtils/Statistics/MultipleComparisons.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using HiddenMarkovModels.MathUtils.Distribution; 6 | 7 | namespace HiddenMarkovModels.MathUtils.Statistics 8 | { 9 | /// 10 | /// testing many pairs of groups is called multiple comparisons 11 | /// 12 | /// In multiple comparisons, each pair of groups are tested to check whether the population mean of their classes are the same (H_0) or different (H_A). 13 | /// T statistic is used to model the sample mean difference distribution of pairwise groups 14 | /// 15 | public class MultipleComparisons 16 | { 17 | /// 18 | /// The Bonferroni correction suggests that a more stringent significance level is more appropriate for mulitiple comparison tests than ANOVA. 19 | /// 20 | /// This adjusts the significance level by the number comparisons being considered 21 | /// 22 | /// 23 | /// number of groups 24 | /// 25 | public static double BonferroniCorrection(double signifiance_level, int k) 26 | { 27 | int K = k * (k - 1) / 2; // number of comparisons 28 | return signifiance_level / K; 29 | } 30 | 31 | /// 32 | /// Return a matrix of reject H_0, for which rejectH0Matrix[i][j] = true if the pairwise comparison provide enough evidence that group[i] and group[j] does not have the same mean 33 | /// 34 | /// Hypotheses for a pair of groups, group[i] and group[j]: 35 | /// H_0 : mu_i = mu_j 36 | /// H_A : mu_i != mu_j 37 | /// 38 | /// sampled groupped by classes 39 | /// significance level for the test 40 | /// RejectH0 matrix: rejctH0Matrix[i][j] = true if the test provide enough evidence that group[i] and group[j] does not have the same mean 41 | public static bool[][] RejectH0(double[] sample, int[] grpCat, double significance_level = 0.05) 42 | { 43 | ANOVA anova_output; 44 | ANOVA.RunANOVA(sample, grpCat, out anova_output, significance_level); 45 | 46 | Dictionary> groupedSample = new Dictionary>(); 47 | for (int i = 0; i < sample.Length; ++i) 48 | { 49 | int grpId = grpCat[i]; 50 | double sampleVal = sample[i]; 51 | List grp = null; 52 | if (groupedSample.ContainsKey(grpId)) 53 | { 54 | grp = groupedSample[grpId]; 55 | } 56 | else 57 | { 58 | grp = new List(); 59 | groupedSample[grpId] = grp; 60 | } 61 | grp.Add(sampleVal); 62 | } 63 | 64 | int k = groupedSample.Count; // number of groups 65 | double alpha_adj = BonferroniCorrection(significance_level, k); 66 | 67 | bool[][] rejectH0Matrix = new bool[k][]; 68 | for (int i = 0; i < k; ++k) 69 | { 70 | rejectH0Matrix[i] = new bool[k]; 71 | } 72 | 73 | List groupIdList = groupedSample.Keys.ToList(); 74 | for (int i = 0; i < k - 1; ++i) 75 | { 76 | List group1 = groupedSample[groupIdList[i]]; 77 | for (int j = i + 1; j < k; ++j) 78 | { 79 | List group2 = groupedSample[groupIdList[j]]; 80 | double pValue = PairwiseCompare(group1, group2, anova_output); 81 | bool reject_H0 = pValue < alpha_adj; 82 | rejectH0Matrix[i][j] = reject_H0; 83 | rejectH0Matrix[j][i] = reject_H0; 84 | } 85 | } 86 | return rejectH0Matrix; 87 | } 88 | 89 | /// 90 | /// Pairwise comparison of group1 and group2 91 | /// 92 | /// random sample from class 1 93 | /// random sample from class 2 94 | /// parameters obtained after ANOVA 95 | /// p-value = P(observed or more extreme values | H_0 is true) 96 | public static double PairwiseCompare(List group1, List group2, ANOVA anova) 97 | { 98 | double x_bar1 = Mean.GetMean(group1); 99 | double x_bar2 = Mean.GetMean(group2); 100 | int n1 = group1.Count; 101 | int n2 = group2.Count; 102 | 103 | int null_value = 0; 104 | double t = GetTStatistic(x_bar1, x_bar2, n1, n2, null_value, anova.MSE); 105 | double pValue = GetPValue(t, anova.dfE); 106 | return pValue; 107 | } 108 | 109 | /// 110 | /// Return the t statistic 111 | /// 112 | /// point estimate of sample mean in class 1 113 | /// point estimate of sample mean in class 2 114 | /// size of random sample from class 1 115 | /// size of random sample from class 2 116 | /// null value from H_0 117 | /// mean squares error obtained after ANOVA 118 | /// t statistic 119 | private static double GetTStatistic(double x_bar1, double x_bar2, double n1, double n2, double null_value, double MSE) 120 | { 121 | return ((x_bar1 - x_bar2) - null_value) / System.Math.Sqrt(MSE / n1 + MSE / n2); 122 | } 123 | 124 | /// 125 | /// Return the p-value from the Student's distribution 126 | /// 127 | /// 128 | /// degrees of freedom error obtained after ANOVA 129 | /// p-value = P(observed or more extreme values | H_0 is true) 130 | private static double GetPValue(double t, int dfE) 131 | { 132 | return StudentT.GetPercentile(System.Math.Abs(t), dfE); 133 | } 134 | } 135 | } 136 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/MathUtils/Statistics/Sample.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using HiddenMarkovModels.MathUtils.Distribution; 6 | 7 | namespace HiddenMarkovModels.MathUtils.Statistics 8 | { 9 | public class Sample 10 | { 11 | public static List SampleWithoutReplacement(IList data, int sampleCount) 12 | { 13 | List sample = new List(); 14 | for (int i = 0; i < sampleCount; ++i) 15 | { 16 | T sampleVal = data[DistributionModel.NextInt(sampleCount)]; 17 | sample.Add(sampleVal); 18 | } 19 | 20 | return sample; 21 | } 22 | 23 | public static List SampleWithReplacement(IList data, int sampleCount) 24 | { 25 | List sample = new List(); 26 | List temp = data.ToList(); 27 | T sampleValue; 28 | int sampleIndex; 29 | for (int i = 0; i < sampleCount; ++i) 30 | { 31 | sampleIndex = DistributionModel.NextInt(temp.Count); 32 | sampleValue = temp[sampleIndex]; 33 | sample.Add(sampleValue); 34 | temp.RemoveAt(sampleIndex); 35 | } 36 | return sample; 37 | } 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/MathUtils/Statistics/StandardError.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using HiddenMarkovModels.MathUtils.Distribution; 6 | 7 | namespace HiddenMarkovModels.MathUtils.Statistics 8 | { 9 | /// 10 | /// Standard error of a sampling distribution is the standard deviation of the the normal distribution formed by the sample statistic (as followed from the Central Limit Theorem or CLT) 11 | /// 12 | public class StandardError 13 | { 14 | /// 15 | /// Return the standard error of the sampling distribution given a random sample 16 | /// Used for continuous-value random variable 17 | /// 18 | /// The sample standard deviation from the random sample 19 | /// The size of a random sample 20 | /// The standard error of the sample statistics (e.g., sample mean) as estimated from the sample following Central Limit Theorem 21 | public static double GetStandardError(double sampleStddev, int sampleSize) 22 | { 23 | return sampleStddev / System.Math.Sqrt(sampleSize); 24 | } 25 | 26 | /// 27 | /// Return the standard error of the sampling distribution given a random sample in which p is the proportion of individuals responding to "YES" and (1-p) is the proportion of individuals responding to "NO" 28 | /// Used for binary discrete variable v = {"YES", "NO"} 29 | /// 30 | /// Double value between 0 and 1, The proportion of individuals in a random sample responding to "YES" 31 | /// The size of a random sample 32 | /// Standard error of a random sample, which is the standard deviation of the sample statistic normal distribution by CLT 33 | public static double GetStandardErrorForProportion(double p, int sampleSize) 34 | { 35 | return System.Math.Sqrt(p * (1 - p) / sampleSize); 36 | } 37 | 38 | /// 39 | /// Return the standard error of the sampling distribution given a random sample 40 | /// 41 | /// The random sample given 42 | /// Standard error of a random sample, which is the standard deviation of the sample statistic normal distribution by CLT 43 | public static double GetStandardError(double[] sample) 44 | { 45 | double sampleMean = Mean.GetMean(sample); 46 | double sampleStdDev = StdDev.GetStdDev(sample, sampleMean); 47 | return GetStandardError(sampleStdDev, sample.Length); 48 | } 49 | 50 | /// 51 | /// Return the standard error of the sampling distribution of the difference between two population statistics var1 and var2, assuming var1 and var2 are independent 52 | /// 53 | /// random sample for var1 54 | /// random sample for var2 55 | /// Standard error of a random sample, which is the standard deviation of the sample statistic normal distribution by CLT 56 | public static double GetStandardError(double[] sample_for_var1, double[] sample_for_var2) 57 | { 58 | double mu_for_var1 = Mean.GetMean(sample_for_var1); 59 | double mu_for_var2 = Mean.GetMean(sample_for_var2); 60 | 61 | double sigma_for_var1 = StdDev.GetStdDev(sample_for_var1, mu_for_var1); 62 | double sigma_for_var2 = StdDev.GetStdDev(sample_for_var2, mu_for_var2); 63 | 64 | return System.Math.Sqrt(sigma_for_var1 * sigma_for_var1 / sample_for_var1.Length + sigma_for_var2 * sigma_for_var2 / sample_for_var2.Length); 65 | } 66 | 67 | /// 68 | /// Return the standard error of the sample distribution given multiple random samples, for each of which the standard error has been calculated 69 | /// 70 | /// List of size for each random sample 71 | /// List of standard error for the sample mean of each random sample 72 | /// Standard error of a random sample, which is the standard deviation of the sample statistic normal distribution by CLT 73 | public static double GetStandardErrorForWeightAverages(int[] sampleSizes, double[] standardErrors) 74 | { 75 | double sum = 0; 76 | int totalSampleSize = 0; 77 | for (int i = 0; i < sampleSizes.Length; ++i) 78 | { 79 | totalSampleSize += sampleSizes[i]; 80 | } 81 | for (int i = 0; i < sampleSizes.Length; ++i) 82 | { 83 | sum += System.Math.Pow(sampleSizes[i] * standardErrors[i] / totalSampleSize, 2); 84 | } 85 | return System.Math.Sqrt(sum); 86 | } 87 | 88 | 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/MathUtils/Statistics/StdDev.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | 6 | namespace HiddenMarkovModels.MathUtils.Statistics 7 | { 8 | public class StdDev 9 | { 10 | public static double GetStdDev(double[] sample, double sampleMean) 11 | { 12 | double sum = 0; 13 | for (int i = 0; i < sample.Length; ++i) 14 | { 15 | sum += System.Math.Pow(sample[i] - sampleMean, 2); 16 | } 17 | return System.Math.Sqrt(sum / sample.Length); 18 | } 19 | 20 | public static double GetStdDev(IList sample, double sampleMean) 21 | { 22 | double sum = 0; 23 | for (int i = 0; i < sample.Count; ++i) 24 | { 25 | sum += System.Math.Pow(sample[i] - sampleMean, 2); 26 | } 27 | return System.Math.Sqrt(sum / sample.Count); 28 | } 29 | 30 | public static float GetStdDev(float[] sample, float sampleMean) 31 | { 32 | float sum = 0; 33 | for (int i = 0; i < sample.Length; ++i) 34 | { 35 | sum += (float)System.Math.Pow(sample[i] - sampleMean, 2); 36 | } 37 | return (float)System.Math.Sqrt(sum / sample.Length); 38 | } 39 | 40 | public static float GetStdDev(IList sample, float sampleMean) 41 | { 42 | float sum = 0; 43 | for (int i = 0; i < sample.Count; ++i) 44 | { 45 | sum += (float)System.Math.Pow(sample[i] - sampleMean, 2); 46 | } 47 | return (float)System.Math.Sqrt(sum / sample.Count); 48 | } 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/Properties/AssemblyInfo.cs: -------------------------------------------------------------------------------- 1 | using System.Reflection; 2 | using System.Runtime.CompilerServices; 3 | using System.Runtime.InteropServices; 4 | 5 | // General Information about an assembly is controlled through the following 6 | // set of attributes. Change these attribute values to modify the information 7 | // associated with an assembly. 8 | [assembly: AssemblyTitle("cs-hidden-markov-models")] 9 | [assembly: AssemblyDescription("")] 10 | [assembly: AssemblyConfiguration("")] 11 | [assembly: AssemblyCompany("")] 12 | [assembly: AssemblyProduct("cs-hidden-markov-models")] 13 | [assembly: AssemblyCopyright("Copyright © 2017")] 14 | [assembly: AssemblyTrademark("")] 15 | [assembly: AssemblyCulture("")] 16 | 17 | // Setting ComVisible to false makes the types in this assembly not visible 18 | // to COM components. If you need to access a type in this assembly from 19 | // COM, set the ComVisible attribute to true on that type. 20 | [assembly: ComVisible(false)] 21 | 22 | // The following GUID is for the ID of the typelib if this project is exposed to COM 23 | [assembly: Guid("812a4c20-2223-4093-a482-186cdde79470")] 24 | 25 | // Version information for an assembly consists of the following four values: 26 | // 27 | // Major Version 28 | // Minor Version 29 | // Build Number 30 | // Revision 31 | // 32 | // You can specify all the values or you can default the Build and Revision Numbers 33 | // by using the '*' as shown below: 34 | // [assembly: AssemblyVersion("1.0.*")] 35 | [assembly: AssemblyVersion("1.0.0.0")] 36 | [assembly: AssemblyFileVersion("1.0.0.0")] 37 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/Topology/Ergodic.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using HiddenMarkovModels.Helpers; 6 | 7 | namespace HiddenMarkovModels.Topology 8 | { 9 | public class Ergodic : ITopology 10 | { 11 | protected int mStateCount = 0; 12 | protected bool mRandom = false; 13 | 14 | public Ergodic(int state_count, bool random=false) 15 | { 16 | mStateCount = state_count; 17 | mRandom = random; 18 | } 19 | 20 | public int Create(out double[,] logTransitionMatrix, out double[] logInitialState) 21 | { 22 | logTransitionMatrix = new double[mStateCount, mStateCount]; 23 | logInitialState = new double[mStateCount]; 24 | 25 | for (int i = 0; i < mStateCount; ++i) 26 | { 27 | logInitialState[i] = double.NegativeInfinity; 28 | } 29 | logInitialState[0] = 0.0; 30 | 31 | if (mRandom) 32 | { 33 | for (int i = 0; i < mStateCount; ++i) 34 | { 35 | double sum = 0.0; 36 | for (int j = 0; j < mStateCount; ++j) 37 | { 38 | sum += logTransitionMatrix[i, j] = MathHelper.NextDouble(); 39 | } 40 | for (int j = 0; j < mStateCount; ++j) 41 | { 42 | double transition_value = logTransitionMatrix[i, j]; 43 | logTransitionMatrix[i, j] = System.Math.Log(transition_value / sum); 44 | } 45 | } 46 | } 47 | else 48 | { 49 | for (int i = 0; i < mStateCount; ++i) 50 | { 51 | for (int j = 0; j < mStateCount; ++j) 52 | { 53 | logTransitionMatrix[i, j] = System.Math.Log(1.0 / mStateCount); 54 | } 55 | } 56 | } 57 | 58 | return mStateCount; 59 | } 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/Topology/Forward.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using HiddenMarkovModels.Helpers; 6 | 7 | namespace HiddenMarkovModels.Topology 8 | { 9 | public class Forward : ITopology 10 | { 11 | protected int mStateCount; 12 | protected int mDeepness; 13 | protected bool mRandom; 14 | 15 | public Forward(int state_count, int deepness, bool random = false) 16 | { 17 | mStateCount = state_count; 18 | mDeepness = deepness; 19 | mRandom = random; 20 | } 21 | 22 | public Forward(int state_count, bool random = false) 23 | : this(state_count, state_count, random) 24 | { 25 | 26 | } 27 | 28 | public int Create(out double[,] logTransitionMatrix, out double[] logInitialState) 29 | { 30 | logTransitionMatrix = new double[mStateCount, mStateCount]; 31 | logInitialState = new double[mStateCount]; 32 | 33 | for (int i = 0; i < mStateCount; ++i) 34 | { 35 | logInitialState[i] = double.NegativeInfinity; 36 | } 37 | logInitialState[0] = 0.0; 38 | 39 | if (mRandom) 40 | { 41 | for (int i = 0; i < mStateCount; ++i) 42 | { 43 | double sum = 0.0; 44 | for (int j = i; j < mDeepness; ++j) 45 | { 46 | sum += logTransitionMatrix[i, j] = MathHelper.NextDouble(); 47 | } 48 | for (int j = i; j < mDeepness; ++j) 49 | { 50 | double transition_value = logTransitionMatrix[i, j]; 51 | logTransitionMatrix[i, j] = transition_value / sum; 52 | } 53 | } 54 | } 55 | else 56 | { 57 | for (int i = 0; i < mStateCount; ++i) 58 | { 59 | double sum = System.Math.Min(mDeepness, mStateCount - i); 60 | for (int j = i; j < mStateCount && (j-i) < mDeepness; ++j) 61 | { 62 | logTransitionMatrix[i, j] = 1.0 / sum; 63 | } 64 | } 65 | } 66 | 67 | for (int i = 0; i < mStateCount; ++i) 68 | { 69 | for (int j = 0; j < mStateCount; ++j) 70 | { 71 | logTransitionMatrix[i, j] = System.Math.Log(logTransitionMatrix[i, j]); 72 | } 73 | } 74 | 75 | return mStateCount; 76 | } 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/Topology/ITopology.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | 6 | namespace HiddenMarkovModels.Topology 7 | { 8 | public interface ITopology 9 | { 10 | int Create(out double[,] logTransitionMatrix, out double[] logInitialState); 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/Viterbi.Log.Continuous.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using HiddenMarkovModels.Helpers; 6 | using HiddenMarkovModels.MathUtils.Distribution; 7 | 8 | namespace HiddenMarkovModels 9 | { 10 | public partial class Viterbi 11 | { 12 | public int[] LogForward(double[,] logA, DistributionModel[] logB, double[] logPi, double[] observations) 13 | { 14 | double logLikelihood = 0; 15 | return LogForward(logA, logB, logPi, observations, out logLikelihood); 16 | } 17 | 18 | public static int[] LogForward(double[,] logA, DistributionModel[] probB, double[] logPi, double[] observations, out double logLikelihood) 19 | { 20 | int T = observations.Length; 21 | int N = logPi.Length; 22 | 23 | DiagnosticsHelper.Assert(logA.GetLength(0) == N); 24 | DiagnosticsHelper.Assert(logA.GetLength(1) == N); 25 | DiagnosticsHelper.Assert(probB.Length == N); 26 | 27 | int[,] V = new int[T, N]; 28 | 29 | double[,] fwd = new double[T, N]; 30 | 31 | for (int i = 0; i < N; ++i) 32 | { 33 | fwd[0, i] = logPi[i] + MathHelper.LogProbabilityFunction(probB[i], observations[0]); 34 | } 35 | 36 | double maxWeight = 0; 37 | int maxState = 0; 38 | 39 | for (int t = 1; t < T; ++t) 40 | { 41 | double x = observations[t]; 42 | for (int i = 0; i < N; ++i) 43 | { 44 | maxWeight = fwd[t - 1, 0] + logA[0, i]; 45 | maxState = 0; 46 | 47 | double weight = 0; 48 | for (int j = 1; j < N; ++j) 49 | { 50 | weight = fwd[t - 1, j] + logA[j, i]; 51 | if (maxWeight < weight) 52 | { 53 | maxWeight = weight; 54 | maxState = j; 55 | } 56 | } 57 | 58 | fwd[t, i] = maxWeight + MathHelper.LogProbabilityFunction(probB[i], x); 59 | V[t, i] = maxState; 60 | } 61 | } 62 | 63 | maxState = 0; 64 | maxWeight = fwd[T - 1, 0]; 65 | for (int i = 0; i < N; ++i) 66 | { 67 | if (fwd[T - 1, i] > maxWeight) 68 | { 69 | maxWeight = fwd[T - 1, i]; 70 | maxState = i; 71 | } 72 | } 73 | 74 | int[] path = new int[T]; 75 | path[T - 1] = maxState; 76 | for (int t = T - 2; t >= 0; --t) 77 | { 78 | path[t] = V[t + 1, path[t + 1]]; 79 | } 80 | 81 | logLikelihood = maxWeight; 82 | 83 | return path; 84 | 85 | } 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/Viterbi.Log.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using HiddenMarkovModels.Helpers; 6 | 7 | namespace HiddenMarkovModels 8 | { 9 | public partial class Viterbi 10 | { 11 | public int[] LogForward(double[,] logA, double[,] logB, double[] logPi, int[] observations) 12 | { 13 | double logLikelihood = 0; 14 | return LogForward(logA, logB, logPi, observations, out logLikelihood); 15 | } 16 | 17 | public static int[] LogForward(double[,] logA, double[,] logB, double[] logPi, int[] observations, out double logLikelihood) 18 | { 19 | int T = observations.Length; 20 | int N = logPi.Length; 21 | 22 | DiagnosticsHelper.Assert(logA.GetLength(0) == N); 23 | DiagnosticsHelper.Assert(logA.GetLength(1) == N); 24 | DiagnosticsHelper.Assert(logB.GetLength(0) == N); 25 | 26 | int[,] V = new int[T, N]; 27 | 28 | double[,] fwd = new double[T, N]; 29 | 30 | for (int i = 0; i < N; ++i) 31 | { 32 | fwd[0, i] = logPi[i] + logB[i, observations[0]]; 33 | } 34 | 35 | double maxWeight = 0; 36 | int maxState = 0; 37 | 38 | for (int t = 1; t < T; ++t) 39 | { 40 | for (int i = 0; i < N; ++i) 41 | { 42 | maxWeight = fwd[t - 1, 0] + logA[0, i]; 43 | maxState = 0; 44 | 45 | double weight = 0; 46 | for (int j = 1; j < N; ++j) 47 | { 48 | weight = fwd[t - 1, j] + logA[j, i]; 49 | if (maxWeight < weight) 50 | { 51 | maxWeight = weight; 52 | maxState = j; 53 | } 54 | } 55 | 56 | fwd[t, i] = maxWeight + logB[i, observations[t]]; 57 | V[t, i] = maxState; 58 | } 59 | } 60 | 61 | maxState = 0; 62 | maxWeight = fwd[T - 1, 0]; 63 | for (int i = 0; i < N; ++i) 64 | { 65 | if (fwd[T - 1, i] > maxWeight) 66 | { 67 | maxWeight = fwd[T - 1, i]; 68 | maxState = i; 69 | } 70 | } 71 | 72 | int[] path = new int[T]; 73 | path[T - 1] = maxState; 74 | for (int t = T - 2; t >= 0; --t) 75 | { 76 | path[t] = V[t + 1, path[t + 1]]; 77 | } 78 | 79 | logLikelihood = maxWeight; 80 | 81 | return path; 82 | 83 | } 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/Viterbi.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using HiddenMarkovModels.Helpers; 6 | 7 | namespace HiddenMarkovModels 8 | { 9 | public partial class Viterbi 10 | { 11 | public static int[] Forward(double[,] A, double[,] B, double[] pi, int[] observations, out double logLikelihood) 12 | { 13 | int T = observations.Length; 14 | int N = pi.Length; 15 | 16 | DiagnosticsHelper.Assert(A.GetLength(0) == N); 17 | DiagnosticsHelper.Assert(A.GetLength(1) == N); 18 | DiagnosticsHelper.Assert(B.GetLength(0) == N); 19 | 20 | int[,] V = new int[T, N]; 21 | 22 | double[,] fwd = new double[T, N]; 23 | 24 | for (int i = 0; i < N; ++i) 25 | { 26 | fwd[0, i] = pi[i] * B[i, observations[0]]; 27 | } 28 | 29 | double maxWeight = 0; 30 | int maxState = 0; 31 | 32 | for (int t = 1; t < T; ++t) 33 | { 34 | for (int i = 0; i < N; ++i) 35 | { 36 | maxWeight = fwd[t-1, 0] * A[0, i]; 37 | maxState = 0; 38 | 39 | double weight = 0; 40 | for (int j = 1; j < N; ++j) 41 | { 42 | weight = fwd[t - 1, j] * A[j, i]; 43 | if (maxWeight < weight) 44 | { 45 | maxWeight = weight; 46 | maxState = j; 47 | } 48 | } 49 | 50 | fwd[t, i]=maxWeight * B[i, observations[t]]; 51 | V[t, i] = maxState; 52 | } 53 | } 54 | 55 | maxState = 0; 56 | maxWeight = fwd[T-1, 0]; 57 | for (int i = 0; i < N; ++i) 58 | { 59 | if (fwd[T - 1, i] > maxWeight) 60 | { 61 | maxWeight = fwd[T - 1, i]; 62 | maxState = i; 63 | } 64 | } 65 | 66 | int[] path = new int[T]; 67 | path[T - 1] = maxState; 68 | for (int t = T - 2; t >= 0; --t) 69 | { 70 | path[t] = V[t + 1, path[t + 1]]; 71 | } 72 | 73 | logLikelihood = System.Math.Log(maxWeight); 74 | 75 | return path; 76 | } 77 | 78 | public static int[] Forward(double[,] A, double[,] B, double[] pi, int[] observation) 79 | { 80 | double logLikelihood = 0; 81 | return Forward(A, B, pi, observation, out logLikelihood); 82 | } 83 | 84 | 85 | 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/cs-hidden-markov-models.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | AnyCPU 7 | {812A4C20-2223-4093-A482-186CDDE79470} 8 | Library 9 | Properties 10 | cs_hidden_markov_models 11 | cs-hidden-markov-models 12 | v4.5.2 13 | 512 14 | 15 | 16 | true 17 | full 18 | false 19 | bin\Debug\ 20 | DEBUG;TRACE 21 | prompt 22 | 4 23 | 24 | 25 | pdbonly 26 | true 27 | bin\Release\ 28 | TRACE 29 | prompt 30 | 4 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 129 | -------------------------------------------------------------------------------- /cs-hidden-markov-models/cs-hidden-markov-models.csproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | ProjectFiles 5 | 6 | -------------------------------------------------------------------------------- /icon.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chen0040/cs-hidden-markov-models/8a0887e5294148cac60664d68f57a0594889854f/icon.jpg -------------------------------------------------------------------------------- /notes/nuget-packaging.md: -------------------------------------------------------------------------------- 1 | Run the following command line: 2 | 3 | ```bash 4 | cd nuget/ 5 | nuget spec 6 | nuget pack cs-hidden-markov-models.nuspec 7 | ``` -------------------------------------------------------------------------------- /nuget/cs-hidden-markov-models.1.0.1.nupkg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chen0040/cs-hidden-markov-models/8a0887e5294148cac60664d68f57a0594889854f/nuget/cs-hidden-markov-models.1.0.1.nupkg -------------------------------------------------------------------------------- /nuget/cs-hidden-markov-models.nuspec: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | cs-hidden-markov-models 5 | 1.0.1 6 | chen0040 7 | chen0040 8 | https://github.com/chen0040/cs-hidden-markov-models/blob/master/LICENSE 9 | https://github.com/chen0040/cs-hidden-markov-models 10 | https://rawgit.com/chen0040/cs-hidden-markov-models/master/icon.jpg 11 | false 12 | cs-hidden-markov-models 13 | Hidden Markov Models 14 | Copyright 2017 15 | hidden-markov-models, c-sharp 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /nuget/lib/net452/Debug/cs-hidden-markov-models.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chen0040/cs-hidden-markov-models/8a0887e5294148cac60664d68f57a0594889854f/nuget/lib/net452/Debug/cs-hidden-markov-models.dll -------------------------------------------------------------------------------- /nuget/lib/net452/Release/cs-hidden-markov-models.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chen0040/cs-hidden-markov-models/8a0887e5294148cac60664d68f57a0594889854f/nuget/lib/net452/Release/cs-hidden-markov-models.dll --------------------------------------------------------------------------------