├── .github
└── FUNDING.yml
├── .gitignore
├── .travis.yml
├── JSAT
├── .gitignore
├── nb-configuration.xml
├── nbactions.xml
├── pom.xml
├── src
│ └── jsat
│ │ ├── ColumnMajorStore.java
│ │ ├── DataSet.java
│ │ ├── DataStore.java
│ │ ├── RowMajorStore.java
│ │ ├── SimpleDataSet.java
│ │ ├── SimpleWeightVectorModel.java
│ │ ├── SingleWeightVectorModel.java
│ │ ├── classifiers
│ │ ├── BaseUpdateableClassifier.java
│ │ ├── CategoricalData.java
│ │ ├── CategoricalResults.java
│ │ ├── ClassificationDataSet.java
│ │ ├── ClassificationModelEvaluation.java
│ │ ├── Classifier.java
│ │ ├── DDAG.java
│ │ ├── DataPoint.java
│ │ ├── DataPointPair.java
│ │ ├── MajorityVote.java
│ │ ├── OneVSAll.java
│ │ ├── OneVSOne.java
│ │ ├── PriorClassifier.java
│ │ ├── RegressorToClassifier.java
│ │ ├── Rocchio.java
│ │ ├── UpdateableClassifier.java
│ │ ├── WarmClassifier.java
│ │ ├── bayesian
│ │ │ ├── AODE.java
│ │ │ ├── BestClassDistribution.java
│ │ │ ├── ConditionalProbabilityTable.java
│ │ │ ├── MultinomialNaiveBayes.java
│ │ │ ├── MultivariateNormals.java
│ │ │ ├── NaiveBayes.java
│ │ │ ├── NaiveBayesUpdateable.java
│ │ │ ├── ODE.java
│ │ │ └── graphicalmodel
│ │ │ │ ├── DirectedGraph.java
│ │ │ │ ├── DiscreteBayesNetwork.java
│ │ │ │ └── K2NetworkLearner.java
│ │ ├── boosting
│ │ │ ├── AdaBoostM1.java
│ │ │ ├── ArcX4.java
│ │ │ ├── Bagging.java
│ │ │ ├── EmphasisBoost.java
│ │ │ ├── LogitBoost.java
│ │ │ ├── LogitBoostPL.java
│ │ │ ├── ModestAdaBoost.java
│ │ │ ├── SAMME.java
│ │ │ ├── Stacking.java
│ │ │ ├── UpdatableStacking.java
│ │ │ ├── Wagging.java
│ │ │ └── WaggingNormal.java
│ │ ├── calibration
│ │ │ ├── BinaryCalibration.java
│ │ │ ├── BinaryScoreClassifier.java
│ │ │ ├── IsotonicCalibration.java
│ │ │ └── PlattCalibration.java
│ │ ├── evaluation
│ │ │ ├── AUC.java
│ │ │ ├── Accuracy.java
│ │ │ ├── BalancedAccuracy.java
│ │ │ ├── ClassificationScore.java
│ │ │ ├── F1Score.java
│ │ │ ├── FbetaScore.java
│ │ │ ├── Kappa.java
│ │ │ ├── LogLoss.java
│ │ │ ├── MatthewsCorrelationCoefficient.java
│ │ │ ├── Precision.java
│ │ │ ├── Recall.java
│ │ │ └── SimpleBinaryClassMetric.java
│ │ ├── imbalance
│ │ │ ├── BorderlineSMOTE.java
│ │ │ └── SMOTE.java
│ │ ├── knn
│ │ │ ├── DANN.java
│ │ │ ├── LWL.java
│ │ │ └── NearestNeighbour.java
│ │ ├── linear
│ │ │ ├── ALMA2.java
│ │ │ ├── AROW.java
│ │ │ ├── BBR.java
│ │ │ ├── LinearBatch.java
│ │ │ ├── LinearL1SCD.java
│ │ │ ├── LinearSGD.java
│ │ │ ├── LinearTools.java
│ │ │ ├── LogisticRegressionDCD.java
│ │ │ ├── NHERD.java
│ │ │ ├── NewGLMNET.java
│ │ │ ├── OWA.java
│ │ │ ├── PassiveAggressive.java
│ │ │ ├── ROMMA.java
│ │ │ ├── SCD.java
│ │ │ ├── SCW.java
│ │ │ ├── SDCA.java
│ │ │ ├── SMIDAS.java
│ │ │ ├── SPA.java
│ │ │ ├── STGD.java
│ │ │ ├── StochasticMultinomialLogisticRegression.java
│ │ │ ├── StochasticSTLinearL1.java
│ │ │ └── kernelized
│ │ │ │ ├── ALMA2K.java
│ │ │ │ ├── BOGD.java
│ │ │ │ ├── CSKLR.java
│ │ │ │ ├── CSKLRBatch.java
│ │ │ │ ├── DUOL.java
│ │ │ │ ├── Forgetron.java
│ │ │ │ ├── KernelSGD.java
│ │ │ │ ├── OSKL.java
│ │ │ │ └── Projectron.java
│ │ ├── neuralnetwork
│ │ │ ├── BackPropagationNet.java
│ │ │ ├── DReDNetSimple.java
│ │ │ ├── LVQ.java
│ │ │ ├── LVQLLC.java
│ │ │ ├── Perceptron.java
│ │ │ ├── RBFNet.java
│ │ │ ├── SGDNetworkTrainer.java
│ │ │ ├── SOM.java
│ │ │ ├── activations
│ │ │ │ ├── ActivationLayer.java
│ │ │ │ ├── LinearLayer.java
│ │ │ │ ├── ReLU.java
│ │ │ │ ├── SigmoidLayer.java
│ │ │ │ ├── SoftSignLayer.java
│ │ │ │ ├── SoftmaxLayer.java
│ │ │ │ └── TanhLayer.java
│ │ │ ├── initializers
│ │ │ │ ├── BiastInitializer.java
│ │ │ │ ├── ConstantInit.java
│ │ │ │ ├── GaussianNormalInit.java
│ │ │ │ ├── TanhInitializer.java
│ │ │ │ └── WeightInitializer.java
│ │ │ └── regularizers
│ │ │ │ ├── Max2NormRegularizer.java
│ │ │ │ └── WeightRegularizer.java
│ │ ├── svm
│ │ │ ├── DCD.java
│ │ │ ├── DCDs.java
│ │ │ ├── DCSVM.java
│ │ │ ├── LSSVM.java
│ │ │ ├── Pegasos.java
│ │ │ ├── PegasosK.java
│ │ │ ├── PlattSMO.java
│ │ │ ├── SBP.java
│ │ │ ├── SVMnoBias.java
│ │ │ ├── SupportVectorLearner.java
│ │ │ └── extended
│ │ │ │ ├── AMM.java
│ │ │ │ ├── CPM.java
│ │ │ │ └── OnlineAMM.java
│ │ └── trees
│ │ │ ├── DecisionStump.java
│ │ │ ├── DecisionTree.java
│ │ │ ├── ERTrees.java
│ │ │ ├── ExtraTree.java
│ │ │ ├── ID3.java
│ │ │ ├── ImportanceByUses.java
│ │ │ ├── ImpurityScore.java
│ │ │ ├── MDA.java
│ │ │ ├── MDI.java
│ │ │ ├── RandomDecisionTree.java
│ │ │ ├── RandomForest.java
│ │ │ ├── TreeFeatureImportanceInference.java
│ │ │ ├── TreeLearner.java
│ │ │ ├── TreeNodeVisitor.java
│ │ │ └── TreePruner.java
│ │ ├── clustering
│ │ ├── BayesianHAC.java
│ │ ├── CLARA.java
│ │ ├── ClusterFailureException.java
│ │ ├── Clusterer.java
│ │ ├── ClustererBase.java
│ │ ├── DBSCAN.java
│ │ ├── EMGaussianMixture.java
│ │ ├── FLAME.java
│ │ ├── GapStatistic.java
│ │ ├── HDBSCAN.java
│ │ ├── KClusterer.java
│ │ ├── KClustererBase.java
│ │ ├── LSDBC.java
│ │ ├── MEDDIT.java
│ │ ├── MeanShift.java
│ │ ├── OPTICS.java
│ │ ├── PAM.java
│ │ ├── SeedSelectionMethods.java
│ │ ├── TRIKMEDS.java
│ │ ├── VBGMM.java
│ │ ├── biclustering
│ │ │ ├── Bicluster.java
│ │ │ ├── ConsensusScore.java
│ │ │ └── SpectralCoClustering.java
│ │ ├── dissimilarity
│ │ │ ├── AbstractClusterDissimilarity.java
│ │ │ ├── AverageLinkDissimilarity.java
│ │ │ ├── CentroidDissimilarity.java
│ │ │ ├── ClusterDissimilarity.java
│ │ │ ├── CompleteLinkDissimilarity.java
│ │ │ ├── DistanceMetricDissimilarity.java
│ │ │ ├── LanceWilliamsDissimilarity.java
│ │ │ ├── MedianDissimilarity.java
│ │ │ ├── SingleLinkDissimilarity.java
│ │ │ ├── UpdatableClusterDissimilarity.java
│ │ │ └── WardsDissimilarity.java
│ │ ├── evaluation
│ │ │ ├── AdjustedRandIndex.java
│ │ │ ├── ClusterEvaluation.java
│ │ │ ├── ClusterEvaluationBase.java
│ │ │ ├── Completeness.java
│ │ │ ├── DaviesBouldinIndex.java
│ │ │ ├── DunnIndex.java
│ │ │ ├── Homogeneity.java
│ │ │ ├── IntraClusterSumEvaluation.java
│ │ │ ├── NormalizedMutualInformation.java
│ │ │ ├── VMeasure.java
│ │ │ └── intra
│ │ │ │ ├── IntraClusterEvaluation.java
│ │ │ │ ├── MaxDistance.java
│ │ │ │ ├── MeanCentroidDistance.java
│ │ │ │ ├── MeanDistance.java
│ │ │ │ ├── SoSCentroidDistance.java
│ │ │ │ └── SumOfSqrdPairwiseDistances.java
│ │ ├── hierarchical
│ │ │ ├── DivisiveGlobalClusterer.java
│ │ │ ├── DivisiveLocalClusterer.java
│ │ │ ├── NNChainHAC.java
│ │ │ ├── PriorityHAC.java
│ │ │ └── SimpleHAC.java
│ │ └── kmeans
│ │ │ ├── ElkanKMeans.java
│ │ │ ├── ElkanKernelKMeans.java
│ │ │ ├── GMeans.java
│ │ │ ├── HamerlyKMeans.java
│ │ │ ├── KMeans.java
│ │ │ ├── KMeansPDN.java
│ │ │ ├── KernelKMeans.java
│ │ │ ├── LloydKernelKMeans.java
│ │ │ ├── MiniBatchKMeans.java
│ │ │ ├── NaiveKMeans.java
│ │ │ └── XMeans.java
│ │ ├── datatransform
│ │ ├── AutoDeskewTransform.java
│ │ ├── DataModelPipeline.java
│ │ ├── DataTransform.java
│ │ ├── DataTransformBase.java
│ │ ├── DataTransformProcess.java
│ │ ├── DenseSparceTransform.java
│ │ ├── FastICA.java
│ │ ├── FixedDataTransform.java
│ │ ├── Imputer.java
│ │ ├── InPlaceInvertibleTransform.java
│ │ ├── InPlaceTransform.java
│ │ ├── InsertMissingValuesTransform.java
│ │ ├── InverseOfTransform.java
│ │ ├── InvertibleTransform.java
│ │ ├── JLTransform.java
│ │ ├── LinearTransform.java
│ │ ├── NominalToNumeric.java
│ │ ├── NumericalToHistogram.java
│ │ ├── PCA.java
│ │ ├── PNormNormalization.java
│ │ ├── PolynomialTransform.java
│ │ ├── ProjectionTransform.java
│ │ ├── RemoveAttributeTransform.java
│ │ ├── StandardizeTransform.java
│ │ ├── UnitVarianceTransform.java
│ │ ├── WhitenedPCA.java
│ │ ├── WhitenedZCA.java
│ │ ├── ZeroMeanTransform.java
│ │ ├── featureselection
│ │ │ ├── BDS.java
│ │ │ ├── LRS.java
│ │ │ ├── MutualInfoFS.java
│ │ │ ├── ReliefF.java
│ │ │ ├── SBS.java
│ │ │ └── SFS.java
│ │ ├── kernel
│ │ │ ├── KernelPCA.java
│ │ │ ├── Nystrom.java
│ │ │ └── RFF_RBF.java
│ │ └── visualization
│ │ │ ├── Isomap.java
│ │ │ ├── LargeViz.java
│ │ │ ├── MDS.java
│ │ │ ├── TSNE.java
│ │ │ └── VisualizationTransform.java
│ │ ├── distributions
│ │ ├── Beta.java
│ │ ├── Cauchy.java
│ │ ├── ChiSquared.java
│ │ ├── ContinuousDistribution.java
│ │ ├── Distribution.java
│ │ ├── DistributionSearch.java
│ │ ├── Exponential.java
│ │ ├── FisherSendor.java
│ │ ├── Gamma.java
│ │ ├── Kolmogorov.java
│ │ ├── Kumaraswamy.java
│ │ ├── Laplace.java
│ │ ├── Levy.java
│ │ ├── LogNormal.java
│ │ ├── LogUniform.java
│ │ ├── Logistic.java
│ │ ├── MaxwellBoltzmann.java
│ │ ├── Normal.java
│ │ ├── Pareto.java
│ │ ├── Rayleigh.java
│ │ ├── StudentT.java
│ │ ├── TruncatedDistribution.java
│ │ ├── Uniform.java
│ │ ├── Weibull.java
│ │ ├── discrete
│ │ │ ├── Binomial.java
│ │ │ ├── DiscreteDistribution.java
│ │ │ ├── Poisson.java
│ │ │ ├── UniformDiscrete.java
│ │ │ └── Zipf.java
│ │ ├── empirical
│ │ │ ├── KernelDensityEstimator.java
│ │ │ └── kernelfunc
│ │ │ │ ├── BiweightKF.java
│ │ │ │ ├── EpanechnikovKF.java
│ │ │ │ ├── GaussKF.java
│ │ │ │ ├── KernelFunction.java
│ │ │ │ ├── TriweightKF.java
│ │ │ │ └── UniformKF.java
│ │ ├── kernels
│ │ │ ├── BaseKernelTrick.java
│ │ │ ├── BaseL2Kernel.java
│ │ │ ├── DistanceMetricBasedKernel.java
│ │ │ ├── GeneralRBFKernel.java
│ │ │ ├── KernelPoint.java
│ │ │ ├── KernelPoints.java
│ │ │ ├── KernelTrick.java
│ │ │ ├── LinearKernel.java
│ │ │ ├── NormalizedKernel.java
│ │ │ ├── PolynomialKernel.java
│ │ │ ├── PukKernel.java
│ │ │ ├── RBFKernel.java
│ │ │ ├── RationalQuadraticKernel.java
│ │ │ └── SigmoidKernel.java
│ │ └── multivariate
│ │ │ ├── Dirichlet.java
│ │ │ ├── IndependentDistribution.java
│ │ │ ├── MetricKDE.java
│ │ │ ├── MultivariateDistribution.java
│ │ │ ├── MultivariateDistributionSkeleton.java
│ │ │ ├── MultivariateKDE.java
│ │ │ ├── NormalM.java
│ │ │ ├── NormalMR.java
│ │ │ ├── ProductKDE.java
│ │ │ └── SymmetricDirichlet.java
│ │ ├── driftdetectors
│ │ ├── ADWIN.java
│ │ ├── BaseDriftDetector.java
│ │ ├── DDM.java
│ │ └── UnhandledDriftException.java
│ │ ├── exceptions
│ │ ├── FailedToFitException.java
│ │ ├── ModelMismatchException.java
│ │ └── UntrainedModelException.java
│ │ ├── io
│ │ ├── ARFFLoader.java
│ │ ├── CSV.java
│ │ ├── DataWriter.java
│ │ ├── JSATData.java
│ │ └── LIBSVMLoader.java
│ │ ├── linear
│ │ ├── CholeskyDecomposition.java
│ │ ├── ConcatenatedVec.java
│ │ ├── ConstantVector.java
│ │ ├── DenseMatrix.java
│ │ ├── DenseVector.java
│ │ ├── EigenValueDecomposition.java
│ │ ├── GenericMatrix.java
│ │ ├── HessenbergForm.java
│ │ ├── IndexValue.java
│ │ ├── LUPDecomposition.java
│ │ ├── Lanczos.java
│ │ ├── Matrix.java
│ │ ├── MatrixOfVecs.java
│ │ ├── MatrixStatistics.java
│ │ ├── Poly2Vec.java
│ │ ├── QRDecomposition.java
│ │ ├── RandomMatrix.java
│ │ ├── RandomVector.java
│ │ ├── RowColumnOps.java
│ │ ├── ScaledVector.java
│ │ ├── ShiftedVec.java
│ │ ├── SingularValueDecomposition.java
│ │ ├── SparseMatrix.java
│ │ ├── SparseVector.java
│ │ ├── SubMatrix.java
│ │ ├── SubVector.java
│ │ ├── TransposeView.java
│ │ ├── TruncatedSVD.java
│ │ ├── Vec.java
│ │ ├── VecOps.java
│ │ ├── VecPaired.java
│ │ ├── VecPairedComparable.java
│ │ ├── VecWithNorm.java
│ │ ├── distancemetrics
│ │ │ ├── ChebyshevDistance.java
│ │ │ ├── CosineDistance.java
│ │ │ ├── CosineDistanceNormalized.java
│ │ │ ├── DenseSparseMetric.java
│ │ │ ├── DistanceCounter.java
│ │ │ ├── DistanceMetric.java
│ │ │ ├── EuclideanDistance.java
│ │ │ ├── JaccardDistance.java
│ │ │ ├── KernelDistance.java
│ │ │ ├── MahalanobisDistance.java
│ │ │ ├── ManhattanDistance.java
│ │ │ ├── MinkowskiDistance.java
│ │ │ ├── NormalizedEuclideanDistance.java
│ │ │ ├── PearsonDistance.java
│ │ │ ├── SquaredEuclideanDistance.java
│ │ │ ├── TrainableDistanceMetric.java
│ │ │ └── WeightedEuclideanDistance.java
│ │ ├── solvers
│ │ │ └── ConjugateGradient.java
│ │ └── vectorcollection
│ │ │ ├── BallTree.java
│ │ │ ├── BaseCaseDT.java
│ │ │ ├── CoverTree.java
│ │ │ ├── DCI.java
│ │ │ ├── DefaultVectorCollection.java
│ │ │ ├── DualTree.java
│ │ │ ├── IncrementalCollection.java
│ │ │ ├── IndexDistPair.java
│ │ │ ├── IndexNode.java
│ │ │ ├── IndexTuple.java
│ │ │ ├── KDTree.java
│ │ │ ├── RTree.java
│ │ │ ├── RandomBallCover.java
│ │ │ ├── RandomBallCoverOneShot.java
│ │ │ ├── SVPTree.java
│ │ │ ├── ScoreDT.java
│ │ │ ├── ScoreDTLazy.java
│ │ │ ├── VPTree.java
│ │ │ ├── VPTreeMV.java
│ │ │ ├── VectorArray.java
│ │ │ ├── VectorCollection.java
│ │ │ ├── VectorCollectionUtils.java
│ │ │ └── lsh
│ │ │ ├── E2LSH.java
│ │ │ └── RandomProjectionLSH.java
│ │ ├── lossfunctions
│ │ ├── AbsoluteLoss.java
│ │ ├── EpsilonInsensitiveLoss.java
│ │ ├── HingeLoss.java
│ │ ├── HuberLoss.java
│ │ ├── LogisticLoss.java
│ │ ├── LossC.java
│ │ ├── LossFunc.java
│ │ ├── LossMC.java
│ │ ├── LossR.java
│ │ ├── SoftmaxLoss.java
│ │ └── SquaredLoss.java
│ │ ├── math
│ │ ├── Complex.java
│ │ ├── ContinuedFraction.java
│ │ ├── DescriptiveStatistics.java
│ │ ├── ExponentialMovingStatistics.java
│ │ ├── FastMath.java
│ │ ├── Function.java
│ │ ├── Function1D.java
│ │ ├── FunctionMat.java
│ │ ├── FunctionVec.java
│ │ ├── IndexFunction.java
│ │ ├── MathTricks.java
│ │ ├── OnLineStatistics.java
│ │ ├── SimpleLinearRegression.java
│ │ ├── SpecialMath.java
│ │ ├── TrigMath.java
│ │ ├── decayrates
│ │ │ ├── DecayRate.java
│ │ │ ├── ExponetialDecay.java
│ │ │ ├── InverseDecay.java
│ │ │ ├── LinearDecay.java
│ │ │ ├── NoDecay.java
│ │ │ └── PowerDecay.java
│ │ ├── integration
│ │ │ ├── AdaptiveSimpson.java
│ │ │ ├── Romberg.java
│ │ │ └── Trapezoidal.java
│ │ ├── optimization
│ │ │ ├── BFGS.java
│ │ │ ├── BacktrackingArmijoLineSearch.java
│ │ │ ├── GoldenSearch.java
│ │ │ ├── LBFGS.java
│ │ │ ├── LineSearch.java
│ │ │ ├── ModifiedOWLQN.java
│ │ │ ├── NelderMead.java
│ │ │ ├── Optimizer.java
│ │ │ ├── RosenbrockFunction.java
│ │ │ ├── WolfeNWLineSearch.java
│ │ │ ├── oned
│ │ │ │ └── GoldenSearch.java
│ │ │ └── stochastic
│ │ │ │ ├── AdaDelta.java
│ │ │ │ ├── AdaGrad.java
│ │ │ │ ├── Adam.java
│ │ │ │ ├── GradientUpdater.java
│ │ │ │ ├── NAdaGrad.java
│ │ │ │ ├── RMSProp.java
│ │ │ │ ├── Rprop.java
│ │ │ │ ├── SGDMomentum.java
│ │ │ │ └── SimpleSGD.java
│ │ └── rootfinding
│ │ │ ├── Bisection.java
│ │ │ ├── RiddersMethod.java
│ │ │ ├── RootFinder.java
│ │ │ ├── Secant.java
│ │ │ └── Zeroin.java
│ │ ├── outlier
│ │ ├── DensityOutlier.java
│ │ ├── IsolationForest.java
│ │ ├── LOF.java
│ │ ├── LinearOCSVM.java
│ │ ├── LoOP.java
│ │ └── Outlier.java
│ │ ├── parameters
│ │ ├── BooleanParameter.java
│ │ ├── DecayRateParameter.java
│ │ ├── DoubleParameter.java
│ │ ├── GridSearch.java
│ │ ├── IntParameter.java
│ │ ├── KernelFunctionParameter.java
│ │ ├── MetricParameter.java
│ │ ├── ModelSearch.java
│ │ ├── ObjectParameter.java
│ │ ├── Parameter.java
│ │ ├── Parameterized.java
│ │ └── RandomSearch.java
│ │ ├── regression
│ │ ├── AveragedRegressor.java
│ │ ├── BaseUpdateableRegressor.java
│ │ ├── KernelRLS.java
│ │ ├── KernelRidgeRegression.java
│ │ ├── MultipleLinearRegression.java
│ │ ├── NadarayaWatson.java
│ │ ├── OrdinaryKriging.java
│ │ ├── RANSAC.java
│ │ ├── RegressionDataSet.java
│ │ ├── RegressionModelEvaluation.java
│ │ ├── Regressor.java
│ │ ├── RidgeRegression.java
│ │ ├── StochasticGradientBoosting.java
│ │ ├── StochasticRidgeRegression.java
│ │ ├── UpdateableRegressor.java
│ │ ├── WarmRegressor.java
│ │ └── evaluation
│ │ │ ├── CoefficientOfDetermination.java
│ │ │ ├── MeanAbsoluteError.java
│ │ │ ├── MeanSquaredError.java
│ │ │ ├── RegressionScore.java
│ │ │ ├── RelativeAbsoluteError.java
│ │ │ ├── RelativeSquaredError.java
│ │ │ └── TotalHistoryRegressionScore.java
│ │ ├── testing
│ │ ├── StatisticTest.java
│ │ ├── goodnessoffit
│ │ │ └── KSTest.java
│ │ └── onesample
│ │ │ ├── OneSampleTest.java
│ │ │ ├── TTest.java
│ │ │ └── ZTest.java
│ │ ├── text
│ │ ├── BasicTextVectorCreator.java
│ │ ├── ClassificationHashedTextDataLoader.java
│ │ ├── ClassificationTextDataLoader.java
│ │ ├── GreekLetters.java
│ │ ├── HashedTextDataLoader.java
│ │ ├── HashedTextVectorCreator.java
│ │ ├── TextDataLoader.java
│ │ ├── TextVectorCreator.java
│ │ ├── stemming
│ │ │ ├── LovinsStemmer.java
│ │ │ ├── PaiceHuskStemmer.java
│ │ │ ├── PorterStemmer.java
│ │ │ ├── Stemmer.java
│ │ │ └── VoidStemmer.java
│ │ ├── tokenizer
│ │ │ ├── NGramTokenizer.java
│ │ │ ├── NaiveTokenizer.java
│ │ │ ├── StemmingTokenizer.java
│ │ │ ├── StopWordTokenizer.java
│ │ │ └── Tokenizer.java
│ │ ├── topicmodel
│ │ │ └── OnlineLDAsvi.java
│ │ └── wordweighting
│ │ │ ├── BinaryWordPresent.java
│ │ │ ├── OkapiBM25.java
│ │ │ ├── TfIdf.java
│ │ │ ├── WordCount.java
│ │ │ └── WordWeighting.java
│ │ └── utils
│ │ ├── ArrayUtils.java
│ │ ├── BooleanList.java
│ │ ├── BoundedSortedList.java
│ │ ├── BoundedSortedSet.java
│ │ ├── ClosedHashingUtil.java
│ │ ├── DoubleList.java
│ │ ├── FakeExecutor.java
│ │ ├── FibHeap.java
│ │ ├── GridDataGenerator.java
│ │ ├── IndexTable.java
│ │ ├── IntDoubleMap.java
│ │ ├── IntDoubleMapArray.java
│ │ ├── IntList.java
│ │ ├── IntPriorityQueue.java
│ │ ├── IntSet.java
│ │ ├── IntSetFixedSize.java
│ │ ├── IntSortedSet.java
│ │ ├── IterableIterator.java
│ │ ├── ListUtils.java
│ │ ├── LongDoubleMap.java
│ │ ├── LongList.java
│ │ ├── ModifiableCountDownLatch.java
│ │ ├── Pair.java
│ │ ├── PairedReturn.java
│ │ ├── PoisonRunnable.java
│ │ ├── ProbailityMatch.java
│ │ ├── QuickSort.java
│ │ ├── RunnableConsumer.java
│ │ ├── SimpleList.java
│ │ ├── SortedArrayList.java
│ │ ├── StringUtils.java
│ │ ├── SystemInfo.java
│ │ ├── Tuple3.java
│ │ ├── UnionFind.java
│ │ ├── concurrent
│ │ ├── AtomicDouble.java
│ │ ├── AtomicDoubleArray.java
│ │ ├── ConcurrentCacheLRU.java
│ │ ├── IndexReducer.java
│ │ ├── IndexRunnable.java
│ │ ├── LoopChunkReducer.java
│ │ ├── LoopChunkRunner.java
│ │ ├── ParallelUtils.java
│ │ └── TreeBarrier.java
│ │ └── random
│ │ ├── CMWC4096.java
│ │ ├── RandomUtil.java
│ │ ├── XOR128.java
│ │ ├── XOR96.java
│ │ └── XORWOW.java
└── test
│ └── jsat
│ ├── FixedProblems.java
│ ├── NormalClampedSample.java
│ ├── TestTools.java
│ ├── classifiers
│ ├── DDAGTest.java
│ ├── OneVSAllTest.java
│ ├── OneVSOneTest.java
│ ├── RocchioTest.java
│ ├── bayesian
│ │ ├── AODETest.java
│ │ ├── MultinomialNaiveBayesTest.java
│ │ ├── MultivariateNormalsTest.java
│ │ ├── NaiveBayesTest.java
│ │ └── NaiveBayesUpdateableTest.java
│ ├── boosting
│ │ ├── AdaBoostM1Test.java
│ │ ├── ArcX4Test.java
│ │ ├── BaggingTest.java
│ │ ├── EmphasisBoostTest.java
│ │ ├── LogitBoostPLTest.java
│ │ ├── LogitBoostTest.java
│ │ ├── ModestAdaBoostTest.java
│ │ ├── SAMMETest.java
│ │ ├── StackingTest.java
│ │ ├── UpdatableStackingTest.java
│ │ └── WaggingNormalTest.java
│ ├── calibration
│ │ ├── IsotonicCalibrationTest.java
│ │ └── PlattCalibrationTest.java
│ ├── evaluation
│ │ ├── AUCTest.java
│ │ ├── AccuracyTest.java
│ │ ├── F1ScoreTest.java
│ │ ├── FbetaScoreTest.java
│ │ ├── KappaTest.java
│ │ ├── LogLossTest.java
│ │ ├── MatthewsCorrelationCoefficientTest.java
│ │ ├── PrecisionTest.java
│ │ └── RecallTest.java
│ ├── imbalance
│ │ ├── BorderlineSMOTETest.java
│ │ └── SMOTETest.java
│ ├── knn
│ │ ├── DANNTest.java
│ │ ├── LWLTest.java
│ │ └── NearestNeighbourTest.java
│ ├── linear
│ │ ├── ALMA2Test.java
│ │ ├── AROWTest.java
│ │ ├── BBRTest.java
│ │ ├── LinearBatchTest.java
│ │ ├── LinearL1SCDTest.java
│ │ ├── LinearSGDTest.java
│ │ ├── LogisticRegressionDCDTest.java
│ │ ├── NHERDTest.java
│ │ ├── NewGLMNETTest.java
│ │ ├── PassiveAggressiveTest.java
│ │ ├── ROMMATest.java
│ │ ├── SCDTest.java
│ │ ├── SCWTest.java
│ │ ├── SDCATest.java
│ │ ├── SMIDASTest.java
│ │ ├── STGDTest.java
│ │ ├── StochasticMultinomialLogisticRegressionTest.java
│ │ └── kernelized
│ │ │ ├── ALMA2KTest.java
│ │ │ ├── BOGDTest.java
│ │ │ ├── CSKLRBatchTest.java
│ │ │ ├── CSKLRTest.java
│ │ │ ├── DUOLTest.java
│ │ │ ├── ForgetronTest.java
│ │ │ ├── KernelPointTest.java
│ │ │ ├── KernelSGDTest.java
│ │ │ ├── OSKLTest.java
│ │ │ └── ProjectronTest.java
│ ├── neuralnetwork
│ │ ├── BackPropagationNetTest.java
│ │ ├── DReDNetSimpleTest.java
│ │ ├── LVQLLCTest.java
│ │ ├── LVQTest.java
│ │ ├── PerceptronTest.java
│ │ ├── RBFNetTest.java
│ │ └── SOMTest.java
│ ├── svm
│ │ ├── DCDTest.java
│ │ ├── DCDsTest.java
│ │ ├── DCSVMTest.java
│ │ ├── LSSVMTest.java
│ │ ├── PegasosKTest.java
│ │ ├── PegasosTest.java
│ │ ├── PlattSMOTest.java
│ │ ├── SBPTest.java
│ │ ├── SVMnoBiasTest.java
│ │ └── extended
│ │ │ ├── AMMTest.java
│ │ │ └── CPMTest.java
│ └── trees
│ │ ├── DecisionStumpTest.java
│ │ ├── DecisionTreeTest.java
│ │ ├── ERTreesTest.java
│ │ ├── ImportanceByUsesTest.java
│ │ ├── MDATest.java
│ │ ├── MDITest.java
│ │ └── RandomForestTest.java
│ ├── clustering
│ ├── BayesianHACTest.java
│ ├── CLARATest.java
│ ├── DBSCANTest.java
│ ├── EMGaussianMixtureTest.java
│ ├── FLAMETest.java
│ ├── GapStatisticTest.java
│ ├── HDBSCANTest.java
│ ├── LSDBCTest.java
│ ├── MEDDITTest.java
│ ├── MeanShiftTest.java
│ ├── OPTICSTest.java
│ ├── PAMTest.java
│ ├── TRIKMEDSTest.java
│ ├── VBGMMTest.java
│ ├── biclustering
│ │ └── SpectralCoClusteringTest.java
│ ├── evaluation
│ │ ├── AdjustedRandIndexTest.java
│ │ ├── CompletenessTest.java
│ │ ├── HomogeneityTest.java
│ │ ├── NormalizedMutualInformationTest.java
│ │ ├── VMeasureTest.java
│ │ └── intra
│ │ │ └── SumOfSqrdPairwiseDistancesTest.java
│ ├── hierarchical
│ │ ├── DivisiveGlobalClustererTest.java
│ │ ├── DivisiveLocalClustererTest.java
│ │ ├── NNChainHACTest.java
│ │ ├── PriorityHACTest.java
│ │ └── SimpleHACTest.java
│ └── kmeans
│ │ ├── ElkanKMeansTest.java
│ │ ├── ElkanKernelKMeansTest.java
│ │ ├── GMeansTest.java
│ │ ├── HamerlyKMeansTest.java
│ │ ├── KMeansPDNTest.java
│ │ ├── LloydKernelKMeansTest.java
│ │ ├── MiniBatchKMeansTest.java
│ │ ├── NaiveKMeansTest.java
│ │ └── XMeansTest.java
│ ├── datatransform
│ ├── FastICATest.java
│ ├── ImputerTest.java
│ ├── JLTransformTest.java
│ ├── PCATest.java
│ ├── RemoveAttributeTransformTest.java
│ ├── WhitenedPCATest.java
│ ├── WhitenedZCATest.java
│ ├── featureselection
│ │ ├── BDSTest.java
│ │ ├── LRSTest.java
│ │ ├── MutualInfoFSTest.java
│ │ ├── ReliefFTest.java
│ │ ├── SBSTest.java
│ │ └── SFSTest.java
│ ├── kernel
│ │ ├── KernelPCATest.java
│ │ ├── NystromTest.java
│ │ └── RFF_RBFTest.java
│ └── visualization
│ │ ├── IsomapTest.java
│ │ ├── LargeVizTest.java
│ │ ├── MDSTest.java
│ │ └── TSNETest.java
│ ├── distributions
│ ├── BetaTest.java
│ ├── CauchyTest.java
│ ├── ChiSquaredTest.java
│ ├── ContinuousDistributionTest.java
│ ├── ExponentialTest.java
│ ├── FisherSendorTest.java
│ ├── GammaTest.java
│ ├── KolmogorovTest.java
│ ├── KumaraswamyTest.java
│ ├── LaplaceTest.java
│ ├── LevyTest.java
│ ├── LogNormalTest.java
│ ├── LogUniformTest.java
│ ├── LogisticTest.java
│ ├── MaxwellBoltzmannTest.java
│ ├── NormalTest.java
│ ├── ParetoTest.java
│ ├── RayleighTest.java
│ ├── StudentTTest.java
│ ├── TruncatedDistributionTest.java
│ ├── UniformTest.java
│ ├── WeibullTest.java
│ ├── discrete
│ │ ├── BinomialTest.java
│ │ ├── PoissonTest.java
│ │ ├── UniformDiscreteTest.java
│ │ └── ZipfTest.java
│ ├── empirical
│ │ └── KernelDensityEstimatorTest.java
│ └── multivariate
│ │ ├── DirichletTest.java
│ │ ├── NormalMTest.java
│ │ └── SymmetricDirichletTest.java
│ ├── driftdetectors
│ ├── ADWINTest.java
│ └── DDMTest.java
│ ├── io
│ ├── CSVTest.java
│ ├── JSATDataTest.java
│ └── LIBSVMLoaderTest.java
│ ├── linear
│ ├── CholeskyDecompositionTest.java
│ ├── ConcatenatedVecTest.java
│ ├── DenseMatrixTest.java
│ ├── EigenvalueDecompositionTest.java
│ ├── GenericMatrixTest.java
│ ├── HessenbergFormTest.java
│ ├── LUPDecompositionTest.java
│ ├── LanczosTest.java
│ ├── MatrixOfVecsTest.java
│ ├── MatrixStatisticsTest.java
│ ├── MatrixTest.java
│ ├── Poly2VecTest.java
│ ├── QRDecompositionTest.java
│ ├── ShiftedVecTest.java
│ ├── SingularValueDecompositionTest.java
│ ├── SparseMatrixTest.java
│ ├── SparseVectorTest.java
│ ├── SubVectorTest.java
│ ├── TruncatedSVDTest.java
│ ├── VecWithNormTest.java
│ ├── distancemetrics
│ │ ├── ChebyshevDistanceTest.java
│ │ ├── CosineDistanceTest.java
│ │ ├── CosineDistanceTestNormalized.java
│ │ ├── EuclideanDistanceTest.java
│ │ ├── JaccardDistanceTest.java
│ │ ├── MahalanobisDistanceTest.java
│ │ ├── ManhattanDistanceTest.java
│ │ ├── MinkowskiDistanceTest.java
│ │ ├── NormalizedEuclideanDistanceTest.java
│ │ ├── PearsonDistanceTest.java
│ │ ├── SquaredEuclideanDistanceTest.java
│ │ └── WeightedEuclideanDistanceTest.java
│ ├── solvers
│ │ └── ConjugateGradientTest.java
│ └── vectorcollection
│ │ ├── BallTreeTest.java
│ │ ├── CoverTreeTest.java
│ │ ├── DCITest.java
│ │ ├── DualTreeTest.java
│ │ ├── KDTreeTest.java
│ │ ├── RTreeTest.java
│ │ ├── RandomBallCoverTest.java
│ │ ├── VPTreeMVTest.java
│ │ ├── VPTreeTest.java
│ │ ├── VectorArrayTest.java
│ │ └── lsh
│ │ ├── E2LSHTest.java
│ │ └── RandomProjectionLSHTest.java
│ ├── math
│ ├── ComplexTest.java
│ ├── ExponentialMovingStatisticsTest.java
│ ├── FastMathTest.java
│ ├── FunctionTest.java
│ ├── MathTricksTest.java
│ ├── OnLineStatisticsTest.java
│ ├── SpecialMathTest.java
│ ├── integration
│ │ └── AdaptiveSimpsonTest.java
│ ├── optimization
│ │ ├── BFGSTest.java
│ │ ├── LBFGSTest.java
│ │ ├── ModifiedOWLQNTest.java
│ │ ├── NelderMeadTest.java
│ │ ├── oned
│ │ │ └── GoldenSearchTest.java
│ │ └── stochastic
│ │ │ ├── AdaDeltaTest.java
│ │ │ ├── AdaGradTest.java
│ │ │ ├── AdamTest.java
│ │ │ ├── NAdaGradTest.java
│ │ │ ├── RMSPropTest.java
│ │ │ ├── RpropTest.java
│ │ │ ├── SGDMomentumTest.java
│ │ │ └── SimpleSGDTest.java
│ └── rootfinding
│ │ ├── BisectionTest.java
│ │ ├── RiddersMethodTest.java
│ │ ├── SecantTest.java
│ │ └── ZeroinTest.java
│ ├── outlier
│ ├── DensityOutlierTest.java
│ ├── IsolationForestTest.java
│ ├── LOFTest.java
│ ├── LinearOCSVMTest.java
│ └── LoOPTest.java
│ ├── parameters
│ ├── GridSearchTest.java
│ └── RandomSearchTest.java
│ ├── regression
│ ├── AveragedRegressorTest.java
│ ├── KernelRLSTest.java
│ ├── KernelRidgeRegressionTest.java
│ ├── NadarayaWatsonTest.java
│ ├── OrdinaryKrigingTest.java
│ ├── RANSACTest.java
│ ├── RidgeRegressionTest.java
│ ├── StochasticGradientBoostingTest.java
│ ├── StochasticRidgeRegressionTest.java
│ └── evaluation
│ │ ├── CoefficientOfDeterminationTest.java
│ │ ├── MeanAbsoluteErrorTest.java
│ │ ├── MeanSquaredErrorTest.java
│ │ ├── RelativeAbsoluteErrorTest.java
│ │ └── RelativeSquaredErrorTest.java
│ ├── text
│ ├── stemming
│ │ ├── LovinsStemmerTest.java
│ │ ├── PaiceHuskStemmerTest.java
│ │ └── PorterStemmerTest.java
│ ├── tokenizer
│ │ └── NGramTokenizerTest.java
│ └── topicmodel
│ │ └── OnlineLDAsviTest.java
│ └── utils
│ ├── FibHeapTest.java
│ ├── IndexTableTest.java
│ ├── IntDoubleMapArrayTest.java
│ ├── IntDoubleMapTest.java
│ ├── IntListTest.java
│ ├── IntPriorityQueueTest.java
│ ├── IntSetFixedSizeTest.java
│ ├── IntSetTest.java
│ ├── IntSortedSetTest.java
│ ├── ListUtilsTest.java
│ ├── LongDoubleMapTest.java
│ ├── LongListTest.java
│ ├── QuickSortTest.java
│ ├── SimpleListTest.java
│ └── StringUtilsTest.java
├── LICENSE.txt
└── README.md
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 |
3 | github: [EdwardRaff] # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
4 | # patreon: # Replace with a single Patreon username
5 | # open_collective: # Replace with a single Open Collective username
6 | # ko_fi: # Replace with a single Ko-fi username
7 | # tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
8 | # community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
9 | # liberapay: # Replace with a single Liberapay username
10 | # issuehunt: # Replace with a single IssueHunt username
11 | # otechie: # Replace with a single Otechie username
12 | # custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
13 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /JSAT/nbproject/private/
2 | /JSAT/target/
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | sudo: required
2 | language: java
3 |
4 | jdk:
5 | - oraclejdk8
6 |
7 | env:
8 | - JSAT_SOURCE_DIR=JSAT
9 |
10 | before_script:
11 | - echo "MAVEN_OPTS='-Xmx2g -XX:MaxPermSize=512m'" > ~/.mavenrc
12 |
13 | script:
14 | - cd $JSAT_SOURCE_DIR
15 | - mvn clean install
16 |
--------------------------------------------------------------------------------
/JSAT/.gitignore:
--------------------------------------------------------------------------------
1 | /bin/
2 |
--------------------------------------------------------------------------------
/JSAT/nb-configuration.xml:
--------------------------------------------------------------------------------
1 |
2 |
implements Serializable
13 | {
14 |
15 | private static final long serialVersionUID = 5091308998873225566L;
16 | DataPoint dataPoint;
17 | P pair;
18 |
19 | public DataPointPair(DataPoint dataPoint, P pair)
20 | {
21 | this.dataPoint = dataPoint;
22 | this.pair = pair;
23 | }
24 |
25 | public void setDataPoint(DataPoint dataPoint)
26 | {
27 | this.dataPoint = dataPoint;
28 | }
29 |
30 | public void setPair(P pair)
31 | {
32 | this.pair = pair;
33 | }
34 |
35 | public DataPoint getDataPoint()
36 | {
37 | return dataPoint;
38 | }
39 |
40 | public P getPair()
41 | {
42 | return pair;
43 | }
44 |
45 | /**
46 | * The same as calling {@link DataPoint#getNumericalValues() } on {@link #getDataPoint() }.
47 | * @return the Vec related to the data point in this pair.
48 | */
49 | public Vec getVector()
50 | {
51 | return dataPoint.getNumericalValues();
52 | }
53 | }
54 |
--------------------------------------------------------------------------------
/JSAT/src/jsat/classifiers/PriorClassifier.java:
--------------------------------------------------------------------------------
1 | package jsat.classifiers;
2 |
3 | import java.util.concurrent.ExecutorService;
4 | import jsat.exceptions.UntrainedModelException;
5 |
6 | /**
7 | * A Naive classifier that simply returns the prior probabilities as the
8 | * classification decision.
9 | *
10 | * @author Edward Raff
11 | */
12 | public class PriorClassifier implements Classifier
13 | {
14 |
15 | private static final long serialVersionUID = 7763388716880766538L;
16 | private CategoricalResults cr;
17 |
18 | /**
19 | * Creates a new PriorClassifeir
20 | */
21 | public PriorClassifier()
22 | {
23 | }
24 |
25 | /**
26 | * Creates a new Prior Classifier that is given the results it should be
27 | * returning
28 | *
29 | * @param cr the prior probabilities for classification
30 | */
31 | public PriorClassifier(CategoricalResults cr)
32 | {
33 | this.cr = cr;
34 | }
35 |
36 | @Override
37 | public CategoricalResults classify(DataPoint data)
38 | {
39 | if(cr == null)
40 | throw new UntrainedModelException("PriorClassifier has not been trained");
41 | return cr;
42 | }
43 |
44 | @Override
45 | public void train(ClassificationDataSet dataSet, boolean parallel)
46 | {
47 | train(dataSet);
48 | }
49 |
50 | @Override
51 | public void train(ClassificationDataSet dataSet)
52 | {
53 | cr = new CategoricalResults(dataSet.getPredicting().getNumOfCategories());
54 | for(int i = 0; i < dataSet.size(); i++)
55 | cr.incProb(dataSet.getDataPointCategory(i), dataSet.getWeight(i));
56 | cr.normalize();
57 | }
58 |
59 | @Override
60 | public boolean supportsWeightedData()
61 | {
62 | return true;
63 | }
64 |
65 | @Override
66 | public Classifier clone()
67 | {
68 | PriorClassifier clone = new PriorClassifier();
69 | if(this.cr != null)
70 | clone.cr = this.cr.clone();
71 | return clone;
72 | }
73 | }
74 |
--------------------------------------------------------------------------------
/JSAT/src/jsat/classifiers/bayesian/MultivariateNormals.java:
--------------------------------------------------------------------------------
1 | package jsat.classifiers.bayesian;
2 |
3 | import jsat.distributions.multivariate.NormalM;
4 |
5 | /**
6 | * This classifier can be seen as an extension of {@link NaiveBayes}. Instead of treating the variables as independent,
7 | * each class uses all of its variables to fit a {@link NormalM Multivariate Normal} distribution. As such, it can only
8 | * handle numerical attributes. However, if the classes are normally distributed, it will produce optimal classification
9 | * results. The less normal the true distributions are, the less accurate the classifier will be.
10 | *
11 | * @author Edward Raff
12 | */
13 | public class MultivariateNormals extends BestClassDistribution
14 | {
15 |
16 | private static final long serialVersionUID = 5977979334930517655L;
17 |
18 | public MultivariateNormals(boolean usePriors)
19 | {
20 | super(new NormalM(), usePriors);
21 | }
22 |
23 | /**
24 | * Creates a new class for classification by feating each class to a {@link NormalM Multivariate Normal Distribution}.
25 | */
26 | public MultivariateNormals()
27 | {
28 | super(new NormalM());
29 | }
30 |
31 | /**
32 | * Copy constructor
33 | * @param toCopy the object to copy
34 | */
35 | public MultivariateNormals(MultivariateNormals toCopy)
36 | {
37 | super(toCopy);
38 | }
39 |
40 | @Override
41 | public boolean supportsWeightedData()
42 | {
43 | return true;
44 | }
45 |
46 | @Override
47 | public MultivariateNormals clone()
48 | {
49 | return new MultivariateNormals(this);
50 | }
51 |
52 |
53 | }
54 |
--------------------------------------------------------------------------------
/JSAT/src/jsat/classifiers/calibration/BinaryScoreClassifier.java:
--------------------------------------------------------------------------------
1 |
2 | package jsat.classifiers.calibration;
3 |
4 | import jsat.classifiers.Classifier;
5 | import jsat.classifiers.DataPoint;
6 |
7 | /**
8 | * Many algorithms linear a binary separation between two classes A and
9 | * B by representing the target labels with a {@code -1} ad {@code 1}. At
10 | * prediction, the output is a real valued number - where the sign indicates the
11 | * class label. This interface indicates that an algorithm conforms such
12 | * behavior, and that the "0" class corresponds to the {@code -1} label, and the
13 | * "1" class corresponds to the {@code 1} label.
14 | *
15 | * @author Edward Raff
16 | */
17 | public interface BinaryScoreClassifier extends Classifier
18 | {
19 | /**
20 | * Returns the numeric score for predicting a class of a given data point,
21 | * where the sign of the value indicates which class the data point is
22 | * predicted to belong to.
23 | *
24 | * @param dp the data point to predict the class label of
25 | * @return the score for the given data point
26 | */
27 | public double getScore(DataPoint dp);
28 |
29 | @Override
30 | public BinaryScoreClassifier clone();
31 | }
32 |
--------------------------------------------------------------------------------
/JSAT/src/jsat/classifiers/evaluation/F1Score.java:
--------------------------------------------------------------------------------
1 | package jsat.classifiers.evaluation;
2 |
3 | /**
4 | * The F1 score is the harmonic mean of {@link Precision} and
5 | * {@link Recall}. This score is only valid for binary
6 | * classification problems.
7 | *
8 | * @author Edward Raff
9 | */
10 | public class F1Score extends SimpleBinaryClassMetric
11 | {
12 |
13 |
14 | private static final long serialVersionUID = -6192302685766444921L;
15 |
16 | public F1Score()
17 | {
18 | super();
19 | }
20 |
21 | public F1Score(F1Score toClone)
22 | {
23 | super(toClone);
24 | }
25 |
26 | @Override
27 | public double getScore()
28 | {
29 | return 2*tp/(2*tp+fp+fn);
30 | }
31 |
32 | @Override
33 | public boolean equals(Object obj)
34 | {
35 | if(this.getClass().isAssignableFrom(obj.getClass()) && obj.getClass().isAssignableFrom(this.getClass()))
36 | {
37 | return true;
38 | }
39 | return false;
40 | }
41 |
42 | @Override
43 | public int hashCode()
44 | {
45 | return getName().hashCode();
46 | }
47 |
48 | @Override
49 | public F1Score clone()
50 | {
51 | return new F1Score(this);
52 | }
53 |
54 | @Override
55 | public String getName()
56 | {
57 | return "F1 Score";
58 | }
59 |
60 | }
61 |
--------------------------------------------------------------------------------
/JSAT/src/jsat/classifiers/evaluation/MatthewsCorrelationCoefficient.java:
--------------------------------------------------------------------------------
1 | package jsat.classifiers.evaluation;
2 |
3 | /**
4 | * Evaluates a classifier based on Mathews Correlation Coefficient
5 | *
6 | * @author Edward Raff
7 | */
8 | public class MatthewsCorrelationCoefficient extends SimpleBinaryClassMetric
9 | {
10 |
11 |
12 | private static final long serialVersionUID = 7102318546460007008L;
13 |
14 | public MatthewsCorrelationCoefficient()
15 | {
16 | super();
17 | }
18 |
19 | public MatthewsCorrelationCoefficient(MatthewsCorrelationCoefficient toClone)
20 | {
21 | super(toClone);
22 | }
23 |
24 | @Override
25 | public double getScore()
26 | {
27 | double denom = (tp+fp)*(tp+fn)*(tn+fp)*(tn+fn);
28 | if(denom <= 1e-16)
29 | return 0;
30 | return (tp*tn-fp*fn)/Math.sqrt(denom);
31 | }
32 |
33 | @Override
34 | public boolean equals(Object obj)
35 | {
36 | if(this.getClass().isAssignableFrom(obj.getClass()) && obj.getClass().isAssignableFrom(this.getClass()))
37 | {
38 | return true;
39 | }
40 | return false;
41 | }
42 |
43 | @Override
44 | public int hashCode()
45 | {
46 | return getName().hashCode();
47 | }
48 |
49 | @Override
50 | public MatthewsCorrelationCoefficient clone()
51 | {
52 | return new MatthewsCorrelationCoefficient(this);
53 | }
54 |
55 | @Override
56 | public String getName()
57 | {
58 | return "Matthews Correlation Coefficient";
59 | }
60 |
61 | }
62 |
--------------------------------------------------------------------------------
/JSAT/src/jsat/classifiers/evaluation/Precision.java:
--------------------------------------------------------------------------------
1 | package jsat.classifiers.evaluation;
2 |
3 | /**
4 | * Evaluates a classifier based on the Precision, where the class of index 0
5 | * is considered the positive class. This score is only valid for binary
6 | * classification problems.
7 | *
8 | * @author Edward Raff
9 | */
10 | public class Precision extends SimpleBinaryClassMetric
11 | {
12 |
13 |
14 | private static final long serialVersionUID = 7046590252900909918L;
15 |
16 | public Precision()
17 | {
18 | super();
19 | }
20 |
21 | public Precision(Precision toClone)
22 | {
23 | super(toClone);
24 | }
25 |
26 | @Override
27 | public double getScore()
28 | {
29 | return tp/(tp+fp);
30 | }
31 |
32 | @Override
33 | public boolean equals(Object obj)
34 | {
35 | if(this.getClass().isAssignableFrom(obj.getClass()) && obj.getClass().isAssignableFrom(this.getClass()))
36 | {
37 | return true;
38 | }
39 | return false;
40 | }
41 |
42 | @Override
43 | public int hashCode()
44 | {
45 | return getName().hashCode();
46 | }
47 |
48 | @Override
49 | public Precision clone()
50 | {
51 | return new Precision(this);
52 | }
53 |
54 | @Override
55 | public String getName()
56 | {
57 | return "Precision";
58 | }
59 |
60 | }
61 |
--------------------------------------------------------------------------------
/JSAT/src/jsat/classifiers/evaluation/Recall.java:
--------------------------------------------------------------------------------
1 | package jsat.classifiers.evaluation;
2 |
3 | /**
4 | * Evaluates a classifier based on the Recall rate, where the class of index 0
5 | * is considered the positive class. This score is only valid for binary
6 | * classification problems.
7 | *
8 | * @author Edward Raff
9 | */
10 | public class Recall extends SimpleBinaryClassMetric
11 | {
12 |
13 |
14 | private static final long serialVersionUID = 4832185425203972017L;
15 |
16 | /**
17 | * Creates a new Recall evaluator
18 | */
19 | public Recall()
20 | {
21 | super();
22 | }
23 |
24 | /**
25 | * Copy constructor
26 | * @param toClone the object to copy
27 | */
28 | public Recall(Recall toClone)
29 | {
30 | super(toClone);
31 | }
32 |
33 | @Override
34 | public double getScore()
35 | {
36 | return tp/(tp+fn);
37 | }
38 |
39 | @Override
40 | public boolean equals(Object obj)
41 | {
42 | if(this.getClass().isAssignableFrom(obj.getClass()) && obj.getClass().isAssignableFrom(this.getClass()))
43 | {
44 | return true;
45 | }
46 | return false;
47 | }
48 |
49 | @Override
50 | public int hashCode()
51 | {
52 | return getName().hashCode();
53 | }
54 |
55 | @Override
56 | public Recall clone()
57 | {
58 | return new Recall(this);
59 | }
60 |
61 | @Override
62 | public String getName()
63 | {
64 | return "Recall";
65 | }
66 |
67 | }
68 |
--------------------------------------------------------------------------------
/JSAT/src/jsat/classifiers/neuralnetwork/activations/LinearLayer.java:
--------------------------------------------------------------------------------
1 | package jsat.classifiers.neuralnetwork.activations;
2 |
3 | import jsat.linear.Matrix;
4 | import jsat.linear.Vec;
5 |
6 | /**
7 | *
8 | * @author Edward Raff
9 | */
10 | public class LinearLayer implements ActivationLayer
11 | {
12 |
13 |
14 | private static final long serialVersionUID = -4040058095010471379L;
15 |
16 | @Override
17 | public void activate(Vec input, Vec output)
18 | {
19 | input.copyTo(output);
20 | }
21 |
22 | @Override
23 | public void activate(Matrix input, Matrix output, boolean rowMajor)
24 | {
25 | input.copyTo(output);
26 | }
27 |
28 | @Override
29 | public void backprop(Vec input, Vec output, Vec delta_partial, Vec errout)
30 | {
31 | delta_partial.copyTo(errout);
32 | }
33 |
34 | @Override
35 | public void backprop(Matrix input, Matrix output, Matrix delta_partial, Matrix errout, boolean rowMajor)
36 | {
37 | delta_partial.copyTo(errout);
38 | }
39 |
40 | @Override
41 | public LinearLayer clone()
42 | {
43 | return new LinearLayer();
44 | }
45 |
46 | }
47 |
--------------------------------------------------------------------------------
/JSAT/src/jsat/classifiers/neuralnetwork/activations/SigmoidLayer.java:
--------------------------------------------------------------------------------
1 | package jsat.classifiers.neuralnetwork.activations;
2 |
3 | import jsat.linear.Matrix;
4 | import jsat.linear.Vec;
5 |
6 | /**
7 | * This layer provides the standard Sigmoid activation f(x) =
8 | * 1/(1+exp(-x))
9 | *
10 | * @author Edward Raff
11 | */
12 | public class SigmoidLayer implements ActivationLayer
13 | {
14 |
15 |
16 | private static final long serialVersionUID = 160273287445169627L;
17 |
18 | @Override
19 | public void activate(Vec input, Vec output)
20 | {
21 | for(int i = 0; i < input.length(); i++)
22 | output.set(i, 1/(1+Math.exp(-input.get(i))));
23 | }
24 |
25 | @Override
26 | public void activate(Matrix input, Matrix output, boolean rowMajor)
27 | {
28 | for(int i = 0; i < input.rows(); i++)
29 | for(int j = 0; j < input.cols(); j++)
30 | output.set(i, j, 1.0/(1+Math.exp(-input.get(i, j))));
31 | }
32 |
33 | @Override
34 | public void backprop(Vec input, Vec output, Vec delta_partial, Vec errout)
35 | {
36 | for(int i = 0; i < input.length(); i++)
37 | {
38 | double out_i = output.get(i);
39 | double errin_i = delta_partial.get(i);
40 | errout.set(i, out_i*(1-out_i)*errin_i);
41 | }
42 | }
43 |
44 |
45 |
46 | @Override
47 | public void backprop(Matrix input, Matrix output, Matrix delta_partial, Matrix errout, boolean rowMajor)
48 | {
49 | for(int i = 0; i < input.rows(); i++)
50 | for(int j = 0; j < input.cols(); j++)
51 | {
52 | double out_ij = output.get(i, j);
53 | double errin_ij = delta_partial.get(i, j);
54 | errout.set(i, j, out_ij*(1-out_ij)*errin_ij);
55 | }
56 | }
57 |
58 | @Override
59 | public SigmoidLayer clone()
60 | {
61 | return new SigmoidLayer();
62 | }
63 |
64 | }
65 |
--------------------------------------------------------------------------------
/JSAT/src/jsat/classifiers/neuralnetwork/activations/SoftmaxLayer.java:
--------------------------------------------------------------------------------
1 | package jsat.classifiers.neuralnetwork.activations;
2 |
3 | import jsat.linear.Matrix;
4 | import jsat.linear.Vec;
5 | import jsat.math.MathTricks;
6 |
7 | /**
8 | * This activation layer is meant to be used as the top-most layer for
9 | * classification problems, and uses the softmax function (also known as cross
10 | * entropy) to convert the inputs into probabilities.
11 | *
12 | * @author Edward Raff
13 | */
14 | public class SoftmaxLayer implements ActivationLayer
15 | {
16 |
17 |
18 | private static final long serialVersionUID = -6595701781466123463L;
19 |
20 | @Override
21 | public void activate(Vec input, Vec output)
22 | {
23 | input.copyTo(output);
24 | MathTricks.softmax(output, false);
25 | }
26 |
27 | @Override
28 | public void backprop(Vec input, Vec output, Vec delta_partial, Vec errout)
29 | {
30 | if(delta_partial != errout)//if the same object, nothing to do
31 | delta_partial.copyTo(errout);
32 | }
33 |
34 | @Override
35 | public void activate(Matrix input, Matrix output, boolean rowMajor)
36 | {
37 | if(rowMajor)//easy
38 | for(int i = 0; i < input.rows(); i++)
39 | activate(input.getRowView(i), output.getRowView(i));
40 | else//TODO, do this more efficently
41 | for(int j = 0; j < input.cols(); j++)
42 | activate(input.getColumnView(j), output.getColumnView(j));
43 | }
44 |
45 | @Override
46 | public void backprop(Matrix input, Matrix output, Matrix delta_partial, Matrix errout, boolean rowMajor)
47 | {
48 | if(delta_partial != errout)//if the same object, nothing to do
49 | delta_partial.copyTo(errout);
50 | }
51 |
52 | @Override
53 | public SoftmaxLayer clone()
54 | {
55 | return new SoftmaxLayer();
56 | }
57 |
58 | }
59 |
--------------------------------------------------------------------------------
/JSAT/src/jsat/classifiers/neuralnetwork/activations/TanhLayer.java:
--------------------------------------------------------------------------------
1 | package jsat.classifiers.neuralnetwork.activations;
2 |
3 | import jsat.linear.Matrix;
4 | import jsat.linear.Vec;
5 |
6 | /**
7 | * This layer provides the standard tanh activation f(x) =
8 | * tanh(x)
9 | *
10 | * @author Edward Raff
11 | */
12 | public class TanhLayer implements ActivationLayer
13 | {
14 |
15 |
16 | private static final long serialVersionUID = -8369008344962638121L;
17 |
18 | @Override
19 | public void activate(Vec input, Vec output)
20 | {
21 | for(int i = 0; i < input.length(); i++)
22 | output.set(i, Math.tanh(input.get(i)));
23 | }
24 |
25 | @Override
26 | public void activate(Matrix input, Matrix output, boolean rowMajor)
27 | {
28 | for(int i = 0; i < input.rows(); i++)
29 | for (int j = 0; j < input.cols(); j++)
30 | output.set(i, j, Math.tanh(input.get(i, j)));
31 | }
32 |
33 | @Override
34 | public void backprop(Vec input, Vec output, Vec delta_partial, Vec errout)
35 | {
36 | for(int i = 0; i < input.length(); i++)
37 | {
38 | double out_i = output.get(i);
39 | double errin_i = delta_partial.get(i);
40 | errout.set(i, (1-out_i*out_i)*errin_i);
41 | }
42 | }
43 |
44 | @Override
45 | public void backprop(Matrix input, Matrix output, Matrix delta_partial, Matrix errout, boolean rowMajor)
46 | {
47 | for(int i = 0; i < input.rows(); i++)
48 | for (int j = 0; j < input.cols(); j++)
49 | {
50 | double out_ij = output.get(i, j);
51 | double errin_ij = delta_partial.get(i, j);
52 | errout.set(i, j, (1-out_ij*out_ij)*errin_ij);
53 | }
54 | }
55 |
56 | @Override
57 | public TanhLayer clone()
58 | {
59 | return new TanhLayer();
60 | }
61 |
62 | }
63 |
--------------------------------------------------------------------------------
/JSAT/src/jsat/classifiers/neuralnetwork/initializers/BiastInitializer.java:
--------------------------------------------------------------------------------
1 | package jsat.classifiers.neuralnetwork.initializers;
2 |
3 | import java.io.Serializable;
4 | import java.util.Random;
5 | import jsat.linear.Vec;
6 |
7 | /**
8 | * This interface specifies the method of initializing the bias connections in a
9 | * neural network.
10 | * @author Edward Raff
11 | */
12 | public interface BiastInitializer extends Serializable
13 | {
14 | /**
15 | * Performs the initialization of the given vector of bias values
16 | * @param b the vector to store the biases in
17 | * @param fanIn the number of connections coming into the layer that these
18 | * biases are for.
19 | * @param rand the source of randomness for initialization
20 | */
21 | public void init(Vec b, int fanIn, Random rand);
22 |
23 | public BiastInitializer clone();
24 | }
25 |
--------------------------------------------------------------------------------
/JSAT/src/jsat/classifiers/neuralnetwork/initializers/ConstantInit.java:
--------------------------------------------------------------------------------
1 | package jsat.classifiers.neuralnetwork.initializers;
2 |
3 | import java.util.Random;
4 | import jsat.linear.ConstantVector;
5 | import jsat.linear.Vec;
6 |
7 | /**
8 | * This initializes all bias values to a single constant value
9 | * @author Edward Raff
10 | */
11 | public class ConstantInit implements BiastInitializer
12 | {
13 |
14 | private static final long serialVersionUID = 2638413936718283757L;
15 | private double c;
16 |
17 | /**
18 | *
19 | * @param c the constant to set all biases to
20 | */
21 | public ConstantInit(double c)
22 | {
23 | this.c = c;
24 | }
25 |
26 | /**
27 | *
28 | * @param c the constant value to use
29 | */
30 | public void setConstant(double c)
31 | {
32 | if(Double.isNaN(c) || Double.isInfinite(c))
33 | throw new IllegalArgumentException("Constant must be a real value, not " + c);
34 | this.c = c;
35 | }
36 |
37 | /**
38 | *
39 | * @return the constant value that will be used for initialization
40 | */
41 | public double getConstant()
42 | {
43 | return c;
44 | }
45 |
46 | @Override
47 | public void init(Vec b, int fanIn, Random rand)
48 | {
49 | new ConstantVector(c, b.length()).copyTo(b);
50 | }
51 |
52 | @Override
53 | public ConstantInit clone()
54 | {
55 | return new ConstantInit(c);
56 | }
57 |
58 | }
59 |
--------------------------------------------------------------------------------
/JSAT/src/jsat/classifiers/neuralnetwork/initializers/GaussianNormalInit.java:
--------------------------------------------------------------------------------
1 | package jsat.classifiers.neuralnetwork.initializers;
2 |
3 | import java.util.Random;
4 | import jsat.linear.Matrix;
5 | import jsat.linear.Vec;
6 |
7 | /**
8 | * This object initializes the values of weights by sampling from the zero mean
9 | * Gaussian
10 | * @author Edward Raff
11 | */
12 | public class GaussianNormalInit implements WeightInitializer, BiastInitializer
13 | {
14 |
15 | private static final long serialVersionUID = -882418891606717433L;
16 | private double stndDev;
17 |
18 | /**
19 | * Creates a new GuassianNormalInit object for initializing weights
20 | * @param stndDev the standard deviation of the distribution to sample from
21 | */
22 | public GaussianNormalInit(double stndDev)
23 | {
24 | this.stndDev = stndDev;
25 | }
26 |
27 | /**
28 | * Sets the standard deviation of the distribution that will be sampled from
29 | * @param stndDev the standard deviation to use
30 | */
31 | public void setStndDev(double stndDev)
32 | {
33 | this.stndDev = stndDev;
34 | }
35 |
36 | /**
37 | *
38 | * @return the standard deviation of the Gaussian that is sampled from
39 | */
40 | public double getStndDev()
41 | {
42 | return stndDev;
43 | }
44 |
45 | @Override
46 | public void init(Matrix w, Random rand)
47 | {
48 | for(int i = 0; i < w.rows(); i++)
49 | for(int j = 0; j < w.cols(); j++)
50 | w.set(i, j, rand.nextGaussian()*stndDev);
51 |
52 | }
53 |
54 | @Override
55 | public void init(Vec b, int fanIn, Random rand)
56 | {
57 | for(int i = 0; i < b.length(); i++)
58 | b.set(i, rand.nextGaussian()*stndDev);
59 | }
60 |
61 | @Override
62 | public GaussianNormalInit clone()
63 | {
64 | return new GaussianNormalInit(stndDev);
65 | }
66 | }
67 |
--------------------------------------------------------------------------------
/JSAT/src/jsat/classifiers/neuralnetwork/initializers/TanhInitializer.java:
--------------------------------------------------------------------------------
1 | package jsat.classifiers.neuralnetwork.initializers;
2 |
3 | import java.util.Random;
4 | import jsat.linear.Matrix;
5 | import jsat.linear.Vec;
6 |
7 | /**
8 | * This initializer samples the weights from an adjusted uniform distribution
9 | * in order to provided better behavior of neuron activation and gradients
10 | *
11 | * See: Glorot, X., & Bengio, Y. (2010). Understanding the difficulty of
12 | * training deep feedforward neural networks. Journal of Machine Learning
13 | * Research - Proceedings Track, 9, 249–256. Retrieved from
14 | *
15 | * here
16 | * @author Edward Raff
17 | */
18 | public class TanhInitializer implements WeightInitializer, BiastInitializer
19 | {
20 |
21 |
22 | private static final long serialVersionUID = -4770682311082616208L;
23 |
24 | @Override
25 | public void init(Matrix w, Random rand)
26 | {
27 | double cnt = Math.sqrt(6)/Math.sqrt(w.rows()+w.cols());
28 | for(int i = 0; i < w.rows(); i++)
29 | for(int j = 0; j < w.cols(); j++)
30 | w.set(i, j, rand.nextDouble()*cnt*2-cnt);
31 |
32 | }
33 |
34 | @Override
35 | public void init(Vec b, int fanIn, Random rand)
36 | {
37 | double cnt = Math.sqrt(6)/Math.sqrt(b.length()+fanIn);
38 | for(int i = 0; i < b.length(); i++)
39 | b.set(i, rand.nextDouble()*cnt*2-cnt);
40 | }
41 |
42 | @Override
43 | public TanhInitializer clone()
44 | {
45 | return new TanhInitializer();
46 | }
47 |
48 | }
49 |
--------------------------------------------------------------------------------
/JSAT/src/jsat/classifiers/neuralnetwork/initializers/WeightInitializer.java:
--------------------------------------------------------------------------------
1 |
2 | package jsat.classifiers.neuralnetwork.initializers;
3 |
4 | import java.io.Serializable;
5 | import java.util.Random;
6 | import jsat.linear.Matrix;
7 |
8 | /**
9 | * This interface specifies the method of initializing the weight connections in
10 | * a neural network.
11 | *
12 | * @author Edward Raff
13 | */
14 | public interface WeightInitializer extends Serializable
15 | {
16 | /**
17 | * Initializes the values of the given weight matrix
18 | * @param w the matrix to initialize
19 | * @param rand the source of randomness for the initialization
20 | */
21 | public void init(Matrix w, Random rand);
22 |
23 | public WeightInitializer clone();
24 | }
25 |
--------------------------------------------------------------------------------
/JSAT/src/jsat/classifiers/trees/TreeFeatureImportanceInference.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2016 Edward Raff > dataSets)
38 | {
39 | double score = 0;
40 | for(List
14 | * A transform may or may not require training, it could be fully specified at
15 | * construction, or learned from the data set. Learning is done via the
16 | * {@link #fit(jsat.DataSet) fit method}. Many DataTransforms will include a
17 | * constructor that takes a dataset as a parameter. These transforms will fit
18 | * the data when constructed, and exist for convenience.
19 | *
20 | * @author Edward Raff
21 | */
22 | public interface DataTransform extends Cloneable, Serializable
23 | {
24 | /**
25 | * Returns a new data point that is a transformation of the original data
26 | * point. This new data point is a different object, but may contain the
27 | * same references as the original data point. It is not guaranteed that you
28 | * can mutate the transformed point without having a side effect on the
29 | * original point.
30 | *
31 | * @param dp the data point to apply a transformation to
32 | * @return a transformed data point
33 | */
34 | public DataPoint transform(DataPoint dp);
35 |
36 | /**
37 | * Fits this transform to the given dataset. Some transforms can only be
38 | * learned from classification or regression datasets. If an incompatible
39 | * dataset type is given, a {@link FailedToFitException} exception may be
40 | * thrown.
41 | *
42 | * @param data the dataset to fir this transform to
43 | * @throws FailedToFitException if the dataset type is not compatible with
44 | * the transform
45 | */
46 | public void fit(DataSet data);
47 |
48 | public DataTransform clone();
49 | }
50 |
--------------------------------------------------------------------------------
/JSAT/src/jsat/datatransform/DataTransformBase.java:
--------------------------------------------------------------------------------
1 | package jsat.datatransform;
2 |
3 | import java.util.List;
4 | import jsat.parameters.Parameter;
5 | import jsat.parameters.Parameterized;
6 |
7 | /**
8 | * This abstract class implements the Parameterized interface to ease the
9 | * development of simple Data Transforms. If a more complicated set of
10 | * parameters is needed then what is obtained from
11 | * {@link Parameter#getParamsFromMethods(java.lang.Object) } than there is no
12 | * reason to use this class.
13 | *
14 | * @author Edward Raff
15 | */
16 | abstract public class DataTransformBase implements DataTransform, Parameterized
17 | {
18 |
19 | @Override
20 | abstract public DataTransform clone();
21 |
22 | }
23 |
--------------------------------------------------------------------------------
/JSAT/src/jsat/datatransform/FixedDataTransform.java:
--------------------------------------------------------------------------------
1 | /*
2 | * This code contributed in the public domain.
3 | */
4 | package jsat.datatransform;
5 |
6 | import jsat.classifiers.DataPoint;
7 |
8 | /**
9 | * This interface is meant to be used for convinence when you wish to apply a
10 | * transformation to a data set using the Java 8 lambda features. It is for
11 | * transformations that do not need to be trained on any data, or where all
12 | * training has been done in advance.
13 | *
14 | * @author Edward Raff
15 | */
16 | public interface FixedDataTransform
17 | {
18 |
19 | /**
20 | * Returns a new data point that is a transformation of the original data
21 | * point. This new data point is a different object, but may contain the
22 | * same references as the original data point. It is not guaranteed that you
23 | * can mutate the transformed point without having a side effect on the
24 | * original point.
25 | *
26 | * @param dp the data point to apply a transformation to
27 | * @return a transformed data point
28 | */
29 | public DataPoint transform(DataPoint dp);
30 | }
31 |
--------------------------------------------------------------------------------
/JSAT/src/jsat/datatransform/InPlaceInvertibleTransform.java:
--------------------------------------------------------------------------------
1 | package jsat.datatransform;
2 |
3 | import jsat.classifiers.DataPoint;
4 |
5 | /**
6 | * This interface behaves exactly as {@link InPlaceTransform} specifies, with
7 | * the addition of an in-place "reverse" method that can be used to alter any
8 | * given transformed data point back into an approximation of the
9 | * original vector, without having to new vector object, but altering the one
10 | * given.
11 | *
12 | * @author Edward Raff
13 | */
14 | public interface InPlaceInvertibleTransform extends InPlaceTransform, InvertibleTransform
15 | {
16 |
17 | /**
18 | * Mutates the given data point. This causes side effects, altering the data
19 | * point to have the same value as the output of
20 | * {@link #inverse(jsat.classifiers.DataPoint) }
21 | *
22 | * @param dp the data point to alter with an inverse transformation
23 | */
24 | public void mutableInverse(DataPoint dp);
25 |
26 | @Override
27 | public InPlaceInvertibleTransform clone();
28 |
29 | }
30 |
--------------------------------------------------------------------------------
/JSAT/src/jsat/datatransform/InPlaceTransform.java:
--------------------------------------------------------------------------------
1 | package jsat.datatransform;
2 |
3 | import jsat.classifiers.DataPoint;
4 |
5 | /**
6 | * An In Place Transform is one that has the same number of categorical and
7 | * numeric features as the input. This means it can mutableTransform the input data point
8 | * instead of allocating a new one, which can reduce overhead on memory
9 | * allocations. This can be useful when performing many data transforms in cross
10 | * validation or when processing new examples in an environment that is applying
11 | * an already learned model.
12 | *
This interface is assumed that it will be applied to numeric
13 | * features. Incase this is not true, a {@link #mutatesNominal() } method is
14 | * provided for the implementation to indicate otherwise.
15 | *
16 | * @author Edward Raff
17 | */
18 | public interface InPlaceTransform extends DataTransform
19 | {
20 |
21 | /**
22 | * Mutates the given data point. This causes side effects, altering the data
23 | * point to have the same value as the output of
24 | * {@link #transform(jsat.classifiers.DataPoint) }.
25 | *
26 | * @param dp the data point to alter
27 | */
28 | public void mutableTransform(DataPoint dp);
29 |
30 | /**
31 | * By default returns {@code false}. Only returns true if this transform
32 | * will mutableTransform the nominal feature values of a data point.
33 | *
34 | * @return {@code true} if nominal feature values are mutated, {@code false}
35 | * otherwise.
36 | */
37 | public boolean mutatesNominal();
38 | }
39 |
--------------------------------------------------------------------------------
/JSAT/src/jsat/datatransform/InverseOfTransform.java:
--------------------------------------------------------------------------------
1 | package jsat.datatransform;
2 |
3 | import jsat.DataSet;
4 | import jsat.classifiers.DataPoint;
5 |
6 | /**
7 | * Creates a new Transform object that simply uses the inverse of an
8 | * {@link InvertibleTransform} as a regular transform. This allows one to apply
9 | * inverses after the fact in a simple matter like:
10 | *
17 | * @author Edward Raff
18 | */
19 | public class InverseOfTransform implements DataTransform
20 | {
21 |
22 | private static final long serialVersionUID = 2565737661260748018L;
23 | private InvertibleTransform transform;
24 |
25 | /**
26 | * Creates a new transform that uses the
27 | * {@link InvertibleTransform#transform(jsat.classifiers.DataPoint)
28 | * transform} of the given transform
29 | * @param transform the transform to use the inverse function of
30 | */
31 | public InverseOfTransform(InvertibleTransform transform)
32 | {
33 | this.transform = transform;
34 | }
35 |
36 | @Override
37 | public void fit(DataSet data)
38 | {
39 | //no-op, nothing to do
40 | }
41 |
42 | /**
43 | * Copy constructor
44 | * @param toClone the object to copy
45 | */
46 | public InverseOfTransform(InverseOfTransform toClone)
47 | {
48 | this(toClone.transform.clone());
49 | }
50 |
51 | @Override
52 | public DataPoint transform(DataPoint dp)
53 | {
54 | return transform.inverse(dp);
55 | }
56 |
57 | @Override
58 | public InverseOfTransform clone()
59 | {
60 | return new InverseOfTransform(this);
61 | }
62 |
63 | }
64 |
--------------------------------------------------------------------------------
/JSAT/src/jsat/datatransform/InvertibleTransform.java:
--------------------------------------------------------------------------------
1 | package jsat.datatransform;
2 |
3 | import jsat.classifiers.DataPoint;
4 |
5 | /**
6 | * A InvertibleTransform is one in which any given transformed vector can be
7 | inverse to recover an approximation of the original vector when using
8 | * a transform that implements this interface. It may not be possible to
9 | * perfectly reproduce the original data point: ie, this process may not be
10 | * loss-less.
11 | *
12 | * @author Edward Raff
13 | */
14 | public interface InvertibleTransform extends DataTransform
15 | {
16 |
17 | /**
18 | * Applies the inverse or "reverse" transform to approximately undo the
19 | * effect of {@link #transform(jsat.classifiers.DataPoint) } to recover an
20 | * approximation of the original data point.
21 | *
22 | * @param dp the transformed data point
23 | * @return the original data point, or a reasonable approximation
24 | */
25 | public DataPoint inverse(DataPoint dp);
26 |
27 | @Override
28 | public InvertibleTransform clone();
29 | }
30 |
--------------------------------------------------------------------------------
/JSAT/src/jsat/datatransform/PNormNormalization.java:
--------------------------------------------------------------------------------
1 | package jsat.datatransform;
2 |
3 | import jsat.DataSet;
4 | import jsat.classifiers.DataPoint;
5 | import jsat.linear.Vec;
6 |
7 | /**
8 | * PNormNormalization transformation performs normalizations of a vector x by
9 | * one its p-norms where p is in (0, Infinity)
10 | *
11 | * @author Edward Raff
12 | */
13 | public class PNormNormalization implements InPlaceTransform
14 | {
15 |
16 | private static final long serialVersionUID = 2934569881395909607L;
17 | private double p;
18 |
19 | /**
20 | * Creates a new object that normalizes based on the 2-norm
21 | */
22 | public PNormNormalization()
23 | {
24 | this(2.0);
25 | }
26 |
27 | /**
28 | * Creates a new p norm
29 | * @param p the norm to use
30 | */
31 | public PNormNormalization(double p)
32 | {
33 | if(p <= 0 || Double.isNaN(p))
34 | throw new IllegalArgumentException("p must be greater than zero, not " + p);
35 | this.p = p;
36 | }
37 |
38 | @Override
39 | public void fit(DataSet data)
40 | {
41 | //no-op, nothing needs to be done
42 | }
43 |
44 | @Override
45 | public DataPoint transform(DataPoint dp)
46 | {
47 | DataPoint dpNew = dp.clone();
48 |
49 | mutableTransform(dpNew);
50 | return dpNew;
51 | }
52 |
53 | @Override
54 | public void mutableTransform(DataPoint dp)
55 | {
56 | Vec vec = dp.getNumericalValues();
57 | double norm = vec.pNorm(p);
58 | if(norm != 0)
59 | vec.mutableDivide(norm);
60 | }
61 |
62 | @Override
63 | public boolean mutatesNominal()
64 | {
65 | return false;
66 | }
67 |
68 | @Override
69 | public PNormNormalization clone()
70 | {
71 | return new PNormNormalization(p);
72 | }
73 | }
74 |
--------------------------------------------------------------------------------
/JSAT/src/jsat/datatransform/UnitVarianceTransform.java:
--------------------------------------------------------------------------------
1 |
2 | package jsat.datatransform;
3 |
4 | import jsat.DataSet;
5 | import jsat.classifiers.DataPoint;
6 | import jsat.linear.Vec;
7 |
8 | /**
9 | * Creates a transform to alter data points so that each attribute has a
10 | * standard deviation of 1, which means a variance of 1.
11 | *
12 | * @author Edward Raff
13 | */
14 | public class UnitVarianceTransform implements InPlaceTransform
15 | {
16 |
17 | private static final long serialVersionUID = 3645532503475641917L;
18 | private Vec stndDevs;
19 |
20 | /**
21 | * Creates a new object for transforming datasets
22 | */
23 | public UnitVarianceTransform()
24 | {
25 | }
26 |
27 | /**
28 | * Creates a new object for making datasets unit variance fit to the given
29 | * dataset
30 | *
31 | * @param d the dataset to learn this transform from
32 | */
33 | public UnitVarianceTransform(DataSet d)
34 | {
35 | fit(d);
36 | }
37 |
38 | @Override
39 | public void fit(DataSet d)
40 | {
41 | stndDevs = d.getColumnMeanVariance()[1];
42 | }
43 |
44 | /**
45 | * Copy constructor
46 | * @param other the transform to make a copy of
47 | */
48 | private UnitVarianceTransform(UnitVarianceTransform other)
49 | {
50 | this.stndDevs = other.stndDevs.clone();
51 | }
52 |
53 | @Override
54 | public DataPoint transform(DataPoint dp)
55 | {
56 | DataPoint newDp = dp.clone();
57 | mutableTransform(newDp);
58 | return newDp;
59 | }
60 |
61 | @Override
62 | public void mutableTransform(DataPoint dp)
63 | {
64 | dp.getNumericalValues().mutablePairwiseDivide(stndDevs);
65 | }
66 |
67 | @Override
68 | public boolean mutatesNominal()
69 | {
70 | return false;
71 | }
72 |
73 | @Override
74 | public DataTransform clone()
75 | {
76 | return new UnitVarianceTransform(this);
77 | }
78 |
79 | }
80 |
--------------------------------------------------------------------------------
/JSAT/src/jsat/distributions/empirical/kernelfunc/BiweightKF.java:
--------------------------------------------------------------------------------
1 |
2 | package jsat.distributions.empirical.kernelfunc;
3 |
4 | /**
5 | *
6 | * @author Edward Raff
7 | */
8 | public class BiweightKF implements KernelFunction
9 | {
10 |
11 |
12 | private static final long serialVersionUID = -7199542934997154186L;
13 |
14 | private BiweightKF()
15 | {
16 | }
17 |
18 | private static class SingletonHolder
19 | {
20 |
21 | public static final BiweightKF INSTANCE = new BiweightKF();
22 | }
23 |
24 | /**
25 | * Returns the singleton instance of this class
26 | * @return the instance of this class
27 | */
28 | public static BiweightKF getInstance()
29 | {
30 | return SingletonHolder.INSTANCE;
31 | }
32 |
33 | @Override
34 | public double k(double u)
35 | {
36 | if(Math.abs(u) > 1)
37 | return 0;
38 | return Math.pow(1-u*u, 2)*(15.0/16.0);
39 | }
40 |
41 | @Override
42 | public double intK(double u)
43 | {
44 | if(u < -1)
45 | return 0;
46 | if(u > 1)
47 | return 1;
48 | return Math.pow(u+1, 3)/16.0 * (3*u*u - 9*u + 8);
49 | }
50 |
51 | @Override
52 | public double k2()
53 | {
54 | return 1.0/7.0;
55 | }
56 |
57 | @Override
58 | public double cutOff()
59 | {
60 | return Math.ulp(1)+1;
61 | }
62 |
63 | @Override
64 | public double kPrime(double u)
65 | {
66 | if(Math.abs(u) > 1)
67 | return 0;
68 | return (15.0/4.0)*u*(u*u-1);
69 | }
70 |
71 | @Override
72 | public String toString()
73 | {
74 | return "Biweight Kernel";
75 | }
76 | }
77 |
--------------------------------------------------------------------------------
/JSAT/src/jsat/distributions/empirical/kernelfunc/EpanechnikovKF.java:
--------------------------------------------------------------------------------
1 |
2 | package jsat.distributions.empirical.kernelfunc;
3 |
4 | /**
5 | *
6 | * @author Edward Raff
7 | */
8 | public class EpanechnikovKF implements KernelFunction
9 | {
10 |
11 | private static final long serialVersionUID = 8688942176576932932L;
12 |
13 | private EpanechnikovKF()
14 | {
15 | }
16 |
17 | private static class SingletonHolder
18 | {
19 |
20 | public static final EpanechnikovKF INSTANCE = new EpanechnikovKF();
21 | }
22 |
23 | /**
24 | * Returns the singleton instance of this class
25 | * @return the instance of this class
26 | */
27 | public static EpanechnikovKF getInstance()
28 | {
29 | return SingletonHolder.INSTANCE;
30 | }
31 |
32 | @Override
33 | public double k(double u)
34 | {
35 | if(Math.abs(u) > 1)
36 | return 0;
37 | return (1-u*u)*(3.0/4.0);
38 | }
39 |
40 | @Override
41 | public double intK(double u)
42 | {
43 | if(u < -1)
44 | return 0;
45 | if( u > 1)
46 | return 1;
47 | return (-u*u*u + 3 *u + 2)/4;
48 | }
49 |
50 | @Override
51 | public double k2()
52 | {
53 | return 1.0/5.0;
54 | }
55 |
56 | @Override
57 | public double cutOff()
58 | {
59 | return Math.ulp(1)+1;
60 | }
61 |
62 | @Override
63 | public double kPrime(double u)
64 | {
65 | if(Math.abs(u) > 1)
66 | return 0;
67 | return - u *(3.0/2.0);
68 | }
69 |
70 | @Override
71 | public String toString()
72 | {
73 | return "Epanechnikov Kernel";
74 | }
75 | }
76 |
--------------------------------------------------------------------------------
/JSAT/src/jsat/distributions/empirical/kernelfunc/GaussKF.java:
--------------------------------------------------------------------------------
1 |
2 | package jsat.distributions.empirical.kernelfunc;
3 | import static java.lang.Math.*;
4 | import jsat.distributions.Normal;
5 |
6 | /**
7 | *
8 | * @author Edward Raff
9 | */
10 | public class GaussKF implements KernelFunction
11 | {
12 |
13 | private static final long serialVersionUID = -6765390012694573184L;
14 |
15 | private GaussKF()
16 | {
17 | }
18 |
19 | private static class SingletonHolder
20 | {
21 |
22 | public static final GaussKF INSTANCE = new GaussKF();
23 | }
24 |
25 | /**
26 | * Returns the singleton instance of this class
27 | * @return the instance of this class
28 | */
29 | public static GaussKF getInstance()
30 | {
31 | return SingletonHolder.INSTANCE;
32 | }
33 |
34 | @Override
35 | public double k(double u)
36 | {
37 | return Normal.pdf(u, 0, 1);
38 | }
39 |
40 | @Override
41 | public double intK(double u)
42 | {
43 | return Normal.cdf(u, 0, 1);
44 | }
45 |
46 | @Override
47 | public double k2()
48 | {
49 | return 1;
50 | }
51 |
52 | @Override
53 | public double cutOff()
54 | {
55 | /*
56 | * This is not techincaly correct, as this value of k(u) is still 7.998827757006813E-38
57 | * However, this is very close to zero, and is so small that k(u)+x = x, for most values of x.
58 | * Unless this probability si going to be near zero, values past this point will have
59 | * no effect on the result
60 | */
61 | return 13;
62 | }
63 |
64 | @Override
65 | public double kPrime(double u)
66 | {
67 | return -exp(-pow(u, 2)/2)*u/sqrt(2 * PI);
68 | }
69 |
70 | @Override
71 | public String toString()
72 | {
73 | return "Gaussian Kernel";
74 | }
75 | }
76 |
--------------------------------------------------------------------------------
/JSAT/src/jsat/distributions/empirical/kernelfunc/KernelFunction.java:
--------------------------------------------------------------------------------
1 |
2 | package jsat.distributions.empirical.kernelfunc;
3 |
4 | import java.io.Serializable;
5 |
6 | /**
7 | * Class for representing one dimensional kernel functions. Since they require
8 | * no parameters and have no need for duplication, its is advised to make
9 | * them singletons.
10 | *
11 | * See http://en.wikipedia.org/wiki/Kernel_(statistics)
12 | *
13 | * @author Edward Raff
14 | */
15 | public interface KernelFunction extends Serializable
16 | {
17 | /**
18 | * Returns the weight to be applied to a sample for the normalized distance of two data points.
19 | * @param u the distance of the data points
20 | * @return the value in [0, 1) of the amount of weight to give to the sample based on its distance
21 | */
22 | public double k(double u);
23 | /**
24 | * Computes the value of the finite integral from -Infinity up to the value u, of the function given by {@link #k(double) }
25 | * @param u the distance of the data points
26 | * @return the value of the integration
27 | */
28 | public double intK(double u);
29 |
30 | /**
31 | *
32 | * Returns the value of the derivative at a point, k'(u)
33 | * @param u the distance of the data points
34 | * @return the value of the derivative at u
35 | */
36 | public double kPrime(double u);
37 |
38 | /**
39 | * Returns the variance of the kernel function
40 | * @return the variance of the kernel function
41 | */
42 | public double k2();
43 |
44 | /**
45 | * As the value of |u| for the kernel function approaches infinity, the
46 | * value of k(u) approaches zero. This function returns the minimal
47 | * absolute value of u for which k(u) returns 0
48 | *
49 | * @return the first value for which k(u) = 0
50 | */
51 | public double cutOff();
52 |
53 | /**
54 | * Returns the name of this kernel function
55 | * @return the name of this kernel function
56 | */
57 | @Override
58 | public String toString();
59 | }
60 |
--------------------------------------------------------------------------------
/JSAT/src/jsat/distributions/empirical/kernelfunc/TriweightKF.java:
--------------------------------------------------------------------------------
1 |
2 | package jsat.distributions.empirical.kernelfunc;
3 |
4 | /**
5 | *
6 | * @author Edward Raff
7 | */
8 | public class TriweightKF implements KernelFunction
9 | {
10 |
11 | private static final long serialVersionUID = -9156392658970318676L;
12 |
13 | private TriweightKF()
14 | {
15 | }
16 |
17 | private static class SingletonHolder
18 | {
19 |
20 | public static final TriweightKF INSTANCE = new TriweightKF();
21 | }
22 |
23 | /**
24 | * Returns the singleton instance of this class
25 | * @return the instance of this class
26 | */
27 | public static TriweightKF getInstance()
28 | {
29 | return SingletonHolder.INSTANCE;
30 | }
31 |
32 | @Override
33 | public double k(double u)
34 | {
35 | if(Math.abs(u) > 1)
36 | return 0;
37 | return Math.pow(1 - u*u, 3)*(35.0/32.0);
38 | }
39 |
40 | @Override
41 | public double intK(double u)
42 | {
43 | if(u < -1)
44 | return 0;
45 | if(u > 1)
46 | return 1;
47 | return (-5*Math.pow(u, 7) + 21*Math.pow(u, 5) - 35 * Math.pow(u, 3) + 35 *u + 16)/32;
48 | }
49 |
50 | @Override
51 | public double k2()
52 | {
53 | return 1.0/9.0;
54 | }
55 |
56 | @Override
57 | public double cutOff()
58 | {
59 | return Math.ulp(1)+1;
60 | }
61 |
62 | @Override
63 | public double kPrime(double u)
64 | {
65 | if(Math.abs(u) > 1)
66 | return 0;
67 | return -u;
68 | }
69 |
70 | @Override
71 | public String toString()
72 | {
73 | return "Triweight Kernel";
74 | }
75 | }
76 |
--------------------------------------------------------------------------------
/JSAT/src/jsat/distributions/empirical/kernelfunc/UniformKF.java:
--------------------------------------------------------------------------------
1 |
2 | package jsat.distributions.empirical.kernelfunc;
3 |
4 | /**
5 | *
6 | * @author Edward Raff
7 | */
8 | public class UniformKF implements KernelFunction
9 | {
10 |
11 | private static final long serialVersionUID = -6413579643511350896L;
12 |
13 | private UniformKF()
14 | {
15 | }
16 |
17 | private static class SingletonHolder
18 | {
19 |
20 | public static final UniformKF INSTANCE = new UniformKF();
21 | }
22 |
23 | /**
24 | * Returns the singleton instance of this class
25 | * @return the instance of this class
26 | */
27 | public static UniformKF getInstance()
28 | {
29 | return SingletonHolder.INSTANCE;
30 | }
31 |
32 | @Override
33 | public double k(double u)
34 | {
35 | if(Math.abs(u) > 1)
36 | return 0;
37 | return 0.5;
38 | }
39 |
40 | @Override
41 | public double intK(double u)
42 | {
43 | if(u < -1)
44 | return 0;
45 | if (u > 1)
46 | return 1;
47 | return (u+1)/2;
48 | }
49 |
50 | @Override
51 | public double k2()
52 | {
53 | return 1.0/3.0;
54 | }
55 |
56 | @Override
57 | public double cutOff()
58 | {
59 | return Math.ulp(1)+1;
60 | }
61 |
62 | @Override
63 | public double kPrime(double u)
64 | {
65 | return 0;
66 | }
67 |
68 | @Override
69 | public String toString()
70 | {
71 | return "Uniform Kernel";
72 | }
73 | }
74 |
--------------------------------------------------------------------------------
/JSAT/src/jsat/distributions/kernels/LinearKernel.java:
--------------------------------------------------------------------------------
1 |
2 | package jsat.distributions.kernels;
3 |
4 | import java.util.Arrays;
5 | import java.util.List;
6 | import jsat.linear.Vec;
7 | import jsat.parameters.DoubleParameter;
8 | import jsat.parameters.Parameter;
9 |
10 | /**
11 | * Provides a linear kernel function, which computes the normal dot product.
12 | * k(x,y) = x.y + c
13 | *
14 | * @author Edward Raff
15 | */
16 | public class LinearKernel extends BaseKernelTrick
17 | {
18 |
19 | private static final long serialVersionUID = -1870181048970135367L;
20 | private double c;
21 |
22 | /**
23 | * Creates a new Linear Kernel that computes the dot product and offsets it by a specified value
24 | * @param c the positive bias term for the dot product
25 | */
26 | public LinearKernel(double c)
27 | {
28 | this.c = c;
29 | }
30 |
31 | /**
32 | * Creates a new Linear Kernel with an added bias term of 1
33 | */
34 | public LinearKernel()
35 | {
36 | this(1);
37 | }
38 |
39 | /**
40 | * The positive bias term added to the result of the dot product
41 | * @param c the added product term
42 | */
43 | public void setC(double c)
44 | {
45 | if(c < 0 || Double.isInfinite(c) || Double.isNaN(c))
46 | throw new IllegalArgumentException("C must be a positive constant, not " + c);
47 | this.c = c;
48 | }
49 |
50 | /**
51 | * Returns the positive additive term
52 | * @return the positive additive term
53 | */
54 | public double getC()
55 | {
56 | return c;
57 | }
58 |
59 |
60 | @Override
61 | public double eval(Vec a, Vec b)
62 | {
63 | return a.dot(b) + c;
64 | }
65 |
66 | @Override
67 | public String toString()
68 | {
69 | return "Linear Kernel (c=" + c + ")";
70 | }
71 |
72 | @Override
73 | public LinearKernel clone()
74 | {
75 | return new LinearKernel(c);
76 | }
77 | }
78 |
--------------------------------------------------------------------------------
/JSAT/src/jsat/distributions/multivariate/MultivariateDistributionSkeleton.java:
--------------------------------------------------------------------------------
1 |
2 | package jsat.distributions.multivariate;
3 |
4 | import java.util.List;
5 | import java.util.concurrent.ExecutorService;
6 | import jsat.DataSet;
7 | import jsat.classifiers.DataPoint;
8 | import jsat.linear.DenseVector;
9 | import jsat.linear.Vec;
10 |
11 | /**
12 | * Common class for implementing a multivariate distribution. A number of methods are pre implemented,
13 | * building off of the implementation of the remaining methods.
11 | * DataSet x = //some data set;
12 | * InvertibleTransform transform = //some transform;
13 | * x.applyTransform(transform);//apply the original transform
14 | * //reverse the transform, getting back to where we started
15 | * x.applyTransform(new InverseOfTransform(transform));
16 | *
14 | * Note: the default implementation for the multithreaded methods calls the non threaded version of the method.
15 | * The exception to this is the {@link #setUsingData(jsat.DataSet, java.util.concurrent.ExecutorService) } method,
16 | * which calls {@link #setUsingData(java.util.List, java.util.concurrent.ExecutorService) }
17 | *
18 | * @author Edward Raff
19 | */
20 | public abstract class MultivariateDistributionSkeleton implements MultivariateDistribution
21 | {
22 |
23 | private static final long serialVersionUID = 4080753806798149915L;
24 |
25 | @Override
26 | public double logPdf(Vec x)
27 | {
28 | double logPDF = Math.log(pdf(x));
29 | if(Double.isInfinite(logPDF) && logPDF < 0)//log(0) == -Infinty
30 | return -Double.MAX_VALUE;
31 | return logPDF;
32 | }
33 |
34 |
35 | @Override
36 | abstract public MultivariateDistribution clone();
37 | }
38 |
--------------------------------------------------------------------------------
/JSAT/src/jsat/distributions/multivariate/NormalMR.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2018 Edward Raff
3 | *
4 | * This program is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * This program is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with this program. If not, see
12 | * A distance metric that can efficiently handle dense to sparse distance
13 | * computations has no reason to implement this interface.
14 | *
15 | * @author Edward Raff
16 | */
17 | public interface DenseSparseMetric extends DistanceMetric
18 | {
19 | /**
20 | * Computes a summary constant value for the vector that is based on the
21 | * distance metric in use. This value will be used to perform efficient
22 | * dense to sparse computations.
23 | *
24 | * @param vec the vector that will be used in many distance computations
25 | * @return the summary value for the vector
26 | */
27 | public double getVectorConstant(Vec vec);
28 |
29 | /**
30 | * Efficiently computes the distance from one main vector that is used many
31 | * times, to some sparse target vector. If the target vector dose not return
32 | * true for {@link Vec#isSparse() }, the distance will be calculated using
33 | * {@link #dist(jsat.linear.Vec, jsat.linear.Vec) } instead.
34 | *
35 | * @param summaryConst the summary constant for the main vector obtained
36 | * with {@link #getVectorConstant(jsat.linear.Vec) }
37 | * @param main the main vector the summary constant is for
38 | * @param target the target vector to compute the distance to
39 | * @return the distance between the two vectors dist(main, target)
40 | */
41 | public double dist(double summaryConst, Vec main, Vec target);
42 | }
43 |
--------------------------------------------------------------------------------
/JSAT/src/jsat/linear/distancemetrics/ManhattanDistance.java:
--------------------------------------------------------------------------------
1 |
2 | package jsat.linear.distancemetrics;
3 |
4 | import java.util.List;
5 | import java.util.concurrent.ExecutorService;
6 | import jsat.linear.IndexValue;
7 | import jsat.linear.Vec;
8 |
9 | /**
10 | * Manhattan Distance is the L1 norm.
11 | *
12 | * @author Edward Raff
13 | */
14 | public class ManhattanDistance implements DenseSparseMetric
15 | {
16 |
17 | private static final long serialVersionUID = 3028834823742743351L;
18 |
19 | @Override
20 | public double dist(Vec a, Vec b)
21 | {
22 | return a.pNormDist(1, b);
23 | }
24 |
25 | @Override
26 | public boolean isSymmetric()
27 | {
28 | return true;
29 | }
30 |
31 | @Override
32 | public boolean isSubadditive()
33 | {
34 | return true;
35 | }
36 |
37 | @Override
38 | public boolean isIndiscemible()
39 | {
40 | return true;
41 | }
42 |
43 | @Override
44 | public double metricBound()
45 | {
46 | return Double.POSITIVE_INFINITY;
47 | }
48 |
49 | @Override
50 | public String toString()
51 | {
52 | return "Manhattan Distance";
53 | }
54 |
55 | @Override
56 | public ManhattanDistance clone()
57 | {
58 | return new ManhattanDistance();
59 | }
60 |
61 | @Override
62 | public double getVectorConstant(Vec vec)
63 | {
64 | return vec.pNorm(1);
65 | }
66 |
67 | @Override
68 | public double dist(double summaryConst, Vec main, Vec target)
69 | {
70 | if(!target.isSparse())
71 | return dist(main, target);
72 | /**
73 | * Summary contains the differences to the zero vec, only a few
74 | * of the indices are actually non zero - we correct those values
75 | */
76 | double takeOut = 0.0;
77 | for(IndexValue iv : target)
78 | {
79 | int i = iv.getIndex();
80 | double mainVal = main.get(i);
81 | takeOut += mainVal-Math.abs(mainVal-iv.getValue());
82 | }
83 | return summaryConst-takeOut;
84 | }
85 | }
86 |
--------------------------------------------------------------------------------
/JSAT/src/jsat/linear/vectorcollection/BaseCaseDT.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2018 Edward Raff
3 | *
4 | * This program is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * This program is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with this program. If not, see
14 | *
15 | * @param yData the Y data set (to be predicted)
16 | * @param xData the X data set (the predictor)
17 | * @return an array containing the a and b, such that index 0 contains a and index 1 contains b
18 | */
19 | static public double[] regres(Vec xData, Vec yData)
20 | {
21 | //find y = a + B *x
22 | double[] toReturn = new double[2];
23 |
24 | //B value
25 | toReturn[1] = DescriptiveStatistics.sampleCorCoeff(xData, yData)*yData.standardDeviation()/xData.standardDeviation();
26 | //a value
27 | toReturn[0] = yData.mean() - toReturn[1]*xData.mean();
28 |
29 | return toReturn;
30 | }
31 | }
32 |
--------------------------------------------------------------------------------
/JSAT/src/jsat/math/TrigMath.java:
--------------------------------------------------------------------------------
1 |
2 | package jsat.math;
3 | import static java.lang.Math.*;
4 |
5 | /**
6 | * This class includes additional trig and hyperbolic trig that
7 | * does not come with Java.Math by default.
8 | *
9 | * @author Edward Raff
10 | */
11 | public class TrigMath
12 | {
13 |
14 | public static double coth(double x)
15 | {
16 | double eX = exp(x);
17 | double eNX = exp(-x);
18 |
19 | return (eX + eNX) / (eX - eNX);
20 | }
21 |
22 | public static double sech(double x)
23 | {
24 | return 2 / (exp(x) + exp(-x));
25 | }
26 |
27 | public static double csch(double x)
28 | {
29 | return 2 / (exp(x) - exp(-x));
30 | }
31 |
32 | public static double asinh(double x)
33 | {
34 | return log(x + sqrt(x*x + 1));
35 | }
36 |
37 | public static double acosh(double x)
38 | {
39 | if(x < 1)
40 | return Double.NaN;//Complex result
41 | return log(x + sqrt(x*x - 1));
42 | }
43 |
44 | public static double atanh(double x)
45 | {
46 | if(abs(x) >= 1)
47 | return Double.NaN;
48 | return 0.5* log((x+1) / (x-1));
49 | }
50 |
51 | public static double asech(double x)
52 | {
53 | if(x <= 0 || x > 1)
54 | return Double.NaN;
55 | return log((1 + sqrt(1-x*x))/x);
56 | }
57 |
58 | public static double acsch(double x)
59 | {
60 | return log(1/x + sqrt(1+x*x)/abs(x));
61 | }
62 |
63 | public static double acotch(double x)
64 | {
65 | if(abs(x) <= 1)
66 | return Double.NaN;
67 | return 0.5* log((x+1) / (x-1));
68 | }
69 |
70 | }
71 |
--------------------------------------------------------------------------------
/JSAT/src/jsat/math/decayrates/DecayRate.java:
--------------------------------------------------------------------------------
1 | package jsat.math.decayrates;
2 |
3 | import java.io.Serializable;
4 |
5 | /**
6 | * Many algorithms use a learning rate to adjust the step size by which the
7 | * search space is covered. In practice, it is often useful to reduce this
8 | * learning rate over time. In this way, large steps can be taken in the
9 | * beginning when we are far from the solution, and smaller steps when we have
10 | * gotten closer to the solution and do not want to step too far away.
11 | *
12 | * @author Edward Raff
13 | */
14 | public interface DecayRate extends Serializable
15 | {
16 | /**
17 | * Decays the initial value over time.
18 | *
19 | * @param time the current time through the algorithm in the range
20 | * [0, maxTime]
21 | * @param maxTime the maximum time step that will be seen
22 | * @param initial the initial value
23 | * @return the decayed value over time of the initial value
24 | * @throws ArithmeticException if the time is negative
25 | */
26 | public double rate(double time, double maxTime, double initial);
27 |
28 | /**
29 | * Decays the initial value over time.
30 | *
31 | * @param time the current time step to return a value for
32 | * @param initial the initial learning rate
33 | * @return the decayed value
34 | */
35 | public double rate(double time, double initial);
36 |
37 | public DecayRate clone();
38 | }
39 |
--------------------------------------------------------------------------------
/JSAT/src/jsat/math/decayrates/NoDecay.java:
--------------------------------------------------------------------------------
1 | package jsat.math.decayrates;
2 |
3 | /**
4 | * A possible value for a decaying learning rate. NoDecay will perform no
5 | * decaying of the initial value, the initial value will always be returned
6 | * regardless of the input.
7 | *
8 | * @author Edward Raff
9 | */
10 | public class NoDecay implements DecayRate
11 | {
12 |
13 | private static final long serialVersionUID = -4502356199281880268L;
14 |
15 | @Override
16 | public double rate(double time, double maxTime, double initial)
17 | {
18 | return rate(time, initial);
19 | }
20 |
21 | @Override
22 | public double rate(double time, double initial)
23 | {
24 | if(time < 0)
25 | throw new ArithmeticException("Negative time value given");
26 | return initial;
27 | }
28 |
29 | @Override
30 | public DecayRate clone()
31 | {
32 | return new NoDecay();
33 | }
34 |
35 | @Override
36 | public String toString()
37 | {
38 | return "NoDecay";
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/JSAT/src/jsat/math/integration/Trapezoidal.java:
--------------------------------------------------------------------------------
1 |
2 | package jsat.math.integration;
3 |
4 | import jsat.math.Function1D;
5 |
6 | /**
7 | * This class provides an implementation of the Trapezoidal method for
8 | * numerically computing an integral
9 | *
10 | * @author Edward Raff
11 | */
12 | public class Trapezoidal
13 | {
14 | /**
15 | * Numerically computes the integral of the given function
16 | *
17 | * @param f the function to integrate
18 | * @param a the lower limit of the integral
19 | * @param b the upper limit of the integral
20 | * @param N the number of points in the integral to take, must be ≥ 2.
21 | * @return an approximation of the integral of
22 | * ∫abf(x) , dx
23 | */
24 | static public double trapz(Function1D f, double a, double b, int N)
25 | {
26 | if(a == b)
27 | return 0;
28 | else if(a > b)
29 | throw new RuntimeException("Integral upper limit (" + b+") must be larger than the lower-limit (" + a + ")");
30 | else if(N < 1)
31 | throw new RuntimeException("At least two integration parts must be used, not " + N);
32 | /*
33 | * b / N - 1 \
34 | * / | ===== |
35 | * | b - a |f(a) + f(b) \ / k (b - a)\|
36 | * | f(x) dx = ----- |----------- + > f|a + ---------||
37 | * | N | 2 / \ N /|
38 | * / | ===== |
39 | * a \ k = 1 /
40 | */
41 | double sum =0;
42 | for(int k = 1; k < N; k++)
43 | sum += f.f(a+k*(b-a)/N);
44 |
45 | sum+= (f.f(a)+f.f(b))/2;
46 |
47 | return (b-a)/N*sum;
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/JSAT/src/jsat/math/optimization/GoldenSearch.java:
--------------------------------------------------------------------------------
1 | package jsat.math.optimization;
2 |
3 | import jsat.math.Function1D;
4 |
5 | /**
6 | * Minimizes a single variate function in the same way that
7 | *
8 | * @author Edward Raff
9 | */
10 | public class GoldenSearch
11 | {
12 | /**
13 | * Phi (golden ratio) minus 1
14 | */
15 | private static final double tau = (Math.sqrt(5.0) - 1.0)/2.0;
16 | private static final double om_tau = 1-tau;
17 |
18 | /**
19 | * Finds the local minimum of the function {@code f}.
20 | * @param eps the desired accuracy of the result
21 | * @param maxIterations the maximum number of iterations to perform
22 | * @param a the left bound on the minimum
23 | * @param b the right bound on the minimum
24 | * @param f the function to find the minimize of
25 |
26 | * @return the value of variable {@code pos} that produces the local minima
27 | */
28 | public static double minimize(double eps, int maxIterations, double a, double b, Function1D f)
29 | {
30 | if (a > b)
31 | {
32 | double tmp = b;
33 | b = a;
34 | a = tmp;
35 | }
36 |
37 | //Intitial values
38 | int iter = 0;
39 |
40 | double x1 = a + om_tau*(b-a);
41 | double f1 = f.f(x1);
42 |
43 | double x2 = a + tau*(b-a);
44 | double f2 = f.f(x2);
45 |
46 | while (b - a > 2 * eps && iter < maxIterations)
47 | {
48 | if(f1 > f2)
49 | {
50 | a = x1;
51 | x1 = x2;
52 | f1 = f2;
53 | x2 = a + tau*(b-a);
54 | f2 = f.f(x2);
55 | }
56 | else//f1 < f2
57 | {
58 | b = x2;
59 | x2 = x1;
60 | f2 = f1;
61 | x1 = a + om_tau*(b-a);
62 | f1 = f.f(x1);
63 | }
64 | iter++;
65 | }
66 |
67 | return (a + b) / 2.0;
68 | }
69 | }
70 |
--------------------------------------------------------------------------------
/JSAT/src/jsat/math/optimization/stochastic/SimpleSGD.java:
--------------------------------------------------------------------------------
1 | package jsat.math.optimization.stochastic;
2 |
3 | import jsat.linear.Vec;
4 |
5 | /**
6 | * Performs unaltered Stochastic Gradient Decent updates computing
7 | * x = x- η grad
8 | *
9 | * Because the SimpleSGD requires no internal state, it is not necessary to call
10 | * {@link #setup(int) }.
11 | *
12 | * @author Edward Raff
13 | */
14 | public class SimpleSGD implements GradientUpdater
15 | {
16 |
17 |
18 | private static final long serialVersionUID = 4022442467298319553L;
19 |
20 | /**
21 | * Creates a new SGD updater
22 | */
23 | public SimpleSGD()
24 | {
25 | }
26 |
27 | @Override
28 | public void update(Vec x, Vec grad, double eta)
29 | {
30 | x.mutableSubtract(eta, grad);
31 | }
32 |
33 | @Override
34 | public double update(Vec x, Vec grad, double eta, double bias, double biasGrad)
35 | {
36 | x.mutableSubtract(eta, grad);
37 | return eta*biasGrad;
38 | }
39 |
40 | @Override
41 | public SimpleSGD clone()
42 | {
43 | return new SimpleSGD();
44 | }
45 |
46 | @Override
47 | public void setup(int d)
48 | {
49 | //no setup to be done
50 | }
51 |
52 | }
53 |
--------------------------------------------------------------------------------
/JSAT/src/jsat/math/rootfinding/RootFinder.java:
--------------------------------------------------------------------------------
1 |
2 | package jsat.math.rootfinding;
3 |
4 | import java.io.Serializable;
5 | import jsat.math.Function1D;
6 |
7 | /**
8 | * This interface defines a general contract for the numerical computation of a
9 | * root of a given function. A root of a function {@code f} is a point {@code x}
10 | * for which {@code f(x) = 0}. A function may have any number of roots
11 | * (including no roots).
12 |
13 | *
14 | * @author Edward Raff
15 | */
16 | public interface RootFinder extends Serializable
17 | {
18 | /**
19 | * Attempts to numerical compute the root of a given function, such that f(args) = 0. Only one variable may be altered at a time
20 | *
21 | * @param eps the accuracy desired for the solution
22 | * @param maxIterations the maximum number of steps allowed before forcing a return of the current solution.
23 | * @param initialGuesses an array containing the initial guess values
24 | * @param f the function to find the root of
25 | * @param pos the index of the argument that will be allowed to alter in order to find the root. Starts from 0
26 | * @param args the values to be passed to the function as arguments
27 | * @return the value of the variable at the index pos that makes the function return 0
28 | */
29 | public double root(double eps, int maxIterations, double[] initialGuesses, Function1D f);
30 |
31 | /**
32 | * Different root finding methods require different numbers of initial guesses.
33 | * Some root finding methods require 2 guesses, each with values of opposite
34 | * sign so that they bracket the root. Others just need any 2 initial guesses
35 | * sufficiently close to the root. This method simply returns the number of
36 | * guesses that are needed.
37 | *
38 | * @return the number of initial guesses this root finding method needs
39 | */
40 | public int guessesNeeded();
41 | }
42 |
--------------------------------------------------------------------------------
/JSAT/src/jsat/outlier/Outlier.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2018 Edward Raff
38 | *
39 | * Providing a getGuess is not required, and returns {@code null} if
40 | * guessing is not supported.
41 | *
42 | * @param data the data with which we want a reasonable guess for this
43 | * parameter
44 | * @return a distribution that represents a reasonable guess of a good value
45 | * for this parameter given the input data
46 | */
47 | public Distribution getGuess(DataSet data)
48 | {
49 | return null;
50 | }
51 |
52 | @Override
53 | public String getValueString()
54 | {
55 | return Double.toString(getValue());
56 | }
57 | }
58 |
--------------------------------------------------------------------------------
/JSAT/src/jsat/parameters/IntParameter.java:
--------------------------------------------------------------------------------
1 | package jsat.parameters;
2 |
3 | import jsat.DataSet;
4 | import jsat.distributions.Distribution;
5 |
6 | /**
7 | * An integer parameter that may be altered.
8 | *
9 | * @author Edward Raff
10 | */
11 | public abstract class IntParameter extends Parameter
12 | {
13 |
14 | private static final long serialVersionUID = -8467918069240345315L;
15 |
16 | /**
17 | * Returns the current value for the parameter.
18 | *
19 | * @return the value for this parameter.
20 | */
21 | abstract public int getValue();
22 |
23 | /**
24 | * Sets the value for this parameter.
25 | * @return true if the value was set, false if the value
26 | * was invalid, and thus ignored.
27 | */
28 | abstract public boolean setValue(int val);
29 |
30 | /**
31 | * This method allows one to obtain a distribution that represents a
32 | * reasonable "guess" at the range of values that would work for this
33 | * parameter. If the DataSet is an instance of {@link ClassificationDataSet}
34 | * or {@link RegressionDataSet}, the method may choose to assume that the
35 | * value is being guessed for the specified task and change its behavior
36 | *
37 | * Providing a getGuess is not required, and returns {@code null} if
38 | * guessing is not supported.
39 | *
40 | * @param data the data with which we want a reasonable guess for this
41 | * parameter
42 | * @return a distribution that represents a reasonable guess of a good value
43 | * for this parameter given the input data
44 | */
45 | public Distribution getGuess(DataSet data)
46 | {
47 | return null;
48 | }
49 |
50 | @Override
51 | public String getValueString()
52 | {
53 | return Integer.toString(getValue());
54 | }
55 | }
56 |
--------------------------------------------------------------------------------
/JSAT/src/jsat/parameters/KernelFunctionParameter.java:
--------------------------------------------------------------------------------
1 |
2 | package jsat.parameters;
3 |
4 | import java.util.*;
5 | import jsat.distributions.empirical.kernelfunc.*;
6 |
7 | /**
8 | * A default Parameter semi-implementation for classes that require a
9 | * {@link KernelFunction} to be specified.
10 | *
11 | * @author Edward Raff
12 | */
13 | public abstract class KernelFunctionParameter extends ObjectParameter
13 | * Do not expect perfect results from stemming. This class provides the
14 | * contract for a stemmer that does not have any word history.
15 | *
16 | * @author Edward Raff
17 | */
18 | public abstract class Stemmer implements Serializable
19 | {
20 |
21 | private static final long serialVersionUID = 1889842876393488149L;
22 |
23 | /**
24 | * Reduce the given input to its stem word
25 | * @param word the unstemmed input word
26 | * @return the stemmed version of the word
27 | */
28 | abstract public String stem(String word);
29 |
30 | /**
31 | * Replaces each value in the list with the stemmed version of the word
32 | * @param list the list to apply stemming to
33 | */
34 | public void applyTo(List
21 | * This method should be thread safe
22 | *
23 | * @param input the string to tokenize
24 | * @return an already allocated list to place the tokens into
25 | */
26 | public List
34 | * This method should be thread safe
35 | *
36 | * @param input the string to tokenize
37 | * @param workSpace an already allocated (but empty) string builder than can
38 | * be used as a temporary work space.
39 | * @param storageSpace an already allocated (but empty) list to place the
40 | * tokens into
41 | */
42 | public void tokenize(String input, StringBuilder workSpace, List
10 | *
11 | * This class does not require any state or configuration, so it can be used
12 | * without calling {@link #setWeight(java.util.List, java.util.List) }.
13 | *
14 | *
15 | * @author Edward Raff
16 | */
17 | public class BinaryWordPresent implements WordWeighting
18 | {
19 |
20 | private static final long serialVersionUID = 5633647387188363706L;
21 |
22 | @Override
23 | public void setWeight(List extends Vec> allDocuments, List
10 | * This class does not require any state or configuration, so it can be used
11 | * without calling {@link #setWeight(java.util.List, java.util.List) }.
12 | *
13 | * @author Edward Raff
14 | */
15 | public class WordCount implements WordWeighting
16 | {
17 | private static final long serialVersionUID = 4665749166722300326L;
18 |
19 | @Override
20 | public void setWeight(List extends Vec> allDocuments, List
11 | * Some Word weighting schemes may need information about the document
12 | * collection as a whole before constructing the weightings, and this class
13 | * provides the facilities for this to be done in a standardized manner.
14 | *
15 | * @author Edward Raff
16 | */
17 | public interface WordWeighting extends IndexFunction
18 | {
19 |
20 | /**
21 | * Prepares the word weighting to be performed on a data set. This should be
22 | * called once before being applied to any vectors. Different WordWeightings
23 | * may require different amounts of computation to set up.
24 | *
25 | * @param allDocuments the list of all vectors that make up the set of
26 | * documents. The word vectors should be unmodified, containing the value of
27 | * how many times a word appeared in the document for each index.
28 | * @param df a list mapping each integer index of a word to how many times
29 | * that word occurred in total
30 | */
31 | public void setWeight(List extends Vec> allDocuments, List
9 | * See: Marsaglia, G. (2005).
10 | * On the randomness of Pi and other decimal expansions. Interstat 5
11 | *
12 | * @author Edward Raff
13 | */
14 | public class CMWC4096 extends Random
15 | {
16 |
17 | private static final long serialVersionUID = -5061963074440046713L;
18 | private static final long a = 18782;
19 | private int c = 362436;
20 | private int i = 4095;
21 | private int[] Q;
22 |
23 | /**
24 | * Creates a new PRNG with a random seed
25 | */
26 | public CMWC4096()
27 | {
28 | super();
29 | }
30 |
31 | /**
32 | * Creates a new PRNG
33 | * @param seed the seed that controls the initial state of the PRNG
34 | * @see #setSeed(long)
35 | */
36 | public CMWC4096(long seed)
37 | {
38 | super(seed);
39 | }
40 |
41 |
42 | @Override
43 | public synchronized void setSeed(long seed)
44 | {
45 | super.setSeed(seed);
46 | if(Q == null)
47 | Q = new int[4096];
48 | for (int j = 0; j < Q.length; j++)
49 | Q[j] = super.next(32);
50 | }
51 |
52 | @Override
53 | protected int next(int bits)
54 | {
55 | long t;
56 |
57 | long x, r = 0xfffffffe;
58 | i = (i + 1) & 4095;
59 | t = a * Q[i] + c;
60 | c = (int) (t >>> 32);
61 | x = t + c;
62 | if (x < c)
63 | {
64 | x++;
65 | c++;
66 | }
67 | return (Q[i] = (int) (r - x)) >>> 32 - bits;
68 | }
69 |
70 | }
71 |
--------------------------------------------------------------------------------
/JSAT/src/jsat/utils/random/XOR128.java:
--------------------------------------------------------------------------------
1 | package jsat.utils.random;
2 |
3 | import java.util.Random;
4 |
5 | /**
6 | * A fast PRNG that produces medium quality random numbers that passes the
7 | * diehard tests. It has a period of 2128-1
8 | *
9 | * See: G. Marsaglia. Xorshift RNGs. Journal of Statistical Software, 8,
10 | * 14:1–9, 2003
11 | * @author Edward Raff
12 | */
13 | public class XOR128 extends Random
14 | {
15 |
16 | private static final long serialVersionUID = -5218902638864900490L;
17 | private long x, y, z, w;
18 |
19 | /**
20 | * Creates a new PRNG with a random seed
21 | */
22 | public XOR128()
23 | {
24 | super();
25 | }
26 |
27 | /**
28 | * Creates a new PRNG
29 | * @param seed the seed that controls the initial state of the PRNG
30 | * @see #setSeed(long)
31 | */
32 | public XOR128(long seed)
33 | {
34 | super(seed);
35 | }
36 |
37 | @Override
38 | public synchronized void setSeed(long seed)
39 | {
40 | super.setSeed(seed);
41 | x = super.next(32);
42 | x = x << 32;
43 | x += super.next(32);
44 |
45 | y = super.next(32);
46 | y = y << 32;
47 | y += super.next(32);
48 |
49 | z = super.next(32);
50 | z = z << 32;
51 | z += super.next(32);
52 |
53 | w = super.next(32);
54 | w = w << 32;
55 | w += super.next(32);
56 | }
57 |
58 | @Override
59 | protected int next(int bits)
60 | {
61 | return (int)(nextLong() >>> (64 - bits));
62 | }
63 |
64 | @Override
65 | public long nextLong()
66 | {
67 | long t;
68 | t = (x ^ (x << 11));
69 | x = y;
70 | y = z;
71 | z = w;
72 | w = (w ^ (w >>> 19)) ^ (t ^ (t >>> 8));
73 | return w;
74 | }
75 |
76 | @Override
77 | public double nextDouble()
78 | {
79 | long l = nextLong() >>> 11;
80 | return l / (double)(1L << 53);
81 | }
82 | }
83 |
--------------------------------------------------------------------------------
/JSAT/src/jsat/utils/random/XOR96.java:
--------------------------------------------------------------------------------
1 | package jsat.utils.random;
2 |
3 | import java.util.Random;
4 |
5 | /**
6 | * A fast PRNG that produces medium quality random numbers. It has a period of
7 | * 296-1
8 | *
9 | * See: G. Marsaglia. Xorshift RNGs. Journal of Statistical Software, 8,
10 | * 14:1–9, 2003
11 | * @author Edward Raff
12 | */
13 | public class XOR96 extends Random
14 | {
15 |
16 | private static final long serialVersionUID = 1247900882148980639L;
17 |
18 | private static final long a = 13, b = 19, c = 3;//magic from paper
19 |
20 | private long x, y, z;
21 |
22 | /**
23 | * Creates a new PRNG with a random seed
24 | */
25 | public XOR96()
26 | {
27 | super();
28 | }
29 |
30 | /**
31 | * Creates a new PRNG
32 | * @param seed the seed that controls the initial state of the PRNG
33 | * @see #setSeed(long)
34 | */
35 | public XOR96(long seed)
36 | {
37 | super(seed);
38 | }
39 |
40 | @Override
41 | public synchronized void setSeed(long seed)
42 | {
43 | super.setSeed(seed);
44 | x = super.next(32);
45 | x = x << 32;
46 | x += super.next(32);
47 |
48 | y = super.next(32);
49 | y = y << 32;
50 | y += super.next(32);
51 |
52 | z = super.next(32);
53 | z = z << 32;
54 | z += super.next(32);
55 | }
56 |
57 | @Override
58 | protected int next(int bits)
59 | {
60 | return (int)(nextLong() >>> (64 - bits));
61 | }
62 |
63 | @Override
64 | public long nextLong()
65 | {
66 | long t = (x ^ (x << a));
67 | x = y;
68 | y = z;
69 | z = (z ^ (z >>> c)) ^ (t ^ (t >>> b));
70 | return z;
71 | }
72 |
73 | @Override
74 | public double nextDouble()
75 | {
76 | long l = nextLong() >>> 11;
77 | return l / (double)(1L << 53);
78 | }
79 | }
80 |
--------------------------------------------------------------------------------
/JSAT/test/jsat/NormalClampedSample.java:
--------------------------------------------------------------------------------
1 | package jsat;
2 |
3 | import java.util.Random;
4 | import jsat.distributions.Normal;
5 | import jsat.linear.DenseVector;
6 |
7 | /**
8 | * Helper class to avoid issues with sampling from the normal distribution when
9 | * testing since the normal can have extreme values
10 | * @author Edward Raff
11 | */
12 | public class NormalClampedSample extends Normal
13 | {
14 |
15 | private static final long serialVersionUID = 3970933766374506189L;
16 | double min, max;
17 |
18 | public NormalClampedSample(double mean, double stndDev)
19 | {
20 | this(mean, stndDev, mean-3*stndDev, mean+3*stndDev);
21 | }
22 |
23 | public NormalClampedSample(double mean, double stndDev, double min, double max)
24 | {
25 | super(mean, stndDev);
26 | this.min = Math.min(min, max);
27 | this.max = Math.max(min, max);
28 | }
29 |
30 | @Override
31 | public double invCdf(double d)
32 | {
33 | return Math.max(min, Math.min(max, super.invCdf(d)));
34 | }
35 |
36 | @Override
37 | public double[] sample(int numSamples, Random rand)
38 | {
39 | double[] ret = super.sample(numSamples, rand);
40 | for(int i = 0; i < ret.length; i++)
41 | ret[i] = Math.max(min, Math.min(max, ret[i]));
42 | return ret;
43 | }
44 |
45 | @Override
46 | public DenseVector sampleVec(int numSamples, Random rand)
47 | {
48 | DenseVector ret = super.sampleVec(numSamples, rand);
49 | for(int i = 0; i < ret.length(); i++)
50 | ret.set(i, Math.max(min, Math.min(max, ret.get(i))));
51 | return ret;
52 | }
53 |
54 | }
55 |
--------------------------------------------------------------------------------
/JSAT/test/jsat/classifiers/linear/ALMA2Test.java:
--------------------------------------------------------------------------------
1 | /*
2 | * To change this template, choose Tools | Templates
3 | * and open the template in the editor.
4 | */
5 | package jsat.classifiers.linear;
6 |
7 | import java.util.Random;
8 | import jsat.FixedProblems;
9 | import jsat.classifiers.*;
10 | import jsat.utils.random.RandomUtil;
11 | import org.junit.After;
12 | import org.junit.AfterClass;
13 | import org.junit.Before;
14 | import org.junit.BeforeClass;
15 | import org.junit.Test;
16 | import static org.junit.Assert.*;
17 |
18 | /**
19 | *
20 | * @author Edward Raff
21 | */
22 | public class ALMA2Test
23 | {
24 |
25 | public ALMA2Test()
26 | {
27 | }
28 |
29 | @BeforeClass
30 | public static void setUpClass()
31 | {
32 | }
33 |
34 | @AfterClass
35 | public static void tearDownClass()
36 | {
37 | }
38 |
39 | @Before
40 | public void setUp()
41 | {
42 | }
43 |
44 | @After
45 | public void tearDown()
46 | {
47 | }
48 |
49 | /**
50 | * Test of classify method, of class ALMA2.
51 | */
52 | @Test
53 | public void testTrain_C()
54 | {
55 | System.out.println("classify");
56 |
57 | ClassificationDataSet train = FixedProblems.get2ClassLinear(200, RandomUtil.getRandom());
58 |
59 | ALMA2 alma = new ALMA2();
60 | alma.setEpochs(1);
61 |
62 | alma.train(train);
63 |
64 | ClassificationDataSet test = FixedProblems.get2ClassLinear(200, RandomUtil.getRandom());
65 |
66 | for(DataPointPair