├── .github ├── FUNDING.yml └── workflows │ └── ci.yml ├── .gitattributes ├── tests ├── test.png ├── test.sqlite ├── test_rotated.png ├── test.csv ├── Helpers │ └── CPUTest.php ├── test.ndjson ├── DeferredTest.php ├── Kernels │ └── SVM │ │ ├── LinearTest.php │ │ ├── RBFTest.php │ │ ├── SigmoidalTest.php │ │ └── PolynomialTest.php ├── Strategies │ └── PercentileTest.php ├── Backends │ └── Tasks │ │ ├── ProbaTest.php │ │ ├── TrainLearnerTest.php │ │ └── PredictTest.php ├── Loggers │ └── ScreenTest.php ├── Specifications │ └── ExtensionIsLoadedTest.php └── NeuralNet │ └── Initializers │ ├── HeTest.php │ ├── LeCunTest.php │ ├── NormalTest.php │ ├── Xavier1Test.php │ └── Xavier2Test.php ├── docs ├── images │ ├── app-icon-large.png │ ├── app-icon-medium.png │ ├── app-icon-small.png │ ├── app-icon-apple-touch.png │ ├── neural-network-graph.png │ ├── half-moon-scatterplot.png │ ├── iris-dataset-2d-scatterplot.png │ ├── housing-dataset-1d-histogram.png │ ├── iris-dataset-t-sne-embedding.png │ └── iris-dataset-truncated-svd-embedding.png ├── css │ └── custom.css ├── js │ └── custom.js ├── kernels │ ├── svm │ │ ├── linear.md │ │ ├── rbf.md │ │ ├── sigmoidal.md │ │ └── polynomial.md │ └── distance │ │ ├── sparse-cosine.md │ │ ├── hamming.md │ │ ├── jaccard.md │ │ ├── diagonal.md │ │ ├── manhattan.md │ │ ├── euclidean.md │ │ ├── safe-euclidean.md │ │ ├── canberra.md │ │ └── minkowski.md ├── clusterers │ └── seeders │ │ ├── random.md │ │ ├── preset.md │ │ ├── k-mc2.md │ │ └── plus-plus.md ├── persistable.md ├── serializers │ ├── native.md │ ├── api.md │ ├── gzip-native.md │ └── rbx.md ├── strategies │ ├── mean.md │ ├── prior.md │ ├── wild-guess.md │ ├── constant.md │ ├── percentile.md │ └── k-most-frequent.md ├── tokenizers │ ├── word.md │ ├── sentence.md │ ├── whitespace.md │ ├── n-gram.md │ ├── word-stemmer.md │ └── k-skip-n-gram.md ├── neural-network │ ├── initializers │ │ ├── constant.md │ │ ├── uniform.md │ │ ├── normal.md │ │ ├── lecun.md │ │ ├── xavier-1.md │ │ ├── xavier-2.md │ │ └── he.md │ ├── optimizers │ │ ├── stochastic.md │ │ ├── adagrad.md │ │ ├── rms-prop.md │ │ ├── step-decay.md │ │ ├── adamax.md │ │ ├── cyclical.md │ │ ├── adam.md │ │ └── momentum.md │ ├── activation-functions │ │ ├── soft-plus.md │ │ ├── softsign.md │ │ ├── silu.md │ │ ├── gelu.md │ │ ├── sigmoid.md │ │ ├── softmax.md │ │ ├── hyperbolic-tangent.md │ │ ├── relu.md │ │ ├── selu.md │ │ ├── thresholded-relu.md │ │ ├── elu.md │ │ └── leaky-relu.md │ ├── cost-functions │ │ ├── cross-entropy.md │ │ ├── least-squares.md │ │ ├── relative-entropy.md │ │ └── huber-loss.md │ └── hidden-layers │ │ ├── activation.md │ │ ├── noise.md │ │ ├── dropout.md │ │ ├── swish.md │ │ └── prelu.md ├── datasets │ ├── generators │ │ ├── api.md │ │ ├── circle.md │ │ └── half-moon.md │ └── unlabeled.md ├── persisters │ └── api.md ├── loggers │ └── screen.md ├── scoring.md ├── transformers │ ├── l1-normalizer.md │ ├── l2-normalizer.md │ ├── stop-word-filter.md │ ├── max-absolute-scaler.md │ ├── text-normalizer.md │ ├── image-resizer.md │ ├── numeric-string-converter.md │ ├── interval-discretizer.md │ ├── one-hot-encoder.md │ ├── polynomial-expander.md │ ├── image-vectorizer.md │ ├── linear-discriminant-analysis.md │ ├── multibyte-text-normalizer.md │ ├── min-max-normalizer.md │ └── boolean-converter.md ├── backends │ ├── serial.md │ └── amp.md ├── cross-validation │ ├── metrics │ │ ├── r-squared.md │ │ ├── top-k-accuracy.md │ │ ├── accuracy.md │ │ ├── probabilistic-accuracy.md │ │ ├── brier-score.md │ │ ├── completeness.md │ │ ├── homogeneity.md │ │ ├── informedness.md │ │ ├── mean-squared-error.md │ │ ├── mean-absolute-error.md │ │ ├── median-absolute-error.md │ │ ├── rmse.md │ │ ├── v-measure.md │ │ ├── f-beta.md │ │ └── rand-index.md │ ├── leave-p-out.md │ ├── hold-out.md │ ├── k-fold.md │ ├── reports │ │ ├── aggregate-report.md │ │ ├── contingency-table.md │ │ └── confusion-matrix.md │ ├── api.md │ └── monte-carlo.md ├── extractors │ ├── column-filter.md │ ├── concatenator.md │ ├── ndjson.md │ └── column-picker.md ├── online.md ├── installation.md ├── estimator.md ├── ranks-features.md ├── parallel.md ├── learner.md └── regressors │ └── ridge.md ├── phpstan.neon ├── src ├── NeuralNet │ ├── CostFunctions │ │ ├── RegressionLoss.php │ │ ├── ClassificationLoss.php │ │ └── CostFunction.php │ ├── Layers │ │ ├── Input.php │ │ ├── Hidden.php │ │ ├── Output.php │ │ ├── Parametric.php │ │ └── Layer.php │ ├── Network.php │ ├── Optimizers │ │ ├── Adaptive.php │ │ └── Optimizer.php │ ├── Initializers │ │ └── Initializer.php │ └── ActivationFunctions │ │ ├── ActivationFunction.php │ │ └── Softmax.php ├── Exceptions │ ├── RubixMLException.php │ ├── RuntimeException.php │ ├── EmptyDataset.php │ ├── InvalidArgumentException.php │ ├── LabelsAreMissing.php │ ├── MissingExtension.php │ ├── EstimatorIncompatibleWithMetric.php │ ├── IncorrectDatasetDimensionality.php │ └── ClassRevisionMismatch.php ├── Loggers │ ├── Logger.php │ └── BlackHole.php ├── Learner.php ├── Graph │ ├── Nodes │ │ ├── Node.php │ │ ├── BinaryNode.php │ │ ├── Outcome.php │ │ ├── Decision.php │ │ ├── Hypercube.php │ │ └── Hypersphere.php │ └── Trees │ │ ├── Tree.php │ │ └── BinaryTree.php ├── Kernels │ ├── Distance │ │ ├── NaNSafe.php │ │ └── Distance.php │ └── SVM │ │ └── Kernel.php ├── Extractors │ ├── Extractor.php │ ├── Exporter.php │ └── Concatenator.php ├── Persistable.php ├── Backends │ ├── Tasks │ │ ├── Task.php │ │ ├── TrainLearner.php │ │ ├── Predict.php │ │ └── Proba.php │ └── Backend.php ├── EstimatorWrapper.php ├── AnomalyDetectors │ └── Scoring.php ├── Parallel.php ├── Trainable.php ├── Online.php ├── RanksFeatures.php ├── Transformers │ ├── Elastic.php │ ├── Reversible.php │ ├── Stateful.php │ └── Transformer.php ├── Verbose.php ├── Tokenizers │ ├── Tokenizer.php │ ├── Word.php │ └── Sentence.php ├── Datasets │ └── Generators │ │ └── Generator.php ├── Probabilistic.php ├── CrossValidation │ ├── Validator.php │ ├── Reports │ │ └── ReportGenerator.php │ └── Metrics │ │ ├── ProbabilisticMetric.php │ │ └── Metric.php ├── Clusterers │ └── Seeders │ │ ├── Seeder.php │ │ └── Random.php ├── Persisters │ └── Persister.php ├── Serializers │ └── Serializer.php ├── constants.php ├── Traits │ ├── LoggerAware.php │ └── Multiprocessing.php ├── Strategies │ └── Strategy.php ├── Specifications │ ├── Specification.php │ ├── DatasetIsNotEmpty.php │ └── ExtensionIsLoaded.php └── Estimator.php ├── phpbench.json.dist ├── .gitignore ├── benchmarks ├── Transformers │ └── ImageVectorizerBench.php ├── Tokenizers │ ├── WordBench.php │ ├── NGramBench.php │ ├── SentenceBench.php │ ├── KSkipNGramBench.php │ ├── WhitespaceBench.php │ └── WordStemmerBench.php └── Datasets │ └── SplittingBench.php └── LICENSE /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: [andrewdalpino] 2 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | * text=auto 2 | 3 | *.php text eol=lf -------------------------------------------------------------------------------- /tests/test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RubixML/ML/HEAD/tests/test.png -------------------------------------------------------------------------------- /tests/test.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RubixML/ML/HEAD/tests/test.sqlite -------------------------------------------------------------------------------- /tests/test_rotated.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RubixML/ML/HEAD/tests/test_rotated.png -------------------------------------------------------------------------------- /docs/images/app-icon-large.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RubixML/ML/HEAD/docs/images/app-icon-large.png -------------------------------------------------------------------------------- /docs/images/app-icon-medium.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RubixML/ML/HEAD/docs/images/app-icon-medium.png -------------------------------------------------------------------------------- /docs/images/app-icon-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RubixML/ML/HEAD/docs/images/app-icon-small.png -------------------------------------------------------------------------------- /docs/images/app-icon-apple-touch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RubixML/ML/HEAD/docs/images/app-icon-apple-touch.png -------------------------------------------------------------------------------- /docs/images/neural-network-graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RubixML/ML/HEAD/docs/images/neural-network-graph.png -------------------------------------------------------------------------------- /docs/images/half-moon-scatterplot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RubixML/ML/HEAD/docs/images/half-moon-scatterplot.png -------------------------------------------------------------------------------- /docs/images/iris-dataset-2d-scatterplot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RubixML/ML/HEAD/docs/images/iris-dataset-2d-scatterplot.png -------------------------------------------------------------------------------- /docs/images/housing-dataset-1d-histogram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RubixML/ML/HEAD/docs/images/housing-dataset-1d-histogram.png -------------------------------------------------------------------------------- /docs/images/iris-dataset-t-sne-embedding.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RubixML/ML/HEAD/docs/images/iris-dataset-t-sne-embedding.png -------------------------------------------------------------------------------- /docs/css/custom.css: -------------------------------------------------------------------------------- 1 | .md-typeset table:not([class]) th { 2 | min-width: auto; 3 | } 4 | 5 | #version-selector { 6 | display: none; 7 | } 8 | -------------------------------------------------------------------------------- /docs/images/iris-dataset-truncated-svd-embedding.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RubixML/ML/HEAD/docs/images/iris-dataset-truncated-svd-embedding.png -------------------------------------------------------------------------------- /phpstan.neon: -------------------------------------------------------------------------------- 1 | parameters: 2 | level: 8 3 | paths: 4 | - 'src' 5 | - 'tests' 6 | - 'benchmarks' 7 | excludePaths: 8 | - src/Backends/Amp.php 9 | -------------------------------------------------------------------------------- /src/NeuralNet/CostFunctions/RegressionLoss.php: -------------------------------------------------------------------------------- 1 | 15 | */ 16 | interface Extractor extends IteratorAggregate 17 | { 18 | // 19 | } 20 | -------------------------------------------------------------------------------- /src/Persistable.php: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Linear 4 | A simple linear kernel computed by the dot product of two vectors. 5 | 6 | ## Parameters 7 | This kernel does not have any parameters. 8 | 9 | ## Example 10 | ```php 11 | use Rubix\ML\Kernels\SVM\Linear; 12 | 13 | $kernel = new Linear(); 14 | ``` -------------------------------------------------------------------------------- /src/Backends/Tasks/Task.php: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Random 4 | Completely random selection of seeds from a given dataset. 5 | 6 | ## Parameters 7 | This seeder does not have any parameters. 8 | 9 | ## Example 10 | ```php 11 | use Rubix\ML\Clusterers\Seeders\Random; 12 | 13 | $seeder = new Random(); 14 | ``` -------------------------------------------------------------------------------- /docs/persistable.md: -------------------------------------------------------------------------------- 1 | # Persistable 2 | An estimator that implements the Persistable interface can be serialized by a [Serializer](serializers/api.md) or save and loaded using the [Persistent Model](persistent-model.md) meta-estimator. 3 | 4 | To return the current class revision hash: 5 | ```php 6 | public revision() : string 7 | ``` 8 | 9 | ```php 10 | echo $persistable->revision(); 11 | ``` 12 | 13 | ``` 14 | e7eeec9a 15 | ``` 16 | -------------------------------------------------------------------------------- /docs/serializers/native.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Native 4 | The native bytecode format that comes bundled with PHP core. 5 | 6 | ## Parameters 7 | This serializer does not have any parameters. 8 | 9 | ## Example 10 | ```php 11 | use Rubix\ML\Serializers\Native; 12 | 13 | $serializer = new Native(); 14 | ``` 15 | -------------------------------------------------------------------------------- /docs/strategies/mean.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Mean 4 | This strategy always predicts the mean of the fitted data. 5 | 6 | **Data Type:** Continuous 7 | 8 | ## Parameters 9 | This strategy does not have any parameters. 10 | 11 | ## Example 12 | ```php 13 | use Rubix\ML\Strategies\Mean; 14 | 15 | $strategy = new Mean(); 16 | ``` -------------------------------------------------------------------------------- /docs/tokenizers/word.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Word Tokenizer 4 | The Word tokenizer uses a regular expression to tokenize the words in a blob of text. 5 | 6 | ## Parameters 7 | This tokenizer does not have any parameters. 8 | 9 | ## Example 10 | ```php 11 | use Rubix\ML\Tokenizers\Word; 12 | 13 | $tokenizer = new Word(); 14 | ``` 15 | -------------------------------------------------------------------------------- /src/AnomalyDetectors/Scoring.php: -------------------------------------------------------------------------------- 1 | $iterator 18 | */ 19 | public function export(iterable $iterator) : void; 20 | } 21 | -------------------------------------------------------------------------------- /docs/tokenizers/sentence.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Word Tokenizer 4 | This tokenizer matches sentences starting with a letter and ending with a punctuation mark. 5 | 6 | ## Parameters 7 | This tokenizer does not have any parameters. 8 | 9 | ## Example 10 | ```php 11 | use Rubix\ML\Tokenizers\Sentence; 12 | 13 | $tokenizer = new Sentence(); 14 | ``` -------------------------------------------------------------------------------- /src/Graph/Nodes/BinaryNode.php: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Prior 4 | A strategy where the probability of guessing a class is equal to the class's prior probability. 5 | 6 | **Data Type:** Categorical 7 | 8 | ## Parameters 9 | This strategy does not have any parameters. 10 | 11 | ## Example 12 | ```php 13 | use Rubix\ML\Strategies\Prior; 14 | 15 | $strategy = new Prior(); 16 | ``` -------------------------------------------------------------------------------- /src/NeuralNet/Network.php: -------------------------------------------------------------------------------- 1 | 22 | */ 23 | public function layers() : Traversable; 24 | } 25 | -------------------------------------------------------------------------------- /src/Kernels/SVM/Kernel.php: -------------------------------------------------------------------------------- 1 | > $samples 18 | */ 19 | public function reverseTransform(array &$samples) : void; 20 | } 21 | -------------------------------------------------------------------------------- /docs/strategies/wild-guess.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Wild Guess 4 | Guess a random number somewhere between the minimum and maximum computed by fitting a collection of values. 5 | 6 | **Data Type:** Continuous 7 | 8 | ## Parameters 9 | This strategy does not have any parameters. 10 | 11 | ## Example 12 | ```php 13 | use Rubix\ML\Strategies\WildGuess; 14 | 15 | $strategy = new WildGuess(); 16 | ``` -------------------------------------------------------------------------------- /src/Graph/Nodes/Outcome.php: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # RBF 4 | Non linear radial basis function (RBF) computes the distance from a centroid or origin. 5 | 6 | ## Parameters 7 | | # | Name | Default | Type | Description | 8 | |---|---|---|---|---| 9 | | 1 | gamma | null | float | The kernel coefficient. | 10 | 11 | ## Example 12 | ```php 13 | use Rubix\ML\Kernels\SVM\RBF; 14 | 15 | $kernel = new RBF(null); 16 | ``` -------------------------------------------------------------------------------- /src/Verbose.php: -------------------------------------------------------------------------------- 1 | [source]' 2 | 3 | # Constant 4 | Always guess the same value. 5 | 6 | **Data Type:** Continuous 7 | 8 | ## Parameters 9 | | # | Name | Default | Type | Description | 10 | |---|---|---|---|---| 11 | | 1 | value | 0.0 | float | The value to constantly guess. | 12 | 13 | ## Example 14 | ```php 15 | use Rubix\ML\Strategies\Constant; 16 | 17 | $strategy = new Constant(0.0); 18 | ``` -------------------------------------------------------------------------------- /src/NeuralNet/Optimizers/Adaptive.php: -------------------------------------------------------------------------------- 1 | 23 | */ 24 | public function tokenize(string $text) : array; 25 | } 26 | -------------------------------------------------------------------------------- /src/Datasets/Generators/Generator.php: -------------------------------------------------------------------------------- 1 | 13 | */ 14 | public function dimensions() : int; 15 | 16 | /** 17 | * Generate n data points. 18 | * 19 | * @param int<0,max> $n 20 | * @return \Rubix\ML\Datasets\Dataset 21 | */ 22 | public function generate(int $n); 23 | } 24 | -------------------------------------------------------------------------------- /src/Probabilistic.php: -------------------------------------------------------------------------------- 1 | 21 | */ 22 | public function proba(Dataset $dataset) : array; 23 | } 24 | -------------------------------------------------------------------------------- /docs/kernels/distance/sparse-cosine.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Sparse Cosine 4 | A version of the Cosine distance kernel that is specifically optimized for computing sparse vectors. 5 | 6 | **Data Type Compatibility:** Continuous 7 | 8 | ## Parameters 9 | This kernel does not have any parameters. 10 | 11 | ## Example 12 | ```php 13 | use Rubix\ML\Kernels\Distance\SparseCosine; 14 | 15 | $kernel = new SparseCosine(); 16 | ``` 17 | -------------------------------------------------------------------------------- /docs/tokenizers/whitespace.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Whitespace 4 | Tokens are delimited by a user-specified whitespace character. 5 | 6 | ## Parameters 7 | | # | Name | Default | Type | Description | 8 | |---|---|---|---|---| 9 | | 1 | delimiter | ' ' | string | The whitespace character that delimits each token. | 10 | 11 | ## Example 12 | ```php 13 | use Rubix\ML\Tokenizers\Whitespace; 14 | 15 | $tokenizer = new Whitespace(','); 16 | ``` 17 | -------------------------------------------------------------------------------- /src/Exceptions/EstimatorIncompatibleWithMetric.php: -------------------------------------------------------------------------------- 1 | type()}s."); 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /docs/neural-network/initializers/constant.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Constant 4 | Initialize the parameter to a user-specified constant value. 5 | 6 | ## Parameters 7 | | # | Name | Default | Type | Description | 8 | |---|---|---|---|---| 9 | | 1 | value | 0.0 | float | The value to initialize the parameter to. | 10 | 11 | ## Example 12 | ```php 13 | use Rubix\ML\NeuralNet\Initializers\Constant; 14 | 15 | $initializer = new Constant(1.0); 16 | ``` -------------------------------------------------------------------------------- /docs/datasets/generators/api.md: -------------------------------------------------------------------------------- 1 | # Generators 2 | Dataset generators produce synthetic datasets of a user-specified shape and dimensionality. Synthetic data is useful for a number of tasks including experimentation, testing, benchmarking, and demonstration purposes. 3 | 4 | ### Generate a Dataset 5 | To generate a Dataset object with *n* records: 6 | ```php 7 | public generate(int $n) : Dataset 8 | ``` 9 | 10 | ```php 11 | use Rubix\ML\Datasets\Generators\HalfMoon; 12 | 13 | $generator = new HalfMoon(0.0, 0.0); 14 | 15 | $dataset = $generator->generate(1000); 16 | ``` 17 | -------------------------------------------------------------------------------- /docs/persisters/api.md: -------------------------------------------------------------------------------- 1 | # Persisters 2 | Persisters are responsible for persisting Encoding objects to storage and are also used by the [Persistent Model](../persistent-model.md) meta-estimator to save and restore models that have been serialized. 3 | 4 | ### Save 5 | To save an encoding: 6 | ```php 7 | public save(Encoding $encoding) : void 8 | ``` 9 | 10 | ```php 11 | $persister->save($encoding); 12 | ``` 13 | 14 | ### Load 15 | To load an encoding from persistence: 16 | ```php 17 | public load() : Encoding 18 | ``` 19 | 20 | ```php 21 | $encoding = $persister->load(); 22 | ``` -------------------------------------------------------------------------------- /docs/kernels/svm/sigmoidal.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Sigmoidal 4 | S shaped nonliearity kernel with output values ranging from -1 to 1. 5 | 6 | ## Parameters 7 | | # | Name | Default | Type | Description | 8 | |---|---|---|---|---| 9 | | 1 | gamma | null | float | The kernel coefficient. | 10 | | 2 | coef0 | 0. | float | The independent term. | 11 | 12 | ## Example 13 | ```php 14 | use Rubix\ML\Kernels\SVM\Sigmoidal; 15 | 16 | $kernel = new Sigmoidal(null, 0.); 17 | ``` -------------------------------------------------------------------------------- /src/Graph/Trees/Tree.php: -------------------------------------------------------------------------------- 1 | assertLessThan(1.0, $epsilon); 22 | $this->assertGreaterThan(0.0, $epsilon); 23 | 24 | $this->assertFalse(1.0 + $epsilon === 1.0); 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /tests/test.ndjson: -------------------------------------------------------------------------------- 1 | {"attitude":"nice","texture":"furry","sociability":"friendly","rating":4,"class":"not monster"} 2 | {"attitude":"mean","texture":"furry","sociability":"loner","rating":-1.5,"class":"monster"} 3 | {"attitude":"nice","texture":"rough","sociability":"friendly","rating":2.6,"class":"not monster"} 4 | {"attitude":"mean","texture":"rough","sociability":"friendly","rating":-1,"class":"monster"} 5 | {"attitude":"nice","texture":"rough","sociability":"friendly","rating":2.9,"class":"not monster"} 6 | {"attitude":"nice","texture":"furry","sociability":"loner","rating":-5,"class":"not monster"} 7 | -------------------------------------------------------------------------------- /docs/strategies/percentile.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Blurry Percentile 4 | A strategy that always guesses the p-th percentile of the fitted data. 5 | 6 | **Data Type:** Continuous 7 | 8 | ## Parameters 9 | | # | Name | Default | Type | Description | 10 | |---|---|---|---|---| 11 | | 1 | p | 50.0 | float | The percentile of the fitted data to use as a guess. | 12 | 13 | ## Example 14 | ```php 15 | use Rubix\ML\Strategies\Percentile; 16 | 17 | $strategy = new Percentile(90.0); 18 | ``` -------------------------------------------------------------------------------- /docs/clusterers/seeders/preset.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Preset 4 | Generates centroids from a list of presets. 5 | 6 | ## Parameters 7 | | # | Name | Default | Type | Description | 8 | |---|---|---|---|---| 9 | | 1 | centroids| | array | A list of predefined cluster centroids to sample from. | 10 | 11 | ## Example 12 | ```php 13 | use Rubix\ML\Clusterers\Seeders\Preset; 14 | 15 | $seeder = new Preset([ 16 | ['foo', 14, 0.72], 17 | ['bar', 16, 0.92], 18 | ]); 19 | ``` 20 | -------------------------------------------------------------------------------- /docs/neural-network/optimizers/stochastic.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Stochastic 4 | A constant learning rate optimizer based on vanilla Stochastic Gradient Descent. 5 | 6 | ## Parameters 7 | | # | Name | Default | Type | Description | 8 | |---|---|---|---|---| 9 | | 1 | rate | 0.01 | float | The learning rate that controls the global step size. | 10 | 11 | ## Example 12 | ```php 13 | use Rubix\ML\NeuralNet\Optimizers\Stochastic; 14 | 15 | $optimizer = new Stochastic(0.01); 16 | ``` -------------------------------------------------------------------------------- /docs/neural-network/initializers/uniform.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Uniform 4 | Generates a random uniform distribution centered at 0 and bounded at both ends by the parameter beta. 5 | 6 | ## Parameters 7 | | # | Name | Default | Type | Description | 8 | |---|---|---|---|---| 9 | | 1 | beta | 0.05 | float | The upper and lower bound of the distribution. | 10 | 11 | ## Example 12 | ```php 13 | use Rubix\ML\NeuralNet\Initializers\Uniform; 14 | 15 | $initializer = new Uniform(1e-3); 16 | ``` -------------------------------------------------------------------------------- /docs/loggers/screen.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Screen 4 | A logger that displays log messages to the standard output. 5 | 6 | ## Parameters 7 | | # | Name | Default | Type | Description | 8 | |---|---|---|---|---| 9 | | 1 | channel | '' | string | The channel name that appears on each line. | 10 | | 2 | timestampFormat | 'Y-m-d H:i:s' | string | The format of the timestamp. | 11 | 12 | ## Example 13 | ```php 14 | use Rubix\ML\Loggers\Screen; 15 | 16 | $logger = new Screen('mlp', 'Y-m-d H:i:s'); 17 | ``` 18 | -------------------------------------------------------------------------------- /docs/neural-network/initializers/normal.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Normal 4 | Generates a random weight matrix from a Gaussian distribution with user-specified standard deviation. 5 | 6 | ## Parameters 7 | | # | Name | Default | Type | Description | 8 | |---|---|---|---|---| 9 | | 1 | stddev | 0.05 | float | The standard deviation of the distribution to sample from. | 10 | 11 | ## Example 12 | ```php 13 | use Rubix\ML\NeuralNet\Initializers\Normal; 14 | 15 | $initializer = new Normal(0.1); 16 | ``` -------------------------------------------------------------------------------- /docs/strategies/k-most-frequent.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # K Most Frequent 4 | This Strategy outputs one of k most frequently occurring classes at random with equal probability. 5 | 6 | **Data Type:** Categorical 7 | 8 | ## Parameters 9 | | # | Name | Default | Type | Description | 10 | |---|---|---|---|---| 11 | | 1 | k | 1 | int | The number of most frequent classes to consider. | 12 | 13 | ## Example 14 | ```php 15 | use Rubix\ML\Strategies\KMostFrequent; 16 | 17 | $strategy = new KMostFrequent(5); 18 | ``` -------------------------------------------------------------------------------- /src/CrossValidation/Validator.php: -------------------------------------------------------------------------------- 1 | numFeatures()} given."; 18 | 19 | parent::__construct($message); 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/Graph/Trees/BinaryTree.php: -------------------------------------------------------------------------------- 1 | > 25 | */ 26 | public function seed(Dataset $dataset, int $k) : array; 27 | } 28 | -------------------------------------------------------------------------------- /src/Loggers/BlackHole.php: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Hamming 4 | A categorical distance function that measures distance as the number of substitutions necessary to convert one sample to the other. 5 | 6 | **Data Type Compatibility:** Categorical 7 | 8 | ## Parameters 9 | This kernel does not have any parameters. 10 | 11 | ## Example 12 | ```php 13 | use Rubix\ML\Kernels\Distance\Hamming; 14 | 15 | $kernel = new Hamming(); 16 | ``` 17 | 18 | ## References 19 | [^1]: R. W. Hamming. (1950). Error detecting and error correcting codes. -------------------------------------------------------------------------------- /src/Persisters/Persister.php: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Jaccard 4 | The *generalized* Jaccard distance is a measure of distance with a range from 0 to 1 and can be thought of as the size of the intersection divided by the size of the union of two points if they were consisted only of binary random variables. 5 | 6 | **Data Type Compatibility:** Continuous 7 | 8 | ## Parameters 9 | This kernel does not have any parameters. 10 | 11 | ## Example 12 | ```php 13 | use Rubix\ML\Kernels\Distance\Jaccard; 14 | 15 | $kernel = new Jaccard(); 16 | ``` -------------------------------------------------------------------------------- /docs/kernels/svm/polynomial.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Polynomial 4 | This kernel projects a sample vector using polynomials of the p'th degree. 5 | 6 | ## Parameters 7 | | # | Name | Default | Type | Description | 8 | |---|---|---|---|---| 9 | | 1 | degree | 3 | int | The degree of the polynomial. | 10 | | 2 | gamma | null | float | The kernel coefficient. | 11 | | 3 | coef0 | 0. | float | The independent term. | 12 | 13 | ## Example 14 | ```php 15 | use Rubix\ML\Kernels\SVM\Polynomial; 16 | 17 | $kernel = new Polynomial(3, null, 0.); 18 | ``` 19 | -------------------------------------------------------------------------------- /src/Graph/Nodes/Decision.php: -------------------------------------------------------------------------------- 1 | 27 | */ 28 | public function n() : int; 29 | } 30 | -------------------------------------------------------------------------------- /src/NeuralNet/Initializers/Initializer.php: -------------------------------------------------------------------------------- 1 | $fanIn 23 | * @param int<0,max> $fanOut 24 | * @return Matrix 25 | */ 26 | public function initialize(int $fanIn, int $fanOut) : Matrix; 27 | } 28 | -------------------------------------------------------------------------------- /docs/kernels/distance/diagonal.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Diagonal 4 | The Diagonal (a.k.a. *Chebyshev*) distance is a measure that constrains movement to horizontal, vertical, and diagonal. An example of a game that uses diagonal movement is chess. 5 | 6 | $$ 7 | {\displaystyle Diagonal(a,b)=\max _{i}(|a_{i}-b_{i}|)} 8 | $$ 9 | 10 | **Data Type Compatibility:** Continuous 11 | 12 | ## Parameters 13 | This kernel does not have any parameters. 14 | 15 | ## Example 16 | ```php 17 | use Rubix\ML\Kernels\Distance\Diagonal; 18 | 19 | $kernel = new Diagonal(); 20 | ``` -------------------------------------------------------------------------------- /src/Graph/Nodes/Hypercube.php: -------------------------------------------------------------------------------- 1 | > 22 | */ 23 | public function sides() : Traversable; 24 | 25 | /** 26 | * Does the hypercube reduce to a single point? 27 | * 28 | * @return bool 29 | */ 30 | public function isPoint() : bool; 31 | } 32 | -------------------------------------------------------------------------------- /src/NeuralNet/Layers/Hidden.php: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Soft Plus 4 | A smooth approximation of the piecewise linear [ReLU](relu.md) activation function. 5 | 6 | $$ 7 | {\displaystyle Soft-Plus = \log \left(1+e^{x}\right)} 8 | $$ 9 | 10 | ## Parameters 11 | This activation function does not have any parameters. 12 | 13 | ## Example 14 | ```php 15 | use Rubix\ML\NeuralNet\ActivationFunctions\SoftPlus; 16 | 17 | $activationFunction = new SoftPlus(); 18 | ``` 19 | 20 | ## References 21 | [^1]: X. Glorot et al. (2011). Deep Sparse Rectifier Neural Networks. -------------------------------------------------------------------------------- /docs/tokenizers/n-gram.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # N-gram 4 | N-grams are sequences of n-words of a given string. The N-gram tokenizer outputs tokens of contiguous words ranging from *min* to *max* number of words per token. 5 | 6 | ## Parameters 7 | | # | Name | Default | Type | Description | 8 | |---|---|---|---|---| 9 | | 1 | min | 2 | int | The minimum number of contiguous words to a token. | 10 | | 2 | max | 2 | int | The maximum number of contiguous words to a token. | 11 | 12 | ## Example 13 | ```php 14 | use Rubix\ML\Tokenizers\NGram; 15 | 16 | $tokenizer = new NGram(1, 3); 17 | ``` 18 | -------------------------------------------------------------------------------- /src/CrossValidation/Reports/ReportGenerator.php: -------------------------------------------------------------------------------- 1 | 15 | */ 16 | public function compatibility() : array; 17 | 18 | /** 19 | * Generate the report. 20 | * 21 | * @param list $predictions 22 | * @param list $labels 23 | * @return Report 24 | */ 25 | public function generate(array $predictions, array $labels) : Report; 26 | } 27 | -------------------------------------------------------------------------------- /docs/neural-network/initializers/lecun.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Le Cun 4 | Proposed by Yan Le Cun in a paper in 1998, this initializer was one of the first published attempts to control the variance of activations between layers through weight initialization. It remains a good default choice for many hidden layer configurations. 5 | 6 | ## Parameters 7 | This initializer does not have any parameters. 8 | 9 | ## Example 10 | ```php 11 | use Rubix\ML\NeuralNet\Initializers\LeCun; 12 | 13 | $initializer = new LeCun(); 14 | ``` 15 | 16 | ## References 17 | [^1]: Y. Le Cun et al. (1998). Efficient Backprop. -------------------------------------------------------------------------------- /docs/neural-network/activation-functions/softsign.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Softsign 4 | A smooth sigmoid-shaped function that squashes the input between -1 and 1. 5 | 6 | $$ 7 | {\displaystyle Softsign = {\frac {x}{1+|x|}}} 8 | $$ 9 | 10 | ## Parameters 11 | This activation function does not have any parameters. 12 | 13 | ## Example 14 | ```php 15 | use Rubix\ML\NeuralNet\ActivationFunctions\Softsign; 16 | 17 | $activationFunction = new Softsign(); 18 | ``` 19 | 20 | ## References 21 | [^1]: X. Glorot et al. (2010). Understanding the Difficulty of Training Deep Feedforward Neural Networks. 22 | -------------------------------------------------------------------------------- /src/NeuralNet/Optimizers/Optimizer.php: -------------------------------------------------------------------------------- 1 | $gradient 25 | * @return Tensor 26 | */ 27 | public function step(Parameter $param, Tensor $gradient) : Tensor; 28 | } 29 | -------------------------------------------------------------------------------- /docs/scoring.md: -------------------------------------------------------------------------------- 1 | # Scoring 2 | A Scoring anomaly detector is one that assigns anomaly scores to unknown samples in a dataset. The interface provides the `score()` method which returns a set of scores from the model. Higher scores indicate a greater degree of anomalousness. In addition, samples can be sorted by their anomaly score to identify the top outliers. 3 | 4 | ## Score a Dataset 5 | Return the anomaly scores assigned to the samples in a dataset: 6 | ```php 7 | public score(Dataset $dataset) : array 8 | ``` 9 | 10 | ```php 11 | $scores = $estimator->score($dataset); 12 | 13 | print_r($scores); 14 | ``` 15 | 16 | ```php 17 | Array 18 | ( 19 | [0] => 0.35033 20 | [1] => 0.40992 21 | [2] => 1.68153 22 | ) 23 | ``` 24 | -------------------------------------------------------------------------------- /docs/kernels/distance/manhattan.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Manhattan 4 | A distance metric that constrains movement to horizontal and vertical, similar to navigating the city blocks of Manhattan. An example of a board game that uses this type of movement is Checkers. 5 | 6 | $$ 7 | Manhattan(\mathbf {a} ,\mathbf {b})=\|\mathbf {a} -\mathbf {b} \|_{1}=\sum _{i=1}^{n}|a_{i}-b_{i}| 8 | $$ 9 | 10 | **Data Type Compatibility:** Continuous 11 | 12 | ## Parameters 13 | This kernel does not have any parameters. 14 | 15 | ## Example 16 | ```php 17 | use Rubix\ML\Kernels\Distance\Manhattan; 18 | 19 | $kernel = new Manhattan(); 20 | ``` -------------------------------------------------------------------------------- /docs/transformers/l1-normalizer.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # L1 Normalizer 4 | Transform each sample (row) vector in the sample matrix such that each feature is divided by the L1 norm (or *magnitude*) of that vector. 5 | 6 | **Interfaces:** [Transformer](api.md#transformer) 7 | 8 | **Data Type Compatibility:** Continuous only 9 | 10 | ## Parameters 11 | This transformer does not have any parameters. 12 | 13 | ## Example 14 | ```php 15 | use Rubix\ML\Transformers\L1Normalizer; 16 | 17 | $transformer = new L1Normalizer(); 18 | ``` 19 | 20 | ## Additional Methods 21 | This transformer does not have any additional methods. 22 | -------------------------------------------------------------------------------- /docs/transformers/l2-normalizer.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # L2 Normalizer 4 | Transform each sample (row) vector in the sample matrix such that each feature is divided by the L2 norm (or *magnitude*) of that vector. 5 | 6 | **Interfaces:** [Transformer](api.md#transformer) 7 | 8 | **Data Type Compatibility:** Continuous only 9 | 10 | ## Parameters 11 | This transformer does not have any parameters. 12 | 13 | ## Example 14 | ```php 15 | use Rubix\ML\Transformers\L2Normalizer; 16 | 17 | $transformer = new L2Normalizer(); 18 | ``` 19 | 20 | ## Additional Methods 21 | This transformer does not have any additional methods. 22 | -------------------------------------------------------------------------------- /src/Transformers/Transformer.php: -------------------------------------------------------------------------------- 1 | 22 | */ 23 | public function compatibility() : array; 24 | 25 | /** 26 | * Transform the dataset in place. 27 | * 28 | * @param list> $samples 29 | */ 30 | public function transform(array &$samples) : void; 31 | } 32 | -------------------------------------------------------------------------------- /docs/serializers/api.md: -------------------------------------------------------------------------------- 1 | # Serializers 2 | Serializers take objects that implement the [Persistable](../persistable.md) interface and convert them into blobs of data called *encodings*. Encodings can then be used to either store an object or to reinstantiate an object from storage. 3 | 4 | ### Serialize 5 | To serialize a persistable object into an encoding: 6 | ```php 7 | public serialize(Persistable $persistable) : Encoding 8 | ``` 9 | 10 | ```php 11 | $encoding = $serializer->serialize($persistable); 12 | ``` 13 | 14 | ### Deserialize 15 | To deserialize a persistable object from an encoding: 16 | ```php 17 | public deserialize(Encoding $encoding) : Persistable 18 | ``` 19 | 20 | ```php 21 | $persistable = $serializer->deserialize($encoding); 22 | ``` 23 | -------------------------------------------------------------------------------- /src/NeuralNet/Layers/Parametric.php: -------------------------------------------------------------------------------- 1 | 22 | */ 23 | public function parameters() : Generator; 24 | 25 | /** 26 | * Restore the parameters on the layer from an associative array. 27 | * 28 | * @param \Rubix\ML\NeuralNet\Parameter[] $parameters 29 | */ 30 | public function restore(array $parameters) : void; 31 | } 32 | -------------------------------------------------------------------------------- /docs/neural-network/cost-functions/cross-entropy.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Cross Entropy 4 | Cross Entropy (or *log loss*) measures the performance of a classification model whose output is a joint probability distribution over the possible classes. Entropy increases as the predicted probability distribution diverges from the actual distribution. 5 | 6 | $$ 7 | Cross Entropy = -\sum_{c=1}^My_{o,c}\log(p_{o,c}) 8 | $$ 9 | 10 | ## Parameters 11 | This cost function does not have any parameters. 12 | 13 | ## Example 14 | ```php 15 | use Rubix\ML\NeuralNet\CostFunctions\CrossEntropy; 16 | 17 | $costFunction = new CrossEntropy(); 18 | ``` -------------------------------------------------------------------------------- /docs/neural-network/cost-functions/least-squares.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Least Squares 4 | Least Squares (or *quadratic* loss) is a function that computes the average squared error (MSE) between the target output given by the labels and the actual output of the network. It produces a smooth bowl-shaped gradient that is highly-influenced by large errors. 5 | 6 | $$ 7 | Least Squares = \sum_{i=1}^{D}(y_i-\hat{y}_i)^2 8 | $$ 9 | 10 | ## Parameters 11 | This cost function does not have any parameters. 12 | 13 | ## Example 14 | ```php 15 | use Rubix\ML\NeuralNet\CostFunctions\LeastSquares; 16 | 17 | $costFunction = new LeastSquares(); 18 | ``` -------------------------------------------------------------------------------- /docs/neural-network/hidden-layers/activation.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Activation 4 | Activation layers apply a user-defined non-linear activation function to their inputs. They often work in conjunction with [Dense](dense.md) layers as a way to transform their output. 5 | 6 | ## Parameters 7 | | # | Name | Default | Type | Description | 8 | |---|---|---|---|---| 9 | | 1 | activationFn | | ActivationFunction | The function that computes the output of the layer. | 10 | 11 | ## Example 12 | ```php 13 | use Rubix\ML\NeuralNet\Layers\Activation; 14 | use Rubix\ML\NeuralNet\ActivationFunctions\ReLU; 15 | 16 | $layer = new Activation(new ReLU()); 17 | ``` -------------------------------------------------------------------------------- /docs/backends/serial.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | ### Serial 4 | The Serial backend executes tasks sequentially inside of a single process. The advantage of the Serial backend is that it has zero overhead, thus it may be faster than a parallel backend for small datasets. 5 | 6 | !!! note 7 | The Serial backend is the default for most objects that are capable of parallel processing. 8 | 9 | ## Parameters 10 | This backend does not have any additional parameters. 11 | 12 | ## Example 13 | ```php 14 | use Rubix\ML\Backends\Serial; 15 | 16 | $backend = new Serial(); 17 | ``` 18 | 19 | ## Additional Methods 20 | This backend does not have any additional methods. 21 | -------------------------------------------------------------------------------- /docs/kernels/distance/euclidean.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Euclidean 4 | The straight line (*bee* line) distance between two points. Euclidean distance has the nice property of being invariant under any rotation. 5 | 6 | $$ 7 | Euclidean\left(a,b\right) = \sqrt {\sum _{i=1}^{n} \left( a_{i}-b_{i}\right)^2} 8 | $$ 9 | 10 | **Data Type Compatibility:** Continuous 11 | 12 | ## Parameters 13 | This kernel does not have any parameters. 14 | 15 | ## Example 16 | ```php 17 | use Rubix\ML\Kernels\Distance\Euclidean; 18 | 19 | $kernel = new Euclidean(); 20 | ``` 21 | 22 | ## References 23 | [^1]: J. K. Dixon. (1978). Pattern Recognition with Partly Missing Data. -------------------------------------------------------------------------------- /src/CrossValidation/Metrics/ProbabilisticMetric.php: -------------------------------------------------------------------------------- 1 | > $probabilities 21 | * @param list $labels 22 | * @return float 23 | */ 24 | public function score(array $probabilities, array $labels) : float; 25 | } 26 | -------------------------------------------------------------------------------- /docs/neural-network/activation-functions/silu.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # SiLU 4 | Sigmoid Linear Units are smooth and non-monotonic rectified activation functions. Their inputs are weighted by the [Sigmoid](sigmoid.md) activation function acting as a self-gating mechanism. 5 | 6 | ## Parameters 7 | This activation function does not have any parameters. 8 | 9 | ## Example 10 | ```php 11 | use Rubix\ML\NeuralNet\ActivationFunctions\SiLU; 12 | 13 | $activationFunction = new SiLU(); 14 | ``` 15 | 16 | ### References 17 | [^1]: S. Elwing et al. (2017). Sigmoid-Weighted Linear Units for Neural Network Function Approximation in Reinforcement Learning. 18 | -------------------------------------------------------------------------------- /docs/serializers/gzip-native.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Gzip Native 4 | Gzip Native wraps the native PHP serialization format in an outer compression layer based on the DEFLATE algorithm with a header and CRC32 checksum. 5 | 6 | ## Parameters 7 | | # | Name | Default | Type | Description | 8 | |---|---|---|---|---| 9 | | 1 | level | 6 | int | The compression level between 0 and 9, 0 meaning no compression. | 10 | 11 | ## Example 12 | ```php 13 | use Rubix\ML\Serializers\GzipNative; 14 | 15 | $serializer = new GzipNative(1); 16 | ``` 17 | 18 | ## References 19 | [^1]: P. Deutsch. (1996). RFC 1951 - DEFLATE Compressed Data Format Specification version. 20 | -------------------------------------------------------------------------------- /docs/neural-network/activation-functions/gelu.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # GELU 4 | Gaussian Error Linear Units (GELUs) are rectifiers that are gated by the magnitude of their input rather than the sign of their input as with ReLU variants. Their output can be interpreted as the expected value of a neuron with random dropout regularization applied. 5 | 6 | ## Parameters 7 | This activation function does not have any parameters. 8 | 9 | ## Example 10 | ```php 11 | use Rubix\ML\NeuralNet\ActivationFunctions\GELU; 12 | 13 | $activationFunction = new GELU(); 14 | ``` 15 | 16 | ### References 17 | >- D. Hendrycks et al. (2018). Gaussian Error Linear Units (GELUs). 18 | -------------------------------------------------------------------------------- /docs/neural-network/activation-functions/sigmoid.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Sigmoid 4 | A bounded S-shaped function (sometimes called the *Logistic* function) with an output value between 0 and 1. The output of the sigmoid function has the advantage of being interpretable as a probability, however it is not zero-centered and tends to saturate if inputs become large. 5 | 6 | $$ 7 | {\displaystyle Sigmoid = {\frac {1}{1+e^{-x}}}} 8 | $$ 9 | 10 | ## Parameters 11 | This activation function does not have any parameters. 12 | 13 | ## Example 14 | ```php 15 | use Rubix\ML\NeuralNet\ActivationFunctions\Sigmoid; 16 | 17 | $activationFunction = new Sigmoid(); 18 | ``` -------------------------------------------------------------------------------- /docs/cross-validation/metrics/r-squared.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # R Squared 4 | The *coefficient of determination* or R Squared (R²) is the proportion of the variance in the target labels that is explainable from the predictions. It gives an indication as to how well the predictions approximate the labels. 5 | 6 | $$ 7 | {\displaystyle R^{2} = 1-{SS_{\rm {res}} \over SS_{\rm {tot}}}} 8 | $$ 9 | 10 | **Estimator Compatibility:** Regressor 11 | 12 | **Score Range:** -∞ to 1 13 | 14 | ## Parameters 15 | This metric does not have any parameters. 16 | 17 | ## Example 18 | ```php 19 | use Rubix\ML\CrossValidation\Metrics\RSquared; 20 | 21 | $metric = new RSquared(); 22 | ``` 23 | -------------------------------------------------------------------------------- /docs/extractors/column-filter.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Column Filter 4 | 5 | **Interfaces:** [Extractor](api.md) 6 | 7 | ## Parameters 8 | | # | Name | Default | Type | Description | 9 | |---|---|---|---|---| 10 | | 1 | iterator | | Traversable | The base iterator. | 11 | | 2 | keys | | array | The string and/or integer keys of the columns to filter from the table | 12 | 13 | ## Example 14 | ```php 15 | use Rubix\ML\Extractors\ColumnFilter; 16 | use Rubix\ML\Extractors\CSV; 17 | 18 | $extractor = new ColumnFilter(new CSV('example.csv', true), [ 19 | 'texture', 'class', 20 | ]); 21 | ``` 22 | 23 | ## Additional Methods 24 | This extractor does not have any additional methods. 25 | -------------------------------------------------------------------------------- /src/Graph/Nodes/Hypersphere.php: -------------------------------------------------------------------------------- 1 | 20 | */ 21 | public function center() : array; 22 | 23 | /** 24 | * Return the radius of the centroid. 25 | * 26 | * @return float 27 | */ 28 | public function radius() : float; 29 | 30 | /** 31 | * Does the hypersphere reduce to a single point? 32 | * 33 | * @return bool 34 | */ 35 | public function isPoint() : bool; 36 | } 37 | -------------------------------------------------------------------------------- /docs/neural-network/cost-functions/relative-entropy.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Relative Entropy 4 | Relative Entropy (or *Kullback-Leibler divergence*) is a measure of how the expectation and activation of the network diverge. It is different from [Cross Entropy](cross-entropy.md) in that it is *asymmetric* and thus does not qualify as a statistical measure of error. 5 | 6 | $$ 7 | KL(\hat{y} || y) = \sum_{c=1}^{M}\hat{y}_c \log{\frac{\hat{y}_c}{y_c}} 8 | $$ 9 | 10 | ## Parameters 11 | This cost function does not have any parameters. 12 | 13 | ## Example 14 | ```php 15 | use Rubix\ML\NeuralNet\CostFunctions\RelativeEntropy; 16 | 17 | $costFunction = new RelativeEntropy(); 18 | ``` -------------------------------------------------------------------------------- /docs/cross-validation/leave-p-out.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Leave P Out 4 | Leave P Out tests a learner with a unique holdout set of size p for each iteration until all samples have been tested. Although Leave P Out can take long with large datasets and small values of p, it is especially suited for small datasets. 5 | 6 | **Interfaces:** [Validator](api.md#validator), [Parallel](#parallel) 7 | 8 | ## Parameters 9 | | # | Name | Default | Type | Description | 10 | |---|---|---|---|---| 11 | | 1 | p | 10 | int | The number of samples to leave out each round for testing. | 12 | 13 | ## Example 14 | ```php 15 | use Rubix\ML\CrossValidation\LeavePOut; 16 | 17 | $validator = new LeavePOut(50); 18 | ``` -------------------------------------------------------------------------------- /docs/neural-network/activation-functions/softmax.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Softmax 4 | The Softmax function is a generalization of the [Sigmoid](sigmoid.md) function that squashes each activation between 0 and 1 with the addition that all activations add up to 1. Together, these properties allow the output of the Softmax function to be interpretable as a *joint* probability distribution. 5 | 6 | $$ 7 | {\displaystyle Softmax = {\frac {e^{x_{i}}}{\sum _{j=1}^{J}e^{x_{j}}}}} 8 | $$ 9 | 10 | ## Parameters 11 | This activation function does not have any parameters. 12 | 13 | ## Example 14 | ```php 15 | use Rubix\ML\NeuralNet\ActivationFunctions\Softmax; 16 | 17 | $activationFunction = new Softmax(); 18 | ``` -------------------------------------------------------------------------------- /docs/cross-validation/hold-out.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Hold Out 4 | Hold Out is a quick and simple cross validation technique that uses a validation set that is *held out* from the training data. The advantages of Hold Out is that the validation score is quick to compute, however it does not allow the learner to *both* train and test on all the data in the training set. 5 | 6 | **Interfaces:** [Validator](api.md#validator) 7 | 8 | ## Parameters 9 | | # | Name | Default | Type | Description | 10 | |---|---|---|---|---| 11 | | 1 | ratio | 0.2 | float | The ratio of samples to hold out for testing. | 12 | 13 | ## Example 14 | ```php 15 | use Rubix\ML\CrossValidation\HoldOut; 16 | 17 | $validator = new HoldOut(0.3); 18 | ``` -------------------------------------------------------------------------------- /docs/neural-network/hidden-layers/noise.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Noise 4 | This layer adds random Gaussian noise to the inputs with a user-defined standard deviation. Noise added to neural network activations acts as a regularizer by indirectly adding a penalty to the weights through the cost function in the output layer. 5 | 6 | ## Parameters 7 | | # | Name | Default | Type | Description | 8 | |---|---|---|---|---| 9 | | 1 | stddev | 0.1 | float | The standard deviation of the Gaussian noise added to the inputs. | 10 | 11 | ## Example 12 | ```php 13 | use Rubix\ML\NeuralNet\Layers\Noise; 14 | 15 | $layer = new Noise(1e-3); 16 | ``` 17 | 18 | ## References 19 | [^1]: C. Gulcehre et al. (2016). Noisy Activation Functions. -------------------------------------------------------------------------------- /docs/transformers/stop-word-filter.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Stop Word Filter 4 | Removes user-specified words from any categorical feature columns including blobs of text. 5 | 6 | **Interfaces:** [Transformer](api.md#transformer) 7 | 8 | **Data Type Compatibility:** Categorical 9 | 10 | ## Parameters 11 | | # | Name | Default | Type | Description | 12 | |---|---|---|---|---| 13 | | 1 | stopWords | | array | A list of stop words to filter out of each text feature. | 14 | 15 | ## Example 16 | ```php 17 | use Rubix\ML\Transformers\StopWordFilter; 18 | 19 | $transformer = new StopWordFilter(['i', 'me', 'my', ...]); 20 | ``` 21 | 22 | ## Additional Methods 23 | This transformer does not have any additional methods. 24 | -------------------------------------------------------------------------------- /src/Kernels/Distance/Distance.php: -------------------------------------------------------------------------------- 1 | 22 | */ 23 | public function compatibility() : array; 24 | 25 | /** 26 | * Compute the distance between two vectors. 27 | * 28 | * @internal 29 | * 30 | * @param list $a 31 | * @param list $b 32 | * @return float 33 | */ 34 | public function compute(array $a, array $b) : float; 35 | } 36 | -------------------------------------------------------------------------------- /docs/cross-validation/metrics/top-k-accuracy.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Top K Accuracy 4 | Top K Accuracy looks at the k classes with the highest predicted probabilities when calculating the accuracy score. If one of the top k classes matches the ground-truth, then the prediction is considered accurate. 5 | 6 | **Estimator Compatibility:** Probabilistic Classifier 7 | 8 | **Score Range:** 0 to 1 9 | 10 | ## Parameters 11 | | # | Name | Default | Type | Description | 12 | |---|---|---|---|---| 13 | | 1 | k | 3 | int | The number of classes with the highest predicted probability to consider. | 14 | 15 | ## Example 16 | ```php 17 | use Rubix\ML\CrossValidation\Metrics\TopKAccuracy; 18 | 19 | $metric = new TopKAccuracy(5); 20 | ``` 21 | -------------------------------------------------------------------------------- /docs/kernels/distance/safe-euclidean.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Safe Euclidean 4 | An Euclidean distance metric suitable for samples that may contain NaN (not a number) values i.e. missing data. The Safe Euclidean metric approximates the Euclidean distance function by dropping NaN values and scaling the distance according to the proportion of non-NaNs (in either a or b or both) to compensate. 5 | 6 | **Data Type Compatibility:** Continuous 7 | 8 | ## Parameters 9 | This kernel does not have any parameters. 10 | 11 | ## Example 12 | ```php 13 | use Rubix\ML\Kernels\Distance\SafeEuclidean; 14 | 15 | $kernel = new SafeEuclidean(); 16 | ``` 17 | 18 | ## References 19 | [^1]: J. K. Dixon. (1978). Pattern Recognition with Partly Missing Data. -------------------------------------------------------------------------------- /docs/tokenizers/word-stemmer.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Word Stemmer 4 | Word Stemmer reduces inflected and derived words to their root form using the Snowball method. For example, the sentence "Majority voting is likely foolish" stems to "Major vote is like foolish." 5 | 6 | !!! note 7 | For a complete list of [supported languages](https://github.com/wamania/php-stemmer#languages) you can visit the PHP Stemmer documentation. 8 | 9 | ## Parameters 10 | | # | Name | Default | Type | Description | 11 | |---|---|---|---|---| 12 | | 1 | language | | string | The minimum number of contiguous words to a token. | 13 | 14 | ## Example 15 | ```php 16 | use Rubix\ML\Tokenizers\WordStemmer; 17 | 18 | $tokenizer = new WordStemmer('english'); 19 | ``` 20 | -------------------------------------------------------------------------------- /docs/online.md: -------------------------------------------------------------------------------- 1 | # Online 2 | Learners that implement the Online interface can be trained in batches. Learners of this type are great for when you either have a continuous stream of data or a dataset that is too large to fit into memory. In addition, partial training allows the model to evolve over time. 3 | 4 | ## Partially Train 5 | To partially train an Online learner pass it a training set to its `partial()` method: 6 | ```php 7 | public partial(Dataset $dataset) : void 8 | ``` 9 | 10 | ```php 11 | $folds = $dataset->fold(3); 12 | 13 | $estimator->train($folds[0]); 14 | 15 | $estimator->partial($folds[1]); 16 | 17 | $estimator->partial($folds[2]); 18 | ``` 19 | 20 | !!! note 21 | Learner will continue to train as long as you are using the `partial()` method, however, calling `train()` on a trained or partially trained learner will reset it back to baseline first. -------------------------------------------------------------------------------- /docs/cross-validation/metrics/accuracy.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Accuracy 4 | A quick and simple classification and anomaly detection metric defined as the number of true positives over the number of samples in the testing set. Since Accuracy gives equal weight to false positives and false negatives, it is *not* a good metric for datasets with a highly imbalanced distribution of labels. 5 | 6 | $$ 7 | {\displaystyle Accuracy = \frac{TP}{TP + FP}} 8 | $$ 9 | 10 | **Estimator Compatibility:** Classifier, Anomaly Detector 11 | 12 | **Score Range:** 0 to 1 13 | 14 | ## Parameters 15 | This metric does not have any parameters. 16 | 17 | ## Example 18 | ```php 19 | use Rubix\ML\CrossValidation\Metrics\Accuracy; 20 | 21 | $metric = new Accuracy(); 22 | ``` -------------------------------------------------------------------------------- /docs/installation.md: -------------------------------------------------------------------------------- 1 | # Installation 2 | Install Rubix ML into your project using [Composer](https://getcomposer.org/): 3 | 4 | ```sh 5 | $ composer require rubix/ml 6 | ``` 7 | 8 | ## Requirements 9 | - [PHP](https://php.net/manual/en/install.php) 7.4 or above 10 | 11 | **Recommended** 12 | 13 | - [Tensor extension](https://github.com/RubixML/Tensor) for fast Matrix/Vector computing 14 | 15 | **Optional** 16 | 17 | - [GD extension](https://php.net/manual/en/book.image.php) for image support 18 | - [Mbstring extension](https://www.php.net/manual/en/book.mbstring.php) for fast multibyte string manipulation 19 | - [SVM extension](https://php.net/manual/en/book.svm.php) for Support Vector Machine engine (libsvm) 20 | - [PDO extension](https://www.php.net/manual/en/book.pdo.php) for relational database support 21 | - [GraphViz](https://graphviz.org/) for graph visualization 22 | -------------------------------------------------------------------------------- /docs/extractors/concatenator.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Concatenator 4 | Combines multiple iterators by concatenating the output of one iterator with the output of the next iterator in the series. 5 | 6 | **Interfaces:** [Extractor](api.md) 7 | ## Parameters 8 | | # | Name | Default | Type | Description | 9 | |---|---|---|---|---| 10 | | 1 | iterators | | iterable | The iterators to concatenate together. | 11 | 12 | ## Example 13 | ```php 14 | use Rubix\ML\Extractors\Concatenator; 15 | use Rubix\ML\Extractors\CSV; 16 | 17 | $extractor = new Concatenator([ 18 | new CSV('dataset1.csv'), 19 | new CSV('dataset2.csv'), 20 | new CSV('dataset3.csv'), 21 | ]); 22 | ``` 23 | 24 | ## Additional Methods 25 | This extractor does not have any additional methods. 26 | -------------------------------------------------------------------------------- /docs/kernels/distance/canberra.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Canberra 4 | A weighted version of the [Manhattan](manhattan.md) distance, Canberra examines the sum of a series of fractional differences between two samples. Canberra can be very sensitive when both coordinates are near zero. 5 | 6 | $$ 7 | Canberra(\mathbf {a} ,\mathbf {b} )=\sum _{i=1}^{n}{\frac {|a_{i}-b_{i}|}{|a_{i}|+|b_{i}|}} 8 | $$ 9 | 10 | **Data Type Compatibility:** Continuous 11 | 12 | ## Parameters 13 | This kernel does not have any parameters. 14 | 15 | ## Example 16 | ```php 17 | use Rubix\ML\Kernels\Distance\Canberra; 18 | 19 | $kernel = new Canberra(); 20 | ``` 21 | 22 | ## References 23 | [^1]: G. N. Lance et al. (1967). Mixed-data classificatory programs I. Agglomerative Systems. 24 | -------------------------------------------------------------------------------- /docs/neural-network/initializers/xavier-1.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Xavier 1 4 | The Xavier 1 initializer draws from a uniform distribution [-limit, limit] where *limit* is equal to sqrt(6 / (fanIn + fanOut)). This initializer is best suited for layers that feed into an activation layer that outputs a value between 0 and 1 such as [Softmax](../activation-functions/softmax.md) or [Sigmoid](../activation-functions/sigmoid.md). 5 | 6 | ## Parameters 7 | This initializer does not have any parameters. 8 | 9 | ## Example 10 | ```php 11 | use Rubix\ML\NeuralNet\Initializers\Xavier1; 12 | 13 | $initializer = new Xavier1(); 14 | ``` 15 | 16 | ## References 17 | [^1]: X. Glorot et al. (2010). Understanding the Difficulty of Training Deep Feedforward Neural Networks. -------------------------------------------------------------------------------- /docs/serializers/rbx.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # RBX 4 | Rubix Object File format (RBX) is a format designed to reliably store and share serialized PHP objects. Based on PHP's native serialization format, RBX adds additional layers of compression, data integrity checks, and class compatibility detection all in one robust format. 5 | 6 | !!! note 7 | We recommend to use the `.rbx` file extension when storing RBX-serialized PHP objects. 8 | 9 | ## Parameters 10 | ## Parameters 11 | | # | Name | Default | Type | Description | 12 | |---|---|---|---|---| 13 | | 1 | level | 6 | int | The compression level between 0 and 9, 0 meaning no compression. | 14 | 15 | ## Example 16 | ```php 17 | use Rubix\ML\Serializers\RBX; 18 | 19 | $serializer = new RBX(6); 20 | ``` 21 | -------------------------------------------------------------------------------- /docs/neural-network/activation-functions/hyperbolic-tangent.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Hyperbolic Tangent 4 | An S-shaped function that squeezes the input value into an output space between -1 and 1. Hyperbolic Tangent (or *tanh*) has the advantage of being zero centered, however is known to *saturate* with highly positive or negative input values which can slow down training if the activations become too intense. 5 | 6 | $$ 7 | {\displaystyle \tanh(x)={\frac {e^{x}-e^{-x}}{e^{x}+e^{-x}}}} 8 | $$ 9 | 10 | ## Parameters 11 | This activation function does not have any parameters. 12 | 13 | ## Example 14 | ```php 15 | use Rubix\ML\NeuralNet\ActivationFunctions\HyperbolicTangent; 16 | 17 | $activationFunction = new HyperbolicTangent(); 18 | ``` 19 | -------------------------------------------------------------------------------- /docs/cross-validation/metrics/probabilistic-accuracy.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Probabilistic Accuracy 4 | This metric comes from the sports betting domain, where it's used to measure the accuracy of predictions by looking at the probabilities of class predictions. Accordingly, this metric places additional weight on the "confidence" of each prediction. 5 | 6 | **Estimator Compatibility:** Probabilistic Classifier 7 | 8 | **Score Range:** 0 to 1 9 | 10 | ## Parameters 11 | This metric does not have any parameters. 12 | 13 | ## Example 14 | ```php 15 | use Rubix\ML\CrossValidation\Metrics\ProbabilisticAccuracy; 16 | 17 | $metric = new ProbabilisticAccuracy(); 18 | ``` 19 | 20 | ## References 21 | [^1]: https://mercurius.io/en/learn/predicting-forecasting-football 22 | -------------------------------------------------------------------------------- /docs/neural-network/initializers/xavier-2.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Xavier 2 4 | The Xavier 2 initializer draws from a uniform distribution [-limit, limit] where *limit* is equal to (6 / (fanIn + fanOut)) ** 0.25. This initializer is best suited for layers that feed into an activation layer that outputs values between -1 and 1 such as [Hyperbolic Tangent](../activation-functions/hyperbolic-tangent.md) and [Softsign](../activation-functions/softsign.md). 5 | 6 | ## Parameters 7 | This initializer does not have any parameters. 8 | 9 | ## Example 10 | ```php 11 | use Rubix\ML\NeuralNet\Initializers\Xavier2; 12 | 13 | $initializer = new Xavier2(); 14 | ``` 15 | 16 | ## References 17 | [^1]: X. Glorot et al. (2010). Understanding the Difficulty of Training Deep Feedforward Neural Networks. -------------------------------------------------------------------------------- /docs/cross-validation/k-fold.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # K Fold 4 | K Fold is a cross validation technique that splits the training set into *k* individual folds and for each training round uses 1 of the folds to test the model and the rest as training data. The final score is the average validation score over all of the *k* rounds. K Fold has the advantage of both training and testing on each sample in the dataset at least once. 5 | 6 | **Interfaces:** [Validator](api.md#validator), [Parallel](#parallel) 7 | 8 | ## Parameters 9 | | # | Name | Default | Type | Description | 10 | |---|---|---|---|---| 11 | | 1 | k | 5 | int | The number of folds to split the dataset into. | 12 | 13 | ## Example 14 | ```php 15 | use Rubix\ML\CrossValidation\KFold; 16 | 17 | $validator = new KFold(5, true); 18 | ``` -------------------------------------------------------------------------------- /docs/cross-validation/reports/aggregate-report.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Aggregate Report 4 | A report generator that aggregates the output of multiple reports. 5 | 6 | **Estimator Compatibility:** Depends on base reports 7 | 8 | ## Parameters 9 | | # | Name | Default | Type | Description | 10 | |---|---|---|---|---| 11 | | 1 | reports | | array | An array of report generators to aggregate keyed by a user-specified name. | 12 | 13 | ## Example 14 | ```php 15 | use Rubix\ML\CrossValidation\Reports\AggregateReport; 16 | use Rubix\ML\CrossValidation\Reports\ConfusionMatrix; 17 | use Rubix\ML\CrossValidation\Reports\MulticlassBreakdown; 18 | 19 | $report = new AggregateReport([ 20 | 'breakdown' => new MulticlassBreakdown(), 21 | 'matrix' => new ConfusionMatrix(), 22 | ]); 23 | ``` -------------------------------------------------------------------------------- /docs/estimator.md: -------------------------------------------------------------------------------- 1 | # Estimator 2 | The Estimator interface is implemented by all learners in Rubix ML. It provides basic inference functionality through the `predict()` method which returns a set of predictions from a dataset. Additionally, it provides methods for returning estimator type and data type compatibility declarations. 3 | 4 | ### Make Predictions 5 | Return the predictions from a dataset containing unknown samples in an array: 6 | ```php 7 | public predict(Dataset $dataset) : array 8 | ``` 9 | 10 | ```php 11 | $predictions = $estimator->predict($dataset); 12 | 13 | print_r($predictions); 14 | ``` 15 | 16 | ```php 17 | Array 18 | ( 19 | [0] => married 20 | [1] => divorced 21 | [2] => divorced 22 | [3] => married 23 | ) 24 | ``` 25 | 26 | !!! note 27 | The return value of `predict()` is an array containing the predictions in the same order that they were indexed in the dataset. 28 | -------------------------------------------------------------------------------- /docs/clusterers/seeders/k-mc2.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # K-MC2 4 | A fast [Plus Plus](plus-plus.md) approximator that replaces the brute force method with a substantially faster Markov Chain Monte Carlo (MCMC) sampling procedure with comparable results. 5 | 6 | ## Parameters 7 | | # | Name | Default | Type | Description | 8 | |---|---|---|---|---| 9 | | 1 | m | 50 | int | The number of candidate nodes in the Markov Chain. | 10 | | 2 | kernel | Euclidean | Distance | The distance kernel used to compute the distance between samples. | 11 | 12 | ## Example 13 | ```php 14 | use Rubix\ML\Clusterers\Seeders\KMC2; 15 | use Rubix\ML\Kernels\Distance\Euclidean; 16 | 17 | $seeder = new KMC2(200, new Euclidean()); 18 | ``` 19 | 20 | ### 21 | [^1]: O. Bachem et al. (2016). Approximate K-Means++ in Sublinear Time. -------------------------------------------------------------------------------- /src/NeuralNet/ActivationFunctions/ActivationFunction.php: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # He 4 | The He initializer was designed to initialize parameters that feed into rectified [Activation](../hidden-layers/activation.md) layers such as those employing [ReLU](../activation-functions/relu.md), [Leaky ReLU](../activation-functions/leaky-relu.md), or [ELU](../activation-functions/elu.md). It draws values from a uniform distribution with limits defined as +/- (6 / (fanIn + fanOut)) ** (1. / sqrt(2)). 5 | 6 | ## Parameters 7 | This initializer does not have any parameters. 8 | 9 | ## Example 10 | ```php 11 | use Rubix\ML\NeuralNet\Initializers\He; 12 | 13 | $initializer = new He(); 14 | ``` 15 | 16 | ## References 17 | [^1]: K. He et al. (2015). Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification. -------------------------------------------------------------------------------- /docs/ranks-features.md: -------------------------------------------------------------------------------- 1 | # Ranks Features 2 | The Ranks Features interface is for learners that can determine the importances of the features used to train them. Low importance is given to feature columns that do not contribute significantly in the model whereas high importance indicates that the feature is more influential. Feature importances can help explain the predictions derived from a model and can also be used to identify informative features for feature selection. 3 | 4 | ### Feature Importances 5 | Return the importance scores of each feature column of the training set: 6 | ```php 7 | public featureImportances() : array 8 | ``` 9 | 10 | ```php 11 | $estimator->train($dataset); 12 | 13 | $importances = $estimator->featureImportances(); 14 | 15 | print_r($importances); 16 | ``` 17 | 18 | ```php 19 | Array 20 | ( 21 | [0] => 0.04757 22 | [1] => 0.37948 23 | [2] => 0.53170 24 | [3] => 0.04123 25 | ) 26 | ``` 27 | -------------------------------------------------------------------------------- /docs/cross-validation/metrics/brier-score.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Brier Score 4 | Brier Score is a *strictly proper* scoring metric that is equivalent to applying mean squared error to the probabilities of a probabilistic estimator. 5 | 6 | !!! note 7 | In order to maintain the convention of *maximizing* validation scores, this metric outputs the negative 8 | of the original score. 9 | 10 | **Estimator Compatibility:** Probabilistic Classifier 11 | 12 | **Score Range:** -2 to 0 13 | 14 | ## Parameters 15 | This metric does not have any parameters. 16 | 17 | ## Example 18 | ```php 19 | use Rubix\ML\CrossValidation\Metrics\BrierScore; 20 | 21 | $metric = new BrierScore(); 22 | ``` 23 | 24 | ## References 25 | [^1]: G. W. Brier. (1950). Verification of Forecasts Expresses in Terms of Probability. 26 | -------------------------------------------------------------------------------- /docs/transformers/max-absolute-scaler.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Max Absolute Scaler 4 | Scale the sample matrix by the maximum absolute value of each feature column independently such that the feature value is between -1 and 1. 5 | 6 | **Interfaces:** [Transformer](api.md#transformer), [Stateful](api.md#stateful), [Elastic](api.md#elastic), [Reversible](api.md#reversible), [Persistable](../persistable.md) 7 | 8 | **Data Type Compatibility:** Continuous 9 | 10 | ## Parameters 11 | This transformer does not have any parameters. 12 | 13 | ## Example 14 | ```php 15 | use Rubix\ML\Transformers\MaxAbsoluteScaler; 16 | 17 | $transformer = new MaxAbsoluteScaler(); 18 | ``` 19 | 20 | ## Additional Methods 21 | Return the maximum absolute values for each feature column: 22 | ```php 23 | public maxabs() : array 24 | ``` 25 | -------------------------------------------------------------------------------- /docs/neural-network/hidden-layers/dropout.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Dropout 4 | Dropout is a regularization technique to reduce overfitting in neural networks by preventing complex co-adaptations on training data. It works by temporarily disabling output nodes during each training pass. It also acts as an efficient way of performing model averaging with the parameters of neural networks. 5 | 6 | ## Parameters 7 | | # | Name | Default | Type | Description | 8 | |---|---|---|---|---| 9 | | 1 | ratio | 0.5 | float | The ratio of nodes that are dropped during each training pass. | 10 | 11 | ## Example 12 | ```php 13 | use Rubix\ML\NeuralNet\Layers\Dropout; 14 | 15 | $layer = new Dropout(0.2); 16 | ``` 17 | 18 | ## References 19 | [^1]: N. Srivastava et al. (2014). Dropout: A Simple Way to Prevent Neural Networks from Overfitting. -------------------------------------------------------------------------------- /docs/neural-network/optimizers/adagrad.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # AdaGrad 4 | Short for *Adaptive Gradient*, the AdaGrad Optimizer speeds up the learning of parameters that do not change often and slows down the learning of parameters that do enjoy heavy activity. Due to AdaGrad's infinitely decaying step size, training may be slow or fail to converge using a low learning rate. 5 | 6 | ## Parameters 7 | | # | Name | Default | Type | Description | 8 | |---|---|---|---|---| 9 | | 1 | rate | 0.01 | float | The learning rate that controls the global step size. | 10 | 11 | ## Example 12 | ```php 13 | use Rubix\ML\NeuralNet\Optimizers\AdaGrad; 14 | 15 | $optimizer = new AdaGrad(0.125); 16 | ``` 17 | 18 | ## References 19 | [^1]: J. Duchi et al. (2011). Adaptive Subgradient Methods for Online Learning and Stochastic Optimization. -------------------------------------------------------------------------------- /docs/kernels/distance/minkowski.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Minkowski 4 | The Minkowski distance can be considered as a generalization of both the [Euclidean](euclidean.md) and [Manhattan](manhattan.md) distances. When the lambda parameter is set to 1 or 2, the distance is equivalent to Manhattan and Euclidean respectively. 5 | 6 | $$ 7 | {\displaystyle Minkowski\left(a,b\right)=\left(\sum _{i=1}^{n}|a_{i}-b_{i}|^{p}\right)^{\frac {1}{p}}} 8 | $$ 9 | 10 | **Data Type Compatibility:** Continuous 11 | 12 | ## Parameters 13 | | # | Name | Default | Type | Description | 14 | |---|---|---|---|---| 15 | | 1 | lambda | 3.0 | float | Controls the curvature of the unit circle drawn from a point at a fixed distance. | 16 | 17 | ## Example 18 | ```php 19 | use Rubix\ML\Kernels\Distance\Minkowski; 20 | 21 | $kernel = new Minkowski(4.0); 22 | ``` -------------------------------------------------------------------------------- /docs/neural-network/optimizers/rms-prop.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # RMS Prop 4 | An adaptive gradient technique that divides the current gradient over a rolling window of the magnitudes of recent gradients. Unlike [AdaGrad](adagrad.md), RMS Prop does not suffer from an infinitely decaying step size. 5 | 6 | ## Parameters 7 | | # | Name | Default | Type | Description | 8 | |---|---|---|---|---| 9 | | 1 | rate | 0.001 | float | The learning rate that controls the global step size. | 10 | | 2 | decay | 0.1 | float | The decay rate of the rms property. | 11 | 12 | ## Example 13 | ```php 14 | use Rubix\ML\NeuralNet\Optimizers\RMSProp; 15 | 16 | $optimizer = new RMSProp(0.01, 0.1); 17 | ``` 18 | 19 | ## References 20 | [^1]: T. Tieleman et al. (2012). Lecture 6e rmsprop: Divide the gradient by a running average of its recent magnitude. -------------------------------------------------------------------------------- /docs/parallel.md: -------------------------------------------------------------------------------- 1 | # Parallel 2 | Multiprocessing is the use of two or more processes that execute in parallel. Objects that implement the Parallel interface can take advantage of multicore processors by executing parts or all of the algorithm in parallel. Choose a number of processes equal to the number of CPU cores in order to take advantage of a system's full processing capability. 3 | 4 | !!! note 5 | Most parallel learners are configured to use the [Serial](backends/serial.md) backend by default. 6 | 7 | ## Set a Backend 8 | Parallelizable objects can utilize a parallel processing Backend by passing it to the `setBackend()` method. 9 | 10 | To set the backend processing engine: 11 | ```php 12 | public setBackend(Backend $backend) : void 13 | ``` 14 | 15 | ```php 16 | use Rubix\ML\Classifiers\RandomForest; 17 | use Rubix\ML\Backends\Amp; 18 | 19 | $estimator = new RandomForest(); 20 | 21 | $estimator->setBackend(new Amp(16)); 22 | ``` 23 | -------------------------------------------------------------------------------- /docs/tokenizers/k-skip-n-gram.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # K-Skip-N-Gram 4 | K-skip-n-grams are a technique similar to n-grams, whereby n-grams are formed but in addition to allowing adjacent sequences of words, the next *k* words will be skipped forming n-grams of the new forward looking sequences. The tokenizer outputs tokens ranging from *min* to *max* number of words per token. 5 | 6 | ## Parameters 7 | | # | Name | Default | Type | Description | 8 | |---|---|---|---|---| 9 | | 1 | min | 2 | int | The minimum number of words in a single token. | 10 | | 2 | max | 2 | int | The maximum number of words in a single token. | 11 | | 3 | skip | 2 | int | The number of words to skip over to form new sequences. | 12 | 13 | ## Example 14 | ```php 15 | use Rubix\ML\Tokenizers\KSkipNGram; 16 | 17 | $tokenizer = new KSkipNGram(2, 3, 2); 18 | ``` 19 | -------------------------------------------------------------------------------- /docs/transformers/text-normalizer.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Text Normalizer 4 | Converts all the characters in a blob of text to the same case. 5 | 6 | **Interfaces:** [Transformer](api.md#transformer) 7 | 8 | **Data Type Compatibility:** Categorical 9 | 10 | !!! note 11 | This transformer does not handle multibyte strings. For multibyte support, see [MultibyteTextNormalizer](multibyte-text-normalizer.md). 12 | 13 | ## Parameters 14 | | # | Name | Default | Type | Description | 15 | |---|---|---|---|---| 16 | | 1 | uppercase | false | bool | Should the text be converted to uppercase? | 17 | 18 | ## Example 19 | ```php 20 | use Rubix\ML\Transformers\TextNormalizer; 21 | 22 | $transformer = new TextNormalizer(false); 23 | ``` 24 | 25 | ## Additional Methods 26 | This transformer does not have any additional methods. 27 | 28 | -------------------------------------------------------------------------------- /src/CrossValidation/Metrics/Metric.php: -------------------------------------------------------------------------------- 1 | 23 | */ 24 | public function compatibility() : array; 25 | 26 | /** 27 | * Score a set of predictions and their ground-truth labels. 28 | * 29 | * @param list $predictions 30 | * @param list $labels 31 | * @return float 32 | */ 33 | public function score(array $predictions, array $labels) : float; 34 | } 35 | -------------------------------------------------------------------------------- /docs/neural-network/activation-functions/relu.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # ReLU 4 | Rectified Linear Units (ReLU) only output the positive signal of the input. They have the benefit of having a monotonic derivative and are cheap to compute. 5 | 6 | $$ 7 | {\displaystyle ReLU = {\begin{aligned}&{\begin{cases}0&{\text{if }}x\leq 0\\x&{\text{if }}x>0\end{cases}}=&\max\{0,x\}\end{aligned}}} 8 | $$ 9 | 10 | ## Parameters 11 | This activation function does not have any parameters. 12 | 13 | ## Example 14 | ```php 15 | use Rubix\ML\NeuralNet\ActivationFunctions\ReLU; 16 | 17 | $activationFunction = new ReLU(0.1); 18 | ``` 19 | 20 | ## References 21 | [^1]: A. L. Maas et al. (2013). Rectifier Nonlinearities Improve Neural Network Acoustic Models. 22 | [^2]: K. Konda et al. (2015). Zero-bias Autoencoders and the Benefits of Co-adapting Features. -------------------------------------------------------------------------------- /docs/extractors/ndjson.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # NDJSON 4 | [NDJSON](http://ndjson.org/) or *Newline Delimited* JSON files contain rows of data encoded in Javascript Object Notation (JSON) arrays or objects. The format is like a mix of JSON and CSV and has the advantage of retaining data type information and being read into memory incrementally. 5 | 6 | !!! note 7 | Empty lines are ignored by the parser. 8 | 9 | **Interfaces:** [Extractor](api.md), [Writable](api.md) 10 | 11 | ## Parameters 12 | | # | Name | Default | Type | Description | 13 | |---|---|---|---|---| 14 | | 1 | path | | string | The path to the NDJSON file. | 15 | 16 | ## Example 17 | ```php 18 | use Rubix\ML\Extractors\NDJSON; 19 | 20 | $extractor = new NDJSON('example.ndjson'); 21 | ``` 22 | 23 | ## Additional Methods 24 | This extractor does not have any additional methods. 25 | -------------------------------------------------------------------------------- /docs/neural-network/optimizers/step-decay.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Step Decay 4 | A learning rate decay optimizer that reduces the global learning rate by a factor whenever it reaches a new *floor*. The number of steps needed to reach a new floor is defined by the *steps* hyper-parameter. 5 | 6 | ## Parameters 7 | | # | Name | Default | Type | Description | 8 | |---|---|---|---|---| 9 | | 1 | rate | 0.01 | float | The learning rate that controls the global step size. | 10 | | 2 | steps | 100 | int | The size of every floor in steps. i.e. the number of steps to take before applying another factor of decay. | 11 | | 3 | decay | 1e-3 | float | The factor to decrease the learning rate at each *floor*. | 12 | 13 | ## Example 14 | ```php 15 | use Rubix\ML\NeuralNet\Optimizers\StepDecay; 16 | 17 | $optimizer = new StepDecay(0.1, 50, 1e-3); 18 | ``` -------------------------------------------------------------------------------- /docs/cross-validation/metrics/completeness.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Completeness 4 | A ground-truth clustering metric that measures the ratio of samples in a class that are also members of the same cluster. A cluster is said to be *complete* when all the samples in a class are contained in a cluster. 5 | 6 | $$ 7 | {\displaystyle Completeness = 1-\frac{H(K, C)}{H(K)}} 8 | $$ 9 | 10 | !!! note 11 | Since this metric monotonically improves as the number of target clusters decreases, it should not be used as a metric to guide hyper-parameter tuning. 12 | 13 | **Estimator Compatibility:** Clusterer 14 | 15 | **Score Range:** 0 to 1 16 | 17 | ## Parameters 18 | This metric does not have any parameters. 19 | 20 | ## Example 21 | ```php 22 | use Rubix\ML\CrossValidation\Metrics\Completeness; 23 | 24 | $metric = new Completeness(); 25 | ``` -------------------------------------------------------------------------------- /src/NeuralNet/CostFunctions/CostFunction.php: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Homogeneity 4 | A ground-truth clustering metric that measures the ratio of samples in a cluster that are also members of the same class. A cluster is said to be *homogeneous* when the entire cluster is comprised of a single class of samples. 5 | 6 | $$ 7 | {\displaystyle Homogeneity = 1-\frac{H(C, K)}{H(C)}} 8 | $$ 9 | 10 | !!! note 11 | Since this metric monotonically improves as the number of target clusters increases, it should not be used as a metric to guide hyper-parameter tuning. 12 | 13 | **Estimator Compatibility:** Clusterer 14 | 15 | **Score Range:** 0 to 1 16 | 17 | ## Parameters 18 | This metric does not have any parameters. 19 | 20 | ## Example 21 | ```php 22 | use Rubix\ML\CrossValidation\Metrics\Homogeneity; 23 | 24 | $metric = new Homogeneity(); 25 | ``` -------------------------------------------------------------------------------- /docs/neural-network/activation-functions/selu.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # SELU 4 | Scaled Exponential Linear Units (SELU) are a self-normalizing activation function based on the [ELU](#elu) activation function. Neuronal activations of SELU networks automatically converge toward zero mean and unit variance, unlike explicitly normalized networks such as those with [Batch Norm](#batch-norm) hidden layers. 5 | 6 | $$ 7 | {\displaystyle SELU = 1.0507 {\begin{cases}1.67326 (e^{x}-1)&{\text{if }}x<0\\x&{\text{if }}x\geq 0\end{cases}}} 8 | $$ 9 | 10 | ## Parameters 11 | This actvation function does not have any parameters. 12 | 13 | ## Example 14 | ```php 15 | use Rubix\ML\NeuralNet\ActivationFunctions\SELU; 16 | 17 | $activationFunction = new SELU(); 18 | ``` 19 | 20 | ## References 21 | [^1]: G. Klambauer et al. (2017). Self-Normalizing Neural Networks. 22 | -------------------------------------------------------------------------------- /src/Serializers/Serializer.php: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Image Resizer 4 | Image Resizer fits (scales and crops) images to a user-specified width and height that preserves aspect ratio. 5 | 6 | !!! note 7 | The [GD extension](https://php.net/manual/en/book.image.php) is required to use this transformer. 8 | 9 | **Interfaces:** [Transformer](api.md#transformer) 10 | 11 | **Data Type Compatibility:** Image 12 | 13 | ## Parameters 14 | | # | Name | Default | Type | Description | 15 | |---|---|---|---|---| 16 | | 1 | width | 32 | int | The width of the resized image. | 17 | | 2 | heights | 32 | int | The height of the resized image. | 18 | 19 | ## Example 20 | ```php 21 | use Rubix\ML\Transformers\ImageResizer; 22 | 23 | $transformer = new ImageResizer(28, 28); 24 | ``` 25 | 26 | ## Additional Methods 27 | This transformer does not have any additional methods. 28 | -------------------------------------------------------------------------------- /tests/DeferredTest.php: -------------------------------------------------------------------------------- 1 | deferred = new Deferred(function ($a, $b) { 25 | return $a + $b; 26 | }, [1, 2]); 27 | } 28 | 29 | /** 30 | * @test 31 | */ 32 | public function build() : void 33 | { 34 | $this->assertInstanceOf(Deferred::class, $this->deferred); 35 | $this->assertIsCallable($this->deferred); 36 | } 37 | 38 | /** 39 | * @test 40 | */ 41 | public function compute() : void 42 | { 43 | $this->assertEquals(3, $this->deferred->compute()); 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /docs/clusterers/seeders/plus-plus.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Plus Plus 4 | This seeder attempts to maximize the chances of seeding distant clusters while still remaining random. It does so by sequentially selecting random samples weighted by their distance from the previous seed. 5 | 6 | ## Parameters 7 | | # | Name | Default | Type | Description | 8 | |---|---|---|---|---| 9 | | 1 | kernel | Euclidean | Distance | The distance kernel used to compute the distance between samples. | 10 | 11 | ## Example 12 | ```php 13 | use Rubix\ML\Clusterers\Seeders\PlusPlus; 14 | use Rubix\ML\Kernels\Distance\Minkowski; 15 | 16 | $seeder = new PlusPlus(new Minkowski(5.0)); 17 | ``` 18 | 19 | ## References 20 | [^1]: D. Arthur et al. (2006). k-means++: The Advantages of Careful Seeding. 21 | [^2]: A. Stetco et al. (2015). Fuzzy C-means++: Fuzzy C-means with effective seeding initialization. -------------------------------------------------------------------------------- /docs/cross-validation/reports/contingency-table.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Contingency Table 4 | A Contingency Table is used to display the frequency distribution of class labels among a clustering. It is similar to a [Confusion Matrix](confusion-matrix.md) but uses the labels to establish ground-truth for a clustering problem instead. 5 | 6 | **Estimator Compatibility:** Clusterer 7 | 8 | ## Parameters 9 | This report does not have any parameters. 10 | 11 | ## Example 12 | ```php 13 | use Rubix\ML\CrossValidation\Reports\ContingencyTable; 14 | 15 | $report = new ContingencyTable(); 16 | 17 | $result = $report->generate($predictions, $labels); 18 | 19 | echo $result; 20 | ``` 21 | 22 | ```json 23 | [ 24 | { 25 | "lamb": 11, 26 | "wolf": 2 27 | }, 28 | { 29 | "lamb": 1, 30 | "wolf": 5 31 | } 32 | ] 33 | ``` 34 | -------------------------------------------------------------------------------- /docs/neural-network/optimizers/adamax.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # AdaMax 4 | A version of the [Adam](adam.md) optimizer that replaces the RMS property with the infinity norm of the past gradients. As such, AdaMax is generally more suitable for sparse parameter updates and noisy gradients. 5 | 6 | ## Parameters 7 | | # | Name | Default | Type | Description | 8 | |---|---|---|---|---| 9 | | 1 | rate | 0.001 | float | The learning rate that controls the global step size. | 10 | | 2 | momentumDecay | 0.1 | float | The decay rate of the accumulated velocity. | 11 | | 3 | normDecay | 0.001 | float | The decay rate of the infinity norm. | 12 | 13 | ## Example 14 | ```php 15 | use Rubix\ML\NeuralNet\Optimizers\AdaMax; 16 | 17 | $optimizer = new AdaMax(0.0001, 0.1, 0.001); 18 | ``` 19 | 20 | ## References 21 | [^1]: D. P. Kingma et al. (2014). Adam: A Method for Stochastic Optimization. -------------------------------------------------------------------------------- /src/constants.php: -------------------------------------------------------------------------------- 1 | test($estimator, $dataset, new Accuracy()); 20 | 21 | echo $score; 22 | ``` 23 | 24 | ``` 25 | 0.75 26 | ``` -------------------------------------------------------------------------------- /docs/datasets/unlabeled.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Unlabeled 4 | Unlabeled datasets are used to train unsupervised learners and for feeding unknown samples into an estimator to make predictions. As their name implies, they do not require a corresponding label for each sample. 5 | 6 | ## Parameters 7 | | # | Name | Default | Type | Description | 8 | |---|---|---|---|---| 9 | | 1 | samples | | array | A 2-dimensional array consisting of rows of samples and columns with feature values. | 10 | | 2 | verify | true | bool | Should we verify the data? | 11 | 12 | ## Example 13 | 14 | ```php 15 | use Rubix\ML\Datasets\Unlabeled; 16 | 17 | $samples = [ 18 | [0.1, 20, 'furry'], 19 | [2.0, -5, 'rough'], 20 | [0.001, -10, 'rough'], 21 | ]; 22 | 23 | $dataset = new Unlabeled($samples); 24 | ``` 25 | 26 | ## Additional Methods 27 | This dataset does not have any additional methods. 28 | -------------------------------------------------------------------------------- /docs/neural-network/activation-functions/thresholded-relu.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Thresholded ReLU 4 | A version of the [ReLU](relu.md) function that activates only if the input is above some user-specified threshold level. 5 | 6 | $$ 7 | {\displaystyle ThresholdedReLU = {\begin{aligned}&{\begin{cases}0&{\text{if }}x\leq \theta \\x&{\text{if }}x>\theta\end{cases}}\end{aligned}}} 8 | $$ 9 | 10 | ## Parameters 11 | | # | Name | Default | Type | Description | 12 | |---|---|---|---|---| 13 | | 1 | threshold | 1.0 | float | The threshold at which the neuron is activated. | 14 | 15 | ## Example 16 | ```php 17 | use Rubix\ML\NeuralNet\ActivationFunctions\ThresholdedReLU; 18 | 19 | $activationFunction = new ThresholdedReLU(0.5); 20 | ``` 21 | 22 | ## References 23 | [^1]: K. Konda et al. (2015). Zero-bias autoencoders and the benefits of co-adapting features. 24 | -------------------------------------------------------------------------------- /src/Backends/Backend.php: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Swish 4 | Swish is a parametric activation layer that utilizes smooth rectified activation functions. The trainable *beta* parameter allows each activation function in the layer to tailor its output to the training set by interpolating between the linear function and ReLU. 5 | 6 | ## Parameters 7 | | # | Name | Default | Type | Description | 8 | |---|---|---|---|---| 9 | | 1 | initializer | Constant | Initializer | The initializer of the beta parameter. | 10 | 11 | ## Example 12 | ```php 13 | use Rubix\ML\NeuralNet\Layers\Swish; 14 | use Rubix\ML\NeuralNet\Initializers\Constant; 15 | 16 | $layer = new Swish(new Constant(1.0)); 17 | ``` 18 | 19 | ## References 20 | [^1]: P. Ramachandran er al. (2017). Swish: A Self-gated Activation Function. 21 | [^2]: P. Ramachandran et al. (2017). Searching for Activation Functions. 22 | -------------------------------------------------------------------------------- /docs/transformers/numeric-string-converter.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Numeric String Converter 4 | Convert all numeric strings to their equivalent integer and floating point types. Useful for when extracting from a source that only recognizes data as string types such as CSV. 5 | 6 | !!! note 7 | The string representations of the PHP constants `NAN` and `INF` are the string literals 'NAN' and 'INF' respectively. 8 | 9 | **Interfaces:** [Transformer](api.md#transformer), [Reversible](api.md#reversible) 10 | 11 | **Data Type Compatibility:** Categorical 12 | 13 | ## Parameters 14 | This transformer does not have any parameters. 15 | 16 | ## Example 17 | ```php 18 | use Rubix\ML\Transformers\NumericStringConverter; 19 | 20 | $transformer = new NumericStringConverter(); 21 | ``` 22 | 23 | ## Additional Methods 24 | This transformer does not have any additional methods. 25 | -------------------------------------------------------------------------------- /docs/cross-validation/metrics/informedness.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Informedness 4 | Informedness a multiclass generalization of Youden's J Statistic and can be interpreted as the probability that an estimator will make an informed prediction. Its value ranges from -1 through 1 and has a value of 0 when the test yields no useful information. 5 | 6 | $$ 7 | {\displaystyle Informedness = {\frac {\text{TP}}{{\text{TP}}+{\text{FN}}}}+{\frac {\text{TP}}{{\text{TN}}+{\text{FP}}}}-1} 8 | $$ 9 | 10 | **Estimator Compatibility:** Classifier, Anomaly Detector 11 | 12 | **Score Range:** -1 to 1 13 | 14 | ## Parameters 15 | This metric does not have any parameters. 16 | 17 | ## Example 18 | ```php 19 | use Rubix\ML\CrossValidation\Metrics\Informedness; 20 | 21 | $metric = new Informedness(); 22 | ``` 23 | 24 | ## References 25 | [^1]: W. J. Youden. (1950). Index for Rating Diagnostic Tests. -------------------------------------------------------------------------------- /docs/cross-validation/metrics/mean-squared-error.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Mean Squared Error 4 | A scale-dependent regression metric that gives greater weight to error scores the worse they are. Formally, Mean Squared Error (MSE) is the average of the squared differences between a set of predictions and their target labels. 5 | 6 | $$ 7 | {\displaystyle \operatorname {MSE} = {\frac {1}{n}}\sum _{i=1}^{n}(Y_{i}-{\hat {Y_{i}}})^{2}} 8 | $$ 9 | 10 | !!! note 11 | In order to maintain the convention of *maximizing* validation scores, this metric outputs the negative of the original score. 12 | 13 | **Estimator Compatibility:** Regressor 14 | 15 | **Score Range:** -∞ to 0 16 | 17 | ## Parameters 18 | This metric does not have any parameters. 19 | 20 | ## Example 21 | ```php 22 | use Rubix\ML\CrossValidation\Metrics\MeanSquaredError; 23 | 24 | $metric = new MeanSquaredError(); 25 | ``` -------------------------------------------------------------------------------- /src/Clusterers/Seeders/Random.php: -------------------------------------------------------------------------------- 1 | > 26 | */ 27 | public function seed(Dataset $dataset, int $k) : array 28 | { 29 | return $dataset->randomSubset($k)->samples(); 30 | } 31 | 32 | /** 33 | * Return the string representation of the object. 34 | * 35 | * @internal 36 | * 37 | * @return string 38 | */ 39 | public function __toString() : string 40 | { 41 | return 'Random'; 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /docs/cross-validation/metrics/mean-absolute-error.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Mean Absolute Error 4 | A scale-dependent metric that measures the average absolute error between a set of predictions and their ground-truth labels. One of the nice properties of MAE is that it has the same units of measurement as the labels being estimated. 5 | 6 | $$ 7 | {\displaystyle \mathrm {MAE} = {\frac {1}{n}}{\sum _{i=1}^{n}\left |Y_{i}-\hat {Y_{i}}\right|}} 8 | $$ 9 | 10 | !!! note 11 | In order to maintain the convention of *maximizing* validation scores, this metric outputs the negative of the original score. 12 | 13 | **Estimator Compatibility:** Regressor 14 | 15 | **Score Range:** -∞ to 0 16 | 17 | ## Parameters 18 | This metric does not have any parameters. 19 | 20 | ## Example 21 | ```php 22 | use Rubix\ML\CrossValidation\Metrics\MeanAbsoluteError; 23 | 24 | $metric = new MeanAbsoluteError(); 25 | ``` -------------------------------------------------------------------------------- /docs/learner.md: -------------------------------------------------------------------------------- 1 | # Learner 2 | Most estimators have the ability to be trained with data. These estimators are called *Learners* and require training before they are can make predictions. Training is the process of feeding data to the learner so that it can form a generalized representation or *model* of the dataset. 3 | 4 | ### Train a Learner 5 | To train a learner pass a training dataset as argument to the `train()` method: 6 | ```php 7 | public train(Dataset $training) : void 8 | ``` 9 | 10 | ```php 11 | $estimator->train($dataset); 12 | ``` 13 | 14 | !!! note 15 | Calling the `train()` method on an already trained learner will erase its previous training. If you would like to train a model incrementally, you can do so with learners implementing the [Online](online.md) interface. 16 | 17 | ### Is the Learner Trained? 18 | Return whether or not the learner has been trained: 19 | ```php 20 | public trained() : bool 21 | ``` 22 | 23 | ```php 24 | var_dump($estimator->trained()); 25 | ``` 26 | 27 | ``` 28 | bool(true) 29 | ``` 30 | -------------------------------------------------------------------------------- /docs/transformers/interval-discretizer.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Interval Discretizer 4 | Assigns continuous features to ordered categories using variable width per-feature histograms with a fixed user-specified number of bins. 5 | 6 | **Interfaces:** [Transformer](api.md#transformer), [Stateful](api.md#stateful), [Persistable](../persistable.md) 7 | 8 | **Data Type Compatibility:** Continuous 9 | 10 | ## Parameters 11 | | # | Name | Default | Type | Description | 12 | |---|---|---|---|---| 13 | | 1 | bins | 5 | int | The number of bins per histogram. | 14 | | 2 | equiWidth | false | bool | Should the bins be equal width? | 15 | 16 | ## Example 17 | ```php 18 | use Rubix\ML\Transformers\IntervalDiscretizer; 19 | 20 | $transformer = new IntervalDiscretizer(8, false); 21 | ``` 22 | 23 | ## Additional Methods 24 | Return the bin intervals of the fitted data: 25 | ```php 26 | public intervals() : array 27 | ``` 28 | -------------------------------------------------------------------------------- /docs/cross-validation/metrics/median-absolute-error.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Median Absolute Error 4 | Median Absolute Error (MAD) is a robust measure of error, similar to [MAE](mean-absolute-error.md), that ignores highly erroneous predictions. Since MAD is a robust statistic, it works well even when used to measure non-normal distributions. 5 | 6 | $$ 7 | {\displaystyle \operatorname {MAD} = \operatorname {median} (|Y_{i}-{\tilde {Y}}|)} 8 | $$ 9 | 10 | !!! note 11 | In order to maintain the convention of *maximizing* validation scores, this metric outputs the negative of the original score. 12 | 13 | **Estimator Compatibility:** Regressor 14 | 15 | **Score Range:** -∞ to 0 16 | 17 | ## Parameters 18 | This metric does not have any parameters. 19 | 20 | ## Example 21 | ```php 22 | use Rubix\ML\CrossValidation\Metrics\MedianAbsoluteError; 23 | 24 | $metric = new MedianAbsoluteError(); 25 | ``` 26 | -------------------------------------------------------------------------------- /docs/cross-validation/metrics/rmse.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # RMSE 4 | The Root Mean Squared Error (RMSE) is equivalent to the standard deviation of the error residuals in a regression problem. Since RMSE is just the square root of the [MSE](mean-squared-error.md), RMSE is also sensitive to outliers because larger errors have a disproportionately large effect on the score. 5 | 6 | $$ 7 | {\displaystyle \operatorname {RMSE} = {\sqrt{ \frac {1}{n} \sum _{i=1}^{n}(Y_{i}-{\hat {Y_{i}}})^{2}}}} 8 | $$ 9 | 10 | !!! note 11 | In order to maintain the convention of *maximizing* validation scores, this metric outputs the negative of the original score. 12 | 13 | **Estimator Compatibility:** Regressor 14 | 15 | **Score Range:** -∞ to 0 16 | 17 | ## Parameters 18 | This metric does not have any parameters. 19 | 20 | ## Example 21 | ```php 22 | use Rubix\ML\CrossValidation\Metrics\RMSE; 23 | 24 | $metric = new RMSE(); 25 | ``` -------------------------------------------------------------------------------- /src/Traits/LoggerAware.php: -------------------------------------------------------------------------------- 1 | logger = $logger; 33 | } 34 | 35 | /** 36 | * Return the PSR-3 logger instance. 37 | * 38 | * @return LoggerInterface|null 39 | */ 40 | public function logger() : ?LoggerInterface 41 | { 42 | return $this->logger; 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /tests/Kernels/SVM/LinearTest.php: -------------------------------------------------------------------------------- 1 | kernel = new Linear(); 27 | } 28 | 29 | /** 30 | * @test 31 | */ 32 | public function build() : void 33 | { 34 | $this->assertInstanceOf(Linear::class, $this->kernel); 35 | $this->assertInstanceOf(Kernel::class, $this->kernel); 36 | } 37 | 38 | /** 39 | * @test 40 | */ 41 | public function options() : void 42 | { 43 | $expected = [102 => 0]; 44 | 45 | $this->assertEquals($expected, $this->kernel->options()); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /docs/transformers/one-hot-encoder.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # One Hot Encoder 4 | The One Hot Encoder takes a categorical feature column and produces an n-dimensional continuous representation where *n* is equal to the number of unique categories present in that column. A `0` in any location indicates that the category represented by that column is not present in the sample, whereas a `1` indicates that a category is present. 5 | 6 | **Interfaces:** [Transformer](api.md#transformer), [Stateful](api.md#stateful), [Persistable](../persistable.md) 7 | 8 | **Data Type Compatibility:** Categorical 9 | 10 | ## Parameters 11 | This transformer does not have any parameters. 12 | 13 | ## Example 14 | ```php 15 | use Rubix\ML\Transformers\OneHotEncoder; 16 | 17 | $transformer = new OneHotEncoder(); 18 | ``` 19 | 20 | ## Additional Methods 21 | Return the categories computed during fitting indexed by feature column: 22 | ```php 23 | public categories() : ?array 24 | ``` 25 | -------------------------------------------------------------------------------- /src/Strategies/Strategy.php: -------------------------------------------------------------------------------- 1 | $values 39 | */ 40 | public function fit(array $values) : void; 41 | 42 | /** 43 | * Make a guess. 44 | * 45 | * @internal 46 | * 47 | * @return string|int|float 48 | */ 49 | public function guess(); 50 | } 51 | -------------------------------------------------------------------------------- /docs/backends/amp.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Amp 4 | [Amp Parallel](https://amphp.org/parallel/) is a multiprocessing subsystem that requires no extensions. It uses a non-blocking concurrency framework that implements coroutines using PHP generator functions under the hood. 5 | 6 | !!! note 7 | The optimal number of workers will depend on the system specifications of the computer. Fewer workers than CPU cores may not achieve full processing potential but more workers than cores can cause excess overhead. 8 | 9 | ## Parameters 10 | | # | Name | Default | Type | Description | 11 | |---|---|---|---|---| 12 | | 1 | workers | Auto | int | The maximum number of workers in the worker pool. If null then tries to autodetect CPU core count. | 13 | 14 | ## Example 15 | ```php 16 | use Rubix\ML\Backends\Amp; 17 | 18 | $backend = new Amp(16); 19 | ``` 20 | 21 | ## Additional Methods 22 | Return the maximum number of workers in the worker pool: 23 | ```php 24 | public workers() : int 25 | ``` -------------------------------------------------------------------------------- /docs/cross-validation/metrics/v-measure.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # V Measure 4 | V Measure is an entropy-based clustering metric that balances [Homogeneity](homogeneity.md) and [Completeness](completeness.md). It has the additional property of being symmetric in that the predictions and ground-truth can be swapped without changing the score. 5 | 6 | $$ 7 | {\displaystyle V_{\beta} = \frac{(1+\beta)hc}{\beta h + c}} 8 | $$ 9 | 10 | **Estimator Compatibility:** Clusterer 11 | 12 | **Score Range:** 0 to 1 13 | 14 | ## Parameters 15 | | # | Name | Default | Type | Description | 16 | |---|---|---|---|---| 17 | | 1 | beta | 1.0 | float | The ratio of weight given to homogeneity over completeness. | 18 | 19 | ## Example 20 | ```php 21 | use Rubix\ML\CrossValidation\Metrics\VMeasure; 22 | 23 | $metric = new VMeasure(1.0); 24 | ``` 25 | 26 | ## References 27 | [^1]: A. Rosenberg et al. (2007). V-Measure: A conditional entropy-based external cluster evaluation measure. -------------------------------------------------------------------------------- /docs/datasets/generators/circle.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Circle 4 | Creates a dataset of points forming a circle in 2 dimensions. The label of each sample is the random value used to generate the projection measured in degrees. 5 | 6 | **Data Types:** Continuous 7 | 8 | **Label Type:** Continuous 9 | 10 | ## Parameters 11 | | # | Name | Default | Type | Description | 12 | |---|---|---|---|---| 13 | | 1 | x | 0.0 | float | The *x* coordinate of the center of the circle. | 14 | | 2 | y | 0.0 | float | The *y* coordinate of the center of the circle. | 15 | | 3 | scale | 1.0 | float | The scaling factor of the circle. | 16 | | 4 | noise | 0.1 | float | The amount of Gaussian noise to add to each data point as a ratio of the scaling factor. | 17 | 18 | ## Example 19 | ```php 20 | use Rubix\ML\Datasets\Generators\Circle; 21 | 22 | $generator = new Circle(0.0, 0.0, 100, 0.1); 23 | ``` 24 | 25 | ## Additional Methods 26 | This generator does not have any additional methods. 27 | -------------------------------------------------------------------------------- /src/Backends/Tasks/TrainLearner.php: -------------------------------------------------------------------------------- 1 | train($dataset); 31 | 32 | return $estimator; 33 | } 34 | 35 | /** 36 | * @param Learner $estimator 37 | * @param Dataset $dataset 38 | */ 39 | public function __construct(Learner $estimator, Dataset $dataset) 40 | { 41 | parent::__construct([self::class, 'train'], [$estimator, $dataset]); 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /docs/extractors/column-picker.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Column Picker 4 | An extractor that wraps another iterator and selects and reorders the columns of the data table according to the keys specified by the user. The key of a column may either be a string or a column number (integer) depending on the way the columns are indexed in the base iterator. 5 | 6 | **Interfaces:** [Extractor](api.md) 7 | 8 | ## Parameters 9 | | # | Name | Default | Type | Description | 10 | |---|---|---|---|---| 11 | | 1 | iterator | | Traversable | The base iterator. | 12 | | 2 | keys | | array | The string and/or integer keys of the columns to pick and reorder from the table | 13 | 14 | ## Example 15 | ```php 16 | use Rubix\ML\Extractors\ColumnPicker; 17 | use Rubix\ML\Extractors\CSV; 18 | 19 | $extractor = new ColumnPicker(new CSV('example.csv', true), [ 20 | 'attitude', 'texture', 'class', 'rating', 21 | ]); 22 | ``` 23 | 24 | ## Additional Methods 25 | This extractor does not have any additional methods. 26 | -------------------------------------------------------------------------------- /docs/neural-network/activation-functions/elu.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # ELU 4 | *Exponential Linear Units* are a type of rectifier that soften the transition from non-activated to activated using the exponential function. As such, ELU produces smoother gradients than the piecewise linear [ReLU](relu.md) function. 5 | 6 | $$ 7 | {\displaystyle ELU = {\begin{cases}\alpha \left(e^{x}-1\right)&{\text{if }}x\leq 0\\x&{\text{if }}x>0\end{cases}}} 8 | $$ 9 | 10 | ## Parameters 11 | | # | Name | Default | Type | Description | 12 | |---|---|---|---|---| 13 | | 1 | alpha | 1.0 | float | The value at which leakage will begin to saturate. Ex. alpha = 1.0 means that the output will never be less than -1.0 when inactivated. | 14 | 15 | ## Example 16 | ```php 17 | use Rubix\ML\NeuralNet\ActivationFunctions\ELU; 18 | 19 | $activationFunction = new ELU(2.5); 20 | ``` 21 | 22 | ## References 23 | [^1]: D. A. Clevert et al. (2016). Fast and Accurate Deep Network Learning by Exponential Linear Units. 24 | -------------------------------------------------------------------------------- /src/Backends/Tasks/Predict.php: -------------------------------------------------------------------------------- 1 | 27 | */ 28 | public static function predict(Estimator $estimator, Dataset $dataset) : array 29 | { 30 | return $estimator->predict($dataset); 31 | } 32 | 33 | /** 34 | * @param Estimator $estimator 35 | * @param Dataset $dataset 36 | */ 37 | public function __construct(Estimator $estimator, Dataset $dataset) 38 | { 39 | parent::__construct([self::class, 'predict'], [$estimator, $dataset]); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /docs/cross-validation/metrics/f-beta.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # F-Beta 4 | A weighted harmonic mean of precision and recall, F-Beta is a both a versatile and balanced metric. The beta parameter controls the weight of precision in the combined score. As beta goes to infinity the score only considers recall, whereas when it goes to 0 it only considers precision. When beta is equal to 1, this metric is called an F1 score. 5 | 6 | $$ 7 | {\displaystyle F_\beta = (1 + \beta^2) \cdot \frac{\mathrm{precision} \cdot \mathrm{recall}}{(\beta^2 \cdot \mathrm{precision}) + \mathrm{recall}}} 8 | $$ 9 | 10 | **Estimator Compatibility:** Classifier, Anomaly Detector 11 | 12 | **Score Range:** 0 to 1 13 | 14 | ## Parameters 15 | | # | Name | Default | Type | Description | 16 | |---|---|---|---|---| 17 | | 1 | beta | 1.0 | float | The ratio of weight given to precision over recall. | 18 | 19 | ## Example 20 | ```php 21 | use Rubix\ML\CrossValidation\Metrics\FBeta; 22 | 23 | $metric = new FBeta(0.7); 24 | ``` -------------------------------------------------------------------------------- /docs/transformers/polynomial-expander.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Polynomial Expander 4 | This transformer will generate polynomials up to and including the specified *degree* of each continuous feature. Polynomial expansion is sometimes used to fit data that is non-linear using a linear estimator such as [Ridge](../regressors/ridge.md), [Logistic Regression](../classifiers/logistic-regression.md), or [Softmax Classifier](../classifiers/softmax-classifier.md). 5 | 6 | **Interfaces:** [Transformer](api.md#transformer) 7 | 8 | **Data Type Compatibility:** Continuous only 9 | 10 | ## Parameters 11 | | # | Name | Default | Type | Description | 12 | |---|---|---|---|---| 13 | | 1 | degree | 2 | int | The degree of the polynomials to generate for each feature. | 14 | 15 | ## Example 16 | ```php 17 | use Rubix\ML\Transformers\PolynomialExpander; 18 | 19 | $transformer = new PolynomialExpander(3); 20 | ``` 21 | 22 | ## Additional Methods 23 | This transformer does not have any additional methods. 24 | -------------------------------------------------------------------------------- /tests/Kernels/SVM/RBFTest.php: -------------------------------------------------------------------------------- 1 | kernel = new RBF(1e-3); 27 | } 28 | 29 | /** 30 | * @test 31 | */ 32 | public function build() : void 33 | { 34 | $this->assertInstanceOf(RBF::class, $this->kernel); 35 | $this->assertInstanceOf(Kernel::class, $this->kernel); 36 | } 37 | 38 | /** 39 | * @test 40 | */ 41 | public function options() : void 42 | { 43 | $options = [ 44 | 102 => 2, 45 | 201 => 1e-3, 46 | ]; 47 | 48 | $this->assertEquals($options, $this->kernel->options()); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /docs/cross-validation/reports/confusion-matrix.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Confusion Matrix 4 | A Confusion Matrix is a square matrix (table) that visualizes the true positives, false positives, true negatives, and false negatives of a set of predictions and their corresponding labels. 5 | 6 | **Estimator Compatibility:** Classifier, Anomaly Detector 7 | 8 | ## Parameters 9 | This report does not have any parameters. 10 | 11 | ## Example 12 | ```php 13 | use Rubix\ML\CrossValidation\Reports\ConfusionMatrix; 14 | 15 | $report = new ConfusionMatrix(); 16 | 17 | $result = $report->generate($predictions, $labels); 18 | 19 | echo $result; 20 | ``` 21 | 22 | ```json 23 | { 24 | "dog": { 25 | "dog": 12, 26 | "cat": 3, 27 | "turtle": 0 28 | }, 29 | "cat": { 30 | "dog": 2, 31 | "cat": 9, 32 | "turtle": 1 33 | }, 34 | "turtle": { 35 | "dog": 1, 36 | "cat": 0, 37 | "turtle": 11 38 | } 39 | } 40 | ``` 41 | -------------------------------------------------------------------------------- /docs/neural-network/hidden-layers/prelu.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # PReLU 4 | Parametric Rectified Linear Units are leaky rectifiers whose *leakage* coefficient is learned during training. Unlike standard [Leaky ReLUs](../activation-functions/leaky-relu.md) whose leakage remains constant, PReLU layers can adjust the leakage to better suite the model on a per node basis. 5 | 6 | $$ 7 | {\displaystyle PReLU = {\begin{cases}\alpha x&{\text{if }}x<0\\x&{\text{if }}x\geq 0\end{cases}}} 8 | $$ 9 | 10 | ## Parameters 11 | | # | Name | Default | Type | Description | 12 | |---|---|---|---|---| 13 | | 1 | initializer | Constant | Initializer | The initializer of the leakage parameter. | 14 | 15 | ## Example 16 | ```php 17 | use Rubix\ML\NeuralNet\Layers\PReLU; 18 | use Rubix\ML\NeuralNet\Initializers\Normal; 19 | 20 | $layer = new PReLU(new Normal(0.5)); 21 | ``` 22 | 23 | ## References 24 | [^1]: K. He et al. (2015). Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification. 25 | -------------------------------------------------------------------------------- /src/Backends/Tasks/Proba.php: -------------------------------------------------------------------------------- 1 | 27 | */ 28 | public static function proba(Probabilistic $estimator, Dataset $dataset) : array 29 | { 30 | return $estimator->proba($dataset); 31 | } 32 | 33 | /** 34 | * @param Probabilistic $estimator 35 | * @param Dataset $dataset 36 | */ 37 | public function __construct(Probabilistic $estimator, Dataset $dataset) 38 | { 39 | parent::__construct([self::class, 'proba'], [$estimator, $dataset]); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/Specifications/Specification.php: -------------------------------------------------------------------------------- 1 | check(); 34 | 35 | return true; 36 | } catch (Exception $exception) { 37 | return false; 38 | } 39 | } 40 | 41 | /** 42 | * Does the specification fail? 43 | * 44 | * @return bool 45 | */ 46 | public function fails() : bool 47 | { 48 | return !$this->passes(); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: "Code Checks" 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | Build: 7 | name: PHP ${{ matrix.php-versions }} on ${{ matrix.operating-system }} 8 | runs-on: ${{ matrix.operating-system }} 9 | strategy: 10 | matrix: 11 | operating-system: [ubuntu-latest, macos-latest] 12 | php-versions: ['8.0', '8.1', '8.2'] 13 | 14 | steps: 15 | - name: Checkout 16 | uses: actions/checkout@v3 17 | 18 | - name: Setup PHP 19 | uses: shivammathur/setup-php@v2 20 | with: 21 | php-version: ${{ matrix.php-versions }} 22 | tools: composer, pecl 23 | extensions: svm, mbstring, gd, fileinfo 24 | ini-values: memory_limit=-1 25 | 26 | - name: Validate composer.json 27 | run: composer validate 28 | 29 | - name: Install Dependencies 30 | run: composer install 31 | 32 | - name: Static Analysis 33 | run: composer analyze 34 | 35 | - name: Unit Tests 36 | run: composer test 37 | 38 | - name: Check Coding Style 39 | run: composer check 40 | -------------------------------------------------------------------------------- /tests/Strategies/PercentileTest.php: -------------------------------------------------------------------------------- 1 | strategy = new Percentile(50.0); 26 | } 27 | 28 | /** 29 | * @test 30 | */ 31 | public function build() : void 32 | { 33 | $this->assertInstanceOf(Percentile::class, $this->strategy); 34 | $this->assertInstanceOf(Strategy::class, $this->strategy); 35 | } 36 | 37 | /** 38 | * @test 39 | */ 40 | public function fitGuess() : void 41 | { 42 | $this->strategy->fit([1, 2, 3, 4, 5]); 43 | 44 | $guess = $this->strategy->guess(); 45 | 46 | $this->assertEquals(3, $guess); 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /docs/transformers/image-vectorizer.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Image Vectorizer 4 | Image Vectorizer takes images of the same size and converts them into flat feature vectors of raw color channel intensities. Intensities range from 0 to 255 and can either be read from 1 channel (grayscale) or 3 channels (RGB color) per pixel. 5 | 6 | !!! note 7 | Note that the [GD extension](https://php.net/manual/en/book.image.php) is required to use this transformer. 8 | 9 | **Interfaces:** [Transformer](api.md#transformer), [Stateful](api.md#stateful) 10 | 11 | **Data Type Compatibility:** Image 12 | 13 | ## Parameters 14 | | # | Name | Default | Type | Description | 15 | |---|---|---|---|---| 16 | | 1 | grayscale | false | bool | Should we encode the image in grayscale instead of color? | 17 | 18 | ## Example 19 | ```php 20 | use Rubix\ML\Transformers\ImageVectorizer; 21 | 22 | $transformer = new ImageVectorizer(true); 23 | ``` 24 | 25 | ## Additional Methods 26 | This transformer does not have any additional methods. 27 | -------------------------------------------------------------------------------- /tests/Backends/Tasks/ProbaTest.php: -------------------------------------------------------------------------------- 1 | new Blob([69.2, 195.7, 40.0], [1.0, 3.0, 0.3]), 26 | 'female' => new Blob([63.7, 168.5, 38.1], [0.8, 2.5, 0.4]), 27 | ], [0.45, 0.55]); 28 | 29 | $training = $generator->generate(50); 30 | 31 | $estimator->train($training); 32 | 33 | $testing = $generator->generate(15); 34 | 35 | $task = new Proba($estimator, $testing); 36 | 37 | $result = $task->compute(); 38 | 39 | $this->assertCount(15, $result); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /tests/Loggers/ScreenTest.php: -------------------------------------------------------------------------------- 1 | logger = new Screen('default'); 25 | } 26 | 27 | /** 28 | * @test 29 | */ 30 | public function build() : void 31 | { 32 | $this->assertInstanceOf(Screen::class, $this->logger); 33 | $this->assertInstanceOf(Logger::class, $this->logger); 34 | $this->assertInstanceOf(LoggerInterface::class, $this->logger); 35 | } 36 | 37 | /** 38 | * @test 39 | */ 40 | public function log() : void 41 | { 42 | $this->expectOutputRegex('/\b(default.INFO: test)\b/'); 43 | 44 | $this->logger->log(LogLevel::INFO, 'test'); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /docs/cross-validation/monte-carlo.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Monte Carlo 4 | Monte Carlo cross validation (or *repeated random subsampling*) is a technique that averages the validation score of a learner over a user-defined number of simulations where the learner is trained and tested on random splits of the dataset. The estimated validation score approaches the actual validation score as the number of simulations goes to infinity, however, only a tiny fraction of all possible simulations are needed to produce a pretty good approximation. 5 | 6 | **Interfaces:** [Validator](api.md#validator), [Parallel](#parallel) 7 | 8 | ## Parameters 9 | | # | Name | Default | Type | Description | 10 | |---|---|---|---|---| 11 | | 1 | simulations | 10 | int | The number of simulations i.e. random subsamplings of the dataset. | 12 | | 2 | ratio | 0.2 | float | The ratio of samples to hold out for testing. | 13 | 14 | ## Example 15 | ```php 16 | use Rubix\ML\CrossValidation\MonteCarlo; 17 | 18 | $validator = new MonteCarlo(30, 0.1); 19 | ``` -------------------------------------------------------------------------------- /tests/Backends/Tasks/TrainLearnerTest.php: -------------------------------------------------------------------------------- 1 | new Blob([69.2, 195.7, 40.0], [1.0, 3.0, 0.3]), 26 | 'female' => new Blob([63.7, 168.5, 38.1], [0.8, 2.5, 0.4]), 27 | ], [0.45, 0.55]); 28 | 29 | $dataset = $generator->generate(50); 30 | 31 | $task = new TrainLearner($estimator, $dataset); 32 | 33 | $result = $task->compute(); 34 | 35 | $this->assertInstanceOf(GaussianNB::class, $result); 36 | $this->assertTrue($result->trained()); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /tests/Backends/Tasks/PredictTest.php: -------------------------------------------------------------------------------- 1 | new Blob([69.2, 195.7, 40.0], [1.0, 3.0, 0.3]), 26 | 'female' => new Blob([63.7, 168.5, 38.1], [0.8, 2.5, 0.4]), 27 | ], [0.45, 0.55]); 28 | 29 | $training = $generator->generate(50); 30 | 31 | $estimator->train($training); 32 | 33 | $testing = $generator->generate(15); 34 | 35 | $task = new Predict($estimator, $testing); 36 | 37 | $result = $task->compute(); 38 | 39 | $this->assertCount(15, $result); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/Tokenizers/Word.php: -------------------------------------------------------------------------------- 1 | 30 | */ 31 | public function tokenize(string $text) : array 32 | { 33 | $tokens = []; 34 | 35 | preg_match_all(self::WORD_REGEX, $text, $tokens); 36 | 37 | return $tokens[0]; 38 | } 39 | 40 | /** 41 | * Return the string representation of the object. 42 | * 43 | * @internal 44 | * 45 | * @return string 46 | */ 47 | public function __toString() : string 48 | { 49 | return 'Word'; 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /docs/neural-network/cost-functions/huber-loss.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Huber Loss 4 | The pseudo Huber Loss function transitions between L1 and L2 loss at a given pivot point (defined by *delta*) such that the function becomes more quadratic as the loss decreases. The combination of L1 and L2 losses make Huber more robust to outliers while maintaining smoothness near the minimum. 5 | 6 | $$ 7 | L_{\delta}= 8 | \left\{\begin{matrix} 9 | \frac{1}{2}(y - \hat{y})^{2} & if \left | (y - \hat{y}) \right | < \delta\\ 10 | \delta ((y - \hat{y}) - \frac1 2 \delta) & otherwise 11 | \end{matrix}\right. 12 | $$ 13 | 14 | ## Parameters 15 | | # | Name | Default | Type | Description | 16 | |---|---|---|---|---| 17 | | 1 | delta | 1.0 | float | The pivot point i.e the point where numbers larger will be evaluated with an L1 loss while number smaller will be evaluated with an L2 loss. | 18 | 19 | ## Example 20 | ```php 21 | use Rubix\ML\NeuralNet\CostFunctions\HuberLoss; 22 | 23 | $costFunction = new HuberLoss(0.5); 24 | ``` -------------------------------------------------------------------------------- /docs/regressors/ridge.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Ridge 4 | L2 regularized linear regression solved using a closed-form solution. The addition of regularization, controlled by the *alpha* hyper-parameter, makes Ridge less likely to overfit the training data than ordinary least squares (OLS). 5 | 6 | **Interfaces:** [Estimator](../estimator.md), [Learner](../learner.md), [Ranks Features](../ranks-features.md), [Persistable](../persistable.md) 7 | 8 | **Data Type Compatibility:** Continuous 9 | 10 | ## Parameters 11 | | # | Name | Default | Type | Description | 12 | |---|---|---|---|---| 13 | | 1 | l2Penalty | 1.0 | float | The strength of the L2 regularization penalty. | 14 | 15 | ## Example 16 | ```php 17 | use Rubix\ML\Regressors\Ridge; 18 | 19 | $estimator = new Ridge(2.0); 20 | ``` 21 | 22 | ## Additional Methods 23 | Return the weights of features in the decision function. 24 | ```php 25 | public coefficients() : array|null 26 | ``` 27 | 28 | Return the bias added to the decision function. 29 | ```php 30 | public bias() : float|null 31 | ``` 32 | -------------------------------------------------------------------------------- /tests/Specifications/ExtensionIsLoadedTest.php: -------------------------------------------------------------------------------- 1 | assertSame($expected, $specification->passes()); 26 | } 27 | 28 | /** 29 | * @return Generator 30 | */ 31 | public function passesProvider() : Generator 32 | { 33 | yield [ 34 | ExtensionIsLoaded::with('json'), 35 | true, 36 | ]; 37 | 38 | yield [ 39 | ExtensionIsLoaded::with("I be trappin' where I go"), 40 | false, 41 | ]; 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /docs/neural-network/optimizers/cyclical.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Cyclical 4 | The Cyclical optimizer uses a global learning rate that cycles between the lower and upper bound over a designated period while also decaying the upper bound by a factor at each step. Cyclical learning rates have been shown to help escape bad local minima and saddle points of the gradient. 5 | 6 | ## Parameters 7 | | # | Name | Default | Type | Description | 8 | |---|---|---|---|---| 9 | | 1 | lower | 0.001 | float | The lower bound on the learning rate. | 10 | | 2 | upper | 0.006 | float | The upper bound on the learning rate. | 11 | | 3 | steps | 100 | int | The number of steps in every half cycle. | 12 | | 4 | decay | 0.99994 | float | The exponential decay factor to decrease the learning rate by every step. | 13 | 14 | ## Example 15 | ```php 16 | use Rubix\ML\NeuralNet\Optimizers\Cyclical; 17 | 18 | $optimizer = new Cyclical(0.001, 0.005, 1000); 19 | ``` 20 | 21 | ## References 22 | [^1]: L. N. Smith. (2017). Cyclical Learning Rates for Training Neural Networks. -------------------------------------------------------------------------------- /benchmarks/Transformers/ImageVectorizerBench.php: -------------------------------------------------------------------------------- 1 | dataset = Unlabeled::build($samples); 35 | 36 | $this->transformer = new ImageVectorizer(); 37 | } 38 | 39 | /** 40 | * @Subject 41 | * @Iterations(5) 42 | * @OutputTimeUnit("seconds", precision=3) 43 | */ 44 | public function apply() : void 45 | { 46 | $this->dataset->apply($this->transformer); 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /docs/datasets/generators/half-moon.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Half Moon 4 | Generates a dataset consisting of 2-d samples that form the shape of a half moon when plotted on a scatter plot chart. 5 | 6 | **Data Types:** Continuous 7 | 8 | **Label Type:** Continuous 9 | 10 | ## Parameters 11 | | # | Name | Default | Type | Description | 12 | |---|---|---|---|---| 13 | | 1 | x | 0.0 | float | The *x* coordinate of the center of the half moon. | 14 | | 2 | y | 0.0 | float | The *y* coordinate of the center of the half moon. | 15 | | 3 | scale | 1.0 | float | The scaling factor of the half moon. | 16 | | 4 | rotate | 90.0 | float | The amount in degrees to rotate the half moon counterclockwise. | 17 | | 5 | noise | 0.1 | float | The amount of Gaussian noise to add to each data point as a percentage of the scaling factor. | 18 | 19 | ## Example 20 | ```php 21 | use Rubix\ML\Datasets\Generators\HalfMoon; 22 | 23 | $generator = new HalfMoon(4.0, 0.0, 6, 180.0, 0.2); 24 | ``` 25 | 26 | ## Additional Methods 27 | This generator does not have any additional methods. 28 | -------------------------------------------------------------------------------- /tests/Kernels/SVM/SigmoidalTest.php: -------------------------------------------------------------------------------- 1 | kernel = new Sigmoidal(1e-3); 27 | } 28 | 29 | /** 30 | * @test 31 | */ 32 | public function build() : void 33 | { 34 | $this->assertInstanceOf(Sigmoidal::class, $this->kernel); 35 | $this->assertInstanceOf(Kernel::class, $this->kernel); 36 | } 37 | 38 | /** 39 | * @test 40 | */ 41 | public function options() : void 42 | { 43 | $options = [ 44 | 102 => 3, 45 | 201 => 1e-3, 46 | 205 => 0.0, 47 | ]; 48 | 49 | $this->assertEquals($options, $this->kernel->options()); 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /src/Estimator.php: -------------------------------------------------------------------------------- 1 | 32 | */ 33 | public function compatibility() : array; 34 | 35 | /** 36 | * Return the settings of the hyper-parameters in an associative array. 37 | * 38 | * @internal 39 | * 40 | * @return mixed[] 41 | */ 42 | public function params() : array; 43 | 44 | /** 45 | * Make predictions from a dataset. 46 | * 47 | * @param Dataset $dataset 48 | * @return list 49 | */ 50 | public function predict(Dataset $dataset) : array; 51 | } 52 | -------------------------------------------------------------------------------- /src/Specifications/DatasetIsNotEmpty.php: -------------------------------------------------------------------------------- 1 | dataset = $dataset; 37 | } 38 | 39 | /** 40 | * Perform a check of the specification and throw an exception if invalid. 41 | * 42 | * @throws EmptyDataset 43 | */ 44 | public function check() : void 45 | { 46 | if ($this->dataset->empty()) { 47 | throw new EmptyDataset(); 48 | } 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /tests/NeuralNet/Initializers/HeTest.php: -------------------------------------------------------------------------------- 1 | initializer = new He(); 27 | } 28 | 29 | /** 30 | * @test 31 | */ 32 | public function build() : void 33 | { 34 | $this->assertInstanceOf(He::class, $this->initializer); 35 | $this->assertInstanceOf(Initializer::class, $this->initializer); 36 | } 37 | 38 | /** 39 | * @test 40 | */ 41 | public function initialize() : void 42 | { 43 | $w = $this->initializer->initialize(4, 3); 44 | 45 | $this->assertInstanceOf(Matrix::class, $w); 46 | $this->assertEquals([3, 4], $w->shape()); 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /benchmarks/Tokenizers/WordBench.php: -------------------------------------------------------------------------------- 1 | tokenizer = new Word(); 23 | } 24 | 25 | /** 26 | * @Subject 27 | * @revs(1000) 28 | * @Iterations(5) 29 | * @OutputTimeUnit("milliseconds", precision=3) 30 | */ 31 | public function tokenize() : void 32 | { 33 | $this->tokenizer->tokenize(self::SAMPLE_TEXT); 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /docs/neural-network/optimizers/adam.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Adam 4 | Short for *Adaptive Moment Estimation*, the Adam Optimizer combines both Momentum and RMS properties. In addition to storing an exponentially decaying average of past squared gradients like [RMSprop](rms-prop.md), Adam also keeps an exponentially decaying average of past gradients, similar to [Momentum](momentum.md). Whereas Momentum can be seen as a ball running down a slope, Adam behaves like a heavy ball with friction. 5 | 6 | ## Parameters 7 | | # | Name | Default | Type | Description | 8 | |---|---|---|---|---| 9 | | 1 | rate | 0.001 | float | The learning rate that controls the global step size. | 10 | | 2 | momentumDecay | 0.1 | float | The decay rate of the accumulated velocity. | 11 | | 3 | normDecay | 0.001 | float | The decay rate of the rms property. | 12 | 13 | ## Example 14 | ```php 15 | use Rubix\ML\NeuralNet\Optimizers\Adam; 16 | 17 | $optimizer = new Adam(0.0001, 0.1, 0.001); 18 | ``` 19 | 20 | ## References 21 | [^1]: D. P. Kingma et al. (2014). Adam: A Method for Stochastic Optimization. -------------------------------------------------------------------------------- /docs/transformers/linear-discriminant-analysis.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Linear Discriminant Analysis 4 | Linear Discriminant Analysis (LDA) is a supervised dimensionality reduction technique that selects the most informative features using information in the class labels. More formally, LDA finds a linear combination of features that characterizes or best *discriminates* two or more classes. 5 | 6 | **Interfaces:** [Transformer](api.md#transformer), [Stateful](api.md#stateful), [Persistable](../persistable.md) 7 | 8 | **Data Type Compatibility:** Continuous only 9 | 10 | ## Parameters 11 | | # | Name | Default | Type | Description | 12 | |---|---|---|---|---| 13 | | 1 | dimensions | | int | The target number of dimensions to project onto. | 14 | 15 | ## Example 16 | ```php 17 | use Rubix\ML\Transformers\LinearDiscriminantAnalysis; 18 | 19 | $transformer = new LinearDiscriminantAnalysis(20); 20 | ``` 21 | 22 | ## Additional Methods 23 | Return the proportion of information lost due to the transformation: 24 | ```php 25 | public lossiness() : ?float 26 | ``` 27 | -------------------------------------------------------------------------------- /docs/transformers/multibyte-text-normalizer.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Multibyte Text Normalizer 4 | This transformer converts the characters in all [multibyte strings](https://www.php.net/manual/en/intro.mbstring.php) to the same case. Multibyte strings contain characters such as accents (é, è, à), emojis (😀, 😉) or characters of non roman alphabets such as Chinese and Cyrillic. 5 | 6 | !!! note 7 | ⚠️ We recommend you install the [mbstring extension](https://www.php.net/manual/en/book.mbstring.php) for best performance. 8 | 9 | **Interfaces:** [Transformer](api.md#transformer) 10 | 11 | **Data Type Compatibility:** Categorical 12 | 13 | ## Parameters 14 | | # | Name | Default | Type | Description | 15 | |---|---|---|---|---| 16 | | 1 | uppercase | false | bool | Should the text be converted to uppercase? | 17 | 18 | ## Example 19 | ```php 20 | use Rubix\ML\Transformers\MultibyteTextNormalizer; 21 | 22 | $transformer = new MultibyteTextNormalizer(false); 23 | ``` 24 | 25 | ## Additional Methods 26 | This transformer does not have any additional methods. 27 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 The Rubix ML Community 4 | Copyright (c) 2023 Andrew DalPino 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | -------------------------------------------------------------------------------- /benchmarks/Tokenizers/NGramBench.php: -------------------------------------------------------------------------------- 1 | tokenizer = new NGram(1, 2); 23 | } 24 | 25 | /** 26 | * @Subject 27 | * @revs(1000) 28 | * @Iterations(5) 29 | * @OutputTimeUnit("milliseconds", precision=3) 30 | */ 31 | public function tokenize() : void 32 | { 33 | $this->tokenizer->tokenize(self::SAMPLE_TEXT); 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/Exceptions/ClassRevisionMismatch.php: -------------------------------------------------------------------------------- 1 | = 0 ? 'up' : 'down'; 24 | 25 | parent::__construct('Object incompatible with class revision,' 26 | . " {$direction}grade to version $createdWithVersion."); 27 | 28 | $this->createdWithVersion = $createdWithVersion; 29 | } 30 | 31 | /** 32 | * Return the version number of the library that the incompatible object was created with. 33 | * 34 | * @return string 35 | */ 36 | public function createdWithVersion() : string 37 | { 38 | return $this->createdWithVersion; 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /benchmarks/Datasets/SplittingBench.php: -------------------------------------------------------------------------------- 1 | new Blob([5.0, 3.42, 1.46, 0.24], [0.35, 0.38, 0.17, 0.1]), 25 | 'Iris-versicolor' => new Blob([5.94, 2.77, 4.26, 1.33], [0.51, 0.31, 0.47, 0.2]), 26 | 'Iris-virginica' => new Blob([6.59, 2.97, 5.55, 2.03], [0.63, 0.32, 0.55, 0.27]), 27 | ]); 28 | 29 | $this->dataset = $generator->generate(self::DATASET_SIZE); 30 | } 31 | 32 | /** 33 | * @Subject 34 | * @Iterations(5) 35 | * @OutputTimeUnit("milliseconds", precision=3) 36 | */ 37 | public function splitByFeature() : void 38 | { 39 | $this->dataset->splitByFeature(2, 3.0); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /docs/neural-network/activation-functions/leaky-relu.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Leaky ReLU 4 | Leaky Rectified Linear Units are activation functions that output `x` when x is greater or equal to 0 or `x` scaled by a small *leakage* coefficient when the input is less than 0. Leaky rectifiers have the benefit of allowing a small gradient to flow through during backpropagation even though they might not have activated during the forward pass. 5 | 6 | $$ 7 | {\displaystyle LeakyReLU = {\begin{cases}\lambda x&{\text{if }}x<0\\x&{\text{if }}x\geq 0\end{cases}}} 8 | $$ 9 | 10 | ## Parameters 11 | | # | Name | Default | Type | Description | 12 | |---|---|---|---|---| 13 | | 1 | leakage | 0.1 | float | The amount of leakage as a proportion of the input value to allow to pass through when not inactivated. | 14 | 15 | ## Example 16 | ```php 17 | use Rubix\ML\NeuralNet\ActivationFunctions\LeakyReLU; 18 | 19 | $activationFunction = new LeakyReLU(0.3); 20 | ``` 21 | 22 | ## References 23 | [^1]: A. L. Maas et al. (2013). Rectifier Nonlinearities Improve Neural Network Acoustic Models. 24 | -------------------------------------------------------------------------------- /src/NeuralNet/ActivationFunctions/Softmax.php: -------------------------------------------------------------------------------- 1 | exp()->transpose(); 32 | 33 | $total = $zHat->sum()->clipLower(EPSILON); 34 | 35 | return $zHat->divide($total)->transpose(); 36 | } 37 | 38 | /** 39 | * Return the string representation of the object. 40 | * 41 | * @internal 42 | * 43 | * @return string 44 | */ 45 | public function __toString() : string 46 | { 47 | return 'Softmax'; 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /tests/NeuralNet/Initializers/LeCunTest.php: -------------------------------------------------------------------------------- 1 | initializer = new LeCun(); 27 | } 28 | 29 | /** 30 | * @test 31 | */ 32 | public function build() : void 33 | { 34 | $this->assertInstanceOf(LeCun::class, $this->initializer); 35 | $this->assertInstanceOf(Initializer::class, $this->initializer); 36 | } 37 | 38 | /** 39 | * @test 40 | */ 41 | public function initialize() : void 42 | { 43 | $w = $this->initializer->initialize(4, 3); 44 | 45 | $this->assertInstanceOf(Matrix::class, $w); 46 | $this->assertEquals([3, 4], $w->shape()); 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /benchmarks/Tokenizers/SentenceBench.php: -------------------------------------------------------------------------------- 1 | tokenizer = new Sentence(); 23 | } 24 | 25 | /** 26 | * @Subject 27 | * @revs(1000) 28 | * @Iterations(5) 29 | * @OutputTimeUnit("milliseconds", precision=3) 30 | */ 31 | public function tokenize() : void 32 | { 33 | $this->tokenizer->tokenize(self::SAMPLE_TEXT); 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /docs/cross-validation/metrics/rand-index.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Rand Index 4 | The Adjusted Rand Index is a measure of similarity between a clustering and some ground-truth that is adjusted for chance. It considers all pairs of samples that are assigned in the same or different clusters in the predicted and empirical clusterings. 5 | 6 | $$ 7 | {\displaystyle ARI = {\frac {\left.\sum _{ij}{\binom {n_{ij}}{2}}-\left[\sum _{i}{\binom {a_{i}}{2}}\sum _{j}{\binom {b_{j}}{2}}\right]\right/{\binom {n}{2}}}{\left.{\frac {1}{2}}\left[\sum _{i}{\binom {a_{i}}{2}}+\sum _{j}{\binom {b_{j}}{2}}\right]-\left[\sum _{i}{\binom {a_{i}}{2}}\sum _{j}{\binom {b_{j}}{2}}\right]\right/{\binom {n}{2}}}}} 8 | $$ 9 | 10 | **Estimator Compatibility:** Clusterer 11 | 12 | **Score Range:** -1 to 1 13 | 14 | ## Parameters 15 | This metric does not have any parameters. 16 | 17 | ## Example 18 | ```php 19 | use Rubix\ML\CrossValidation\Metrics\RandIndex; 20 | 21 | $metric = new RandIndex(); 22 | ``` 23 | 24 | ## References 25 | [^1]: W. M. Rand. (1971). Objective Criteria for the Evaluation of Clustering Methods. -------------------------------------------------------------------------------- /docs/neural-network/optimizers/momentum.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Momentum 4 | Momentum accelerates each update step by accumulating velocity from past updates and adding a factor of the previous velocity to the current step. Momentum can help speed up training and escape bad local minima when compared with [Stochastic](stochastic.md) Gradient Descent. 5 | 6 | ## Parameters 7 | | # | Name | Default | Type | Description | 8 | |---|---|---|---|---| 9 | | 1 | rate | 0.001 | float | The learning rate that controls the global step size. | 10 | | 2 | decay | 0.1 | float | The decay rate of the accumulated velocity. | 11 | | 3 | lookahead | false | bool | Should we employ Nesterov's lookahead (NAG) when updating the parameters? | 12 | 13 | ## Example 14 | ```php 15 | use Rubix\ML\NeuralNet\Optimizers\Momentum; 16 | 17 | $optimizer = new Momentum(0.01, 0.1, true); 18 | ``` 19 | 20 | ## References 21 | [^1]: D. E. Rumelhart et al. (1988). Learning representations by back-propagating errors. 22 | [^2]: I. Sutskever et al. (2013). On the importance of initialization and momentum in deep learning. 23 | -------------------------------------------------------------------------------- /src/Extractors/Concatenator.php: -------------------------------------------------------------------------------- 1 | > 23 | */ 24 | protected iterable $iterators; 25 | 26 | /** 27 | * @param iterable> $iterators 28 | */ 29 | public function __construct(iterable $iterators) 30 | { 31 | $this->iterators = $iterators; 32 | } 33 | 34 | /** 35 | * Return an iterator for the rows of a data table. 36 | * 37 | * @return \Generator 38 | */ 39 | public function getIterator() : Traversable 40 | { 41 | foreach ($this->iterators as $iterator) { 42 | foreach ($iterator as $record) { 43 | yield $record; 44 | } 45 | } 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /docs/transformers/min-max-normalizer.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Min Max Normalizer 4 | The *Min Max* Normalizer scales the input features to a value between a user-specified range (*default* 0 to 1). 5 | 6 | **Interfaces:** [Transformer](api.md#transformer), [Stateful](api.md#stateful), [Elastic](api.md#elastic), [Reversible](api.md#reversible), [Persistable](../persistable.md) 7 | 8 | **Data Type Compatibility:** Continuous 9 | 10 | ## Parameters 11 | | # | Name | Default | Type | Description | 12 | |---|---|---|---|---| 13 | | 1 | min | 0.0 | float | The minimum value of the transformed features. | 14 | | 2 | max | 1.0 | float | The maximum value of the transformed features. | 15 | 16 | ## Example 17 | ```php 18 | use Rubix\ML\Transformers\MinMaxNormalizer; 19 | 20 | $transformer = new MinMaxNormalizer(-5.0, 5.0); 21 | ``` 22 | 23 | ## Additional Methods 24 | Return the minimum values for each fitted feature column: 25 | ```php 26 | public minimums() : ?array 27 | ``` 28 | 29 | Return the maximum values for each fitted feature column: 30 | ```php 31 | public maximums() : ?array 32 | ``` 33 | -------------------------------------------------------------------------------- /src/Tokenizers/Sentence.php: -------------------------------------------------------------------------------- 1 | 30 | */ 31 | public function tokenize(string $text) : array 32 | { 33 | return preg_split(self::SENTENCE_REGEX, $text, -1, PREG_SPLIT_NO_EMPTY) ?: []; 34 | } 35 | 36 | /** 37 | * Return the string representation of the object. 38 | * 39 | * @internal 40 | * 41 | * @return string 42 | */ 43 | public function __toString() : string 44 | { 45 | return 'Sentence'; 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /tests/NeuralNet/Initializers/NormalTest.php: -------------------------------------------------------------------------------- 1 | initializer = new Normal(0.05); 27 | } 28 | 29 | /** 30 | * @test 31 | */ 32 | public function build() : void 33 | { 34 | $this->assertInstanceOf(Normal::class, $this->initializer); 35 | $this->assertInstanceOf(Initializer::class, $this->initializer); 36 | } 37 | 38 | /** 39 | * @test 40 | */ 41 | public function initialize() : void 42 | { 43 | $w = $this->initializer->initialize(4, 3); 44 | 45 | $this->assertInstanceOf(Matrix::class, $w); 46 | $this->assertEquals([3, 4], $w->shape()); 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /benchmarks/Tokenizers/KSkipNGramBench.php: -------------------------------------------------------------------------------- 1 | tokenizer = new KSkipNGram(2, 3); 23 | } 24 | 25 | /** 26 | * @Subject 27 | * @revs(1000) 28 | * @Iterations(5) 29 | * @OutputTimeUnit("milliseconds", precision=3) 30 | */ 31 | public function tokenize() : void 32 | { 33 | $this->tokenizer->tokenize(self::SAMPLE_TEXT); 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /benchmarks/Tokenizers/WhitespaceBench.php: -------------------------------------------------------------------------------- 1 | tokenizer = new Whitespace(); 23 | } 24 | 25 | /** 26 | * @Subject 27 | * @revs(1000) 28 | * @Iterations(5) 29 | * @OutputTimeUnit("milliseconds", precision=3) 30 | */ 31 | public function tokenize() : void 32 | { 33 | $this->tokenizer->tokenize(self::SAMPLE_TEXT); 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/Traits/Multiprocessing.php: -------------------------------------------------------------------------------- 1 | **Note**: The optimal number of workers will depend on the system 16 | * specifications of the computer. Fewer workers than CPU cores can result in 17 | * slower performance but too many workers can cause excess overhead. 18 | * 19 | * @category Machine Learning 20 | * @package Rubix/ML 21 | * @author Andrew DalPino 22 | */ 23 | trait Multiprocessing 24 | { 25 | /** 26 | * The parallel processing backend. 27 | * 28 | * @var Backend 29 | */ 30 | protected Backend $backend; 31 | 32 | /** 33 | * Set the parallel processing backend. 34 | * 35 | * @param Backend $backend 36 | */ 37 | public function setBackend(Backend $backend) : void 38 | { 39 | $this->backend = $backend; 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /tests/Kernels/SVM/PolynomialTest.php: -------------------------------------------------------------------------------- 1 | kernel = new Polynomial(3, 1e-3); 27 | } 28 | 29 | /** 30 | * @test 31 | */ 32 | public function build() : void 33 | { 34 | $this->assertInstanceOf(Polynomial::class, $this->kernel); 35 | $this->assertInstanceOf(Kernel::class, $this->kernel); 36 | } 37 | 38 | /** 39 | * @test 40 | */ 41 | public function options() : void 42 | { 43 | $expected = [ 44 | 102 => 1, 45 | 201 => 1e-3, 46 | 103 => 3, 47 | 205 => 0.0, 48 | ]; 49 | 50 | $this->assertEquals($expected, $this->kernel->options()); 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /tests/NeuralNet/Initializers/Xavier1Test.php: -------------------------------------------------------------------------------- 1 | initializer = new Xavier1(); 27 | } 28 | 29 | /** 30 | * @test 31 | */ 32 | public function build() : void 33 | { 34 | $this->assertInstanceOf(Xavier1::class, $this->initializer); 35 | $this->assertInstanceOf(Initializer::class, $this->initializer); 36 | } 37 | 38 | /** 39 | * @test 40 | */ 41 | public function initialize() : void 42 | { 43 | $w = $this->initializer->initialize(4, 3); 44 | 45 | $this->assertInstanceOf(Matrix::class, $w); 46 | $this->assertEquals([3, 4], $w->shape()); 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /tests/NeuralNet/Initializers/Xavier2Test.php: -------------------------------------------------------------------------------- 1 | initializer = new Xavier2(); 27 | } 28 | 29 | /** 30 | * @test 31 | */ 32 | public function build() : void 33 | { 34 | $this->assertInstanceOf(Xavier2::class, $this->initializer); 35 | $this->assertInstanceOf(Initializer::class, $this->initializer); 36 | } 37 | 38 | /** 39 | * @test 40 | */ 41 | public function initialize() : void 42 | { 43 | $w = $this->initializer->initialize(4, 3); 44 | 45 | $this->assertInstanceOf(Matrix::class, $w); 46 | $this->assertEquals([3, 4], $w->shape()); 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /benchmarks/Tokenizers/WordStemmerBench.php: -------------------------------------------------------------------------------- 1 | tokenizer = new WordStemmer('english'); 23 | } 24 | 25 | /** 26 | * @Subject 27 | * @revs(1000) 28 | * @Iterations(5) 29 | * @OutputTimeUnit("milliseconds", precision=3) 30 | */ 31 | public function tokenize() : void 32 | { 33 | $this->tokenizer->tokenize(self::SAMPLE_TEXT); 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /docs/transformers/boolean-converter.md: -------------------------------------------------------------------------------- 1 | [source] 2 | 3 | # Boolean Converter 4 | This transformer is used to convert boolean values to a compatible continuous or categorical datatype. Strings should be 5 | used when the boolean should be treated as a categorical value. Ints or floats when the boolean should be treated as a 6 | continuous value. 7 | 8 | **Interfaces:** [Transformer](api.md#transformer) 9 | 10 | **Data Type Compatibility:** Categorical, Continuous 11 | 12 | ## Parameters 13 | | # | Name | Default | Type | Description | 14 | |---|---|---|---|---| 15 | | 1 | trueValue | 'true' | string, int, float | The value to convert `true` to. | 16 | | 2 | falseValue | 'false' | string, int, float | The value to convert `false` to. | 17 | 18 | ## Example 19 | ```php 20 | use Rubix\ML\Transformers\BooleanConverter; 21 | 22 | $transformer = new BooleanConverter('true', 'false); 23 | 24 | $transformer = new BooleanConverter('tall', 'not tall'); 25 | 26 | $transformer = new BooleanConverter(1, 0); 27 | ``` 28 | 29 | ## Additional Methods 30 | This transformer does not have any additional methods. 31 | -------------------------------------------------------------------------------- /src/NeuralNet/Layers/Layer.php: -------------------------------------------------------------------------------- 1 | name = $name; 38 | } 39 | 40 | /** 41 | * Perform a check of the specification and throw an exception if invalid. 42 | * 43 | * @throws MissingExtension 44 | */ 45 | public function check() : void 46 | { 47 | if (!extension_loaded($this->name)) { 48 | throw new MissingExtension($this->name); 49 | } 50 | } 51 | } 52 | --------------------------------------------------------------------------------