├── .github
├── FUNDING.yml
└── workflows
│ └── ci.yml
├── .gitattributes
├── tests
├── test.png
├── test.sqlite
├── test_rotated.png
├── test.csv
├── Helpers
│ └── CPUTest.php
├── test.ndjson
├── DeferredTest.php
├── Kernels
│ └── SVM
│ │ ├── LinearTest.php
│ │ ├── RBFTest.php
│ │ ├── SigmoidalTest.php
│ │ └── PolynomialTest.php
├── Strategies
│ └── PercentileTest.php
├── Backends
│ └── Tasks
│ │ ├── ProbaTest.php
│ │ ├── TrainLearnerTest.php
│ │ └── PredictTest.php
├── Loggers
│ └── ScreenTest.php
├── Specifications
│ └── ExtensionIsLoadedTest.php
└── NeuralNet
│ └── Initializers
│ ├── HeTest.php
│ ├── LeCunTest.php
│ ├── NormalTest.php
│ ├── Xavier1Test.php
│ └── Xavier2Test.php
├── docs
├── images
│ ├── app-icon-large.png
│ ├── app-icon-medium.png
│ ├── app-icon-small.png
│ ├── app-icon-apple-touch.png
│ ├── neural-network-graph.png
│ ├── half-moon-scatterplot.png
│ ├── iris-dataset-2d-scatterplot.png
│ ├── housing-dataset-1d-histogram.png
│ ├── iris-dataset-t-sne-embedding.png
│ └── iris-dataset-truncated-svd-embedding.png
├── css
│ └── custom.css
├── js
│ └── custom.js
├── kernels
│ ├── svm
│ │ ├── linear.md
│ │ ├── rbf.md
│ │ ├── sigmoidal.md
│ │ └── polynomial.md
│ └── distance
│ │ ├── sparse-cosine.md
│ │ ├── hamming.md
│ │ ├── jaccard.md
│ │ ├── diagonal.md
│ │ ├── manhattan.md
│ │ ├── euclidean.md
│ │ ├── safe-euclidean.md
│ │ ├── canberra.md
│ │ └── minkowski.md
├── clusterers
│ └── seeders
│ │ ├── random.md
│ │ ├── preset.md
│ │ ├── k-mc2.md
│ │ └── plus-plus.md
├── persistable.md
├── serializers
│ ├── native.md
│ ├── api.md
│ ├── gzip-native.md
│ └── rbx.md
├── strategies
│ ├── mean.md
│ ├── prior.md
│ ├── wild-guess.md
│ ├── constant.md
│ ├── percentile.md
│ └── k-most-frequent.md
├── tokenizers
│ ├── word.md
│ ├── sentence.md
│ ├── whitespace.md
│ ├── n-gram.md
│ ├── word-stemmer.md
│ └── k-skip-n-gram.md
├── neural-network
│ ├── initializers
│ │ ├── constant.md
│ │ ├── uniform.md
│ │ ├── normal.md
│ │ ├── lecun.md
│ │ ├── xavier-1.md
│ │ ├── xavier-2.md
│ │ └── he.md
│ ├── optimizers
│ │ ├── stochastic.md
│ │ ├── adagrad.md
│ │ ├── rms-prop.md
│ │ ├── step-decay.md
│ │ ├── adamax.md
│ │ ├── cyclical.md
│ │ ├── adam.md
│ │ └── momentum.md
│ ├── activation-functions
│ │ ├── soft-plus.md
│ │ ├── softsign.md
│ │ ├── silu.md
│ │ ├── gelu.md
│ │ ├── sigmoid.md
│ │ ├── softmax.md
│ │ ├── hyperbolic-tangent.md
│ │ ├── relu.md
│ │ ├── selu.md
│ │ ├── thresholded-relu.md
│ │ ├── elu.md
│ │ └── leaky-relu.md
│ ├── cost-functions
│ │ ├── cross-entropy.md
│ │ ├── least-squares.md
│ │ ├── relative-entropy.md
│ │ └── huber-loss.md
│ └── hidden-layers
│ │ ├── activation.md
│ │ ├── noise.md
│ │ ├── dropout.md
│ │ ├── swish.md
│ │ └── prelu.md
├── datasets
│ ├── generators
│ │ ├── api.md
│ │ ├── circle.md
│ │ └── half-moon.md
│ └── unlabeled.md
├── persisters
│ └── api.md
├── loggers
│ └── screen.md
├── scoring.md
├── transformers
│ ├── l1-normalizer.md
│ ├── l2-normalizer.md
│ ├── stop-word-filter.md
│ ├── max-absolute-scaler.md
│ ├── text-normalizer.md
│ ├── image-resizer.md
│ ├── numeric-string-converter.md
│ ├── interval-discretizer.md
│ ├── one-hot-encoder.md
│ ├── polynomial-expander.md
│ ├── image-vectorizer.md
│ ├── linear-discriminant-analysis.md
│ ├── multibyte-text-normalizer.md
│ ├── min-max-normalizer.md
│ └── boolean-converter.md
├── backends
│ ├── serial.md
│ └── amp.md
├── cross-validation
│ ├── metrics
│ │ ├── r-squared.md
│ │ ├── top-k-accuracy.md
│ │ ├── accuracy.md
│ │ ├── probabilistic-accuracy.md
│ │ ├── brier-score.md
│ │ ├── completeness.md
│ │ ├── homogeneity.md
│ │ ├── informedness.md
│ │ ├── mean-squared-error.md
│ │ ├── mean-absolute-error.md
│ │ ├── median-absolute-error.md
│ │ ├── rmse.md
│ │ ├── v-measure.md
│ │ ├── f-beta.md
│ │ └── rand-index.md
│ ├── leave-p-out.md
│ ├── hold-out.md
│ ├── k-fold.md
│ ├── reports
│ │ ├── aggregate-report.md
│ │ ├── contingency-table.md
│ │ └── confusion-matrix.md
│ ├── api.md
│ └── monte-carlo.md
├── extractors
│ ├── column-filter.md
│ ├── concatenator.md
│ ├── ndjson.md
│ └── column-picker.md
├── online.md
├── installation.md
├── estimator.md
├── ranks-features.md
├── parallel.md
├── learner.md
└── regressors
│ └── ridge.md
├── phpstan.neon
├── src
├── NeuralNet
│ ├── CostFunctions
│ │ ├── RegressionLoss.php
│ │ ├── ClassificationLoss.php
│ │ └── CostFunction.php
│ ├── Layers
│ │ ├── Input.php
│ │ ├── Hidden.php
│ │ ├── Output.php
│ │ ├── Parametric.php
│ │ └── Layer.php
│ ├── Network.php
│ ├── Optimizers
│ │ ├── Adaptive.php
│ │ └── Optimizer.php
│ ├── Initializers
│ │ └── Initializer.php
│ └── ActivationFunctions
│ │ ├── ActivationFunction.php
│ │ └── Softmax.php
├── Exceptions
│ ├── RubixMLException.php
│ ├── RuntimeException.php
│ ├── EmptyDataset.php
│ ├── InvalidArgumentException.php
│ ├── LabelsAreMissing.php
│ ├── MissingExtension.php
│ ├── EstimatorIncompatibleWithMetric.php
│ ├── IncorrectDatasetDimensionality.php
│ └── ClassRevisionMismatch.php
├── Loggers
│ ├── Logger.php
│ └── BlackHole.php
├── Learner.php
├── Graph
│ ├── Nodes
│ │ ├── Node.php
│ │ ├── BinaryNode.php
│ │ ├── Outcome.php
│ │ ├── Decision.php
│ │ ├── Hypercube.php
│ │ └── Hypersphere.php
│ └── Trees
│ │ ├── Tree.php
│ │ └── BinaryTree.php
├── Kernels
│ ├── Distance
│ │ ├── NaNSafe.php
│ │ └── Distance.php
│ └── SVM
│ │ └── Kernel.php
├── Extractors
│ ├── Extractor.php
│ ├── Exporter.php
│ └── Concatenator.php
├── Persistable.php
├── Backends
│ ├── Tasks
│ │ ├── Task.php
│ │ ├── TrainLearner.php
│ │ ├── Predict.php
│ │ └── Proba.php
│ └── Backend.php
├── EstimatorWrapper.php
├── AnomalyDetectors
│ └── Scoring.php
├── Parallel.php
├── Trainable.php
├── Online.php
├── RanksFeatures.php
├── Transformers
│ ├── Elastic.php
│ ├── Reversible.php
│ ├── Stateful.php
│ └── Transformer.php
├── Verbose.php
├── Tokenizers
│ ├── Tokenizer.php
│ ├── Word.php
│ └── Sentence.php
├── Datasets
│ └── Generators
│ │ └── Generator.php
├── Probabilistic.php
├── CrossValidation
│ ├── Validator.php
│ ├── Reports
│ │ └── ReportGenerator.php
│ └── Metrics
│ │ ├── ProbabilisticMetric.php
│ │ └── Metric.php
├── Clusterers
│ └── Seeders
│ │ ├── Seeder.php
│ │ └── Random.php
├── Persisters
│ └── Persister.php
├── Serializers
│ └── Serializer.php
├── constants.php
├── Traits
│ ├── LoggerAware.php
│ └── Multiprocessing.php
├── Strategies
│ └── Strategy.php
├── Specifications
│ ├── Specification.php
│ ├── DatasetIsNotEmpty.php
│ └── ExtensionIsLoaded.php
└── Estimator.php
├── phpbench.json.dist
├── .gitignore
├── benchmarks
├── Transformers
│ └── ImageVectorizerBench.php
├── Tokenizers
│ ├── WordBench.php
│ ├── NGramBench.php
│ ├── SentenceBench.php
│ ├── KSkipNGramBench.php
│ ├── WhitespaceBench.php
│ └── WordStemmerBench.php
└── Datasets
│ └── SplittingBench.php
└── LICENSE
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | github: [andrewdalpino]
2 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | * text=auto
2 |
3 | *.php text eol=lf
--------------------------------------------------------------------------------
/tests/test.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RubixML/ML/HEAD/tests/test.png
--------------------------------------------------------------------------------
/tests/test.sqlite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RubixML/ML/HEAD/tests/test.sqlite
--------------------------------------------------------------------------------
/tests/test_rotated.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RubixML/ML/HEAD/tests/test_rotated.png
--------------------------------------------------------------------------------
/docs/images/app-icon-large.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RubixML/ML/HEAD/docs/images/app-icon-large.png
--------------------------------------------------------------------------------
/docs/images/app-icon-medium.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RubixML/ML/HEAD/docs/images/app-icon-medium.png
--------------------------------------------------------------------------------
/docs/images/app-icon-small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RubixML/ML/HEAD/docs/images/app-icon-small.png
--------------------------------------------------------------------------------
/docs/images/app-icon-apple-touch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RubixML/ML/HEAD/docs/images/app-icon-apple-touch.png
--------------------------------------------------------------------------------
/docs/images/neural-network-graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RubixML/ML/HEAD/docs/images/neural-network-graph.png
--------------------------------------------------------------------------------
/docs/images/half-moon-scatterplot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RubixML/ML/HEAD/docs/images/half-moon-scatterplot.png
--------------------------------------------------------------------------------
/docs/images/iris-dataset-2d-scatterplot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RubixML/ML/HEAD/docs/images/iris-dataset-2d-scatterplot.png
--------------------------------------------------------------------------------
/docs/images/housing-dataset-1d-histogram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RubixML/ML/HEAD/docs/images/housing-dataset-1d-histogram.png
--------------------------------------------------------------------------------
/docs/images/iris-dataset-t-sne-embedding.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RubixML/ML/HEAD/docs/images/iris-dataset-t-sne-embedding.png
--------------------------------------------------------------------------------
/docs/css/custom.css:
--------------------------------------------------------------------------------
1 | .md-typeset table:not([class]) th {
2 | min-width: auto;
3 | }
4 |
5 | #version-selector {
6 | display: none;
7 | }
8 |
--------------------------------------------------------------------------------
/docs/images/iris-dataset-truncated-svd-embedding.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RubixML/ML/HEAD/docs/images/iris-dataset-truncated-svd-embedding.png
--------------------------------------------------------------------------------
/phpstan.neon:
--------------------------------------------------------------------------------
1 | parameters:
2 | level: 8
3 | paths:
4 | - 'src'
5 | - 'tests'
6 | - 'benchmarks'
7 | excludePaths:
8 | - src/Backends/Amp.php
9 |
--------------------------------------------------------------------------------
/src/NeuralNet/CostFunctions/RegressionLoss.php:
--------------------------------------------------------------------------------
1 |
15 | */
16 | interface Extractor extends IteratorAggregate
17 | {
18 | //
19 | }
20 |
--------------------------------------------------------------------------------
/src/Persistable.php:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Linear
4 | A simple linear kernel computed by the dot product of two vectors.
5 |
6 | ## Parameters
7 | This kernel does not have any parameters.
8 |
9 | ## Example
10 | ```php
11 | use Rubix\ML\Kernels\SVM\Linear;
12 |
13 | $kernel = new Linear();
14 | ```
--------------------------------------------------------------------------------
/src/Backends/Tasks/Task.php:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Random
4 | Completely random selection of seeds from a given dataset.
5 |
6 | ## Parameters
7 | This seeder does not have any parameters.
8 |
9 | ## Example
10 | ```php
11 | use Rubix\ML\Clusterers\Seeders\Random;
12 |
13 | $seeder = new Random();
14 | ```
--------------------------------------------------------------------------------
/docs/persistable.md:
--------------------------------------------------------------------------------
1 | # Persistable
2 | An estimator that implements the Persistable interface can be serialized by a [Serializer](serializers/api.md) or save and loaded using the [Persistent Model](persistent-model.md) meta-estimator.
3 |
4 | To return the current class revision hash:
5 | ```php
6 | public revision() : string
7 | ```
8 |
9 | ```php
10 | echo $persistable->revision();
11 | ```
12 |
13 | ```
14 | e7eeec9a
15 | ```
16 |
--------------------------------------------------------------------------------
/docs/serializers/native.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Native
4 | The native bytecode format that comes bundled with PHP core.
5 |
6 | ## Parameters
7 | This serializer does not have any parameters.
8 |
9 | ## Example
10 | ```php
11 | use Rubix\ML\Serializers\Native;
12 |
13 | $serializer = new Native();
14 | ```
15 |
--------------------------------------------------------------------------------
/docs/strategies/mean.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Mean
4 | This strategy always predicts the mean of the fitted data.
5 |
6 | **Data Type:** Continuous
7 |
8 | ## Parameters
9 | This strategy does not have any parameters.
10 |
11 | ## Example
12 | ```php
13 | use Rubix\ML\Strategies\Mean;
14 |
15 | $strategy = new Mean();
16 | ```
--------------------------------------------------------------------------------
/docs/tokenizers/word.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Word Tokenizer
4 | The Word tokenizer uses a regular expression to tokenize the words in a blob of text.
5 |
6 | ## Parameters
7 | This tokenizer does not have any parameters.
8 |
9 | ## Example
10 | ```php
11 | use Rubix\ML\Tokenizers\Word;
12 |
13 | $tokenizer = new Word();
14 | ```
15 |
--------------------------------------------------------------------------------
/src/AnomalyDetectors/Scoring.php:
--------------------------------------------------------------------------------
1 | $iterator
18 | */
19 | public function export(iterable $iterator) : void;
20 | }
21 |
--------------------------------------------------------------------------------
/docs/tokenizers/sentence.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Word Tokenizer
4 | This tokenizer matches sentences starting with a letter and ending with a punctuation mark.
5 |
6 | ## Parameters
7 | This tokenizer does not have any parameters.
8 |
9 | ## Example
10 | ```php
11 | use Rubix\ML\Tokenizers\Sentence;
12 |
13 | $tokenizer = new Sentence();
14 | ```
--------------------------------------------------------------------------------
/src/Graph/Nodes/BinaryNode.php:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Prior
4 | A strategy where the probability of guessing a class is equal to the class's prior probability.
5 |
6 | **Data Type:** Categorical
7 |
8 | ## Parameters
9 | This strategy does not have any parameters.
10 |
11 | ## Example
12 | ```php
13 | use Rubix\ML\Strategies\Prior;
14 |
15 | $strategy = new Prior();
16 | ```
--------------------------------------------------------------------------------
/src/NeuralNet/Network.php:
--------------------------------------------------------------------------------
1 |
22 | */
23 | public function layers() : Traversable;
24 | }
25 |
--------------------------------------------------------------------------------
/src/Kernels/SVM/Kernel.php:
--------------------------------------------------------------------------------
1 | > $samples
18 | */
19 | public function reverseTransform(array &$samples) : void;
20 | }
21 |
--------------------------------------------------------------------------------
/docs/strategies/wild-guess.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Wild Guess
4 | Guess a random number somewhere between the minimum and maximum computed by fitting a collection of values.
5 |
6 | **Data Type:** Continuous
7 |
8 | ## Parameters
9 | This strategy does not have any parameters.
10 |
11 | ## Example
12 | ```php
13 | use Rubix\ML\Strategies\WildGuess;
14 |
15 | $strategy = new WildGuess();
16 | ```
--------------------------------------------------------------------------------
/src/Graph/Nodes/Outcome.php:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # RBF
4 | Non linear radial basis function (RBF) computes the distance from a centroid or origin.
5 |
6 | ## Parameters
7 | | # | Name | Default | Type | Description |
8 | |---|---|---|---|---|
9 | | 1 | gamma | null | float | The kernel coefficient. |
10 |
11 | ## Example
12 | ```php
13 | use Rubix\ML\Kernels\SVM\RBF;
14 |
15 | $kernel = new RBF(null);
16 | ```
--------------------------------------------------------------------------------
/src/Verbose.php:
--------------------------------------------------------------------------------
1 | [source]'
2 |
3 | # Constant
4 | Always guess the same value.
5 |
6 | **Data Type:** Continuous
7 |
8 | ## Parameters
9 | | # | Name | Default | Type | Description |
10 | |---|---|---|---|---|
11 | | 1 | value | 0.0 | float | The value to constantly guess. |
12 |
13 | ## Example
14 | ```php
15 | use Rubix\ML\Strategies\Constant;
16 |
17 | $strategy = new Constant(0.0);
18 | ```
--------------------------------------------------------------------------------
/src/NeuralNet/Optimizers/Adaptive.php:
--------------------------------------------------------------------------------
1 |
23 | */
24 | public function tokenize(string $text) : array;
25 | }
26 |
--------------------------------------------------------------------------------
/src/Datasets/Generators/Generator.php:
--------------------------------------------------------------------------------
1 |
13 | */
14 | public function dimensions() : int;
15 |
16 | /**
17 | * Generate n data points.
18 | *
19 | * @param int<0,max> $n
20 | * @return \Rubix\ML\Datasets\Dataset
21 | */
22 | public function generate(int $n);
23 | }
24 |
--------------------------------------------------------------------------------
/src/Probabilistic.php:
--------------------------------------------------------------------------------
1 |
21 | */
22 | public function proba(Dataset $dataset) : array;
23 | }
24 |
--------------------------------------------------------------------------------
/docs/kernels/distance/sparse-cosine.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Sparse Cosine
4 | A version of the Cosine distance kernel that is specifically optimized for computing sparse vectors.
5 |
6 | **Data Type Compatibility:** Continuous
7 |
8 | ## Parameters
9 | This kernel does not have any parameters.
10 |
11 | ## Example
12 | ```php
13 | use Rubix\ML\Kernels\Distance\SparseCosine;
14 |
15 | $kernel = new SparseCosine();
16 | ```
17 |
--------------------------------------------------------------------------------
/docs/tokenizers/whitespace.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Whitespace
4 | Tokens are delimited by a user-specified whitespace character.
5 |
6 | ## Parameters
7 | | # | Name | Default | Type | Description |
8 | |---|---|---|---|---|
9 | | 1 | delimiter | ' ' | string | The whitespace character that delimits each token. |
10 |
11 | ## Example
12 | ```php
13 | use Rubix\ML\Tokenizers\Whitespace;
14 |
15 | $tokenizer = new Whitespace(',');
16 | ```
17 |
--------------------------------------------------------------------------------
/src/Exceptions/EstimatorIncompatibleWithMetric.php:
--------------------------------------------------------------------------------
1 | type()}s.");
17 | }
18 | }
19 |
--------------------------------------------------------------------------------
/docs/neural-network/initializers/constant.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Constant
4 | Initialize the parameter to a user-specified constant value.
5 |
6 | ## Parameters
7 | | # | Name | Default | Type | Description |
8 | |---|---|---|---|---|
9 | | 1 | value | 0.0 | float | The value to initialize the parameter to. |
10 |
11 | ## Example
12 | ```php
13 | use Rubix\ML\NeuralNet\Initializers\Constant;
14 |
15 | $initializer = new Constant(1.0);
16 | ```
--------------------------------------------------------------------------------
/docs/datasets/generators/api.md:
--------------------------------------------------------------------------------
1 | # Generators
2 | Dataset generators produce synthetic datasets of a user-specified shape and dimensionality. Synthetic data is useful for a number of tasks including experimentation, testing, benchmarking, and demonstration purposes.
3 |
4 | ### Generate a Dataset
5 | To generate a Dataset object with *n* records:
6 | ```php
7 | public generate(int $n) : Dataset
8 | ```
9 |
10 | ```php
11 | use Rubix\ML\Datasets\Generators\HalfMoon;
12 |
13 | $generator = new HalfMoon(0.0, 0.0);
14 |
15 | $dataset = $generator->generate(1000);
16 | ```
17 |
--------------------------------------------------------------------------------
/docs/persisters/api.md:
--------------------------------------------------------------------------------
1 | # Persisters
2 | Persisters are responsible for persisting Encoding objects to storage and are also used by the [Persistent Model](../persistent-model.md) meta-estimator to save and restore models that have been serialized.
3 |
4 | ### Save
5 | To save an encoding:
6 | ```php
7 | public save(Encoding $encoding) : void
8 | ```
9 |
10 | ```php
11 | $persister->save($encoding);
12 | ```
13 |
14 | ### Load
15 | To load an encoding from persistence:
16 | ```php
17 | public load() : Encoding
18 | ```
19 |
20 | ```php
21 | $encoding = $persister->load();
22 | ```
--------------------------------------------------------------------------------
/docs/kernels/svm/sigmoidal.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Sigmoidal
4 | S shaped nonliearity kernel with output values ranging from -1 to 1.
5 |
6 | ## Parameters
7 | | # | Name | Default | Type | Description |
8 | |---|---|---|---|---|
9 | | 1 | gamma | null | float | The kernel coefficient. |
10 | | 2 | coef0 | 0. | float | The independent term. |
11 |
12 | ## Example
13 | ```php
14 | use Rubix\ML\Kernels\SVM\Sigmoidal;
15 |
16 | $kernel = new Sigmoidal(null, 0.);
17 | ```
--------------------------------------------------------------------------------
/src/Graph/Trees/Tree.php:
--------------------------------------------------------------------------------
1 | assertLessThan(1.0, $epsilon);
22 | $this->assertGreaterThan(0.0, $epsilon);
23 |
24 | $this->assertFalse(1.0 + $epsilon === 1.0);
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/tests/test.ndjson:
--------------------------------------------------------------------------------
1 | {"attitude":"nice","texture":"furry","sociability":"friendly","rating":4,"class":"not monster"}
2 | {"attitude":"mean","texture":"furry","sociability":"loner","rating":-1.5,"class":"monster"}
3 | {"attitude":"nice","texture":"rough","sociability":"friendly","rating":2.6,"class":"not monster"}
4 | {"attitude":"mean","texture":"rough","sociability":"friendly","rating":-1,"class":"monster"}
5 | {"attitude":"nice","texture":"rough","sociability":"friendly","rating":2.9,"class":"not monster"}
6 | {"attitude":"nice","texture":"furry","sociability":"loner","rating":-5,"class":"not monster"}
7 |
--------------------------------------------------------------------------------
/docs/strategies/percentile.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Blurry Percentile
4 | A strategy that always guesses the p-th percentile of the fitted data.
5 |
6 | **Data Type:** Continuous
7 |
8 | ## Parameters
9 | | # | Name | Default | Type | Description |
10 | |---|---|---|---|---|
11 | | 1 | p | 50.0 | float | The percentile of the fitted data to use as a guess. |
12 |
13 | ## Example
14 | ```php
15 | use Rubix\ML\Strategies\Percentile;
16 |
17 | $strategy = new Percentile(90.0);
18 | ```
--------------------------------------------------------------------------------
/docs/clusterers/seeders/preset.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Preset
4 | Generates centroids from a list of presets.
5 |
6 | ## Parameters
7 | | # | Name | Default | Type | Description |
8 | |---|---|---|---|---|
9 | | 1 | centroids| | array | A list of predefined cluster centroids to sample from. |
10 |
11 | ## Example
12 | ```php
13 | use Rubix\ML\Clusterers\Seeders\Preset;
14 |
15 | $seeder = new Preset([
16 | ['foo', 14, 0.72],
17 | ['bar', 16, 0.92],
18 | ]);
19 | ```
20 |
--------------------------------------------------------------------------------
/docs/neural-network/optimizers/stochastic.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Stochastic
4 | A constant learning rate optimizer based on vanilla Stochastic Gradient Descent.
5 |
6 | ## Parameters
7 | | # | Name | Default | Type | Description |
8 | |---|---|---|---|---|
9 | | 1 | rate | 0.01 | float | The learning rate that controls the global step size. |
10 |
11 | ## Example
12 | ```php
13 | use Rubix\ML\NeuralNet\Optimizers\Stochastic;
14 |
15 | $optimizer = new Stochastic(0.01);
16 | ```
--------------------------------------------------------------------------------
/docs/neural-network/initializers/uniform.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Uniform
4 | Generates a random uniform distribution centered at 0 and bounded at both ends by the parameter beta.
5 |
6 | ## Parameters
7 | | # | Name | Default | Type | Description |
8 | |---|---|---|---|---|
9 | | 1 | beta | 0.05 | float | The upper and lower bound of the distribution. |
10 |
11 | ## Example
12 | ```php
13 | use Rubix\ML\NeuralNet\Initializers\Uniform;
14 |
15 | $initializer = new Uniform(1e-3);
16 | ```
--------------------------------------------------------------------------------
/docs/loggers/screen.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Screen
4 | A logger that displays log messages to the standard output.
5 |
6 | ## Parameters
7 | | # | Name | Default | Type | Description |
8 | |---|---|---|---|---|
9 | | 1 | channel | '' | string | The channel name that appears on each line. |
10 | | 2 | timestampFormat | 'Y-m-d H:i:s' | string | The format of the timestamp. |
11 |
12 | ## Example
13 | ```php
14 | use Rubix\ML\Loggers\Screen;
15 |
16 | $logger = new Screen('mlp', 'Y-m-d H:i:s');
17 | ```
18 |
--------------------------------------------------------------------------------
/docs/neural-network/initializers/normal.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Normal
4 | Generates a random weight matrix from a Gaussian distribution with user-specified standard deviation.
5 |
6 | ## Parameters
7 | | # | Name | Default | Type | Description |
8 | |---|---|---|---|---|
9 | | 1 | stddev | 0.05 | float | The standard deviation of the distribution to sample from. |
10 |
11 | ## Example
12 | ```php
13 | use Rubix\ML\NeuralNet\Initializers\Normal;
14 |
15 | $initializer = new Normal(0.1);
16 | ```
--------------------------------------------------------------------------------
/docs/strategies/k-most-frequent.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # K Most Frequent
4 | This Strategy outputs one of k most frequently occurring classes at random with equal probability.
5 |
6 | **Data Type:** Categorical
7 |
8 | ## Parameters
9 | | # | Name | Default | Type | Description |
10 | |---|---|---|---|---|
11 | | 1 | k | 1 | int | The number of most frequent classes to consider. |
12 |
13 | ## Example
14 | ```php
15 | use Rubix\ML\Strategies\KMostFrequent;
16 |
17 | $strategy = new KMostFrequent(5);
18 | ```
--------------------------------------------------------------------------------
/src/CrossValidation/Validator.php:
--------------------------------------------------------------------------------
1 | numFeatures()} given.";
18 |
19 | parent::__construct($message);
20 | }
21 | }
22 |
--------------------------------------------------------------------------------
/src/Graph/Trees/BinaryTree.php:
--------------------------------------------------------------------------------
1 | >
25 | */
26 | public function seed(Dataset $dataset, int $k) : array;
27 | }
28 |
--------------------------------------------------------------------------------
/src/Loggers/BlackHole.php:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Hamming
4 | A categorical distance function that measures distance as the number of substitutions necessary to convert one sample to the other.
5 |
6 | **Data Type Compatibility:** Categorical
7 |
8 | ## Parameters
9 | This kernel does not have any parameters.
10 |
11 | ## Example
12 | ```php
13 | use Rubix\ML\Kernels\Distance\Hamming;
14 |
15 | $kernel = new Hamming();
16 | ```
17 |
18 | ## References
19 | [^1]: R. W. Hamming. (1950). Error detecting and error correcting codes.
--------------------------------------------------------------------------------
/src/Persisters/Persister.php:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Jaccard
4 | The *generalized* Jaccard distance is a measure of distance with a range from 0 to 1 and can be thought of as the size of the intersection divided by the size of the union of two points if they were consisted only of binary random variables.
5 |
6 | **Data Type Compatibility:** Continuous
7 |
8 | ## Parameters
9 | This kernel does not have any parameters.
10 |
11 | ## Example
12 | ```php
13 | use Rubix\ML\Kernels\Distance\Jaccard;
14 |
15 | $kernel = new Jaccard();
16 | ```
--------------------------------------------------------------------------------
/docs/kernels/svm/polynomial.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Polynomial
4 | This kernel projects a sample vector using polynomials of the p'th degree.
5 |
6 | ## Parameters
7 | | # | Name | Default | Type | Description |
8 | |---|---|---|---|---|
9 | | 1 | degree | 3 | int | The degree of the polynomial. |
10 | | 2 | gamma | null | float | The kernel coefficient. |
11 | | 3 | coef0 | 0. | float | The independent term. |
12 |
13 | ## Example
14 | ```php
15 | use Rubix\ML\Kernels\SVM\Polynomial;
16 |
17 | $kernel = new Polynomial(3, null, 0.);
18 | ```
19 |
--------------------------------------------------------------------------------
/src/Graph/Nodes/Decision.php:
--------------------------------------------------------------------------------
1 |
27 | */
28 | public function n() : int;
29 | }
30 |
--------------------------------------------------------------------------------
/src/NeuralNet/Initializers/Initializer.php:
--------------------------------------------------------------------------------
1 | $fanIn
23 | * @param int<0,max> $fanOut
24 | * @return Matrix
25 | */
26 | public function initialize(int $fanIn, int $fanOut) : Matrix;
27 | }
28 |
--------------------------------------------------------------------------------
/docs/kernels/distance/diagonal.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Diagonal
4 | The Diagonal (a.k.a. *Chebyshev*) distance is a measure that constrains movement to horizontal, vertical, and diagonal. An example of a game that uses diagonal movement is chess.
5 |
6 | $$
7 | {\displaystyle Diagonal(a,b)=\max _{i}(|a_{i}-b_{i}|)}
8 | $$
9 |
10 | **Data Type Compatibility:** Continuous
11 |
12 | ## Parameters
13 | This kernel does not have any parameters.
14 |
15 | ## Example
16 | ```php
17 | use Rubix\ML\Kernels\Distance\Diagonal;
18 |
19 | $kernel = new Diagonal();
20 | ```
--------------------------------------------------------------------------------
/src/Graph/Nodes/Hypercube.php:
--------------------------------------------------------------------------------
1 | >
22 | */
23 | public function sides() : Traversable;
24 |
25 | /**
26 | * Does the hypercube reduce to a single point?
27 | *
28 | * @return bool
29 | */
30 | public function isPoint() : bool;
31 | }
32 |
--------------------------------------------------------------------------------
/src/NeuralNet/Layers/Hidden.php:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Soft Plus
4 | A smooth approximation of the piecewise linear [ReLU](relu.md) activation function.
5 |
6 | $$
7 | {\displaystyle Soft-Plus = \log \left(1+e^{x}\right)}
8 | $$
9 |
10 | ## Parameters
11 | This activation function does not have any parameters.
12 |
13 | ## Example
14 | ```php
15 | use Rubix\ML\NeuralNet\ActivationFunctions\SoftPlus;
16 |
17 | $activationFunction = new SoftPlus();
18 | ```
19 |
20 | ## References
21 | [^1]: X. Glorot et al. (2011). Deep Sparse Rectifier Neural Networks.
--------------------------------------------------------------------------------
/docs/tokenizers/n-gram.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # N-gram
4 | N-grams are sequences of n-words of a given string. The N-gram tokenizer outputs tokens of contiguous words ranging from *min* to *max* number of words per token.
5 |
6 | ## Parameters
7 | | # | Name | Default | Type | Description |
8 | |---|---|---|---|---|
9 | | 1 | min | 2 | int | The minimum number of contiguous words to a token. |
10 | | 2 | max | 2 | int | The maximum number of contiguous words to a token. |
11 |
12 | ## Example
13 | ```php
14 | use Rubix\ML\Tokenizers\NGram;
15 |
16 | $tokenizer = new NGram(1, 3);
17 | ```
18 |
--------------------------------------------------------------------------------
/src/CrossValidation/Reports/ReportGenerator.php:
--------------------------------------------------------------------------------
1 |
15 | */
16 | public function compatibility() : array;
17 |
18 | /**
19 | * Generate the report.
20 | *
21 | * @param list $predictions
22 | * @param list $labels
23 | * @return Report
24 | */
25 | public function generate(array $predictions, array $labels) : Report;
26 | }
27 |
--------------------------------------------------------------------------------
/docs/neural-network/initializers/lecun.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Le Cun
4 | Proposed by Yan Le Cun in a paper in 1998, this initializer was one of the first published attempts to control the variance of activations between layers through weight initialization. It remains a good default choice for many hidden layer configurations.
5 |
6 | ## Parameters
7 | This initializer does not have any parameters.
8 |
9 | ## Example
10 | ```php
11 | use Rubix\ML\NeuralNet\Initializers\LeCun;
12 |
13 | $initializer = new LeCun();
14 | ```
15 |
16 | ## References
17 | [^1]: Y. Le Cun et al. (1998). Efficient Backprop.
--------------------------------------------------------------------------------
/docs/neural-network/activation-functions/softsign.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Softsign
4 | A smooth sigmoid-shaped function that squashes the input between -1 and 1.
5 |
6 | $$
7 | {\displaystyle Softsign = {\frac {x}{1+|x|}}}
8 | $$
9 |
10 | ## Parameters
11 | This activation function does not have any parameters.
12 |
13 | ## Example
14 | ```php
15 | use Rubix\ML\NeuralNet\ActivationFunctions\Softsign;
16 |
17 | $activationFunction = new Softsign();
18 | ```
19 |
20 | ## References
21 | [^1]: X. Glorot et al. (2010). Understanding the Difficulty of Training Deep Feedforward Neural Networks.
22 |
--------------------------------------------------------------------------------
/src/NeuralNet/Optimizers/Optimizer.php:
--------------------------------------------------------------------------------
1 | $gradient
25 | * @return Tensor
26 | */
27 | public function step(Parameter $param, Tensor $gradient) : Tensor;
28 | }
29 |
--------------------------------------------------------------------------------
/docs/scoring.md:
--------------------------------------------------------------------------------
1 | # Scoring
2 | A Scoring anomaly detector is one that assigns anomaly scores to unknown samples in a dataset. The interface provides the `score()` method which returns a set of scores from the model. Higher scores indicate a greater degree of anomalousness. In addition, samples can be sorted by their anomaly score to identify the top outliers.
3 |
4 | ## Score a Dataset
5 | Return the anomaly scores assigned to the samples in a dataset:
6 | ```php
7 | public score(Dataset $dataset) : array
8 | ```
9 |
10 | ```php
11 | $scores = $estimator->score($dataset);
12 |
13 | print_r($scores);
14 | ```
15 |
16 | ```php
17 | Array
18 | (
19 | [0] => 0.35033
20 | [1] => 0.40992
21 | [2] => 1.68153
22 | )
23 | ```
24 |
--------------------------------------------------------------------------------
/docs/kernels/distance/manhattan.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Manhattan
4 | A distance metric that constrains movement to horizontal and vertical, similar to navigating the city blocks of Manhattan. An example of a board game that uses this type of movement is Checkers.
5 |
6 | $$
7 | Manhattan(\mathbf {a} ,\mathbf {b})=\|\mathbf {a} -\mathbf {b} \|_{1}=\sum _{i=1}^{n}|a_{i}-b_{i}|
8 | $$
9 |
10 | **Data Type Compatibility:** Continuous
11 |
12 | ## Parameters
13 | This kernel does not have any parameters.
14 |
15 | ## Example
16 | ```php
17 | use Rubix\ML\Kernels\Distance\Manhattan;
18 |
19 | $kernel = new Manhattan();
20 | ```
--------------------------------------------------------------------------------
/docs/transformers/l1-normalizer.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # L1 Normalizer
4 | Transform each sample (row) vector in the sample matrix such that each feature is divided by the L1 norm (or *magnitude*) of that vector.
5 |
6 | **Interfaces:** [Transformer](api.md#transformer)
7 |
8 | **Data Type Compatibility:** Continuous only
9 |
10 | ## Parameters
11 | This transformer does not have any parameters.
12 |
13 | ## Example
14 | ```php
15 | use Rubix\ML\Transformers\L1Normalizer;
16 |
17 | $transformer = new L1Normalizer();
18 | ```
19 |
20 | ## Additional Methods
21 | This transformer does not have any additional methods.
22 |
--------------------------------------------------------------------------------
/docs/transformers/l2-normalizer.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # L2 Normalizer
4 | Transform each sample (row) vector in the sample matrix such that each feature is divided by the L2 norm (or *magnitude*) of that vector.
5 |
6 | **Interfaces:** [Transformer](api.md#transformer)
7 |
8 | **Data Type Compatibility:** Continuous only
9 |
10 | ## Parameters
11 | This transformer does not have any parameters.
12 |
13 | ## Example
14 | ```php
15 | use Rubix\ML\Transformers\L2Normalizer;
16 |
17 | $transformer = new L2Normalizer();
18 | ```
19 |
20 | ## Additional Methods
21 | This transformer does not have any additional methods.
22 |
--------------------------------------------------------------------------------
/src/Transformers/Transformer.php:
--------------------------------------------------------------------------------
1 |
22 | */
23 | public function compatibility() : array;
24 |
25 | /**
26 | * Transform the dataset in place.
27 | *
28 | * @param list> $samples
29 | */
30 | public function transform(array &$samples) : void;
31 | }
32 |
--------------------------------------------------------------------------------
/docs/serializers/api.md:
--------------------------------------------------------------------------------
1 | # Serializers
2 | Serializers take objects that implement the [Persistable](../persistable.md) interface and convert them into blobs of data called *encodings*. Encodings can then be used to either store an object or to reinstantiate an object from storage.
3 |
4 | ### Serialize
5 | To serialize a persistable object into an encoding:
6 | ```php
7 | public serialize(Persistable $persistable) : Encoding
8 | ```
9 |
10 | ```php
11 | $encoding = $serializer->serialize($persistable);
12 | ```
13 |
14 | ### Deserialize
15 | To deserialize a persistable object from an encoding:
16 | ```php
17 | public deserialize(Encoding $encoding) : Persistable
18 | ```
19 |
20 | ```php
21 | $persistable = $serializer->deserialize($encoding);
22 | ```
23 |
--------------------------------------------------------------------------------
/src/NeuralNet/Layers/Parametric.php:
--------------------------------------------------------------------------------
1 |
22 | */
23 | public function parameters() : Generator;
24 |
25 | /**
26 | * Restore the parameters on the layer from an associative array.
27 | *
28 | * @param \Rubix\ML\NeuralNet\Parameter[] $parameters
29 | */
30 | public function restore(array $parameters) : void;
31 | }
32 |
--------------------------------------------------------------------------------
/docs/neural-network/cost-functions/cross-entropy.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Cross Entropy
4 | Cross Entropy (or *log loss*) measures the performance of a classification model whose output is a joint probability distribution over the possible classes. Entropy increases as the predicted probability distribution diverges from the actual distribution.
5 |
6 | $$
7 | Cross Entropy = -\sum_{c=1}^My_{o,c}\log(p_{o,c})
8 | $$
9 |
10 | ## Parameters
11 | This cost function does not have any parameters.
12 |
13 | ## Example
14 | ```php
15 | use Rubix\ML\NeuralNet\CostFunctions\CrossEntropy;
16 |
17 | $costFunction = new CrossEntropy();
18 | ```
--------------------------------------------------------------------------------
/docs/neural-network/cost-functions/least-squares.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Least Squares
4 | Least Squares (or *quadratic* loss) is a function that computes the average squared error (MSE) between the target output given by the labels and the actual output of the network. It produces a smooth bowl-shaped gradient that is highly-influenced by large errors.
5 |
6 | $$
7 | Least Squares = \sum_{i=1}^{D}(y_i-\hat{y}_i)^2
8 | $$
9 |
10 | ## Parameters
11 | This cost function does not have any parameters.
12 |
13 | ## Example
14 | ```php
15 | use Rubix\ML\NeuralNet\CostFunctions\LeastSquares;
16 |
17 | $costFunction = new LeastSquares();
18 | ```
--------------------------------------------------------------------------------
/docs/neural-network/hidden-layers/activation.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Activation
4 | Activation layers apply a user-defined non-linear activation function to their inputs. They often work in conjunction with [Dense](dense.md) layers as a way to transform their output.
5 |
6 | ## Parameters
7 | | # | Name | Default | Type | Description |
8 | |---|---|---|---|---|
9 | | 1 | activationFn | | ActivationFunction | The function that computes the output of the layer. |
10 |
11 | ## Example
12 | ```php
13 | use Rubix\ML\NeuralNet\Layers\Activation;
14 | use Rubix\ML\NeuralNet\ActivationFunctions\ReLU;
15 |
16 | $layer = new Activation(new ReLU());
17 | ```
--------------------------------------------------------------------------------
/docs/backends/serial.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | ### Serial
4 | The Serial backend executes tasks sequentially inside of a single process. The advantage of the Serial backend is that it has zero overhead, thus it may be faster than a parallel backend for small datasets.
5 |
6 | !!! note
7 | The Serial backend is the default for most objects that are capable of parallel processing.
8 |
9 | ## Parameters
10 | This backend does not have any additional parameters.
11 |
12 | ## Example
13 | ```php
14 | use Rubix\ML\Backends\Serial;
15 |
16 | $backend = new Serial();
17 | ```
18 |
19 | ## Additional Methods
20 | This backend does not have any additional methods.
21 |
--------------------------------------------------------------------------------
/docs/kernels/distance/euclidean.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Euclidean
4 | The straight line (*bee* line) distance between two points. Euclidean distance has the nice property of being invariant under any rotation.
5 |
6 | $$
7 | Euclidean\left(a,b\right) = \sqrt {\sum _{i=1}^{n} \left( a_{i}-b_{i}\right)^2}
8 | $$
9 |
10 | **Data Type Compatibility:** Continuous
11 |
12 | ## Parameters
13 | This kernel does not have any parameters.
14 |
15 | ## Example
16 | ```php
17 | use Rubix\ML\Kernels\Distance\Euclidean;
18 |
19 | $kernel = new Euclidean();
20 | ```
21 |
22 | ## References
23 | [^1]: J. K. Dixon. (1978). Pattern Recognition with Partly Missing Data.
--------------------------------------------------------------------------------
/src/CrossValidation/Metrics/ProbabilisticMetric.php:
--------------------------------------------------------------------------------
1 | > $probabilities
21 | * @param list $labels
22 | * @return float
23 | */
24 | public function score(array $probabilities, array $labels) : float;
25 | }
26 |
--------------------------------------------------------------------------------
/docs/neural-network/activation-functions/silu.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # SiLU
4 | Sigmoid Linear Units are smooth and non-monotonic rectified activation functions. Their inputs are weighted by the [Sigmoid](sigmoid.md) activation function acting as a self-gating mechanism.
5 |
6 | ## Parameters
7 | This activation function does not have any parameters.
8 |
9 | ## Example
10 | ```php
11 | use Rubix\ML\NeuralNet\ActivationFunctions\SiLU;
12 |
13 | $activationFunction = new SiLU();
14 | ```
15 |
16 | ### References
17 | [^1]: S. Elwing et al. (2017). Sigmoid-Weighted Linear Units for Neural Network Function Approximation in Reinforcement Learning.
18 |
--------------------------------------------------------------------------------
/docs/serializers/gzip-native.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Gzip Native
4 | Gzip Native wraps the native PHP serialization format in an outer compression layer based on the DEFLATE algorithm with a header and CRC32 checksum.
5 |
6 | ## Parameters
7 | | # | Name | Default | Type | Description |
8 | |---|---|---|---|---|
9 | | 1 | level | 6 | int | The compression level between 0 and 9, 0 meaning no compression. |
10 |
11 | ## Example
12 | ```php
13 | use Rubix\ML\Serializers\GzipNative;
14 |
15 | $serializer = new GzipNative(1);
16 | ```
17 |
18 | ## References
19 | [^1]: P. Deutsch. (1996). RFC 1951 - DEFLATE Compressed Data Format Specification version.
20 |
--------------------------------------------------------------------------------
/docs/neural-network/activation-functions/gelu.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # GELU
4 | Gaussian Error Linear Units (GELUs) are rectifiers that are gated by the magnitude of their input rather than the sign of their input as with ReLU variants. Their output can be interpreted as the expected value of a neuron with random dropout regularization applied.
5 |
6 | ## Parameters
7 | This activation function does not have any parameters.
8 |
9 | ## Example
10 | ```php
11 | use Rubix\ML\NeuralNet\ActivationFunctions\GELU;
12 |
13 | $activationFunction = new GELU();
14 | ```
15 |
16 | ### References
17 | >- D. Hendrycks et al. (2018). Gaussian Error Linear Units (GELUs).
18 |
--------------------------------------------------------------------------------
/docs/neural-network/activation-functions/sigmoid.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Sigmoid
4 | A bounded S-shaped function (sometimes called the *Logistic* function) with an output value between 0 and 1. The output of the sigmoid function has the advantage of being interpretable as a probability, however it is not zero-centered and tends to saturate if inputs become large.
5 |
6 | $$
7 | {\displaystyle Sigmoid = {\frac {1}{1+e^{-x}}}}
8 | $$
9 |
10 | ## Parameters
11 | This activation function does not have any parameters.
12 |
13 | ## Example
14 | ```php
15 | use Rubix\ML\NeuralNet\ActivationFunctions\Sigmoid;
16 |
17 | $activationFunction = new Sigmoid();
18 | ```
--------------------------------------------------------------------------------
/docs/cross-validation/metrics/r-squared.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # R Squared
4 | The *coefficient of determination* or R Squared (R²) is the proportion of the variance in the target labels that is explainable from the predictions. It gives an indication as to how well the predictions approximate the labels.
5 |
6 | $$
7 | {\displaystyle R^{2} = 1-{SS_{\rm {res}} \over SS_{\rm {tot}}}}
8 | $$
9 |
10 | **Estimator Compatibility:** Regressor
11 |
12 | **Score Range:** -∞ to 1
13 |
14 | ## Parameters
15 | This metric does not have any parameters.
16 |
17 | ## Example
18 | ```php
19 | use Rubix\ML\CrossValidation\Metrics\RSquared;
20 |
21 | $metric = new RSquared();
22 | ```
23 |
--------------------------------------------------------------------------------
/docs/extractors/column-filter.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Column Filter
4 |
5 | **Interfaces:** [Extractor](api.md)
6 |
7 | ## Parameters
8 | | # | Name | Default | Type | Description |
9 | |---|---|---|---|---|
10 | | 1 | iterator | | Traversable | The base iterator. |
11 | | 2 | keys | | array | The string and/or integer keys of the columns to filter from the table |
12 |
13 | ## Example
14 | ```php
15 | use Rubix\ML\Extractors\ColumnFilter;
16 | use Rubix\ML\Extractors\CSV;
17 |
18 | $extractor = new ColumnFilter(new CSV('example.csv', true), [
19 | 'texture', 'class',
20 | ]);
21 | ```
22 |
23 | ## Additional Methods
24 | This extractor does not have any additional methods.
25 |
--------------------------------------------------------------------------------
/src/Graph/Nodes/Hypersphere.php:
--------------------------------------------------------------------------------
1 |
20 | */
21 | public function center() : array;
22 |
23 | /**
24 | * Return the radius of the centroid.
25 | *
26 | * @return float
27 | */
28 | public function radius() : float;
29 |
30 | /**
31 | * Does the hypersphere reduce to a single point?
32 | *
33 | * @return bool
34 | */
35 | public function isPoint() : bool;
36 | }
37 |
--------------------------------------------------------------------------------
/docs/neural-network/cost-functions/relative-entropy.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Relative Entropy
4 | Relative Entropy (or *Kullback-Leibler divergence*) is a measure of how the expectation and activation of the network diverge. It is different from [Cross Entropy](cross-entropy.md) in that it is *asymmetric* and thus does not qualify as a statistical measure of error.
5 |
6 | $$
7 | KL(\hat{y} || y) = \sum_{c=1}^{M}\hat{y}_c \log{\frac{\hat{y}_c}{y_c}}
8 | $$
9 |
10 | ## Parameters
11 | This cost function does not have any parameters.
12 |
13 | ## Example
14 | ```php
15 | use Rubix\ML\NeuralNet\CostFunctions\RelativeEntropy;
16 |
17 | $costFunction = new RelativeEntropy();
18 | ```
--------------------------------------------------------------------------------
/docs/cross-validation/leave-p-out.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Leave P Out
4 | Leave P Out tests a learner with a unique holdout set of size p for each iteration until all samples have been tested. Although Leave P Out can take long with large datasets and small values of p, it is especially suited for small datasets.
5 |
6 | **Interfaces:** [Validator](api.md#validator), [Parallel](#parallel)
7 |
8 | ## Parameters
9 | | # | Name | Default | Type | Description |
10 | |---|---|---|---|---|
11 | | 1 | p | 10 | int | The number of samples to leave out each round for testing. |
12 |
13 | ## Example
14 | ```php
15 | use Rubix\ML\CrossValidation\LeavePOut;
16 |
17 | $validator = new LeavePOut(50);
18 | ```
--------------------------------------------------------------------------------
/docs/neural-network/activation-functions/softmax.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Softmax
4 | The Softmax function is a generalization of the [Sigmoid](sigmoid.md) function that squashes each activation between 0 and 1 with the addition that all activations add up to 1. Together, these properties allow the output of the Softmax function to be interpretable as a *joint* probability distribution.
5 |
6 | $$
7 | {\displaystyle Softmax = {\frac {e^{x_{i}}}{\sum _{j=1}^{J}e^{x_{j}}}}}
8 | $$
9 |
10 | ## Parameters
11 | This activation function does not have any parameters.
12 |
13 | ## Example
14 | ```php
15 | use Rubix\ML\NeuralNet\ActivationFunctions\Softmax;
16 |
17 | $activationFunction = new Softmax();
18 | ```
--------------------------------------------------------------------------------
/docs/cross-validation/hold-out.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Hold Out
4 | Hold Out is a quick and simple cross validation technique that uses a validation set that is *held out* from the training data. The advantages of Hold Out is that the validation score is quick to compute, however it does not allow the learner to *both* train and test on all the data in the training set.
5 |
6 | **Interfaces:** [Validator](api.md#validator)
7 |
8 | ## Parameters
9 | | # | Name | Default | Type | Description |
10 | |---|---|---|---|---|
11 | | 1 | ratio | 0.2 | float | The ratio of samples to hold out for testing. |
12 |
13 | ## Example
14 | ```php
15 | use Rubix\ML\CrossValidation\HoldOut;
16 |
17 | $validator = new HoldOut(0.3);
18 | ```
--------------------------------------------------------------------------------
/docs/neural-network/hidden-layers/noise.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Noise
4 | This layer adds random Gaussian noise to the inputs with a user-defined standard deviation. Noise added to neural network activations acts as a regularizer by indirectly adding a penalty to the weights through the cost function in the output layer.
5 |
6 | ## Parameters
7 | | # | Name | Default | Type | Description |
8 | |---|---|---|---|---|
9 | | 1 | stddev | 0.1 | float | The standard deviation of the Gaussian noise added to the inputs. |
10 |
11 | ## Example
12 | ```php
13 | use Rubix\ML\NeuralNet\Layers\Noise;
14 |
15 | $layer = new Noise(1e-3);
16 | ```
17 |
18 | ## References
19 | [^1]: C. Gulcehre et al. (2016). Noisy Activation Functions.
--------------------------------------------------------------------------------
/docs/transformers/stop-word-filter.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Stop Word Filter
4 | Removes user-specified words from any categorical feature columns including blobs of text.
5 |
6 | **Interfaces:** [Transformer](api.md#transformer)
7 |
8 | **Data Type Compatibility:** Categorical
9 |
10 | ## Parameters
11 | | # | Name | Default | Type | Description |
12 | |---|---|---|---|---|
13 | | 1 | stopWords | | array | A list of stop words to filter out of each text feature. |
14 |
15 | ## Example
16 | ```php
17 | use Rubix\ML\Transformers\StopWordFilter;
18 |
19 | $transformer = new StopWordFilter(['i', 'me', 'my', ...]);
20 | ```
21 |
22 | ## Additional Methods
23 | This transformer does not have any additional methods.
24 |
--------------------------------------------------------------------------------
/src/Kernels/Distance/Distance.php:
--------------------------------------------------------------------------------
1 |
22 | */
23 | public function compatibility() : array;
24 |
25 | /**
26 | * Compute the distance between two vectors.
27 | *
28 | * @internal
29 | *
30 | * @param list $a
31 | * @param list $b
32 | * @return float
33 | */
34 | public function compute(array $a, array $b) : float;
35 | }
36 |
--------------------------------------------------------------------------------
/docs/cross-validation/metrics/top-k-accuracy.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Top K Accuracy
4 | Top K Accuracy looks at the k classes with the highest predicted probabilities when calculating the accuracy score. If one of the top k classes matches the ground-truth, then the prediction is considered accurate.
5 |
6 | **Estimator Compatibility:** Probabilistic Classifier
7 |
8 | **Score Range:** 0 to 1
9 |
10 | ## Parameters
11 | | # | Name | Default | Type | Description |
12 | |---|---|---|---|---|
13 | | 1 | k | 3 | int | The number of classes with the highest predicted probability to consider. |
14 |
15 | ## Example
16 | ```php
17 | use Rubix\ML\CrossValidation\Metrics\TopKAccuracy;
18 |
19 | $metric = new TopKAccuracy(5);
20 | ```
21 |
--------------------------------------------------------------------------------
/docs/kernels/distance/safe-euclidean.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Safe Euclidean
4 | An Euclidean distance metric suitable for samples that may contain NaN (not a number) values i.e. missing data. The Safe Euclidean metric approximates the Euclidean distance function by dropping NaN values and scaling the distance according to the proportion of non-NaNs (in either a or b or both) to compensate.
5 |
6 | **Data Type Compatibility:** Continuous
7 |
8 | ## Parameters
9 | This kernel does not have any parameters.
10 |
11 | ## Example
12 | ```php
13 | use Rubix\ML\Kernels\Distance\SafeEuclidean;
14 |
15 | $kernel = new SafeEuclidean();
16 | ```
17 |
18 | ## References
19 | [^1]: J. K. Dixon. (1978). Pattern Recognition with Partly Missing Data.
--------------------------------------------------------------------------------
/docs/tokenizers/word-stemmer.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Word Stemmer
4 | Word Stemmer reduces inflected and derived words to their root form using the Snowball method. For example, the sentence "Majority voting is likely foolish" stems to "Major vote is like foolish."
5 |
6 | !!! note
7 | For a complete list of [supported languages](https://github.com/wamania/php-stemmer#languages) you can visit the PHP Stemmer documentation.
8 |
9 | ## Parameters
10 | | # | Name | Default | Type | Description |
11 | |---|---|---|---|---|
12 | | 1 | language | | string | The minimum number of contiguous words to a token. |
13 |
14 | ## Example
15 | ```php
16 | use Rubix\ML\Tokenizers\WordStemmer;
17 |
18 | $tokenizer = new WordStemmer('english');
19 | ```
20 |
--------------------------------------------------------------------------------
/docs/online.md:
--------------------------------------------------------------------------------
1 | # Online
2 | Learners that implement the Online interface can be trained in batches. Learners of this type are great for when you either have a continuous stream of data or a dataset that is too large to fit into memory. In addition, partial training allows the model to evolve over time.
3 |
4 | ## Partially Train
5 | To partially train an Online learner pass it a training set to its `partial()` method:
6 | ```php
7 | public partial(Dataset $dataset) : void
8 | ```
9 |
10 | ```php
11 | $folds = $dataset->fold(3);
12 |
13 | $estimator->train($folds[0]);
14 |
15 | $estimator->partial($folds[1]);
16 |
17 | $estimator->partial($folds[2]);
18 | ```
19 |
20 | !!! note
21 | Learner will continue to train as long as you are using the `partial()` method, however, calling `train()` on a trained or partially trained learner will reset it back to baseline first.
--------------------------------------------------------------------------------
/docs/cross-validation/metrics/accuracy.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Accuracy
4 | A quick and simple classification and anomaly detection metric defined as the number of true positives over the number of samples in the testing set. Since Accuracy gives equal weight to false positives and false negatives, it is *not* a good metric for datasets with a highly imbalanced distribution of labels.
5 |
6 | $$
7 | {\displaystyle Accuracy = \frac{TP}{TP + FP}}
8 | $$
9 |
10 | **Estimator Compatibility:** Classifier, Anomaly Detector
11 |
12 | **Score Range:** 0 to 1
13 |
14 | ## Parameters
15 | This metric does not have any parameters.
16 |
17 | ## Example
18 | ```php
19 | use Rubix\ML\CrossValidation\Metrics\Accuracy;
20 |
21 | $metric = new Accuracy();
22 | ```
--------------------------------------------------------------------------------
/docs/installation.md:
--------------------------------------------------------------------------------
1 | # Installation
2 | Install Rubix ML into your project using [Composer](https://getcomposer.org/):
3 |
4 | ```sh
5 | $ composer require rubix/ml
6 | ```
7 |
8 | ## Requirements
9 | - [PHP](https://php.net/manual/en/install.php) 7.4 or above
10 |
11 | **Recommended**
12 |
13 | - [Tensor extension](https://github.com/RubixML/Tensor) for fast Matrix/Vector computing
14 |
15 | **Optional**
16 |
17 | - [GD extension](https://php.net/manual/en/book.image.php) for image support
18 | - [Mbstring extension](https://www.php.net/manual/en/book.mbstring.php) for fast multibyte string manipulation
19 | - [SVM extension](https://php.net/manual/en/book.svm.php) for Support Vector Machine engine (libsvm)
20 | - [PDO extension](https://www.php.net/manual/en/book.pdo.php) for relational database support
21 | - [GraphViz](https://graphviz.org/) for graph visualization
22 |
--------------------------------------------------------------------------------
/docs/extractors/concatenator.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Concatenator
4 | Combines multiple iterators by concatenating the output of one iterator with the output of the next iterator in the series.
5 |
6 | **Interfaces:** [Extractor](api.md)
7 | ## Parameters
8 | | # | Name | Default | Type | Description |
9 | |---|---|---|---|---|
10 | | 1 | iterators | | iterable | The iterators to concatenate together. |
11 |
12 | ## Example
13 | ```php
14 | use Rubix\ML\Extractors\Concatenator;
15 | use Rubix\ML\Extractors\CSV;
16 |
17 | $extractor = new Concatenator([
18 | new CSV('dataset1.csv'),
19 | new CSV('dataset2.csv'),
20 | new CSV('dataset3.csv'),
21 | ]);
22 | ```
23 |
24 | ## Additional Methods
25 | This extractor does not have any additional methods.
26 |
--------------------------------------------------------------------------------
/docs/kernels/distance/canberra.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Canberra
4 | A weighted version of the [Manhattan](manhattan.md) distance, Canberra examines the sum of a series of fractional differences between two samples. Canberra can be very sensitive when both coordinates are near zero.
5 |
6 | $$
7 | Canberra(\mathbf {a} ,\mathbf {b} )=\sum _{i=1}^{n}{\frac {|a_{i}-b_{i}|}{|a_{i}|+|b_{i}|}}
8 | $$
9 |
10 | **Data Type Compatibility:** Continuous
11 |
12 | ## Parameters
13 | This kernel does not have any parameters.
14 |
15 | ## Example
16 | ```php
17 | use Rubix\ML\Kernels\Distance\Canberra;
18 |
19 | $kernel = new Canberra();
20 | ```
21 |
22 | ## References
23 | [^1]: G. N. Lance et al. (1967). Mixed-data classificatory programs I. Agglomerative Systems.
24 |
--------------------------------------------------------------------------------
/docs/neural-network/initializers/xavier-1.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Xavier 1
4 | The Xavier 1 initializer draws from a uniform distribution [-limit, limit] where *limit* is equal to sqrt(6 / (fanIn + fanOut)). This initializer is best suited for layers that feed into an activation layer that outputs a value between 0 and 1 such as [Softmax](../activation-functions/softmax.md) or [Sigmoid](../activation-functions/sigmoid.md).
5 |
6 | ## Parameters
7 | This initializer does not have any parameters.
8 |
9 | ## Example
10 | ```php
11 | use Rubix\ML\NeuralNet\Initializers\Xavier1;
12 |
13 | $initializer = new Xavier1();
14 | ```
15 |
16 | ## References
17 | [^1]: X. Glorot et al. (2010). Understanding the Difficulty of Training Deep Feedforward Neural Networks.
--------------------------------------------------------------------------------
/docs/serializers/rbx.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # RBX
4 | Rubix Object File format (RBX) is a format designed to reliably store and share serialized PHP objects. Based on PHP's native serialization format, RBX adds additional layers of compression, data integrity checks, and class compatibility detection all in one robust format.
5 |
6 | !!! note
7 | We recommend to use the `.rbx` file extension when storing RBX-serialized PHP objects.
8 |
9 | ## Parameters
10 | ## Parameters
11 | | # | Name | Default | Type | Description |
12 | |---|---|---|---|---|
13 | | 1 | level | 6 | int | The compression level between 0 and 9, 0 meaning no compression. |
14 |
15 | ## Example
16 | ```php
17 | use Rubix\ML\Serializers\RBX;
18 |
19 | $serializer = new RBX(6);
20 | ```
21 |
--------------------------------------------------------------------------------
/docs/neural-network/activation-functions/hyperbolic-tangent.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Hyperbolic Tangent
4 | An S-shaped function that squeezes the input value into an output space between -1 and 1. Hyperbolic Tangent (or *tanh*) has the advantage of being zero centered, however is known to *saturate* with highly positive or negative input values which can slow down training if the activations become too intense.
5 |
6 | $$
7 | {\displaystyle \tanh(x)={\frac {e^{x}-e^{-x}}{e^{x}+e^{-x}}}}
8 | $$
9 |
10 | ## Parameters
11 | This activation function does not have any parameters.
12 |
13 | ## Example
14 | ```php
15 | use Rubix\ML\NeuralNet\ActivationFunctions\HyperbolicTangent;
16 |
17 | $activationFunction = new HyperbolicTangent();
18 | ```
19 |
--------------------------------------------------------------------------------
/docs/cross-validation/metrics/probabilistic-accuracy.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Probabilistic Accuracy
4 | This metric comes from the sports betting domain, where it's used to measure the accuracy of predictions by looking at the probabilities of class predictions. Accordingly, this metric places additional weight on the "confidence" of each prediction.
5 |
6 | **Estimator Compatibility:** Probabilistic Classifier
7 |
8 | **Score Range:** 0 to 1
9 |
10 | ## Parameters
11 | This metric does not have any parameters.
12 |
13 | ## Example
14 | ```php
15 | use Rubix\ML\CrossValidation\Metrics\ProbabilisticAccuracy;
16 |
17 | $metric = new ProbabilisticAccuracy();
18 | ```
19 |
20 | ## References
21 | [^1]: https://mercurius.io/en/learn/predicting-forecasting-football
22 |
--------------------------------------------------------------------------------
/docs/neural-network/initializers/xavier-2.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Xavier 2
4 | The Xavier 2 initializer draws from a uniform distribution [-limit, limit] where *limit* is equal to (6 / (fanIn + fanOut)) ** 0.25. This initializer is best suited for layers that feed into an activation layer that outputs values between -1 and 1 such as [Hyperbolic Tangent](../activation-functions/hyperbolic-tangent.md) and [Softsign](../activation-functions/softsign.md).
5 |
6 | ## Parameters
7 | This initializer does not have any parameters.
8 |
9 | ## Example
10 | ```php
11 | use Rubix\ML\NeuralNet\Initializers\Xavier2;
12 |
13 | $initializer = new Xavier2();
14 | ```
15 |
16 | ## References
17 | [^1]: X. Glorot et al. (2010). Understanding the Difficulty of Training Deep Feedforward Neural Networks.
--------------------------------------------------------------------------------
/docs/cross-validation/k-fold.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # K Fold
4 | K Fold is a cross validation technique that splits the training set into *k* individual folds and for each training round uses 1 of the folds to test the model and the rest as training data. The final score is the average validation score over all of the *k* rounds. K Fold has the advantage of both training and testing on each sample in the dataset at least once.
5 |
6 | **Interfaces:** [Validator](api.md#validator), [Parallel](#parallel)
7 |
8 | ## Parameters
9 | | # | Name | Default | Type | Description |
10 | |---|---|---|---|---|
11 | | 1 | k | 5 | int | The number of folds to split the dataset into. |
12 |
13 | ## Example
14 | ```php
15 | use Rubix\ML\CrossValidation\KFold;
16 |
17 | $validator = new KFold(5, true);
18 | ```
--------------------------------------------------------------------------------
/docs/cross-validation/reports/aggregate-report.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Aggregate Report
4 | A report generator that aggregates the output of multiple reports.
5 |
6 | **Estimator Compatibility:** Depends on base reports
7 |
8 | ## Parameters
9 | | # | Name | Default | Type | Description |
10 | |---|---|---|---|---|
11 | | 1 | reports | | array | An array of report generators to aggregate keyed by a user-specified name. |
12 |
13 | ## Example
14 | ```php
15 | use Rubix\ML\CrossValidation\Reports\AggregateReport;
16 | use Rubix\ML\CrossValidation\Reports\ConfusionMatrix;
17 | use Rubix\ML\CrossValidation\Reports\MulticlassBreakdown;
18 |
19 | $report = new AggregateReport([
20 | 'breakdown' => new MulticlassBreakdown(),
21 | 'matrix' => new ConfusionMatrix(),
22 | ]);
23 | ```
--------------------------------------------------------------------------------
/docs/estimator.md:
--------------------------------------------------------------------------------
1 | # Estimator
2 | The Estimator interface is implemented by all learners in Rubix ML. It provides basic inference functionality through the `predict()` method which returns a set of predictions from a dataset. Additionally, it provides methods for returning estimator type and data type compatibility declarations.
3 |
4 | ### Make Predictions
5 | Return the predictions from a dataset containing unknown samples in an array:
6 | ```php
7 | public predict(Dataset $dataset) : array
8 | ```
9 |
10 | ```php
11 | $predictions = $estimator->predict($dataset);
12 |
13 | print_r($predictions);
14 | ```
15 |
16 | ```php
17 | Array
18 | (
19 | [0] => married
20 | [1] => divorced
21 | [2] => divorced
22 | [3] => married
23 | )
24 | ```
25 |
26 | !!! note
27 | The return value of `predict()` is an array containing the predictions in the same order that they were indexed in the dataset.
28 |
--------------------------------------------------------------------------------
/docs/clusterers/seeders/k-mc2.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # K-MC2
4 | A fast [Plus Plus](plus-plus.md) approximator that replaces the brute force method with a substantially faster Markov Chain Monte Carlo (MCMC) sampling procedure with comparable results.
5 |
6 | ## Parameters
7 | | # | Name | Default | Type | Description |
8 | |---|---|---|---|---|
9 | | 1 | m | 50 | int | The number of candidate nodes in the Markov Chain. |
10 | | 2 | kernel | Euclidean | Distance | The distance kernel used to compute the distance between samples. |
11 |
12 | ## Example
13 | ```php
14 | use Rubix\ML\Clusterers\Seeders\KMC2;
15 | use Rubix\ML\Kernels\Distance\Euclidean;
16 |
17 | $seeder = new KMC2(200, new Euclidean());
18 | ```
19 |
20 | ###
21 | [^1]: O. Bachem et al. (2016). Approximate K-Means++ in Sublinear Time.
--------------------------------------------------------------------------------
/src/NeuralNet/ActivationFunctions/ActivationFunction.php:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # He
4 | The He initializer was designed to initialize parameters that feed into rectified [Activation](../hidden-layers/activation.md) layers such as those employing [ReLU](../activation-functions/relu.md), [Leaky ReLU](../activation-functions/leaky-relu.md), or [ELU](../activation-functions/elu.md). It draws values from a uniform distribution with limits defined as +/- (6 / (fanIn + fanOut)) ** (1. / sqrt(2)).
5 |
6 | ## Parameters
7 | This initializer does not have any parameters.
8 |
9 | ## Example
10 | ```php
11 | use Rubix\ML\NeuralNet\Initializers\He;
12 |
13 | $initializer = new He();
14 | ```
15 |
16 | ## References
17 | [^1]: K. He et al. (2015). Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification.
--------------------------------------------------------------------------------
/docs/ranks-features.md:
--------------------------------------------------------------------------------
1 | # Ranks Features
2 | The Ranks Features interface is for learners that can determine the importances of the features used to train them. Low importance is given to feature columns that do not contribute significantly in the model whereas high importance indicates that the feature is more influential. Feature importances can help explain the predictions derived from a model and can also be used to identify informative features for feature selection.
3 |
4 | ### Feature Importances
5 | Return the importance scores of each feature column of the training set:
6 | ```php
7 | public featureImportances() : array
8 | ```
9 |
10 | ```php
11 | $estimator->train($dataset);
12 |
13 | $importances = $estimator->featureImportances();
14 |
15 | print_r($importances);
16 | ```
17 |
18 | ```php
19 | Array
20 | (
21 | [0] => 0.04757
22 | [1] => 0.37948
23 | [2] => 0.53170
24 | [3] => 0.04123
25 | )
26 | ```
27 |
--------------------------------------------------------------------------------
/docs/cross-validation/metrics/brier-score.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Brier Score
4 | Brier Score is a *strictly proper* scoring metric that is equivalent to applying mean squared error to the probabilities of a probabilistic estimator.
5 |
6 | !!! note
7 | In order to maintain the convention of *maximizing* validation scores, this metric outputs the negative
8 | of the original score.
9 |
10 | **Estimator Compatibility:** Probabilistic Classifier
11 |
12 | **Score Range:** -2 to 0
13 |
14 | ## Parameters
15 | This metric does not have any parameters.
16 |
17 | ## Example
18 | ```php
19 | use Rubix\ML\CrossValidation\Metrics\BrierScore;
20 |
21 | $metric = new BrierScore();
22 | ```
23 |
24 | ## References
25 | [^1]: G. W. Brier. (1950). Verification of Forecasts Expresses in Terms of Probability.
26 |
--------------------------------------------------------------------------------
/docs/transformers/max-absolute-scaler.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Max Absolute Scaler
4 | Scale the sample matrix by the maximum absolute value of each feature column independently such that the feature value is between -1 and 1.
5 |
6 | **Interfaces:** [Transformer](api.md#transformer), [Stateful](api.md#stateful), [Elastic](api.md#elastic), [Reversible](api.md#reversible), [Persistable](../persistable.md)
7 |
8 | **Data Type Compatibility:** Continuous
9 |
10 | ## Parameters
11 | This transformer does not have any parameters.
12 |
13 | ## Example
14 | ```php
15 | use Rubix\ML\Transformers\MaxAbsoluteScaler;
16 |
17 | $transformer = new MaxAbsoluteScaler();
18 | ```
19 |
20 | ## Additional Methods
21 | Return the maximum absolute values for each feature column:
22 | ```php
23 | public maxabs() : array
24 | ```
25 |
--------------------------------------------------------------------------------
/docs/neural-network/hidden-layers/dropout.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Dropout
4 | Dropout is a regularization technique to reduce overfitting in neural networks by preventing complex co-adaptations on training data. It works by temporarily disabling output nodes during each training pass. It also acts as an efficient way of performing model averaging with the parameters of neural networks.
5 |
6 | ## Parameters
7 | | # | Name | Default | Type | Description |
8 | |---|---|---|---|---|
9 | | 1 | ratio | 0.5 | float | The ratio of nodes that are dropped during each training pass. |
10 |
11 | ## Example
12 | ```php
13 | use Rubix\ML\NeuralNet\Layers\Dropout;
14 |
15 | $layer = new Dropout(0.2);
16 | ```
17 |
18 | ## References
19 | [^1]: N. Srivastava et al. (2014). Dropout: A Simple Way to Prevent Neural Networks from Overfitting.
--------------------------------------------------------------------------------
/docs/neural-network/optimizers/adagrad.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # AdaGrad
4 | Short for *Adaptive Gradient*, the AdaGrad Optimizer speeds up the learning of parameters that do not change often and slows down the learning of parameters that do enjoy heavy activity. Due to AdaGrad's infinitely decaying step size, training may be slow or fail to converge using a low learning rate.
5 |
6 | ## Parameters
7 | | # | Name | Default | Type | Description |
8 | |---|---|---|---|---|
9 | | 1 | rate | 0.01 | float | The learning rate that controls the global step size. |
10 |
11 | ## Example
12 | ```php
13 | use Rubix\ML\NeuralNet\Optimizers\AdaGrad;
14 |
15 | $optimizer = new AdaGrad(0.125);
16 | ```
17 |
18 | ## References
19 | [^1]: J. Duchi et al. (2011). Adaptive Subgradient Methods for Online Learning and Stochastic Optimization.
--------------------------------------------------------------------------------
/docs/kernels/distance/minkowski.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Minkowski
4 | The Minkowski distance can be considered as a generalization of both the [Euclidean](euclidean.md) and [Manhattan](manhattan.md) distances. When the lambda parameter is set to 1 or 2, the distance is equivalent to Manhattan and Euclidean respectively.
5 |
6 | $$
7 | {\displaystyle Minkowski\left(a,b\right)=\left(\sum _{i=1}^{n}|a_{i}-b_{i}|^{p}\right)^{\frac {1}{p}}}
8 | $$
9 |
10 | **Data Type Compatibility:** Continuous
11 |
12 | ## Parameters
13 | | # | Name | Default | Type | Description |
14 | |---|---|---|---|---|
15 | | 1 | lambda | 3.0 | float | Controls the curvature of the unit circle drawn from a point at a fixed distance. |
16 |
17 | ## Example
18 | ```php
19 | use Rubix\ML\Kernels\Distance\Minkowski;
20 |
21 | $kernel = new Minkowski(4.0);
22 | ```
--------------------------------------------------------------------------------
/docs/neural-network/optimizers/rms-prop.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # RMS Prop
4 | An adaptive gradient technique that divides the current gradient over a rolling window of the magnitudes of recent gradients. Unlike [AdaGrad](adagrad.md), RMS Prop does not suffer from an infinitely decaying step size.
5 |
6 | ## Parameters
7 | | # | Name | Default | Type | Description |
8 | |---|---|---|---|---|
9 | | 1 | rate | 0.001 | float | The learning rate that controls the global step size. |
10 | | 2 | decay | 0.1 | float | The decay rate of the rms property. |
11 |
12 | ## Example
13 | ```php
14 | use Rubix\ML\NeuralNet\Optimizers\RMSProp;
15 |
16 | $optimizer = new RMSProp(0.01, 0.1);
17 | ```
18 |
19 | ## References
20 | [^1]: T. Tieleman et al. (2012). Lecture 6e rmsprop: Divide the gradient by a running average of its recent magnitude.
--------------------------------------------------------------------------------
/docs/parallel.md:
--------------------------------------------------------------------------------
1 | # Parallel
2 | Multiprocessing is the use of two or more processes that execute in parallel. Objects that implement the Parallel interface can take advantage of multicore processors by executing parts or all of the algorithm in parallel. Choose a number of processes equal to the number of CPU cores in order to take advantage of a system's full processing capability.
3 |
4 | !!! note
5 | Most parallel learners are configured to use the [Serial](backends/serial.md) backend by default.
6 |
7 | ## Set a Backend
8 | Parallelizable objects can utilize a parallel processing Backend by passing it to the `setBackend()` method.
9 |
10 | To set the backend processing engine:
11 | ```php
12 | public setBackend(Backend $backend) : void
13 | ```
14 |
15 | ```php
16 | use Rubix\ML\Classifiers\RandomForest;
17 | use Rubix\ML\Backends\Amp;
18 |
19 | $estimator = new RandomForest();
20 |
21 | $estimator->setBackend(new Amp(16));
22 | ```
23 |
--------------------------------------------------------------------------------
/docs/tokenizers/k-skip-n-gram.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # K-Skip-N-Gram
4 | K-skip-n-grams are a technique similar to n-grams, whereby n-grams are formed but in addition to allowing adjacent sequences of words, the next *k* words will be skipped forming n-grams of the new forward looking sequences. The tokenizer outputs tokens ranging from *min* to *max* number of words per token.
5 |
6 | ## Parameters
7 | | # | Name | Default | Type | Description |
8 | |---|---|---|---|---|
9 | | 1 | min | 2 | int | The minimum number of words in a single token. |
10 | | 2 | max | 2 | int | The maximum number of words in a single token. |
11 | | 3 | skip | 2 | int | The number of words to skip over to form new sequences. |
12 |
13 | ## Example
14 | ```php
15 | use Rubix\ML\Tokenizers\KSkipNGram;
16 |
17 | $tokenizer = new KSkipNGram(2, 3, 2);
18 | ```
19 |
--------------------------------------------------------------------------------
/docs/transformers/text-normalizer.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Text Normalizer
4 | Converts all the characters in a blob of text to the same case.
5 |
6 | **Interfaces:** [Transformer](api.md#transformer)
7 |
8 | **Data Type Compatibility:** Categorical
9 |
10 | !!! note
11 | This transformer does not handle multibyte strings. For multibyte support, see [MultibyteTextNormalizer](multibyte-text-normalizer.md).
12 |
13 | ## Parameters
14 | | # | Name | Default | Type | Description |
15 | |---|---|---|---|---|
16 | | 1 | uppercase | false | bool | Should the text be converted to uppercase? |
17 |
18 | ## Example
19 | ```php
20 | use Rubix\ML\Transformers\TextNormalizer;
21 |
22 | $transformer = new TextNormalizer(false);
23 | ```
24 |
25 | ## Additional Methods
26 | This transformer does not have any additional methods.
27 |
28 |
--------------------------------------------------------------------------------
/src/CrossValidation/Metrics/Metric.php:
--------------------------------------------------------------------------------
1 |
23 | */
24 | public function compatibility() : array;
25 |
26 | /**
27 | * Score a set of predictions and their ground-truth labels.
28 | *
29 | * @param list $predictions
30 | * @param list $labels
31 | * @return float
32 | */
33 | public function score(array $predictions, array $labels) : float;
34 | }
35 |
--------------------------------------------------------------------------------
/docs/neural-network/activation-functions/relu.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # ReLU
4 | Rectified Linear Units (ReLU) only output the positive signal of the input. They have the benefit of having a monotonic derivative and are cheap to compute.
5 |
6 | $$
7 | {\displaystyle ReLU = {\begin{aligned}&{\begin{cases}0&{\text{if }}x\leq 0\\x&{\text{if }}x>0\end{cases}}=&\max\{0,x\}\end{aligned}}}
8 | $$
9 |
10 | ## Parameters
11 | This activation function does not have any parameters.
12 |
13 | ## Example
14 | ```php
15 | use Rubix\ML\NeuralNet\ActivationFunctions\ReLU;
16 |
17 | $activationFunction = new ReLU(0.1);
18 | ```
19 |
20 | ## References
21 | [^1]: A. L. Maas et al. (2013). Rectifier Nonlinearities Improve Neural Network Acoustic Models.
22 | [^2]: K. Konda et al. (2015). Zero-bias Autoencoders and the Benefits of Co-adapting Features.
--------------------------------------------------------------------------------
/docs/extractors/ndjson.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # NDJSON
4 | [NDJSON](http://ndjson.org/) or *Newline Delimited* JSON files contain rows of data encoded in Javascript Object Notation (JSON) arrays or objects. The format is like a mix of JSON and CSV and has the advantage of retaining data type information and being read into memory incrementally.
5 |
6 | !!! note
7 | Empty lines are ignored by the parser.
8 |
9 | **Interfaces:** [Extractor](api.md), [Writable](api.md)
10 |
11 | ## Parameters
12 | | # | Name | Default | Type | Description |
13 | |---|---|---|---|---|
14 | | 1 | path | | string | The path to the NDJSON file. |
15 |
16 | ## Example
17 | ```php
18 | use Rubix\ML\Extractors\NDJSON;
19 |
20 | $extractor = new NDJSON('example.ndjson');
21 | ```
22 |
23 | ## Additional Methods
24 | This extractor does not have any additional methods.
25 |
--------------------------------------------------------------------------------
/docs/neural-network/optimizers/step-decay.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Step Decay
4 | A learning rate decay optimizer that reduces the global learning rate by a factor whenever it reaches a new *floor*. The number of steps needed to reach a new floor is defined by the *steps* hyper-parameter.
5 |
6 | ## Parameters
7 | | # | Name | Default | Type | Description |
8 | |---|---|---|---|---|
9 | | 1 | rate | 0.01 | float | The learning rate that controls the global step size. |
10 | | 2 | steps | 100 | int | The size of every floor in steps. i.e. the number of steps to take before applying another factor of decay. |
11 | | 3 | decay | 1e-3 | float | The factor to decrease the learning rate at each *floor*. |
12 |
13 | ## Example
14 | ```php
15 | use Rubix\ML\NeuralNet\Optimizers\StepDecay;
16 |
17 | $optimizer = new StepDecay(0.1, 50, 1e-3);
18 | ```
--------------------------------------------------------------------------------
/docs/cross-validation/metrics/completeness.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Completeness
4 | A ground-truth clustering metric that measures the ratio of samples in a class that are also members of the same cluster. A cluster is said to be *complete* when all the samples in a class are contained in a cluster.
5 |
6 | $$
7 | {\displaystyle Completeness = 1-\frac{H(K, C)}{H(K)}}
8 | $$
9 |
10 | !!! note
11 | Since this metric monotonically improves as the number of target clusters decreases, it should not be used as a metric to guide hyper-parameter tuning.
12 |
13 | **Estimator Compatibility:** Clusterer
14 |
15 | **Score Range:** 0 to 1
16 |
17 | ## Parameters
18 | This metric does not have any parameters.
19 |
20 | ## Example
21 | ```php
22 | use Rubix\ML\CrossValidation\Metrics\Completeness;
23 |
24 | $metric = new Completeness();
25 | ```
--------------------------------------------------------------------------------
/src/NeuralNet/CostFunctions/CostFunction.php:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Homogeneity
4 | A ground-truth clustering metric that measures the ratio of samples in a cluster that are also members of the same class. A cluster is said to be *homogeneous* when the entire cluster is comprised of a single class of samples.
5 |
6 | $$
7 | {\displaystyle Homogeneity = 1-\frac{H(C, K)}{H(C)}}
8 | $$
9 |
10 | !!! note
11 | Since this metric monotonically improves as the number of target clusters increases, it should not be used as a metric to guide hyper-parameter tuning.
12 |
13 | **Estimator Compatibility:** Clusterer
14 |
15 | **Score Range:** 0 to 1
16 |
17 | ## Parameters
18 | This metric does not have any parameters.
19 |
20 | ## Example
21 | ```php
22 | use Rubix\ML\CrossValidation\Metrics\Homogeneity;
23 |
24 | $metric = new Homogeneity();
25 | ```
--------------------------------------------------------------------------------
/docs/neural-network/activation-functions/selu.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # SELU
4 | Scaled Exponential Linear Units (SELU) are a self-normalizing activation function based on the [ELU](#elu) activation function. Neuronal activations of SELU networks automatically converge toward zero mean and unit variance, unlike explicitly normalized networks such as those with [Batch Norm](#batch-norm) hidden layers.
5 |
6 | $$
7 | {\displaystyle SELU = 1.0507 {\begin{cases}1.67326 (e^{x}-1)&{\text{if }}x<0\\x&{\text{if }}x\geq 0\end{cases}}}
8 | $$
9 |
10 | ## Parameters
11 | This actvation function does not have any parameters.
12 |
13 | ## Example
14 | ```php
15 | use Rubix\ML\NeuralNet\ActivationFunctions\SELU;
16 |
17 | $activationFunction = new SELU();
18 | ```
19 |
20 | ## References
21 | [^1]: G. Klambauer et al. (2017). Self-Normalizing Neural Networks.
22 |
--------------------------------------------------------------------------------
/src/Serializers/Serializer.php:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Image Resizer
4 | Image Resizer fits (scales and crops) images to a user-specified width and height that preserves aspect ratio.
5 |
6 | !!! note
7 | The [GD extension](https://php.net/manual/en/book.image.php) is required to use this transformer.
8 |
9 | **Interfaces:** [Transformer](api.md#transformer)
10 |
11 | **Data Type Compatibility:** Image
12 |
13 | ## Parameters
14 | | # | Name | Default | Type | Description |
15 | |---|---|---|---|---|
16 | | 1 | width | 32 | int | The width of the resized image. |
17 | | 2 | heights | 32 | int | The height of the resized image. |
18 |
19 | ## Example
20 | ```php
21 | use Rubix\ML\Transformers\ImageResizer;
22 |
23 | $transformer = new ImageResizer(28, 28);
24 | ```
25 |
26 | ## Additional Methods
27 | This transformer does not have any additional methods.
28 |
--------------------------------------------------------------------------------
/tests/DeferredTest.php:
--------------------------------------------------------------------------------
1 | deferred = new Deferred(function ($a, $b) {
25 | return $a + $b;
26 | }, [1, 2]);
27 | }
28 |
29 | /**
30 | * @test
31 | */
32 | public function build() : void
33 | {
34 | $this->assertInstanceOf(Deferred::class, $this->deferred);
35 | $this->assertIsCallable($this->deferred);
36 | }
37 |
38 | /**
39 | * @test
40 | */
41 | public function compute() : void
42 | {
43 | $this->assertEquals(3, $this->deferred->compute());
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/docs/clusterers/seeders/plus-plus.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Plus Plus
4 | This seeder attempts to maximize the chances of seeding distant clusters while still remaining random. It does so by sequentially selecting random samples weighted by their distance from the previous seed.
5 |
6 | ## Parameters
7 | | # | Name | Default | Type | Description |
8 | |---|---|---|---|---|
9 | | 1 | kernel | Euclidean | Distance | The distance kernel used to compute the distance between samples. |
10 |
11 | ## Example
12 | ```php
13 | use Rubix\ML\Clusterers\Seeders\PlusPlus;
14 | use Rubix\ML\Kernels\Distance\Minkowski;
15 |
16 | $seeder = new PlusPlus(new Minkowski(5.0));
17 | ```
18 |
19 | ## References
20 | [^1]: D. Arthur et al. (2006). k-means++: The Advantages of Careful Seeding.
21 | [^2]: A. Stetco et al. (2015). Fuzzy C-means++: Fuzzy C-means with effective seeding initialization.
--------------------------------------------------------------------------------
/docs/cross-validation/reports/contingency-table.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Contingency Table
4 | A Contingency Table is used to display the frequency distribution of class labels among a clustering. It is similar to a [Confusion Matrix](confusion-matrix.md) but uses the labels to establish ground-truth for a clustering problem instead.
5 |
6 | **Estimator Compatibility:** Clusterer
7 |
8 | ## Parameters
9 | This report does not have any parameters.
10 |
11 | ## Example
12 | ```php
13 | use Rubix\ML\CrossValidation\Reports\ContingencyTable;
14 |
15 | $report = new ContingencyTable();
16 |
17 | $result = $report->generate($predictions, $labels);
18 |
19 | echo $result;
20 | ```
21 |
22 | ```json
23 | [
24 | {
25 | "lamb": 11,
26 | "wolf": 2
27 | },
28 | {
29 | "lamb": 1,
30 | "wolf": 5
31 | }
32 | ]
33 | ```
34 |
--------------------------------------------------------------------------------
/docs/neural-network/optimizers/adamax.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # AdaMax
4 | A version of the [Adam](adam.md) optimizer that replaces the RMS property with the infinity norm of the past gradients. As such, AdaMax is generally more suitable for sparse parameter updates and noisy gradients.
5 |
6 | ## Parameters
7 | | # | Name | Default | Type | Description |
8 | |---|---|---|---|---|
9 | | 1 | rate | 0.001 | float | The learning rate that controls the global step size. |
10 | | 2 | momentumDecay | 0.1 | float | The decay rate of the accumulated velocity. |
11 | | 3 | normDecay | 0.001 | float | The decay rate of the infinity norm. |
12 |
13 | ## Example
14 | ```php
15 | use Rubix\ML\NeuralNet\Optimizers\AdaMax;
16 |
17 | $optimizer = new AdaMax(0.0001, 0.1, 0.001);
18 | ```
19 |
20 | ## References
21 | [^1]: D. P. Kingma et al. (2014). Adam: A Method for Stochastic Optimization.
--------------------------------------------------------------------------------
/src/constants.php:
--------------------------------------------------------------------------------
1 | test($estimator, $dataset, new Accuracy());
20 |
21 | echo $score;
22 | ```
23 |
24 | ```
25 | 0.75
26 | ```
--------------------------------------------------------------------------------
/docs/datasets/unlabeled.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Unlabeled
4 | Unlabeled datasets are used to train unsupervised learners and for feeding unknown samples into an estimator to make predictions. As their name implies, they do not require a corresponding label for each sample.
5 |
6 | ## Parameters
7 | | # | Name | Default | Type | Description |
8 | |---|---|---|---|---|
9 | | 1 | samples | | array | A 2-dimensional array consisting of rows of samples and columns with feature values. |
10 | | 2 | verify | true | bool | Should we verify the data? |
11 |
12 | ## Example
13 |
14 | ```php
15 | use Rubix\ML\Datasets\Unlabeled;
16 |
17 | $samples = [
18 | [0.1, 20, 'furry'],
19 | [2.0, -5, 'rough'],
20 | [0.001, -10, 'rough'],
21 | ];
22 |
23 | $dataset = new Unlabeled($samples);
24 | ```
25 |
26 | ## Additional Methods
27 | This dataset does not have any additional methods.
28 |
--------------------------------------------------------------------------------
/docs/neural-network/activation-functions/thresholded-relu.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Thresholded ReLU
4 | A version of the [ReLU](relu.md) function that activates only if the input is above some user-specified threshold level.
5 |
6 | $$
7 | {\displaystyle ThresholdedReLU = {\begin{aligned}&{\begin{cases}0&{\text{if }}x\leq \theta \\x&{\text{if }}x>\theta\end{cases}}\end{aligned}}}
8 | $$
9 |
10 | ## Parameters
11 | | # | Name | Default | Type | Description |
12 | |---|---|---|---|---|
13 | | 1 | threshold | 1.0 | float | The threshold at which the neuron is activated. |
14 |
15 | ## Example
16 | ```php
17 | use Rubix\ML\NeuralNet\ActivationFunctions\ThresholdedReLU;
18 |
19 | $activationFunction = new ThresholdedReLU(0.5);
20 | ```
21 |
22 | ## References
23 | [^1]: K. Konda et al. (2015). Zero-bias autoencoders and the benefits of co-adapting features.
24 |
--------------------------------------------------------------------------------
/src/Backends/Backend.php:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Swish
4 | Swish is a parametric activation layer that utilizes smooth rectified activation functions. The trainable *beta* parameter allows each activation function in the layer to tailor its output to the training set by interpolating between the linear function and ReLU.
5 |
6 | ## Parameters
7 | | # | Name | Default | Type | Description |
8 | |---|---|---|---|---|
9 | | 1 | initializer | Constant | Initializer | The initializer of the beta parameter. |
10 |
11 | ## Example
12 | ```php
13 | use Rubix\ML\NeuralNet\Layers\Swish;
14 | use Rubix\ML\NeuralNet\Initializers\Constant;
15 |
16 | $layer = new Swish(new Constant(1.0));
17 | ```
18 |
19 | ## References
20 | [^1]: P. Ramachandran er al. (2017). Swish: A Self-gated Activation Function.
21 | [^2]: P. Ramachandran et al. (2017). Searching for Activation Functions.
22 |
--------------------------------------------------------------------------------
/docs/transformers/numeric-string-converter.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Numeric String Converter
4 | Convert all numeric strings to their equivalent integer and floating point types. Useful for when extracting from a source that only recognizes data as string types such as CSV.
5 |
6 | !!! note
7 | The string representations of the PHP constants `NAN` and `INF` are the string literals 'NAN' and 'INF' respectively.
8 |
9 | **Interfaces:** [Transformer](api.md#transformer), [Reversible](api.md#reversible)
10 |
11 | **Data Type Compatibility:** Categorical
12 |
13 | ## Parameters
14 | This transformer does not have any parameters.
15 |
16 | ## Example
17 | ```php
18 | use Rubix\ML\Transformers\NumericStringConverter;
19 |
20 | $transformer = new NumericStringConverter();
21 | ```
22 |
23 | ## Additional Methods
24 | This transformer does not have any additional methods.
25 |
--------------------------------------------------------------------------------
/docs/cross-validation/metrics/informedness.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Informedness
4 | Informedness a multiclass generalization of Youden's J Statistic and can be interpreted as the probability that an estimator will make an informed prediction. Its value ranges from -1 through 1 and has a value of 0 when the test yields no useful information.
5 |
6 | $$
7 | {\displaystyle Informedness = {\frac {\text{TP}}{{\text{TP}}+{\text{FN}}}}+{\frac {\text{TP}}{{\text{TN}}+{\text{FP}}}}-1}
8 | $$
9 |
10 | **Estimator Compatibility:** Classifier, Anomaly Detector
11 |
12 | **Score Range:** -1 to 1
13 |
14 | ## Parameters
15 | This metric does not have any parameters.
16 |
17 | ## Example
18 | ```php
19 | use Rubix\ML\CrossValidation\Metrics\Informedness;
20 |
21 | $metric = new Informedness();
22 | ```
23 |
24 | ## References
25 | [^1]: W. J. Youden. (1950). Index for Rating Diagnostic Tests.
--------------------------------------------------------------------------------
/docs/cross-validation/metrics/mean-squared-error.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Mean Squared Error
4 | A scale-dependent regression metric that gives greater weight to error scores the worse they are. Formally, Mean Squared Error (MSE) is the average of the squared differences between a set of predictions and their target labels.
5 |
6 | $$
7 | {\displaystyle \operatorname {MSE} = {\frac {1}{n}}\sum _{i=1}^{n}(Y_{i}-{\hat {Y_{i}}})^{2}}
8 | $$
9 |
10 | !!! note
11 | In order to maintain the convention of *maximizing* validation scores, this metric outputs the negative of the original score.
12 |
13 | **Estimator Compatibility:** Regressor
14 |
15 | **Score Range:** -∞ to 0
16 |
17 | ## Parameters
18 | This metric does not have any parameters.
19 |
20 | ## Example
21 | ```php
22 | use Rubix\ML\CrossValidation\Metrics\MeanSquaredError;
23 |
24 | $metric = new MeanSquaredError();
25 | ```
--------------------------------------------------------------------------------
/src/Clusterers/Seeders/Random.php:
--------------------------------------------------------------------------------
1 | >
26 | */
27 | public function seed(Dataset $dataset, int $k) : array
28 | {
29 | return $dataset->randomSubset($k)->samples();
30 | }
31 |
32 | /**
33 | * Return the string representation of the object.
34 | *
35 | * @internal
36 | *
37 | * @return string
38 | */
39 | public function __toString() : string
40 | {
41 | return 'Random';
42 | }
43 | }
44 |
--------------------------------------------------------------------------------
/docs/cross-validation/metrics/mean-absolute-error.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Mean Absolute Error
4 | A scale-dependent metric that measures the average absolute error between a set of predictions and their ground-truth labels. One of the nice properties of MAE is that it has the same units of measurement as the labels being estimated.
5 |
6 | $$
7 | {\displaystyle \mathrm {MAE} = {\frac {1}{n}}{\sum _{i=1}^{n}\left |Y_{i}-\hat {Y_{i}}\right|}}
8 | $$
9 |
10 | !!! note
11 | In order to maintain the convention of *maximizing* validation scores, this metric outputs the negative of the original score.
12 |
13 | **Estimator Compatibility:** Regressor
14 |
15 | **Score Range:** -∞ to 0
16 |
17 | ## Parameters
18 | This metric does not have any parameters.
19 |
20 | ## Example
21 | ```php
22 | use Rubix\ML\CrossValidation\Metrics\MeanAbsoluteError;
23 |
24 | $metric = new MeanAbsoluteError();
25 | ```
--------------------------------------------------------------------------------
/docs/learner.md:
--------------------------------------------------------------------------------
1 | # Learner
2 | Most estimators have the ability to be trained with data. These estimators are called *Learners* and require training before they are can make predictions. Training is the process of feeding data to the learner so that it can form a generalized representation or *model* of the dataset.
3 |
4 | ### Train a Learner
5 | To train a learner pass a training dataset as argument to the `train()` method:
6 | ```php
7 | public train(Dataset $training) : void
8 | ```
9 |
10 | ```php
11 | $estimator->train($dataset);
12 | ```
13 |
14 | !!! note
15 | Calling the `train()` method on an already trained learner will erase its previous training. If you would like to train a model incrementally, you can do so with learners implementing the [Online](online.md) interface.
16 |
17 | ### Is the Learner Trained?
18 | Return whether or not the learner has been trained:
19 | ```php
20 | public trained() : bool
21 | ```
22 |
23 | ```php
24 | var_dump($estimator->trained());
25 | ```
26 |
27 | ```
28 | bool(true)
29 | ```
30 |
--------------------------------------------------------------------------------
/docs/transformers/interval-discretizer.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Interval Discretizer
4 | Assigns continuous features to ordered categories using variable width per-feature histograms with a fixed user-specified number of bins.
5 |
6 | **Interfaces:** [Transformer](api.md#transformer), [Stateful](api.md#stateful), [Persistable](../persistable.md)
7 |
8 | **Data Type Compatibility:** Continuous
9 |
10 | ## Parameters
11 | | # | Name | Default | Type | Description |
12 | |---|---|---|---|---|
13 | | 1 | bins | 5 | int | The number of bins per histogram. |
14 | | 2 | equiWidth | false | bool | Should the bins be equal width? |
15 |
16 | ## Example
17 | ```php
18 | use Rubix\ML\Transformers\IntervalDiscretizer;
19 |
20 | $transformer = new IntervalDiscretizer(8, false);
21 | ```
22 |
23 | ## Additional Methods
24 | Return the bin intervals of the fitted data:
25 | ```php
26 | public intervals() : array
27 | ```
28 |
--------------------------------------------------------------------------------
/docs/cross-validation/metrics/median-absolute-error.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Median Absolute Error
4 | Median Absolute Error (MAD) is a robust measure of error, similar to [MAE](mean-absolute-error.md), that ignores highly erroneous predictions. Since MAD is a robust statistic, it works well even when used to measure non-normal distributions.
5 |
6 | $$
7 | {\displaystyle \operatorname {MAD} = \operatorname {median} (|Y_{i}-{\tilde {Y}}|)}
8 | $$
9 |
10 | !!! note
11 | In order to maintain the convention of *maximizing* validation scores, this metric outputs the negative of the original score.
12 |
13 | **Estimator Compatibility:** Regressor
14 |
15 | **Score Range:** -∞ to 0
16 |
17 | ## Parameters
18 | This metric does not have any parameters.
19 |
20 | ## Example
21 | ```php
22 | use Rubix\ML\CrossValidation\Metrics\MedianAbsoluteError;
23 |
24 | $metric = new MedianAbsoluteError();
25 | ```
26 |
--------------------------------------------------------------------------------
/docs/cross-validation/metrics/rmse.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # RMSE
4 | The Root Mean Squared Error (RMSE) is equivalent to the standard deviation of the error residuals in a regression problem. Since RMSE is just the square root of the [MSE](mean-squared-error.md), RMSE is also sensitive to outliers because larger errors have a disproportionately large effect on the score.
5 |
6 | $$
7 | {\displaystyle \operatorname {RMSE} = {\sqrt{ \frac {1}{n} \sum _{i=1}^{n}(Y_{i}-{\hat {Y_{i}}})^{2}}}}
8 | $$
9 |
10 | !!! note
11 | In order to maintain the convention of *maximizing* validation scores, this metric outputs the negative of the original score.
12 |
13 | **Estimator Compatibility:** Regressor
14 |
15 | **Score Range:** -∞ to 0
16 |
17 | ## Parameters
18 | This metric does not have any parameters.
19 |
20 | ## Example
21 | ```php
22 | use Rubix\ML\CrossValidation\Metrics\RMSE;
23 |
24 | $metric = new RMSE();
25 | ```
--------------------------------------------------------------------------------
/src/Traits/LoggerAware.php:
--------------------------------------------------------------------------------
1 | logger = $logger;
33 | }
34 |
35 | /**
36 | * Return the PSR-3 logger instance.
37 | *
38 | * @return LoggerInterface|null
39 | */
40 | public function logger() : ?LoggerInterface
41 | {
42 | return $this->logger;
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/tests/Kernels/SVM/LinearTest.php:
--------------------------------------------------------------------------------
1 | kernel = new Linear();
27 | }
28 |
29 | /**
30 | * @test
31 | */
32 | public function build() : void
33 | {
34 | $this->assertInstanceOf(Linear::class, $this->kernel);
35 | $this->assertInstanceOf(Kernel::class, $this->kernel);
36 | }
37 |
38 | /**
39 | * @test
40 | */
41 | public function options() : void
42 | {
43 | $expected = [102 => 0];
44 |
45 | $this->assertEquals($expected, $this->kernel->options());
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/docs/transformers/one-hot-encoder.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # One Hot Encoder
4 | The One Hot Encoder takes a categorical feature column and produces an n-dimensional continuous representation where *n* is equal to the number of unique categories present in that column. A `0` in any location indicates that the category represented by that column is not present in the sample, whereas a `1` indicates that a category is present.
5 |
6 | **Interfaces:** [Transformer](api.md#transformer), [Stateful](api.md#stateful), [Persistable](../persistable.md)
7 |
8 | **Data Type Compatibility:** Categorical
9 |
10 | ## Parameters
11 | This transformer does not have any parameters.
12 |
13 | ## Example
14 | ```php
15 | use Rubix\ML\Transformers\OneHotEncoder;
16 |
17 | $transformer = new OneHotEncoder();
18 | ```
19 |
20 | ## Additional Methods
21 | Return the categories computed during fitting indexed by feature column:
22 | ```php
23 | public categories() : ?array
24 | ```
25 |
--------------------------------------------------------------------------------
/src/Strategies/Strategy.php:
--------------------------------------------------------------------------------
1 | $values
39 | */
40 | public function fit(array $values) : void;
41 |
42 | /**
43 | * Make a guess.
44 | *
45 | * @internal
46 | *
47 | * @return string|int|float
48 | */
49 | public function guess();
50 | }
51 |
--------------------------------------------------------------------------------
/docs/backends/amp.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Amp
4 | [Amp Parallel](https://amphp.org/parallel/) is a multiprocessing subsystem that requires no extensions. It uses a non-blocking concurrency framework that implements coroutines using PHP generator functions under the hood.
5 |
6 | !!! note
7 | The optimal number of workers will depend on the system specifications of the computer. Fewer workers than CPU cores may not achieve full processing potential but more workers than cores can cause excess overhead.
8 |
9 | ## Parameters
10 | | # | Name | Default | Type | Description |
11 | |---|---|---|---|---|
12 | | 1 | workers | Auto | int | The maximum number of workers in the worker pool. If null then tries to autodetect CPU core count. |
13 |
14 | ## Example
15 | ```php
16 | use Rubix\ML\Backends\Amp;
17 |
18 | $backend = new Amp(16);
19 | ```
20 |
21 | ## Additional Methods
22 | Return the maximum number of workers in the worker pool:
23 | ```php
24 | public workers() : int
25 | ```
--------------------------------------------------------------------------------
/docs/cross-validation/metrics/v-measure.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # V Measure
4 | V Measure is an entropy-based clustering metric that balances [Homogeneity](homogeneity.md) and [Completeness](completeness.md). It has the additional property of being symmetric in that the predictions and ground-truth can be swapped without changing the score.
5 |
6 | $$
7 | {\displaystyle V_{\beta} = \frac{(1+\beta)hc}{\beta h + c}}
8 | $$
9 |
10 | **Estimator Compatibility:** Clusterer
11 |
12 | **Score Range:** 0 to 1
13 |
14 | ## Parameters
15 | | # | Name | Default | Type | Description |
16 | |---|---|---|---|---|
17 | | 1 | beta | 1.0 | float | The ratio of weight given to homogeneity over completeness. |
18 |
19 | ## Example
20 | ```php
21 | use Rubix\ML\CrossValidation\Metrics\VMeasure;
22 |
23 | $metric = new VMeasure(1.0);
24 | ```
25 |
26 | ## References
27 | [^1]: A. Rosenberg et al. (2007). V-Measure: A conditional entropy-based external cluster evaluation measure.
--------------------------------------------------------------------------------
/docs/datasets/generators/circle.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Circle
4 | Creates a dataset of points forming a circle in 2 dimensions. The label of each sample is the random value used to generate the projection measured in degrees.
5 |
6 | **Data Types:** Continuous
7 |
8 | **Label Type:** Continuous
9 |
10 | ## Parameters
11 | | # | Name | Default | Type | Description |
12 | |---|---|---|---|---|
13 | | 1 | x | 0.0 | float | The *x* coordinate of the center of the circle. |
14 | | 2 | y | 0.0 | float | The *y* coordinate of the center of the circle. |
15 | | 3 | scale | 1.0 | float | The scaling factor of the circle. |
16 | | 4 | noise | 0.1 | float | The amount of Gaussian noise to add to each data point as a ratio of the scaling factor. |
17 |
18 | ## Example
19 | ```php
20 | use Rubix\ML\Datasets\Generators\Circle;
21 |
22 | $generator = new Circle(0.0, 0.0, 100, 0.1);
23 | ```
24 |
25 | ## Additional Methods
26 | This generator does not have any additional methods.
27 |
--------------------------------------------------------------------------------
/src/Backends/Tasks/TrainLearner.php:
--------------------------------------------------------------------------------
1 | train($dataset);
31 |
32 | return $estimator;
33 | }
34 |
35 | /**
36 | * @param Learner $estimator
37 | * @param Dataset $dataset
38 | */
39 | public function __construct(Learner $estimator, Dataset $dataset)
40 | {
41 | parent::__construct([self::class, 'train'], [$estimator, $dataset]);
42 | }
43 | }
44 |
--------------------------------------------------------------------------------
/docs/extractors/column-picker.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Column Picker
4 | An extractor that wraps another iterator and selects and reorders the columns of the data table according to the keys specified by the user. The key of a column may either be a string or a column number (integer) depending on the way the columns are indexed in the base iterator.
5 |
6 | **Interfaces:** [Extractor](api.md)
7 |
8 | ## Parameters
9 | | # | Name | Default | Type | Description |
10 | |---|---|---|---|---|
11 | | 1 | iterator | | Traversable | The base iterator. |
12 | | 2 | keys | | array | The string and/or integer keys of the columns to pick and reorder from the table |
13 |
14 | ## Example
15 | ```php
16 | use Rubix\ML\Extractors\ColumnPicker;
17 | use Rubix\ML\Extractors\CSV;
18 |
19 | $extractor = new ColumnPicker(new CSV('example.csv', true), [
20 | 'attitude', 'texture', 'class', 'rating',
21 | ]);
22 | ```
23 |
24 | ## Additional Methods
25 | This extractor does not have any additional methods.
26 |
--------------------------------------------------------------------------------
/docs/neural-network/activation-functions/elu.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # ELU
4 | *Exponential Linear Units* are a type of rectifier that soften the transition from non-activated to activated using the exponential function. As such, ELU produces smoother gradients than the piecewise linear [ReLU](relu.md) function.
5 |
6 | $$
7 | {\displaystyle ELU = {\begin{cases}\alpha \left(e^{x}-1\right)&{\text{if }}x\leq 0\\x&{\text{if }}x>0\end{cases}}}
8 | $$
9 |
10 | ## Parameters
11 | | # | Name | Default | Type | Description |
12 | |---|---|---|---|---|
13 | | 1 | alpha | 1.0 | float | The value at which leakage will begin to saturate. Ex. alpha = 1.0 means that the output will never be less than -1.0 when inactivated. |
14 |
15 | ## Example
16 | ```php
17 | use Rubix\ML\NeuralNet\ActivationFunctions\ELU;
18 |
19 | $activationFunction = new ELU(2.5);
20 | ```
21 |
22 | ## References
23 | [^1]: D. A. Clevert et al. (2016). Fast and Accurate Deep Network Learning by Exponential Linear Units.
24 |
--------------------------------------------------------------------------------
/src/Backends/Tasks/Predict.php:
--------------------------------------------------------------------------------
1 |
27 | */
28 | public static function predict(Estimator $estimator, Dataset $dataset) : array
29 | {
30 | return $estimator->predict($dataset);
31 | }
32 |
33 | /**
34 | * @param Estimator $estimator
35 | * @param Dataset $dataset
36 | */
37 | public function __construct(Estimator $estimator, Dataset $dataset)
38 | {
39 | parent::__construct([self::class, 'predict'], [$estimator, $dataset]);
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/docs/cross-validation/metrics/f-beta.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # F-Beta
4 | A weighted harmonic mean of precision and recall, F-Beta is a both a versatile and balanced metric. The beta parameter controls the weight of precision in the combined score. As beta goes to infinity the score only considers recall, whereas when it goes to 0 it only considers precision. When beta is equal to 1, this metric is called an F1 score.
5 |
6 | $$
7 | {\displaystyle F_\beta = (1 + \beta^2) \cdot \frac{\mathrm{precision} \cdot \mathrm{recall}}{(\beta^2 \cdot \mathrm{precision}) + \mathrm{recall}}}
8 | $$
9 |
10 | **Estimator Compatibility:** Classifier, Anomaly Detector
11 |
12 | **Score Range:** 0 to 1
13 |
14 | ## Parameters
15 | | # | Name | Default | Type | Description |
16 | |---|---|---|---|---|
17 | | 1 | beta | 1.0 | float | The ratio of weight given to precision over recall. |
18 |
19 | ## Example
20 | ```php
21 | use Rubix\ML\CrossValidation\Metrics\FBeta;
22 |
23 | $metric = new FBeta(0.7);
24 | ```
--------------------------------------------------------------------------------
/docs/transformers/polynomial-expander.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Polynomial Expander
4 | This transformer will generate polynomials up to and including the specified *degree* of each continuous feature. Polynomial expansion is sometimes used to fit data that is non-linear using a linear estimator such as [Ridge](../regressors/ridge.md), [Logistic Regression](../classifiers/logistic-regression.md), or [Softmax Classifier](../classifiers/softmax-classifier.md).
5 |
6 | **Interfaces:** [Transformer](api.md#transformer)
7 |
8 | **Data Type Compatibility:** Continuous only
9 |
10 | ## Parameters
11 | | # | Name | Default | Type | Description |
12 | |---|---|---|---|---|
13 | | 1 | degree | 2 | int | The degree of the polynomials to generate for each feature. |
14 |
15 | ## Example
16 | ```php
17 | use Rubix\ML\Transformers\PolynomialExpander;
18 |
19 | $transformer = new PolynomialExpander(3);
20 | ```
21 |
22 | ## Additional Methods
23 | This transformer does not have any additional methods.
24 |
--------------------------------------------------------------------------------
/tests/Kernels/SVM/RBFTest.php:
--------------------------------------------------------------------------------
1 | kernel = new RBF(1e-3);
27 | }
28 |
29 | /**
30 | * @test
31 | */
32 | public function build() : void
33 | {
34 | $this->assertInstanceOf(RBF::class, $this->kernel);
35 | $this->assertInstanceOf(Kernel::class, $this->kernel);
36 | }
37 |
38 | /**
39 | * @test
40 | */
41 | public function options() : void
42 | {
43 | $options = [
44 | 102 => 2,
45 | 201 => 1e-3,
46 | ];
47 |
48 | $this->assertEquals($options, $this->kernel->options());
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/docs/cross-validation/reports/confusion-matrix.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Confusion Matrix
4 | A Confusion Matrix is a square matrix (table) that visualizes the true positives, false positives, true negatives, and false negatives of a set of predictions and their corresponding labels.
5 |
6 | **Estimator Compatibility:** Classifier, Anomaly Detector
7 |
8 | ## Parameters
9 | This report does not have any parameters.
10 |
11 | ## Example
12 | ```php
13 | use Rubix\ML\CrossValidation\Reports\ConfusionMatrix;
14 |
15 | $report = new ConfusionMatrix();
16 |
17 | $result = $report->generate($predictions, $labels);
18 |
19 | echo $result;
20 | ```
21 |
22 | ```json
23 | {
24 | "dog": {
25 | "dog": 12,
26 | "cat": 3,
27 | "turtle": 0
28 | },
29 | "cat": {
30 | "dog": 2,
31 | "cat": 9,
32 | "turtle": 1
33 | },
34 | "turtle": {
35 | "dog": 1,
36 | "cat": 0,
37 | "turtle": 11
38 | }
39 | }
40 | ```
41 |
--------------------------------------------------------------------------------
/docs/neural-network/hidden-layers/prelu.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # PReLU
4 | Parametric Rectified Linear Units are leaky rectifiers whose *leakage* coefficient is learned during training. Unlike standard [Leaky ReLUs](../activation-functions/leaky-relu.md) whose leakage remains constant, PReLU layers can adjust the leakage to better suite the model on a per node basis.
5 |
6 | $$
7 | {\displaystyle PReLU = {\begin{cases}\alpha x&{\text{if }}x<0\\x&{\text{if }}x\geq 0\end{cases}}}
8 | $$
9 |
10 | ## Parameters
11 | | # | Name | Default | Type | Description |
12 | |---|---|---|---|---|
13 | | 1 | initializer | Constant | Initializer | The initializer of the leakage parameter. |
14 |
15 | ## Example
16 | ```php
17 | use Rubix\ML\NeuralNet\Layers\PReLU;
18 | use Rubix\ML\NeuralNet\Initializers\Normal;
19 |
20 | $layer = new PReLU(new Normal(0.5));
21 | ```
22 |
23 | ## References
24 | [^1]: K. He et al. (2015). Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification.
25 |
--------------------------------------------------------------------------------
/src/Backends/Tasks/Proba.php:
--------------------------------------------------------------------------------
1 |
27 | */
28 | public static function proba(Probabilistic $estimator, Dataset $dataset) : array
29 | {
30 | return $estimator->proba($dataset);
31 | }
32 |
33 | /**
34 | * @param Probabilistic $estimator
35 | * @param Dataset $dataset
36 | */
37 | public function __construct(Probabilistic $estimator, Dataset $dataset)
38 | {
39 | parent::__construct([self::class, 'proba'], [$estimator, $dataset]);
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/src/Specifications/Specification.php:
--------------------------------------------------------------------------------
1 | check();
34 |
35 | return true;
36 | } catch (Exception $exception) {
37 | return false;
38 | }
39 | }
40 |
41 | /**
42 | * Does the specification fail?
43 | *
44 | * @return bool
45 | */
46 | public function fails() : bool
47 | {
48 | return !$this->passes();
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
1 | name: "Code Checks"
2 |
3 | on: [push, pull_request]
4 |
5 | jobs:
6 | Build:
7 | name: PHP ${{ matrix.php-versions }} on ${{ matrix.operating-system }}
8 | runs-on: ${{ matrix.operating-system }}
9 | strategy:
10 | matrix:
11 | operating-system: [ubuntu-latest, macos-latest]
12 | php-versions: ['8.0', '8.1', '8.2']
13 |
14 | steps:
15 | - name: Checkout
16 | uses: actions/checkout@v3
17 |
18 | - name: Setup PHP
19 | uses: shivammathur/setup-php@v2
20 | with:
21 | php-version: ${{ matrix.php-versions }}
22 | tools: composer, pecl
23 | extensions: svm, mbstring, gd, fileinfo
24 | ini-values: memory_limit=-1
25 |
26 | - name: Validate composer.json
27 | run: composer validate
28 |
29 | - name: Install Dependencies
30 | run: composer install
31 |
32 | - name: Static Analysis
33 | run: composer analyze
34 |
35 | - name: Unit Tests
36 | run: composer test
37 |
38 | - name: Check Coding Style
39 | run: composer check
40 |
--------------------------------------------------------------------------------
/tests/Strategies/PercentileTest.php:
--------------------------------------------------------------------------------
1 | strategy = new Percentile(50.0);
26 | }
27 |
28 | /**
29 | * @test
30 | */
31 | public function build() : void
32 | {
33 | $this->assertInstanceOf(Percentile::class, $this->strategy);
34 | $this->assertInstanceOf(Strategy::class, $this->strategy);
35 | }
36 |
37 | /**
38 | * @test
39 | */
40 | public function fitGuess() : void
41 | {
42 | $this->strategy->fit([1, 2, 3, 4, 5]);
43 |
44 | $guess = $this->strategy->guess();
45 |
46 | $this->assertEquals(3, $guess);
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/docs/transformers/image-vectorizer.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Image Vectorizer
4 | Image Vectorizer takes images of the same size and converts them into flat feature vectors of raw color channel intensities. Intensities range from 0 to 255 and can either be read from 1 channel (grayscale) or 3 channels (RGB color) per pixel.
5 |
6 | !!! note
7 | Note that the [GD extension](https://php.net/manual/en/book.image.php) is required to use this transformer.
8 |
9 | **Interfaces:** [Transformer](api.md#transformer), [Stateful](api.md#stateful)
10 |
11 | **Data Type Compatibility:** Image
12 |
13 | ## Parameters
14 | | # | Name | Default | Type | Description |
15 | |---|---|---|---|---|
16 | | 1 | grayscale | false | bool | Should we encode the image in grayscale instead of color? |
17 |
18 | ## Example
19 | ```php
20 | use Rubix\ML\Transformers\ImageVectorizer;
21 |
22 | $transformer = new ImageVectorizer(true);
23 | ```
24 |
25 | ## Additional Methods
26 | This transformer does not have any additional methods.
27 |
--------------------------------------------------------------------------------
/tests/Backends/Tasks/ProbaTest.php:
--------------------------------------------------------------------------------
1 | new Blob([69.2, 195.7, 40.0], [1.0, 3.0, 0.3]),
26 | 'female' => new Blob([63.7, 168.5, 38.1], [0.8, 2.5, 0.4]),
27 | ], [0.45, 0.55]);
28 |
29 | $training = $generator->generate(50);
30 |
31 | $estimator->train($training);
32 |
33 | $testing = $generator->generate(15);
34 |
35 | $task = new Proba($estimator, $testing);
36 |
37 | $result = $task->compute();
38 |
39 | $this->assertCount(15, $result);
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/tests/Loggers/ScreenTest.php:
--------------------------------------------------------------------------------
1 | logger = new Screen('default');
25 | }
26 |
27 | /**
28 | * @test
29 | */
30 | public function build() : void
31 | {
32 | $this->assertInstanceOf(Screen::class, $this->logger);
33 | $this->assertInstanceOf(Logger::class, $this->logger);
34 | $this->assertInstanceOf(LoggerInterface::class, $this->logger);
35 | }
36 |
37 | /**
38 | * @test
39 | */
40 | public function log() : void
41 | {
42 | $this->expectOutputRegex('/\b(default.INFO: test)\b/');
43 |
44 | $this->logger->log(LogLevel::INFO, 'test');
45 | }
46 | }
47 |
--------------------------------------------------------------------------------
/docs/cross-validation/monte-carlo.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Monte Carlo
4 | Monte Carlo cross validation (or *repeated random subsampling*) is a technique that averages the validation score of a learner over a user-defined number of simulations where the learner is trained and tested on random splits of the dataset. The estimated validation score approaches the actual validation score as the number of simulations goes to infinity, however, only a tiny fraction of all possible simulations are needed to produce a pretty good approximation.
5 |
6 | **Interfaces:** [Validator](api.md#validator), [Parallel](#parallel)
7 |
8 | ## Parameters
9 | | # | Name | Default | Type | Description |
10 | |---|---|---|---|---|
11 | | 1 | simulations | 10 | int | The number of simulations i.e. random subsamplings of the dataset. |
12 | | 2 | ratio | 0.2 | float | The ratio of samples to hold out for testing. |
13 |
14 | ## Example
15 | ```php
16 | use Rubix\ML\CrossValidation\MonteCarlo;
17 |
18 | $validator = new MonteCarlo(30, 0.1);
19 | ```
--------------------------------------------------------------------------------
/tests/Backends/Tasks/TrainLearnerTest.php:
--------------------------------------------------------------------------------
1 | new Blob([69.2, 195.7, 40.0], [1.0, 3.0, 0.3]),
26 | 'female' => new Blob([63.7, 168.5, 38.1], [0.8, 2.5, 0.4]),
27 | ], [0.45, 0.55]);
28 |
29 | $dataset = $generator->generate(50);
30 |
31 | $task = new TrainLearner($estimator, $dataset);
32 |
33 | $result = $task->compute();
34 |
35 | $this->assertInstanceOf(GaussianNB::class, $result);
36 | $this->assertTrue($result->trained());
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/tests/Backends/Tasks/PredictTest.php:
--------------------------------------------------------------------------------
1 | new Blob([69.2, 195.7, 40.0], [1.0, 3.0, 0.3]),
26 | 'female' => new Blob([63.7, 168.5, 38.1], [0.8, 2.5, 0.4]),
27 | ], [0.45, 0.55]);
28 |
29 | $training = $generator->generate(50);
30 |
31 | $estimator->train($training);
32 |
33 | $testing = $generator->generate(15);
34 |
35 | $task = new Predict($estimator, $testing);
36 |
37 | $result = $task->compute();
38 |
39 | $this->assertCount(15, $result);
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/src/Tokenizers/Word.php:
--------------------------------------------------------------------------------
1 |
30 | */
31 | public function tokenize(string $text) : array
32 | {
33 | $tokens = [];
34 |
35 | preg_match_all(self::WORD_REGEX, $text, $tokens);
36 |
37 | return $tokens[0];
38 | }
39 |
40 | /**
41 | * Return the string representation of the object.
42 | *
43 | * @internal
44 | *
45 | * @return string
46 | */
47 | public function __toString() : string
48 | {
49 | return 'Word';
50 | }
51 | }
52 |
--------------------------------------------------------------------------------
/docs/neural-network/cost-functions/huber-loss.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Huber Loss
4 | The pseudo Huber Loss function transitions between L1 and L2 loss at a given pivot point (defined by *delta*) such that the function becomes more quadratic as the loss decreases. The combination of L1 and L2 losses make Huber more robust to outliers while maintaining smoothness near the minimum.
5 |
6 | $$
7 | L_{\delta}=
8 | \left\{\begin{matrix}
9 | \frac{1}{2}(y - \hat{y})^{2} & if \left | (y - \hat{y}) \right | < \delta\\
10 | \delta ((y - \hat{y}) - \frac1 2 \delta) & otherwise
11 | \end{matrix}\right.
12 | $$
13 |
14 | ## Parameters
15 | | # | Name | Default | Type | Description |
16 | |---|---|---|---|---|
17 | | 1 | delta | 1.0 | float | The pivot point i.e the point where numbers larger will be evaluated with an L1 loss while number smaller will be evaluated with an L2 loss. |
18 |
19 | ## Example
20 | ```php
21 | use Rubix\ML\NeuralNet\CostFunctions\HuberLoss;
22 |
23 | $costFunction = new HuberLoss(0.5);
24 | ```
--------------------------------------------------------------------------------
/docs/regressors/ridge.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Ridge
4 | L2 regularized linear regression solved using a closed-form solution. The addition of regularization, controlled by the *alpha* hyper-parameter, makes Ridge less likely to overfit the training data than ordinary least squares (OLS).
5 |
6 | **Interfaces:** [Estimator](../estimator.md), [Learner](../learner.md), [Ranks Features](../ranks-features.md), [Persistable](../persistable.md)
7 |
8 | **Data Type Compatibility:** Continuous
9 |
10 | ## Parameters
11 | | # | Name | Default | Type | Description |
12 | |---|---|---|---|---|
13 | | 1 | l2Penalty | 1.0 | float | The strength of the L2 regularization penalty. |
14 |
15 | ## Example
16 | ```php
17 | use Rubix\ML\Regressors\Ridge;
18 |
19 | $estimator = new Ridge(2.0);
20 | ```
21 |
22 | ## Additional Methods
23 | Return the weights of features in the decision function.
24 | ```php
25 | public coefficients() : array|null
26 | ```
27 |
28 | Return the bias added to the decision function.
29 | ```php
30 | public bias() : float|null
31 | ```
32 |
--------------------------------------------------------------------------------
/tests/Specifications/ExtensionIsLoadedTest.php:
--------------------------------------------------------------------------------
1 | assertSame($expected, $specification->passes());
26 | }
27 |
28 | /**
29 | * @return Generator
30 | */
31 | public function passesProvider() : Generator
32 | {
33 | yield [
34 | ExtensionIsLoaded::with('json'),
35 | true,
36 | ];
37 |
38 | yield [
39 | ExtensionIsLoaded::with("I be trappin' where I go"),
40 | false,
41 | ];
42 | }
43 | }
44 |
--------------------------------------------------------------------------------
/docs/neural-network/optimizers/cyclical.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Cyclical
4 | The Cyclical optimizer uses a global learning rate that cycles between the lower and upper bound over a designated period while also decaying the upper bound by a factor at each step. Cyclical learning rates have been shown to help escape bad local minima and saddle points of the gradient.
5 |
6 | ## Parameters
7 | | # | Name | Default | Type | Description |
8 | |---|---|---|---|---|
9 | | 1 | lower | 0.001 | float | The lower bound on the learning rate. |
10 | | 2 | upper | 0.006 | float | The upper bound on the learning rate. |
11 | | 3 | steps | 100 | int | The number of steps in every half cycle. |
12 | | 4 | decay | 0.99994 | float | The exponential decay factor to decrease the learning rate by every step. |
13 |
14 | ## Example
15 | ```php
16 | use Rubix\ML\NeuralNet\Optimizers\Cyclical;
17 |
18 | $optimizer = new Cyclical(0.001, 0.005, 1000);
19 | ```
20 |
21 | ## References
22 | [^1]: L. N. Smith. (2017). Cyclical Learning Rates for Training Neural Networks.
--------------------------------------------------------------------------------
/benchmarks/Transformers/ImageVectorizerBench.php:
--------------------------------------------------------------------------------
1 | dataset = Unlabeled::build($samples);
35 |
36 | $this->transformer = new ImageVectorizer();
37 | }
38 |
39 | /**
40 | * @Subject
41 | * @Iterations(5)
42 | * @OutputTimeUnit("seconds", precision=3)
43 | */
44 | public function apply() : void
45 | {
46 | $this->dataset->apply($this->transformer);
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/docs/datasets/generators/half-moon.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Half Moon
4 | Generates a dataset consisting of 2-d samples that form the shape of a half moon when plotted on a scatter plot chart.
5 |
6 | **Data Types:** Continuous
7 |
8 | **Label Type:** Continuous
9 |
10 | ## Parameters
11 | | # | Name | Default | Type | Description |
12 | |---|---|---|---|---|
13 | | 1 | x | 0.0 | float | The *x* coordinate of the center of the half moon. |
14 | | 2 | y | 0.0 | float | The *y* coordinate of the center of the half moon. |
15 | | 3 | scale | 1.0 | float | The scaling factor of the half moon. |
16 | | 4 | rotate | 90.0 | float | The amount in degrees to rotate the half moon counterclockwise. |
17 | | 5 | noise | 0.1 | float | The amount of Gaussian noise to add to each data point as a percentage of the scaling factor. |
18 |
19 | ## Example
20 | ```php
21 | use Rubix\ML\Datasets\Generators\HalfMoon;
22 |
23 | $generator = new HalfMoon(4.0, 0.0, 6, 180.0, 0.2);
24 | ```
25 |
26 | ## Additional Methods
27 | This generator does not have any additional methods.
28 |
--------------------------------------------------------------------------------
/tests/Kernels/SVM/SigmoidalTest.php:
--------------------------------------------------------------------------------
1 | kernel = new Sigmoidal(1e-3);
27 | }
28 |
29 | /**
30 | * @test
31 | */
32 | public function build() : void
33 | {
34 | $this->assertInstanceOf(Sigmoidal::class, $this->kernel);
35 | $this->assertInstanceOf(Kernel::class, $this->kernel);
36 | }
37 |
38 | /**
39 | * @test
40 | */
41 | public function options() : void
42 | {
43 | $options = [
44 | 102 => 3,
45 | 201 => 1e-3,
46 | 205 => 0.0,
47 | ];
48 |
49 | $this->assertEquals($options, $this->kernel->options());
50 | }
51 | }
52 |
--------------------------------------------------------------------------------
/src/Estimator.php:
--------------------------------------------------------------------------------
1 |
32 | */
33 | public function compatibility() : array;
34 |
35 | /**
36 | * Return the settings of the hyper-parameters in an associative array.
37 | *
38 | * @internal
39 | *
40 | * @return mixed[]
41 | */
42 | public function params() : array;
43 |
44 | /**
45 | * Make predictions from a dataset.
46 | *
47 | * @param Dataset $dataset
48 | * @return list
49 | */
50 | public function predict(Dataset $dataset) : array;
51 | }
52 |
--------------------------------------------------------------------------------
/src/Specifications/DatasetIsNotEmpty.php:
--------------------------------------------------------------------------------
1 | dataset = $dataset;
37 | }
38 |
39 | /**
40 | * Perform a check of the specification and throw an exception if invalid.
41 | *
42 | * @throws EmptyDataset
43 | */
44 | public function check() : void
45 | {
46 | if ($this->dataset->empty()) {
47 | throw new EmptyDataset();
48 | }
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/tests/NeuralNet/Initializers/HeTest.php:
--------------------------------------------------------------------------------
1 | initializer = new He();
27 | }
28 |
29 | /**
30 | * @test
31 | */
32 | public function build() : void
33 | {
34 | $this->assertInstanceOf(He::class, $this->initializer);
35 | $this->assertInstanceOf(Initializer::class, $this->initializer);
36 | }
37 |
38 | /**
39 | * @test
40 | */
41 | public function initialize() : void
42 | {
43 | $w = $this->initializer->initialize(4, 3);
44 |
45 | $this->assertInstanceOf(Matrix::class, $w);
46 | $this->assertEquals([3, 4], $w->shape());
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/benchmarks/Tokenizers/WordBench.php:
--------------------------------------------------------------------------------
1 | tokenizer = new Word();
23 | }
24 |
25 | /**
26 | * @Subject
27 | * @revs(1000)
28 | * @Iterations(5)
29 | * @OutputTimeUnit("milliseconds", precision=3)
30 | */
31 | public function tokenize() : void
32 | {
33 | $this->tokenizer->tokenize(self::SAMPLE_TEXT);
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/docs/neural-network/optimizers/adam.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Adam
4 | Short for *Adaptive Moment Estimation*, the Adam Optimizer combines both Momentum and RMS properties. In addition to storing an exponentially decaying average of past squared gradients like [RMSprop](rms-prop.md), Adam also keeps an exponentially decaying average of past gradients, similar to [Momentum](momentum.md). Whereas Momentum can be seen as a ball running down a slope, Adam behaves like a heavy ball with friction.
5 |
6 | ## Parameters
7 | | # | Name | Default | Type | Description |
8 | |---|---|---|---|---|
9 | | 1 | rate | 0.001 | float | The learning rate that controls the global step size. |
10 | | 2 | momentumDecay | 0.1 | float | The decay rate of the accumulated velocity. |
11 | | 3 | normDecay | 0.001 | float | The decay rate of the rms property. |
12 |
13 | ## Example
14 | ```php
15 | use Rubix\ML\NeuralNet\Optimizers\Adam;
16 |
17 | $optimizer = new Adam(0.0001, 0.1, 0.001);
18 | ```
19 |
20 | ## References
21 | [^1]: D. P. Kingma et al. (2014). Adam: A Method for Stochastic Optimization.
--------------------------------------------------------------------------------
/docs/transformers/linear-discriminant-analysis.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Linear Discriminant Analysis
4 | Linear Discriminant Analysis (LDA) is a supervised dimensionality reduction technique that selects the most informative features using information in the class labels. More formally, LDA finds a linear combination of features that characterizes or best *discriminates* two or more classes.
5 |
6 | **Interfaces:** [Transformer](api.md#transformer), [Stateful](api.md#stateful), [Persistable](../persistable.md)
7 |
8 | **Data Type Compatibility:** Continuous only
9 |
10 | ## Parameters
11 | | # | Name | Default | Type | Description |
12 | |---|---|---|---|---|
13 | | 1 | dimensions | | int | The target number of dimensions to project onto. |
14 |
15 | ## Example
16 | ```php
17 | use Rubix\ML\Transformers\LinearDiscriminantAnalysis;
18 |
19 | $transformer = new LinearDiscriminantAnalysis(20);
20 | ```
21 |
22 | ## Additional Methods
23 | Return the proportion of information lost due to the transformation:
24 | ```php
25 | public lossiness() : ?float
26 | ```
27 |
--------------------------------------------------------------------------------
/docs/transformers/multibyte-text-normalizer.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Multibyte Text Normalizer
4 | This transformer converts the characters in all [multibyte strings](https://www.php.net/manual/en/intro.mbstring.php) to the same case. Multibyte strings contain characters such as accents (é, è, à), emojis (😀, 😉) or characters of non roman alphabets such as Chinese and Cyrillic.
5 |
6 | !!! note
7 | ⚠️ We recommend you install the [mbstring extension](https://www.php.net/manual/en/book.mbstring.php) for best performance.
8 |
9 | **Interfaces:** [Transformer](api.md#transformer)
10 |
11 | **Data Type Compatibility:** Categorical
12 |
13 | ## Parameters
14 | | # | Name | Default | Type | Description |
15 | |---|---|---|---|---|
16 | | 1 | uppercase | false | bool | Should the text be converted to uppercase? |
17 |
18 | ## Example
19 | ```php
20 | use Rubix\ML\Transformers\MultibyteTextNormalizer;
21 |
22 | $transformer = new MultibyteTextNormalizer(false);
23 | ```
24 |
25 | ## Additional Methods
26 | This transformer does not have any additional methods.
27 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 The Rubix ML Community
4 | Copyright (c) 2023 Andrew DalPino
5 |
6 | Permission is hereby granted, free of charge, to any person obtaining a copy
7 | of this software and associated documentation files (the "Software"), to deal
8 | in the Software without restriction, including without limitation the rights
9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 |
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 |
--------------------------------------------------------------------------------
/benchmarks/Tokenizers/NGramBench.php:
--------------------------------------------------------------------------------
1 | tokenizer = new NGram(1, 2);
23 | }
24 |
25 | /**
26 | * @Subject
27 | * @revs(1000)
28 | * @Iterations(5)
29 | * @OutputTimeUnit("milliseconds", precision=3)
30 | */
31 | public function tokenize() : void
32 | {
33 | $this->tokenizer->tokenize(self::SAMPLE_TEXT);
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/src/Exceptions/ClassRevisionMismatch.php:
--------------------------------------------------------------------------------
1 | = 0 ? 'up' : 'down';
24 |
25 | parent::__construct('Object incompatible with class revision,'
26 | . " {$direction}grade to version $createdWithVersion.");
27 |
28 | $this->createdWithVersion = $createdWithVersion;
29 | }
30 |
31 | /**
32 | * Return the version number of the library that the incompatible object was created with.
33 | *
34 | * @return string
35 | */
36 | public function createdWithVersion() : string
37 | {
38 | return $this->createdWithVersion;
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/benchmarks/Datasets/SplittingBench.php:
--------------------------------------------------------------------------------
1 | new Blob([5.0, 3.42, 1.46, 0.24], [0.35, 0.38, 0.17, 0.1]),
25 | 'Iris-versicolor' => new Blob([5.94, 2.77, 4.26, 1.33], [0.51, 0.31, 0.47, 0.2]),
26 | 'Iris-virginica' => new Blob([6.59, 2.97, 5.55, 2.03], [0.63, 0.32, 0.55, 0.27]),
27 | ]);
28 |
29 | $this->dataset = $generator->generate(self::DATASET_SIZE);
30 | }
31 |
32 | /**
33 | * @Subject
34 | * @Iterations(5)
35 | * @OutputTimeUnit("milliseconds", precision=3)
36 | */
37 | public function splitByFeature() : void
38 | {
39 | $this->dataset->splitByFeature(2, 3.0);
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/docs/neural-network/activation-functions/leaky-relu.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Leaky ReLU
4 | Leaky Rectified Linear Units are activation functions that output `x` when x is greater or equal to 0 or `x` scaled by a small *leakage* coefficient when the input is less than 0. Leaky rectifiers have the benefit of allowing a small gradient to flow through during backpropagation even though they might not have activated during the forward pass.
5 |
6 | $$
7 | {\displaystyle LeakyReLU = {\begin{cases}\lambda x&{\text{if }}x<0\\x&{\text{if }}x\geq 0\end{cases}}}
8 | $$
9 |
10 | ## Parameters
11 | | # | Name | Default | Type | Description |
12 | |---|---|---|---|---|
13 | | 1 | leakage | 0.1 | float | The amount of leakage as a proportion of the input value to allow to pass through when not inactivated. |
14 |
15 | ## Example
16 | ```php
17 | use Rubix\ML\NeuralNet\ActivationFunctions\LeakyReLU;
18 |
19 | $activationFunction = new LeakyReLU(0.3);
20 | ```
21 |
22 | ## References
23 | [^1]: A. L. Maas et al. (2013). Rectifier Nonlinearities Improve Neural Network Acoustic Models.
24 |
--------------------------------------------------------------------------------
/src/NeuralNet/ActivationFunctions/Softmax.php:
--------------------------------------------------------------------------------
1 | exp()->transpose();
32 |
33 | $total = $zHat->sum()->clipLower(EPSILON);
34 |
35 | return $zHat->divide($total)->transpose();
36 | }
37 |
38 | /**
39 | * Return the string representation of the object.
40 | *
41 | * @internal
42 | *
43 | * @return string
44 | */
45 | public function __toString() : string
46 | {
47 | return 'Softmax';
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/tests/NeuralNet/Initializers/LeCunTest.php:
--------------------------------------------------------------------------------
1 | initializer = new LeCun();
27 | }
28 |
29 | /**
30 | * @test
31 | */
32 | public function build() : void
33 | {
34 | $this->assertInstanceOf(LeCun::class, $this->initializer);
35 | $this->assertInstanceOf(Initializer::class, $this->initializer);
36 | }
37 |
38 | /**
39 | * @test
40 | */
41 | public function initialize() : void
42 | {
43 | $w = $this->initializer->initialize(4, 3);
44 |
45 | $this->assertInstanceOf(Matrix::class, $w);
46 | $this->assertEquals([3, 4], $w->shape());
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/benchmarks/Tokenizers/SentenceBench.php:
--------------------------------------------------------------------------------
1 | tokenizer = new Sentence();
23 | }
24 |
25 | /**
26 | * @Subject
27 | * @revs(1000)
28 | * @Iterations(5)
29 | * @OutputTimeUnit("milliseconds", precision=3)
30 | */
31 | public function tokenize() : void
32 | {
33 | $this->tokenizer->tokenize(self::SAMPLE_TEXT);
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/docs/cross-validation/metrics/rand-index.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Rand Index
4 | The Adjusted Rand Index is a measure of similarity between a clustering and some ground-truth that is adjusted for chance. It considers all pairs of samples that are assigned in the same or different clusters in the predicted and empirical clusterings.
5 |
6 | $$
7 | {\displaystyle ARI = {\frac {\left.\sum _{ij}{\binom {n_{ij}}{2}}-\left[\sum _{i}{\binom {a_{i}}{2}}\sum _{j}{\binom {b_{j}}{2}}\right]\right/{\binom {n}{2}}}{\left.{\frac {1}{2}}\left[\sum _{i}{\binom {a_{i}}{2}}+\sum _{j}{\binom {b_{j}}{2}}\right]-\left[\sum _{i}{\binom {a_{i}}{2}}\sum _{j}{\binom {b_{j}}{2}}\right]\right/{\binom {n}{2}}}}}
8 | $$
9 |
10 | **Estimator Compatibility:** Clusterer
11 |
12 | **Score Range:** -1 to 1
13 |
14 | ## Parameters
15 | This metric does not have any parameters.
16 |
17 | ## Example
18 | ```php
19 | use Rubix\ML\CrossValidation\Metrics\RandIndex;
20 |
21 | $metric = new RandIndex();
22 | ```
23 |
24 | ## References
25 | [^1]: W. M. Rand. (1971). Objective Criteria for the Evaluation of Clustering Methods.
--------------------------------------------------------------------------------
/docs/neural-network/optimizers/momentum.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Momentum
4 | Momentum accelerates each update step by accumulating velocity from past updates and adding a factor of the previous velocity to the current step. Momentum can help speed up training and escape bad local minima when compared with [Stochastic](stochastic.md) Gradient Descent.
5 |
6 | ## Parameters
7 | | # | Name | Default | Type | Description |
8 | |---|---|---|---|---|
9 | | 1 | rate | 0.001 | float | The learning rate that controls the global step size. |
10 | | 2 | decay | 0.1 | float | The decay rate of the accumulated velocity. |
11 | | 3 | lookahead | false | bool | Should we employ Nesterov's lookahead (NAG) when updating the parameters? |
12 |
13 | ## Example
14 | ```php
15 | use Rubix\ML\NeuralNet\Optimizers\Momentum;
16 |
17 | $optimizer = new Momentum(0.01, 0.1, true);
18 | ```
19 |
20 | ## References
21 | [^1]: D. E. Rumelhart et al. (1988). Learning representations by back-propagating errors.
22 | [^2]: I. Sutskever et al. (2013). On the importance of initialization and momentum in deep learning.
23 |
--------------------------------------------------------------------------------
/src/Extractors/Concatenator.php:
--------------------------------------------------------------------------------
1 | >
23 | */
24 | protected iterable $iterators;
25 |
26 | /**
27 | * @param iterable> $iterators
28 | */
29 | public function __construct(iterable $iterators)
30 | {
31 | $this->iterators = $iterators;
32 | }
33 |
34 | /**
35 | * Return an iterator for the rows of a data table.
36 | *
37 | * @return \Generator
38 | */
39 | public function getIterator() : Traversable
40 | {
41 | foreach ($this->iterators as $iterator) {
42 | foreach ($iterator as $record) {
43 | yield $record;
44 | }
45 | }
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/docs/transformers/min-max-normalizer.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Min Max Normalizer
4 | The *Min Max* Normalizer scales the input features to a value between a user-specified range (*default* 0 to 1).
5 |
6 | **Interfaces:** [Transformer](api.md#transformer), [Stateful](api.md#stateful), [Elastic](api.md#elastic), [Reversible](api.md#reversible), [Persistable](../persistable.md)
7 |
8 | **Data Type Compatibility:** Continuous
9 |
10 | ## Parameters
11 | | # | Name | Default | Type | Description |
12 | |---|---|---|---|---|
13 | | 1 | min | 0.0 | float | The minimum value of the transformed features. |
14 | | 2 | max | 1.0 | float | The maximum value of the transformed features. |
15 |
16 | ## Example
17 | ```php
18 | use Rubix\ML\Transformers\MinMaxNormalizer;
19 |
20 | $transformer = new MinMaxNormalizer(-5.0, 5.0);
21 | ```
22 |
23 | ## Additional Methods
24 | Return the minimum values for each fitted feature column:
25 | ```php
26 | public minimums() : ?array
27 | ```
28 |
29 | Return the maximum values for each fitted feature column:
30 | ```php
31 | public maximums() : ?array
32 | ```
33 |
--------------------------------------------------------------------------------
/src/Tokenizers/Sentence.php:
--------------------------------------------------------------------------------
1 |
30 | */
31 | public function tokenize(string $text) : array
32 | {
33 | return preg_split(self::SENTENCE_REGEX, $text, -1, PREG_SPLIT_NO_EMPTY) ?: [];
34 | }
35 |
36 | /**
37 | * Return the string representation of the object.
38 | *
39 | * @internal
40 | *
41 | * @return string
42 | */
43 | public function __toString() : string
44 | {
45 | return 'Sentence';
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/tests/NeuralNet/Initializers/NormalTest.php:
--------------------------------------------------------------------------------
1 | initializer = new Normal(0.05);
27 | }
28 |
29 | /**
30 | * @test
31 | */
32 | public function build() : void
33 | {
34 | $this->assertInstanceOf(Normal::class, $this->initializer);
35 | $this->assertInstanceOf(Initializer::class, $this->initializer);
36 | }
37 |
38 | /**
39 | * @test
40 | */
41 | public function initialize() : void
42 | {
43 | $w = $this->initializer->initialize(4, 3);
44 |
45 | $this->assertInstanceOf(Matrix::class, $w);
46 | $this->assertEquals([3, 4], $w->shape());
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/benchmarks/Tokenizers/KSkipNGramBench.php:
--------------------------------------------------------------------------------
1 | tokenizer = new KSkipNGram(2, 3);
23 | }
24 |
25 | /**
26 | * @Subject
27 | * @revs(1000)
28 | * @Iterations(5)
29 | * @OutputTimeUnit("milliseconds", precision=3)
30 | */
31 | public function tokenize() : void
32 | {
33 | $this->tokenizer->tokenize(self::SAMPLE_TEXT);
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/benchmarks/Tokenizers/WhitespaceBench.php:
--------------------------------------------------------------------------------
1 | tokenizer = new Whitespace();
23 | }
24 |
25 | /**
26 | * @Subject
27 | * @revs(1000)
28 | * @Iterations(5)
29 | * @OutputTimeUnit("milliseconds", precision=3)
30 | */
31 | public function tokenize() : void
32 | {
33 | $this->tokenizer->tokenize(self::SAMPLE_TEXT);
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/src/Traits/Multiprocessing.php:
--------------------------------------------------------------------------------
1 | **Note**: The optimal number of workers will depend on the system
16 | * specifications of the computer. Fewer workers than CPU cores can result in
17 | * slower performance but too many workers can cause excess overhead.
18 | *
19 | * @category Machine Learning
20 | * @package Rubix/ML
21 | * @author Andrew DalPino
22 | */
23 | trait Multiprocessing
24 | {
25 | /**
26 | * The parallel processing backend.
27 | *
28 | * @var Backend
29 | */
30 | protected Backend $backend;
31 |
32 | /**
33 | * Set the parallel processing backend.
34 | *
35 | * @param Backend $backend
36 | */
37 | public function setBackend(Backend $backend) : void
38 | {
39 | $this->backend = $backend;
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/tests/Kernels/SVM/PolynomialTest.php:
--------------------------------------------------------------------------------
1 | kernel = new Polynomial(3, 1e-3);
27 | }
28 |
29 | /**
30 | * @test
31 | */
32 | public function build() : void
33 | {
34 | $this->assertInstanceOf(Polynomial::class, $this->kernel);
35 | $this->assertInstanceOf(Kernel::class, $this->kernel);
36 | }
37 |
38 | /**
39 | * @test
40 | */
41 | public function options() : void
42 | {
43 | $expected = [
44 | 102 => 1,
45 | 201 => 1e-3,
46 | 103 => 3,
47 | 205 => 0.0,
48 | ];
49 |
50 | $this->assertEquals($expected, $this->kernel->options());
51 | }
52 | }
53 |
--------------------------------------------------------------------------------
/tests/NeuralNet/Initializers/Xavier1Test.php:
--------------------------------------------------------------------------------
1 | initializer = new Xavier1();
27 | }
28 |
29 | /**
30 | * @test
31 | */
32 | public function build() : void
33 | {
34 | $this->assertInstanceOf(Xavier1::class, $this->initializer);
35 | $this->assertInstanceOf(Initializer::class, $this->initializer);
36 | }
37 |
38 | /**
39 | * @test
40 | */
41 | public function initialize() : void
42 | {
43 | $w = $this->initializer->initialize(4, 3);
44 |
45 | $this->assertInstanceOf(Matrix::class, $w);
46 | $this->assertEquals([3, 4], $w->shape());
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/tests/NeuralNet/Initializers/Xavier2Test.php:
--------------------------------------------------------------------------------
1 | initializer = new Xavier2();
27 | }
28 |
29 | /**
30 | * @test
31 | */
32 | public function build() : void
33 | {
34 | $this->assertInstanceOf(Xavier2::class, $this->initializer);
35 | $this->assertInstanceOf(Initializer::class, $this->initializer);
36 | }
37 |
38 | /**
39 | * @test
40 | */
41 | public function initialize() : void
42 | {
43 | $w = $this->initializer->initialize(4, 3);
44 |
45 | $this->assertInstanceOf(Matrix::class, $w);
46 | $this->assertEquals([3, 4], $w->shape());
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/benchmarks/Tokenizers/WordStemmerBench.php:
--------------------------------------------------------------------------------
1 | tokenizer = new WordStemmer('english');
23 | }
24 |
25 | /**
26 | * @Subject
27 | * @revs(1000)
28 | * @Iterations(5)
29 | * @OutputTimeUnit("milliseconds", precision=3)
30 | */
31 | public function tokenize() : void
32 | {
33 | $this->tokenizer->tokenize(self::SAMPLE_TEXT);
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/docs/transformers/boolean-converter.md:
--------------------------------------------------------------------------------
1 | [source]
2 |
3 | # Boolean Converter
4 | This transformer is used to convert boolean values to a compatible continuous or categorical datatype. Strings should be
5 | used when the boolean should be treated as a categorical value. Ints or floats when the boolean should be treated as a
6 | continuous value.
7 |
8 | **Interfaces:** [Transformer](api.md#transformer)
9 |
10 | **Data Type Compatibility:** Categorical, Continuous
11 |
12 | ## Parameters
13 | | # | Name | Default | Type | Description |
14 | |---|---|---|---|---|
15 | | 1 | trueValue | 'true' | string, int, float | The value to convert `true` to. |
16 | | 2 | falseValue | 'false' | string, int, float | The value to convert `false` to. |
17 |
18 | ## Example
19 | ```php
20 | use Rubix\ML\Transformers\BooleanConverter;
21 |
22 | $transformer = new BooleanConverter('true', 'false);
23 |
24 | $transformer = new BooleanConverter('tall', 'not tall');
25 |
26 | $transformer = new BooleanConverter(1, 0);
27 | ```
28 |
29 | ## Additional Methods
30 | This transformer does not have any additional methods.
31 |
--------------------------------------------------------------------------------
/src/NeuralNet/Layers/Layer.php:
--------------------------------------------------------------------------------
1 | name = $name;
38 | }
39 |
40 | /**
41 | * Perform a check of the specification and throw an exception if invalid.
42 | *
43 | * @throws MissingExtension
44 | */
45 | public function check() : void
46 | {
47 | if (!extension_loaded($this->name)) {
48 | throw new MissingExtension($this->name);
49 | }
50 | }
51 | }
52 |
--------------------------------------------------------------------------------