PDF rausgenommen

This commit is contained in:
aschwarz
2023-01-23 11:03:31 +01:00
parent 82d562a322
commit a6523903eb
28078 changed files with 4247552 additions and 2 deletions

View File

@ -0,0 +1,9 @@
root = true
[*]
end_of_line = lf
charset = utf-8
max_line_length = 80
indent_style = space
indent_size = 4
insert_final_newline = true

21
msd2/myoos/vendor/php-ai/php-ml/LICENSE vendored Normal file
View File

@ -0,0 +1,21 @@
The MIT License (MIT)
Copyright (c) 2016-2018 Arkadiusz Kondas <arkadiusz.kondas[at]gmail>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -0,0 +1,146 @@
# PHP-ML - Machine Learning library for PHP
[![Minimum PHP Version](https://img.shields.io/badge/php-%3E%3D%207.1-8892BF.svg)](https://php.net/)
[![Latest Stable Version](https://img.shields.io/packagist/v/php-ai/php-ml.svg)](https://packagist.org/packages/php-ai/php-ml)
[![Build Status](https://travis-ci.org/php-ai/php-ml.svg?branch=master)](https://travis-ci.org/php-ai/php-ml)
[![Documentation Status](https://readthedocs.org/projects/php-ml/badge/?version=master)](http://php-ml.readthedocs.org/)
[![Total Downloads](https://poser.pugx.org/php-ai/php-ml/downloads.svg)](https://packagist.org/packages/php-ai/php-ml)
[![License](https://poser.pugx.org/php-ai/php-ml/license.svg)](https://packagist.org/packages/php-ai/php-ml)
[![Coverage Status](https://coveralls.io/repos/github/php-ai/php-ml/badge.svg?branch=master)](https://coveralls.io/github/php-ai/php-ml?branch=master)
[![Scrutinizer Code Quality](https://scrutinizer-ci.com/g/php-ai/php-ml/badges/quality-score.png?b=master)](https://scrutinizer-ci.com/g/php-ai/php-ml/?branch=master)
<p align="center">
<img src="https://github.com/php-ai/php-ml/raw/master/docs/assets/php-ml-logo.png" />
</p>
Fresh approach to Machine Learning in PHP. Algorithms, Cross Validation, Neural Network, Preprocessing, Feature Extraction and much more in one library.
PHP-ML requires PHP >= 7.1.
Simple example of classification:
```php
require_once __DIR__ . '/vendor/autoload.php';
use Phpml\Classification\KNearestNeighbors;
$samples = [[1, 3], [1, 4], [2, 4], [3, 1], [4, 1], [4, 2]];
$labels = ['a', 'a', 'a', 'b', 'b', 'b'];
$classifier = new KNearestNeighbors();
$classifier->train($samples, $labels);
echo $classifier->predict([3, 2]);
// return 'b'
```
## Awards
<a href="http://www.yegor256.com/2016/10/23/award-2017.html">
<img src="http://www.yegor256.com/images/award/2017/winner-itcraftsmanpl.png" width="400"/></a>
## Documentation
To find out how to use PHP-ML follow [Documentation](http://php-ml.readthedocs.org/).
## Installation
Currently this library is in the process of being developed, but You can install it with Composer:
```
composer require php-ai/php-ml
```
## Examples
Example scripts are available in a separate repository [php-ai/php-ml-examples](https://github.com/php-ai/php-ml-examples).
## Datasets
Public datasets are available in a separate repository [php-ai/php-ml-datasets](https://github.com/php-ai/php-ml-datasets).
## Features
* Association rule learning
* [Apriori](http://php-ml.readthedocs.io/en/latest/machine-learning/association/apriori/)
* Classification
* [SVC](http://php-ml.readthedocs.io/en/latest/machine-learning/classification/svc/)
* [k-Nearest Neighbors](http://php-ml.readthedocs.io/en/latest/machine-learning/classification/k-nearest-neighbors/)
* [Naive Bayes](http://php-ml.readthedocs.io/en/latest/machine-learning/classification/naive-bayes/)
* Decision Tree (CART)
* Ensemble Algorithms
* Bagging (Bootstrap Aggregating)
* Random Forest
* AdaBoost
* Linear
* Adaline
* Decision Stump
* Perceptron
* LogisticRegression
* Regression
* [Least Squares](http://php-ml.readthedocs.io/en/latest/machine-learning/regression/least-squares/)
* [SVR](http://php-ml.readthedocs.io/en/latest/machine-learning/regression/svr/)
* Clustering
* [k-Means](http://php-ml.readthedocs.io/en/latest/machine-learning/clustering/k-means/)
* [DBSCAN](http://php-ml.readthedocs.io/en/latest/machine-learning/clustering/dbscan/)
* Fuzzy C-Means
* Metric
* [Accuracy](http://php-ml.readthedocs.io/en/latest/machine-learning/metric/accuracy/)
* [Confusion Matrix](http://php-ml.readthedocs.io/en/latest/machine-learning/metric/confusion-matrix/)
* [Classification Report](http://php-ml.readthedocs.io/en/latest/machine-learning/metric/classification-report/)
* Workflow
* [Pipeline](http://php-ml.readthedocs.io/en/latest/machine-learning/workflow/pipeline)
* Neural Network
* [Multilayer Perceptron Classifier](http://php-ml.readthedocs.io/en/latest/machine-learning/neural-network/multilayer-perceptron-classifier/)
* Cross Validation
* [Random Split](http://php-ml.readthedocs.io/en/latest/machine-learning/cross-validation/random-split/)
* [Stratified Random Split](http://php-ml.readthedocs.io/en/latest/machine-learning/cross-validation/stratified-random-split/)
* Feature Selection
* [Variance Threshold](http://php-ml.readthedocs.io/en/latest/machine-learning/feature-selection/variance-threshold/)
* [SelectKBest](http://php-ml.readthedocs.io/en/latest/machine-learning/feature-selection/selectkbest/)
* Preprocessing
* [Normalization](http://php-ml.readthedocs.io/en/latest/machine-learning/preprocessing/normalization/)
* [Imputation missing values](http://php-ml.readthedocs.io/en/latest/machine-learning/preprocessing/imputation-missing-values/)
* Feature Extraction
* [Token Count Vectorizer](http://php-ml.readthedocs.io/en/latest/machine-learning/feature-extraction/token-count-vectorizer/)
* NGramTokenizer
* WhitespaceTokenizer
* WordTokenizer
* [Tf-idf Transformer](http://php-ml.readthedocs.io/en/latest/machine-learning/feature-extraction/tf-idf-transformer/)
* Dimensionality Reduction
* PCA (Principal Component Analysis)
* Kernel PCA
* LDA (Linear Discriminant Analysis)
* Datasets
* [Array](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/array-dataset/)
* [CSV](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/csv-dataset/)
* [Files](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/files-dataset/)
* [SVM](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/svm-dataset/)
* [MNIST](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/mnist-dataset.md)
* Ready to use:
* [Iris](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/demo/iris/)
* [Wine](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/demo/wine/)
* [Glass](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/demo/glass/)
* Models management
* [Persistency](http://php-ml.readthedocs.io/en/latest/machine-learning/model-manager/persistency/)
* Math
* [Distance](http://php-ml.readthedocs.io/en/latest/math/distance/)
* [Matrix](http://php-ml.readthedocs.io/en/latest/math/matrix/)
* [Set](http://php-ml.readthedocs.io/en/latest/math/set/)
* [Statistic](http://php-ml.readthedocs.io/en/latest/math/statistic/)
* Linear Algebra
## Contribute
- [Guide: CONTRIBUTING.md](https://github.com/php-ai/php-ml/blob/master/CONTRIBUTING.md)
- [Issue Tracker: github.com/php-ai/php-ml](https://github.com/php-ai/php-ml/issues)
- [Source Code: github.com/php-ai/php-ml](https://github.com/php-ai/php-ml)
You can find more about contributing in [CONTRIBUTING.md](CONTRIBUTING.md).
## License
PHP-ML is released under the MIT Licence. See the bundled LICENSE file for details.
## Author
Arkadiusz Kondas (@ArkadiuszKondas)

View File

@ -0,0 +1,4 @@
#!/bin/bash
echo "Run PHPUnit with code coverage"
bin/phpunit --coverage-html .coverage
google-chrome .coverage/index.html

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,53 @@
{
"name": "php-ai/php-ml",
"type": "library",
"description": "PHP-ML - Machine Learning library for PHP",
"keywords": [
"machine learning",
"pattern recognition",
"neural network",
"computational learning theory",
"artificial intelligence",
"data science",
"feature extraction"
],
"homepage": "https://github.com/php-ai/php-ml",
"license": "MIT",
"authors": [
{
"name": "Arkadiusz Kondas",
"email": "arkadiusz.kondas@gmail.com"
}
],
"require": {
"php": "^7.1"
},
"require-dev": {
"phpbench/phpbench": "^0.14.0",
"phpstan/phpstan-phpunit": "^0.10",
"phpstan/phpstan-shim": "^0.10",
"phpstan/phpstan-strict-rules": "^0.10",
"phpunit/phpunit": "^7.0.0",
"symplify/coding-standard": "^5.1",
"symplify/easy-coding-standard": "^5.1"
},
"config": {
"preferred-install": "dist",
"sort-packages": true
},
"autoload": {
"psr-4": {
"Phpml\\": "src/"
}
},
"autoload-dev": {
"psr-4": {
"Phpml\\Tests\\": "tests/"
}
},
"scripts": {
"check-cs": "vendor/bin/ecs check src tests bin",
"fix-cs": "vendor/bin/ecs check src tests bin --fix",
"phpstan": "vendor/bin/phpstan.phar analyse src tests bin --level max --configuration phpstan.neon"
}
}

4617
msd2/myoos/vendor/php-ai/php-ml/composer.lock generated vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,215 @@
RI: refractive index,Na: Sodium,Mg: Magnesium,Al: Aluminum,Si: Silicon,K: Potassium,Ca: Calcium,Ba: Barium,Fe: Iron,type of glass
1.52101,13.64,4.49,1.10,71.78,0.06,8.75,0.00,0.00,building_windows_float_processed
1.51761,13.89,3.60,1.36,72.73,0.48,7.83,0.00,0.00,building_windows_float_processed
1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.00,0.00,building_windows_float_processed
1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.00,0.00,building_windows_float_processed
1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.00,0.00,building_windows_float_processed
1.51596,12.79,3.61,1.62,72.97,0.64,8.07,0.00,0.26,building_windows_float_processed
1.51743,13.30,3.60,1.14,73.09,0.58,8.17,0.00,0.00,building_windows_float_processed
1.51756,13.15,3.61,1.05,73.24,0.57,8.24,0.00,0.00,building_windows_float_processed
1.51918,14.04,3.58,1.37,72.08,0.56,8.30,0.00,0.00,building_windows_float_processed
1.51755,13.00,3.60,1.36,72.99,0.57,8.40,0.00,0.11,building_windows_float_processed
1.51571,12.72,3.46,1.56,73.20,0.67,8.09,0.00,0.24,building_windows_float_processed
1.51763,12.80,3.66,1.27,73.01,0.60,8.56,0.00,0.00,building_windows_float_processed
1.51589,12.88,3.43,1.40,73.28,0.69,8.05,0.00,0.24,building_windows_float_processed
1.51748,12.86,3.56,1.27,73.21,0.54,8.38,0.00,0.17,building_windows_float_processed
1.51763,12.61,3.59,1.31,73.29,0.58,8.50,0.00,0.00,building_windows_float_processed
1.51761,12.81,3.54,1.23,73.24,0.58,8.39,0.00,0.00,building_windows_float_processed
1.51784,12.68,3.67,1.16,73.11,0.61,8.70,0.00,0.00,building_windows_float_processed
1.52196,14.36,3.85,0.89,71.36,0.15,9.15,0.00,0.00,building_windows_float_processed
1.51911,13.90,3.73,1.18,72.12,0.06,8.89,0.00,0.00,building_windows_float_processed
1.51735,13.02,3.54,1.69,72.73,0.54,8.44,0.00,0.07,building_windows_float_processed
1.51750,12.82,3.55,1.49,72.75,0.54,8.52,0.00,0.19,building_windows_float_processed
1.51966,14.77,3.75,0.29,72.02,0.03,9.00,0.00,0.00,building_windows_float_processed
1.51736,12.78,3.62,1.29,72.79,0.59,8.70,0.00,0.00,building_windows_float_processed
1.51751,12.81,3.57,1.35,73.02,0.62,8.59,0.00,0.00,building_windows_float_processed
1.51720,13.38,3.50,1.15,72.85,0.50,8.43,0.00,0.00,building_windows_float_processed
1.51764,12.98,3.54,1.21,73.00,0.65,8.53,0.00,0.00,building_windows_float_processed
1.51793,13.21,3.48,1.41,72.64,0.59,8.43,0.00,0.00,building_windows_float_processed
1.51721,12.87,3.48,1.33,73.04,0.56,8.43,0.00,0.00,building_windows_float_processed
1.51768,12.56,3.52,1.43,73.15,0.57,8.54,0.00,0.00,building_windows_float_processed
1.51784,13.08,3.49,1.28,72.86,0.60,8.49,0.00,0.00,building_windows_float_processed
1.51768,12.65,3.56,1.30,73.08,0.61,8.69,0.00,0.14,building_windows_float_processed
1.51747,12.84,3.50,1.14,73.27,0.56,8.55,0.00,0.00,building_windows_float_processed
1.51775,12.85,3.48,1.23,72.97,0.61,8.56,0.09,0.22,building_windows_float_processed
1.51753,12.57,3.47,1.38,73.39,0.60,8.55,0.00,0.06,building_windows_float_processed
1.51783,12.69,3.54,1.34,72.95,0.57,8.75,0.00,0.00,building_windows_float_processed
1.51567,13.29,3.45,1.21,72.74,0.56,8.57,0.00,0.00,building_windows_float_processed
1.51909,13.89,3.53,1.32,71.81,0.51,8.78,0.11,0.00,building_windows_float_processed
1.51797,12.74,3.48,1.35,72.96,0.64,8.68,0.00,0.00,building_windows_float_processed
1.52213,14.21,3.82,0.47,71.77,0.11,9.57,0.00,0.00,building_windows_float_processed
1.52213,14.21,3.82,0.47,71.77,0.11,9.57,0.00,0.00,building_windows_float_processed
1.51793,12.79,3.50,1.12,73.03,0.64,8.77,0.00,0.00,building_windows_float_processed
1.51755,12.71,3.42,1.20,73.20,0.59,8.64,0.00,0.00,building_windows_float_processed
1.51779,13.21,3.39,1.33,72.76,0.59,8.59,0.00,0.00,building_windows_float_processed
1.52210,13.73,3.84,0.72,71.76,0.17,9.74,0.00,0.00,building_windows_float_processed
1.51786,12.73,3.43,1.19,72.95,0.62,8.76,0.00,0.30,building_windows_float_processed
1.51900,13.49,3.48,1.35,71.95,0.55,9.00,0.00,0.00,building_windows_float_processed
1.51869,13.19,3.37,1.18,72.72,0.57,8.83,0.00,0.16,building_windows_float_processed
1.52667,13.99,3.70,0.71,71.57,0.02,9.82,0.00,0.10,building_windows_float_processed
1.52223,13.21,3.77,0.79,71.99,0.13,10.02,0.00,0.00,building_windows_float_processed
1.51898,13.58,3.35,1.23,72.08,0.59,8.91,0.00,0.00,building_windows_float_processed
1.52320,13.72,3.72,0.51,71.75,0.09,10.06,0.00,0.16,building_windows_float_processed
1.51926,13.20,3.33,1.28,72.36,0.60,9.14,0.00,0.11,building_windows_float_processed
1.51808,13.43,2.87,1.19,72.84,0.55,9.03,0.00,0.00,building_windows_float_processed
1.51837,13.14,2.84,1.28,72.85,0.55,9.07,0.00,0.00,building_windows_float_processed
1.51778,13.21,2.81,1.29,72.98,0.51,9.02,0.00,0.09,building_windows_float_processed
1.51769,12.45,2.71,1.29,73.70,0.56,9.06,0.00,0.24,building_windows_float_processed
1.51215,12.99,3.47,1.12,72.98,0.62,8.35,0.00,0.31,building_windows_float_processed
1.51824,12.87,3.48,1.29,72.95,0.60,8.43,0.00,0.00,building_windows_float_processed
1.51754,13.48,3.74,1.17,72.99,0.59,8.03,0.00,0.00,building_windows_float_processed
1.51754,13.39,3.66,1.19,72.79,0.57,8.27,0.00,0.11,building_windows_float_processed
1.51905,13.60,3.62,1.11,72.64,0.14,8.76,0.00,0.00,building_windows_float_processed
1.51977,13.81,3.58,1.32,71.72,0.12,8.67,0.69,0.00,building_windows_float_processed
1.52172,13.51,3.86,0.88,71.79,0.23,9.54,0.00,0.11,building_windows_float_processed
1.52227,14.17,3.81,0.78,71.35,0.00,9.69,0.00,0.00,building_windows_float_processed
1.52172,13.48,3.74,0.90,72.01,0.18,9.61,0.00,0.07,building_windows_float_processed
1.52099,13.69,3.59,1.12,71.96,0.09,9.40,0.00,0.00,building_windows_float_processed
1.52152,13.05,3.65,0.87,72.22,0.19,9.85,0.00,0.17,building_windows_float_processed
1.52152,13.05,3.65,0.87,72.32,0.19,9.85,0.00,0.17,building_windows_float_processed
1.52152,13.12,3.58,0.90,72.20,0.23,9.82,0.00,0.16,building_windows_float_processed
1.52300,13.31,3.58,0.82,71.99,0.12,10.17,0.00,0.03,building_windows_float_processed
1.51574,14.86,3.67,1.74,71.87,0.16,7.36,0.00,0.12,building_windows_non_float_processed
1.51848,13.64,3.87,1.27,71.96,0.54,8.32,0.00,0.32,building_windows_non_float_processed
1.51593,13.09,3.59,1.52,73.10,0.67,7.83,0.00,0.00,building_windows_non_float_processed
1.51631,13.34,3.57,1.57,72.87,0.61,7.89,0.00,0.00,building_windows_non_float_processed
1.51596,13.02,3.56,1.54,73.11,0.72,7.90,0.00,0.00,building_windows_non_float_processed
1.51590,13.02,3.58,1.51,73.12,0.69,7.96,0.00,0.00,building_windows_non_float_processed
1.51645,13.44,3.61,1.54,72.39,0.66,8.03,0.00,0.00,building_windows_non_float_processed
1.51627,13.00,3.58,1.54,72.83,0.61,8.04,0.00,0.00,building_windows_non_float_processed
1.51613,13.92,3.52,1.25,72.88,0.37,7.94,0.00,0.14,building_windows_non_float_processed
1.51590,12.82,3.52,1.90,72.86,0.69,7.97,0.00,0.00,building_windows_non_float_processed
1.51592,12.86,3.52,2.12,72.66,0.69,7.97,0.00,0.00,building_windows_non_float_processed
1.51593,13.25,3.45,1.43,73.17,0.61,7.86,0.00,0.00,building_windows_non_float_processed
1.51646,13.41,3.55,1.25,72.81,0.68,8.10,0.00,0.00,building_windows_non_float_processed
1.51594,13.09,3.52,1.55,72.87,0.68,8.05,0.00,0.09,building_windows_non_float_processed
1.51409,14.25,3.09,2.08,72.28,1.10,7.08,0.00,0.00,building_windows_non_float_processed
1.51625,13.36,3.58,1.49,72.72,0.45,8.21,0.00,0.00,building_windows_non_float_processed
1.51569,13.24,3.49,1.47,73.25,0.38,8.03,0.00,0.00,building_windows_non_float_processed
1.51645,13.40,3.49,1.52,72.65,0.67,8.08,0.00,0.10,building_windows_non_float_processed
1.51618,13.01,3.50,1.48,72.89,0.60,8.12,0.00,0.00,building_windows_non_float_processed
1.51640,12.55,3.48,1.87,73.23,0.63,8.08,0.00,0.09,building_windows_non_float_processed
1.51841,12.93,3.74,1.11,72.28,0.64,8.96,0.00,0.22,building_windows_non_float_processed
1.51605,12.90,3.44,1.45,73.06,0.44,8.27,0.00,0.00,building_windows_non_float_processed
1.51588,13.12,3.41,1.58,73.26,0.07,8.39,0.00,0.19,building_windows_non_float_processed
1.51590,13.24,3.34,1.47,73.10,0.39,8.22,0.00,0.00,building_windows_non_float_processed
1.51629,12.71,3.33,1.49,73.28,0.67,8.24,0.00,0.00,building_windows_non_float_processed
1.51860,13.36,3.43,1.43,72.26,0.51,8.60,0.00,0.00,building_windows_non_float_processed
1.51841,13.02,3.62,1.06,72.34,0.64,9.13,0.00,0.15,building_windows_non_float_processed
1.51743,12.20,3.25,1.16,73.55,0.62,8.90,0.00,0.24,building_windows_non_float_processed
1.51689,12.67,2.88,1.71,73.21,0.73,8.54,0.00,0.00,building_windows_non_float_processed
1.51811,12.96,2.96,1.43,72.92,0.60,8.79,0.14,0.00,building_windows_non_float_processed
1.51655,12.75,2.85,1.44,73.27,0.57,8.79,0.11,0.22,building_windows_non_float_processed
1.51730,12.35,2.72,1.63,72.87,0.70,9.23,0.00,0.00,building_windows_non_float_processed
1.51820,12.62,2.76,0.83,73.81,0.35,9.42,0.00,0.20,building_windows_non_float_processed
1.52725,13.80,3.15,0.66,70.57,0.08,11.64,0.00,0.00,building_windows_non_float_processed
1.52410,13.83,2.90,1.17,71.15,0.08,10.79,0.00,0.00,building_windows_non_float_processed
1.52475,11.45,0.00,1.88,72.19,0.81,13.24,0.00,0.34,building_windows_non_float_processed
1.53125,10.73,0.00,2.10,69.81,0.58,13.30,3.15,0.28,building_windows_non_float_processed
1.53393,12.30,0.00,1.00,70.16,0.12,16.19,0.00,0.24,building_windows_non_float_processed
1.52222,14.43,0.00,1.00,72.67,0.10,11.52,0.00,0.08,building_windows_non_float_processed
1.51818,13.72,0.00,0.56,74.45,0.00,10.99,0.00,0.00,building_windows_non_float_processed
1.52664,11.23,0.00,0.77,73.21,0.00,14.68,0.00,0.00,building_windows_non_float_processed
1.52739,11.02,0.00,0.75,73.08,0.00,14.96,0.00,0.00,building_windows_non_float_processed
1.52777,12.64,0.00,0.67,72.02,0.06,14.40,0.00,0.00,building_windows_non_float_processed
1.51892,13.46,3.83,1.26,72.55,0.57,8.21,0.00,0.14,building_windows_non_float_processed
1.51847,13.10,3.97,1.19,72.44,0.60,8.43,0.00,0.00,building_windows_non_float_processed
1.51846,13.41,3.89,1.33,72.38,0.51,8.28,0.00,0.00,building_windows_non_float_processed
1.51829,13.24,3.90,1.41,72.33,0.55,8.31,0.00,0.10,building_windows_non_float_processed
1.51708,13.72,3.68,1.81,72.06,0.64,7.88,0.00,0.00,building_windows_non_float_processed
1.51673,13.30,3.64,1.53,72.53,0.65,8.03,0.00,0.29,building_windows_non_float_processed
1.51652,13.56,3.57,1.47,72.45,0.64,7.96,0.00,0.00,building_windows_non_float_processed
1.51844,13.25,3.76,1.32,72.40,0.58,8.42,0.00,0.00,building_windows_non_float_processed
1.51663,12.93,3.54,1.62,72.96,0.64,8.03,0.00,0.21,building_windows_non_float_processed
1.51687,13.23,3.54,1.48,72.84,0.56,8.10,0.00,0.00,building_windows_non_float_processed
1.51707,13.48,3.48,1.71,72.52,0.62,7.99,0.00,0.00,building_windows_non_float_processed
1.52177,13.20,3.68,1.15,72.75,0.54,8.52,0.00,0.00,building_windows_non_float_processed
1.51872,12.93,3.66,1.56,72.51,0.58,8.55,0.00,0.12,building_windows_non_float_processed
1.51667,12.94,3.61,1.26,72.75,0.56,8.60,0.00,0.00,building_windows_non_float_processed
1.52081,13.78,2.28,1.43,71.99,0.49,9.85,0.00,0.17,building_windows_non_float_processed
1.52068,13.55,2.09,1.67,72.18,0.53,9.57,0.27,0.17,building_windows_non_float_processed
1.52020,13.98,1.35,1.63,71.76,0.39,10.56,0.00,0.18,building_windows_non_float_processed
1.52177,13.75,1.01,1.36,72.19,0.33,11.14,0.00,0.00,building_windows_non_float_processed
1.52614,13.70,0.00,1.36,71.24,0.19,13.44,0.00,0.10,building_windows_non_float_processed
1.51813,13.43,3.98,1.18,72.49,0.58,8.15,0.00,0.00,building_windows_non_float_processed
1.51800,13.71,3.93,1.54,71.81,0.54,8.21,0.00,0.15,building_windows_non_float_processed
1.51811,13.33,3.85,1.25,72.78,0.52,8.12,0.00,0.00,building_windows_non_float_processed
1.51789,13.19,3.90,1.30,72.33,0.55,8.44,0.00,0.28,building_windows_non_float_processed
1.51806,13.00,3.80,1.08,73.07,0.56,8.38,0.00,0.12,building_windows_non_float_processed
1.51711,12.89,3.62,1.57,72.96,0.61,8.11,0.00,0.00,building_windows_non_float_processed
1.51674,12.79,3.52,1.54,73.36,0.66,7.90,0.00,0.00,building_windows_non_float_processed
1.51674,12.87,3.56,1.64,73.14,0.65,7.99,0.00,0.00,building_windows_non_float_processed
1.51690,13.33,3.54,1.61,72.54,0.68,8.11,0.00,0.00,building_windows_non_float_processed
1.51851,13.20,3.63,1.07,72.83,0.57,8.41,0.09,0.17,building_windows_non_float_processed
1.51662,12.85,3.51,1.44,73.01,0.68,8.23,0.06,0.25,building_windows_non_float_processed
1.51709,13.00,3.47,1.79,72.72,0.66,8.18,0.00,0.00,building_windows_non_float_processed
1.51660,12.99,3.18,1.23,72.97,0.58,8.81,0.00,0.24,building_windows_non_float_processed
1.51839,12.85,3.67,1.24,72.57,0.62,8.68,0.00,0.35,building_windows_non_float_processed
1.51769,13.65,3.66,1.11,72.77,0.11,8.60,0.00,0.00,vehicle_windows_float_processed
1.51610,13.33,3.53,1.34,72.67,0.56,8.33,0.00,0.00,vehicle_windows_float_processed
1.51670,13.24,3.57,1.38,72.70,0.56,8.44,0.00,0.10,vehicle_windows_float_processed
1.51643,12.16,3.52,1.35,72.89,0.57,8.53,0.00,0.00,vehicle_windows_float_processed
1.51665,13.14,3.45,1.76,72.48,0.60,8.38,0.00,0.17,vehicle_windows_float_processed
1.52127,14.32,3.90,0.83,71.50,0.00,9.49,0.00,0.00,vehicle_windows_float_processed
1.51779,13.64,3.65,0.65,73.00,0.06,8.93,0.00,0.00,vehicle_windows_float_processed
1.51610,13.42,3.40,1.22,72.69,0.59,8.32,0.00,0.00,vehicle_windows_float_processed
1.51694,12.86,3.58,1.31,72.61,0.61,8.79,0.00,0.00,vehicle_windows_float_processed
1.51646,13.04,3.40,1.26,73.01,0.52,8.58,0.00,0.00,vehicle_windows_float_processed
1.51655,13.41,3.39,1.28,72.64,0.52,8.65,0.00,0.00,vehicle_windows_float_processed
1.52121,14.03,3.76,0.58,71.79,0.11,9.65,0.00,0.00,vehicle_windows_float_processed
1.51776,13.53,3.41,1.52,72.04,0.58,8.79,0.00,0.00,vehicle_windows_float_processed
1.51796,13.50,3.36,1.63,71.94,0.57,8.81,0.00,0.09,vehicle_windows_float_processed
1.51832,13.33,3.34,1.54,72.14,0.56,8.99,0.00,0.00,vehicle_windows_float_processed
1.51934,13.64,3.54,0.75,72.65,0.16,8.89,0.15,0.24,vehicle_windows_float_processed
1.52211,14.19,3.78,0.91,71.36,0.23,9.14,0.00,0.37,vehicle_windows_float_processed
1.51514,14.01,2.68,3.50,69.89,1.68,5.87,2.20,0.00,containers
1.51915,12.73,1.85,1.86,72.69,0.60,10.09,0.00,0.00,containers
1.52171,11.56,1.88,1.56,72.86,0.47,11.41,0.00,0.00,containers
1.52151,11.03,1.71,1.56,73.44,0.58,11.62,0.00,0.00,containers
1.51969,12.64,0.00,1.65,73.75,0.38,11.53,0.00,0.00,containers
1.51666,12.86,0.00,1.83,73.88,0.97,10.17,0.00,0.00,containers
1.51994,13.27,0.00,1.76,73.03,0.47,11.32,0.00,0.00,containers
1.52369,13.44,0.00,1.58,72.22,0.32,12.24,0.00,0.00,containers
1.51316,13.02,0.00,3.04,70.48,6.21,6.96,0.00,0.00,containers
1.51321,13.00,0.00,3.02,70.70,6.21,6.93,0.00,0.00,containers
1.52043,13.38,0.00,1.40,72.25,0.33,12.50,0.00,0.00,containers
1.52058,12.85,1.61,2.17,72.18,0.76,9.70,0.24,0.51,containers
1.52119,12.97,0.33,1.51,73.39,0.13,11.27,0.00,0.28,containers
1.51905,14.00,2.39,1.56,72.37,0.00,9.57,0.00,0.00,tableware
1.51937,13.79,2.41,1.19,72.76,0.00,9.77,0.00,0.00,tableware
1.51829,14.46,2.24,1.62,72.38,0.00,9.26,0.00,0.00,tableware
1.51852,14.09,2.19,1.66,72.67,0.00,9.32,0.00,0.00,tableware
1.51299,14.40,1.74,1.54,74.55,0.00,7.59,0.00,0.00,tableware
1.51888,14.99,0.78,1.74,72.50,0.00,9.95,0.00,0.00,tableware
1.51916,14.15,0.00,2.09,72.74,0.00,10.88,0.00,0.00,tableware
1.51969,14.56,0.00,0.56,73.48,0.00,11.22,0.00,0.00,tableware
1.51115,17.38,0.00,0.34,75.41,0.00,6.65,0.00,0.00,tableware
1.51131,13.69,3.20,1.81,72.81,1.76,5.43,1.19,0.00,headlamps
1.51838,14.32,3.26,2.22,71.25,1.46,5.79,1.63,0.00,headlamps
1.52315,13.44,3.34,1.23,72.38,0.60,8.83,0.00,0.00,headlamps
1.52247,14.86,2.20,2.06,70.26,0.76,9.76,0.00,0.00,headlamps
1.52365,15.79,1.83,1.31,70.43,0.31,8.61,1.68,0.00,headlamps
1.51613,13.88,1.78,1.79,73.10,0.00,8.67,0.76,0.00,headlamps
1.51602,14.85,0.00,2.38,73.28,0.00,8.76,0.64,0.09,headlamps
1.51623,14.20,0.00,2.79,73.46,0.04,9.04,0.40,0.09,headlamps
1.51719,14.75,0.00,2.00,73.02,0.00,8.53,1.59,0.08,headlamps
1.51683,14.56,0.00,1.98,73.29,0.00,8.52,1.57,0.07,headlamps
1.51545,14.14,0.00,2.68,73.39,0.08,9.07,0.61,0.05,headlamps
1.51556,13.87,0.00,2.54,73.23,0.14,9.41,0.81,0.01,headlamps
1.51727,14.70,0.00,2.34,73.28,0.00,8.95,0.66,0.00,headlamps
1.51531,14.38,0.00,2.66,73.10,0.04,9.08,0.64,0.00,headlamps
1.51609,15.01,0.00,2.51,73.05,0.05,8.83,0.53,0.00,headlamps
1.51508,15.15,0.00,2.25,73.50,0.00,8.34,0.63,0.00,headlamps
1.51653,11.95,0.00,1.19,75.18,2.70,8.93,0.00,0.00,headlamps
1.51514,14.85,0.00,2.42,73.72,0.00,8.39,0.56,0.00,headlamps
1.51658,14.80,0.00,1.99,73.11,0.00,8.28,1.71,0.00,headlamps
1.51617,14.95,0.00,2.27,73.30,0.00,8.71,0.67,0.00,headlamps
1.51732,14.95,0.00,1.80,72.99,0.00,8.61,1.55,0.00,headlamps
1.51645,14.94,0.00,1.87,73.11,0.00,8.67,1.38,0.00,headlamps
1.51831,14.39,0.00,1.82,72.86,1.41,6.47,2.88,0.00,headlamps
1.51640,14.37,0.00,2.74,72.85,0.00,9.45,0.54,0.00,headlamps
1.51623,14.14,0.00,2.88,72.61,0.08,9.18,1.06,0.00,headlamps
1.51685,14.92,0.00,1.99,73.06,0.00,8.40,1.59,0.00,headlamps
1.52065,14.36,0.00,2.02,73.42,0.00,8.44,1.64,0.00,headlamps
1.51651,14.38,0.00,1.94,73.61,0.00,8.48,1.57,0.00,headlamps
1.51711,14.23,0.00,2.08,73.36,0.00,8.62,1.67,0.00,headlamps
1 RI: refractive index Na: Sodium Mg: Magnesium Al: Aluminum Si: Silicon K: Potassium Ca: Calcium Ba: Barium Fe: Iron type of glass
2 1.52101 13.64 4.49 1.10 71.78 0.06 8.75 0.00 0.00 building_windows_float_processed
3 1.51761 13.89 3.60 1.36 72.73 0.48 7.83 0.00 0.00 building_windows_float_processed
4 1.51618 13.53 3.55 1.54 72.99 0.39 7.78 0.00 0.00 building_windows_float_processed
5 1.51766 13.21 3.69 1.29 72.61 0.57 8.22 0.00 0.00 building_windows_float_processed
6 1.51742 13.27 3.62 1.24 73.08 0.55 8.07 0.00 0.00 building_windows_float_processed
7 1.51596 12.79 3.61 1.62 72.97 0.64 8.07 0.00 0.26 building_windows_float_processed
8 1.51743 13.30 3.60 1.14 73.09 0.58 8.17 0.00 0.00 building_windows_float_processed
9 1.51756 13.15 3.61 1.05 73.24 0.57 8.24 0.00 0.00 building_windows_float_processed
10 1.51918 14.04 3.58 1.37 72.08 0.56 8.30 0.00 0.00 building_windows_float_processed
11 1.51755 13.00 3.60 1.36 72.99 0.57 8.40 0.00 0.11 building_windows_float_processed
12 1.51571 12.72 3.46 1.56 73.20 0.67 8.09 0.00 0.24 building_windows_float_processed
13 1.51763 12.80 3.66 1.27 73.01 0.60 8.56 0.00 0.00 building_windows_float_processed
14 1.51589 12.88 3.43 1.40 73.28 0.69 8.05 0.00 0.24 building_windows_float_processed
15 1.51748 12.86 3.56 1.27 73.21 0.54 8.38 0.00 0.17 building_windows_float_processed
16 1.51763 12.61 3.59 1.31 73.29 0.58 8.50 0.00 0.00 building_windows_float_processed
17 1.51761 12.81 3.54 1.23 73.24 0.58 8.39 0.00 0.00 building_windows_float_processed
18 1.51784 12.68 3.67 1.16 73.11 0.61 8.70 0.00 0.00 building_windows_float_processed
19 1.52196 14.36 3.85 0.89 71.36 0.15 9.15 0.00 0.00 building_windows_float_processed
20 1.51911 13.90 3.73 1.18 72.12 0.06 8.89 0.00 0.00 building_windows_float_processed
21 1.51735 13.02 3.54 1.69 72.73 0.54 8.44 0.00 0.07 building_windows_float_processed
22 1.51750 12.82 3.55 1.49 72.75 0.54 8.52 0.00 0.19 building_windows_float_processed
23 1.51966 14.77 3.75 0.29 72.02 0.03 9.00 0.00 0.00 building_windows_float_processed
24 1.51736 12.78 3.62 1.29 72.79 0.59 8.70 0.00 0.00 building_windows_float_processed
25 1.51751 12.81 3.57 1.35 73.02 0.62 8.59 0.00 0.00 building_windows_float_processed
26 1.51720 13.38 3.50 1.15 72.85 0.50 8.43 0.00 0.00 building_windows_float_processed
27 1.51764 12.98 3.54 1.21 73.00 0.65 8.53 0.00 0.00 building_windows_float_processed
28 1.51793 13.21 3.48 1.41 72.64 0.59 8.43 0.00 0.00 building_windows_float_processed
29 1.51721 12.87 3.48 1.33 73.04 0.56 8.43 0.00 0.00 building_windows_float_processed
30 1.51768 12.56 3.52 1.43 73.15 0.57 8.54 0.00 0.00 building_windows_float_processed
31 1.51784 13.08 3.49 1.28 72.86 0.60 8.49 0.00 0.00 building_windows_float_processed
32 1.51768 12.65 3.56 1.30 73.08 0.61 8.69 0.00 0.14 building_windows_float_processed
33 1.51747 12.84 3.50 1.14 73.27 0.56 8.55 0.00 0.00 building_windows_float_processed
34 1.51775 12.85 3.48 1.23 72.97 0.61 8.56 0.09 0.22 building_windows_float_processed
35 1.51753 12.57 3.47 1.38 73.39 0.60 8.55 0.00 0.06 building_windows_float_processed
36 1.51783 12.69 3.54 1.34 72.95 0.57 8.75 0.00 0.00 building_windows_float_processed
37 1.51567 13.29 3.45 1.21 72.74 0.56 8.57 0.00 0.00 building_windows_float_processed
38 1.51909 13.89 3.53 1.32 71.81 0.51 8.78 0.11 0.00 building_windows_float_processed
39 1.51797 12.74 3.48 1.35 72.96 0.64 8.68 0.00 0.00 building_windows_float_processed
40 1.52213 14.21 3.82 0.47 71.77 0.11 9.57 0.00 0.00 building_windows_float_processed
41 1.52213 14.21 3.82 0.47 71.77 0.11 9.57 0.00 0.00 building_windows_float_processed
42 1.51793 12.79 3.50 1.12 73.03 0.64 8.77 0.00 0.00 building_windows_float_processed
43 1.51755 12.71 3.42 1.20 73.20 0.59 8.64 0.00 0.00 building_windows_float_processed
44 1.51779 13.21 3.39 1.33 72.76 0.59 8.59 0.00 0.00 building_windows_float_processed
45 1.52210 13.73 3.84 0.72 71.76 0.17 9.74 0.00 0.00 building_windows_float_processed
46 1.51786 12.73 3.43 1.19 72.95 0.62 8.76 0.00 0.30 building_windows_float_processed
47 1.51900 13.49 3.48 1.35 71.95 0.55 9.00 0.00 0.00 building_windows_float_processed
48 1.51869 13.19 3.37 1.18 72.72 0.57 8.83 0.00 0.16 building_windows_float_processed
49 1.52667 13.99 3.70 0.71 71.57 0.02 9.82 0.00 0.10 building_windows_float_processed
50 1.52223 13.21 3.77 0.79 71.99 0.13 10.02 0.00 0.00 building_windows_float_processed
51 1.51898 13.58 3.35 1.23 72.08 0.59 8.91 0.00 0.00 building_windows_float_processed
52 1.52320 13.72 3.72 0.51 71.75 0.09 10.06 0.00 0.16 building_windows_float_processed
53 1.51926 13.20 3.33 1.28 72.36 0.60 9.14 0.00 0.11 building_windows_float_processed
54 1.51808 13.43 2.87 1.19 72.84 0.55 9.03 0.00 0.00 building_windows_float_processed
55 1.51837 13.14 2.84 1.28 72.85 0.55 9.07 0.00 0.00 building_windows_float_processed
56 1.51778 13.21 2.81 1.29 72.98 0.51 9.02 0.00 0.09 building_windows_float_processed
57 1.51769 12.45 2.71 1.29 73.70 0.56 9.06 0.00 0.24 building_windows_float_processed
58 1.51215 12.99 3.47 1.12 72.98 0.62 8.35 0.00 0.31 building_windows_float_processed
59 1.51824 12.87 3.48 1.29 72.95 0.60 8.43 0.00 0.00 building_windows_float_processed
60 1.51754 13.48 3.74 1.17 72.99 0.59 8.03 0.00 0.00 building_windows_float_processed
61 1.51754 13.39 3.66 1.19 72.79 0.57 8.27 0.00 0.11 building_windows_float_processed
62 1.51905 13.60 3.62 1.11 72.64 0.14 8.76 0.00 0.00 building_windows_float_processed
63 1.51977 13.81 3.58 1.32 71.72 0.12 8.67 0.69 0.00 building_windows_float_processed
64 1.52172 13.51 3.86 0.88 71.79 0.23 9.54 0.00 0.11 building_windows_float_processed
65 1.52227 14.17 3.81 0.78 71.35 0.00 9.69 0.00 0.00 building_windows_float_processed
66 1.52172 13.48 3.74 0.90 72.01 0.18 9.61 0.00 0.07 building_windows_float_processed
67 1.52099 13.69 3.59 1.12 71.96 0.09 9.40 0.00 0.00 building_windows_float_processed
68 1.52152 13.05 3.65 0.87 72.22 0.19 9.85 0.00 0.17 building_windows_float_processed
69 1.52152 13.05 3.65 0.87 72.32 0.19 9.85 0.00 0.17 building_windows_float_processed
70 1.52152 13.12 3.58 0.90 72.20 0.23 9.82 0.00 0.16 building_windows_float_processed
71 1.52300 13.31 3.58 0.82 71.99 0.12 10.17 0.00 0.03 building_windows_float_processed
72 1.51574 14.86 3.67 1.74 71.87 0.16 7.36 0.00 0.12 building_windows_non_float_processed
73 1.51848 13.64 3.87 1.27 71.96 0.54 8.32 0.00 0.32 building_windows_non_float_processed
74 1.51593 13.09 3.59 1.52 73.10 0.67 7.83 0.00 0.00 building_windows_non_float_processed
75 1.51631 13.34 3.57 1.57 72.87 0.61 7.89 0.00 0.00 building_windows_non_float_processed
76 1.51596 13.02 3.56 1.54 73.11 0.72 7.90 0.00 0.00 building_windows_non_float_processed
77 1.51590 13.02 3.58 1.51 73.12 0.69 7.96 0.00 0.00 building_windows_non_float_processed
78 1.51645 13.44 3.61 1.54 72.39 0.66 8.03 0.00 0.00 building_windows_non_float_processed
79 1.51627 13.00 3.58 1.54 72.83 0.61 8.04 0.00 0.00 building_windows_non_float_processed
80 1.51613 13.92 3.52 1.25 72.88 0.37 7.94 0.00 0.14 building_windows_non_float_processed
81 1.51590 12.82 3.52 1.90 72.86 0.69 7.97 0.00 0.00 building_windows_non_float_processed
82 1.51592 12.86 3.52 2.12 72.66 0.69 7.97 0.00 0.00 building_windows_non_float_processed
83 1.51593 13.25 3.45 1.43 73.17 0.61 7.86 0.00 0.00 building_windows_non_float_processed
84 1.51646 13.41 3.55 1.25 72.81 0.68 8.10 0.00 0.00 building_windows_non_float_processed
85 1.51594 13.09 3.52 1.55 72.87 0.68 8.05 0.00 0.09 building_windows_non_float_processed
86 1.51409 14.25 3.09 2.08 72.28 1.10 7.08 0.00 0.00 building_windows_non_float_processed
87 1.51625 13.36 3.58 1.49 72.72 0.45 8.21 0.00 0.00 building_windows_non_float_processed
88 1.51569 13.24 3.49 1.47 73.25 0.38 8.03 0.00 0.00 building_windows_non_float_processed
89 1.51645 13.40 3.49 1.52 72.65 0.67 8.08 0.00 0.10 building_windows_non_float_processed
90 1.51618 13.01 3.50 1.48 72.89 0.60 8.12 0.00 0.00 building_windows_non_float_processed
91 1.51640 12.55 3.48 1.87 73.23 0.63 8.08 0.00 0.09 building_windows_non_float_processed
92 1.51841 12.93 3.74 1.11 72.28 0.64 8.96 0.00 0.22 building_windows_non_float_processed
93 1.51605 12.90 3.44 1.45 73.06 0.44 8.27 0.00 0.00 building_windows_non_float_processed
94 1.51588 13.12 3.41 1.58 73.26 0.07 8.39 0.00 0.19 building_windows_non_float_processed
95 1.51590 13.24 3.34 1.47 73.10 0.39 8.22 0.00 0.00 building_windows_non_float_processed
96 1.51629 12.71 3.33 1.49 73.28 0.67 8.24 0.00 0.00 building_windows_non_float_processed
97 1.51860 13.36 3.43 1.43 72.26 0.51 8.60 0.00 0.00 building_windows_non_float_processed
98 1.51841 13.02 3.62 1.06 72.34 0.64 9.13 0.00 0.15 building_windows_non_float_processed
99 1.51743 12.20 3.25 1.16 73.55 0.62 8.90 0.00 0.24 building_windows_non_float_processed
100 1.51689 12.67 2.88 1.71 73.21 0.73 8.54 0.00 0.00 building_windows_non_float_processed
101 1.51811 12.96 2.96 1.43 72.92 0.60 8.79 0.14 0.00 building_windows_non_float_processed
102 1.51655 12.75 2.85 1.44 73.27 0.57 8.79 0.11 0.22 building_windows_non_float_processed
103 1.51730 12.35 2.72 1.63 72.87 0.70 9.23 0.00 0.00 building_windows_non_float_processed
104 1.51820 12.62 2.76 0.83 73.81 0.35 9.42 0.00 0.20 building_windows_non_float_processed
105 1.52725 13.80 3.15 0.66 70.57 0.08 11.64 0.00 0.00 building_windows_non_float_processed
106 1.52410 13.83 2.90 1.17 71.15 0.08 10.79 0.00 0.00 building_windows_non_float_processed
107 1.52475 11.45 0.00 1.88 72.19 0.81 13.24 0.00 0.34 building_windows_non_float_processed
108 1.53125 10.73 0.00 2.10 69.81 0.58 13.30 3.15 0.28 building_windows_non_float_processed
109 1.53393 12.30 0.00 1.00 70.16 0.12 16.19 0.00 0.24 building_windows_non_float_processed
110 1.52222 14.43 0.00 1.00 72.67 0.10 11.52 0.00 0.08 building_windows_non_float_processed
111 1.51818 13.72 0.00 0.56 74.45 0.00 10.99 0.00 0.00 building_windows_non_float_processed
112 1.52664 11.23 0.00 0.77 73.21 0.00 14.68 0.00 0.00 building_windows_non_float_processed
113 1.52739 11.02 0.00 0.75 73.08 0.00 14.96 0.00 0.00 building_windows_non_float_processed
114 1.52777 12.64 0.00 0.67 72.02 0.06 14.40 0.00 0.00 building_windows_non_float_processed
115 1.51892 13.46 3.83 1.26 72.55 0.57 8.21 0.00 0.14 building_windows_non_float_processed
116 1.51847 13.10 3.97 1.19 72.44 0.60 8.43 0.00 0.00 building_windows_non_float_processed
117 1.51846 13.41 3.89 1.33 72.38 0.51 8.28 0.00 0.00 building_windows_non_float_processed
118 1.51829 13.24 3.90 1.41 72.33 0.55 8.31 0.00 0.10 building_windows_non_float_processed
119 1.51708 13.72 3.68 1.81 72.06 0.64 7.88 0.00 0.00 building_windows_non_float_processed
120 1.51673 13.30 3.64 1.53 72.53 0.65 8.03 0.00 0.29 building_windows_non_float_processed
121 1.51652 13.56 3.57 1.47 72.45 0.64 7.96 0.00 0.00 building_windows_non_float_processed
122 1.51844 13.25 3.76 1.32 72.40 0.58 8.42 0.00 0.00 building_windows_non_float_processed
123 1.51663 12.93 3.54 1.62 72.96 0.64 8.03 0.00 0.21 building_windows_non_float_processed
124 1.51687 13.23 3.54 1.48 72.84 0.56 8.10 0.00 0.00 building_windows_non_float_processed
125 1.51707 13.48 3.48 1.71 72.52 0.62 7.99 0.00 0.00 building_windows_non_float_processed
126 1.52177 13.20 3.68 1.15 72.75 0.54 8.52 0.00 0.00 building_windows_non_float_processed
127 1.51872 12.93 3.66 1.56 72.51 0.58 8.55 0.00 0.12 building_windows_non_float_processed
128 1.51667 12.94 3.61 1.26 72.75 0.56 8.60 0.00 0.00 building_windows_non_float_processed
129 1.52081 13.78 2.28 1.43 71.99 0.49 9.85 0.00 0.17 building_windows_non_float_processed
130 1.52068 13.55 2.09 1.67 72.18 0.53 9.57 0.27 0.17 building_windows_non_float_processed
131 1.52020 13.98 1.35 1.63 71.76 0.39 10.56 0.00 0.18 building_windows_non_float_processed
132 1.52177 13.75 1.01 1.36 72.19 0.33 11.14 0.00 0.00 building_windows_non_float_processed
133 1.52614 13.70 0.00 1.36 71.24 0.19 13.44 0.00 0.10 building_windows_non_float_processed
134 1.51813 13.43 3.98 1.18 72.49 0.58 8.15 0.00 0.00 building_windows_non_float_processed
135 1.51800 13.71 3.93 1.54 71.81 0.54 8.21 0.00 0.15 building_windows_non_float_processed
136 1.51811 13.33 3.85 1.25 72.78 0.52 8.12 0.00 0.00 building_windows_non_float_processed
137 1.51789 13.19 3.90 1.30 72.33 0.55 8.44 0.00 0.28 building_windows_non_float_processed
138 1.51806 13.00 3.80 1.08 73.07 0.56 8.38 0.00 0.12 building_windows_non_float_processed
139 1.51711 12.89 3.62 1.57 72.96 0.61 8.11 0.00 0.00 building_windows_non_float_processed
140 1.51674 12.79 3.52 1.54 73.36 0.66 7.90 0.00 0.00 building_windows_non_float_processed
141 1.51674 12.87 3.56 1.64 73.14 0.65 7.99 0.00 0.00 building_windows_non_float_processed
142 1.51690 13.33 3.54 1.61 72.54 0.68 8.11 0.00 0.00 building_windows_non_float_processed
143 1.51851 13.20 3.63 1.07 72.83 0.57 8.41 0.09 0.17 building_windows_non_float_processed
144 1.51662 12.85 3.51 1.44 73.01 0.68 8.23 0.06 0.25 building_windows_non_float_processed
145 1.51709 13.00 3.47 1.79 72.72 0.66 8.18 0.00 0.00 building_windows_non_float_processed
146 1.51660 12.99 3.18 1.23 72.97 0.58 8.81 0.00 0.24 building_windows_non_float_processed
147 1.51839 12.85 3.67 1.24 72.57 0.62 8.68 0.00 0.35 building_windows_non_float_processed
148 1.51769 13.65 3.66 1.11 72.77 0.11 8.60 0.00 0.00 vehicle_windows_float_processed
149 1.51610 13.33 3.53 1.34 72.67 0.56 8.33 0.00 0.00 vehicle_windows_float_processed
150 1.51670 13.24 3.57 1.38 72.70 0.56 8.44 0.00 0.10 vehicle_windows_float_processed
151 1.51643 12.16 3.52 1.35 72.89 0.57 8.53 0.00 0.00 vehicle_windows_float_processed
152 1.51665 13.14 3.45 1.76 72.48 0.60 8.38 0.00 0.17 vehicle_windows_float_processed
153 1.52127 14.32 3.90 0.83 71.50 0.00 9.49 0.00 0.00 vehicle_windows_float_processed
154 1.51779 13.64 3.65 0.65 73.00 0.06 8.93 0.00 0.00 vehicle_windows_float_processed
155 1.51610 13.42 3.40 1.22 72.69 0.59 8.32 0.00 0.00 vehicle_windows_float_processed
156 1.51694 12.86 3.58 1.31 72.61 0.61 8.79 0.00 0.00 vehicle_windows_float_processed
157 1.51646 13.04 3.40 1.26 73.01 0.52 8.58 0.00 0.00 vehicle_windows_float_processed
158 1.51655 13.41 3.39 1.28 72.64 0.52 8.65 0.00 0.00 vehicle_windows_float_processed
159 1.52121 14.03 3.76 0.58 71.79 0.11 9.65 0.00 0.00 vehicle_windows_float_processed
160 1.51776 13.53 3.41 1.52 72.04 0.58 8.79 0.00 0.00 vehicle_windows_float_processed
161 1.51796 13.50 3.36 1.63 71.94 0.57 8.81 0.00 0.09 vehicle_windows_float_processed
162 1.51832 13.33 3.34 1.54 72.14 0.56 8.99 0.00 0.00 vehicle_windows_float_processed
163 1.51934 13.64 3.54 0.75 72.65 0.16 8.89 0.15 0.24 vehicle_windows_float_processed
164 1.52211 14.19 3.78 0.91 71.36 0.23 9.14 0.00 0.37 vehicle_windows_float_processed
165 1.51514 14.01 2.68 3.50 69.89 1.68 5.87 2.20 0.00 containers
166 1.51915 12.73 1.85 1.86 72.69 0.60 10.09 0.00 0.00 containers
167 1.52171 11.56 1.88 1.56 72.86 0.47 11.41 0.00 0.00 containers
168 1.52151 11.03 1.71 1.56 73.44 0.58 11.62 0.00 0.00 containers
169 1.51969 12.64 0.00 1.65 73.75 0.38 11.53 0.00 0.00 containers
170 1.51666 12.86 0.00 1.83 73.88 0.97 10.17 0.00 0.00 containers
171 1.51994 13.27 0.00 1.76 73.03 0.47 11.32 0.00 0.00 containers
172 1.52369 13.44 0.00 1.58 72.22 0.32 12.24 0.00 0.00 containers
173 1.51316 13.02 0.00 3.04 70.48 6.21 6.96 0.00 0.00 containers
174 1.51321 13.00 0.00 3.02 70.70 6.21 6.93 0.00 0.00 containers
175 1.52043 13.38 0.00 1.40 72.25 0.33 12.50 0.00 0.00 containers
176 1.52058 12.85 1.61 2.17 72.18 0.76 9.70 0.24 0.51 containers
177 1.52119 12.97 0.33 1.51 73.39 0.13 11.27 0.00 0.28 containers
178 1.51905 14.00 2.39 1.56 72.37 0.00 9.57 0.00 0.00 tableware
179 1.51937 13.79 2.41 1.19 72.76 0.00 9.77 0.00 0.00 tableware
180 1.51829 14.46 2.24 1.62 72.38 0.00 9.26 0.00 0.00 tableware
181 1.51852 14.09 2.19 1.66 72.67 0.00 9.32 0.00 0.00 tableware
182 1.51299 14.40 1.74 1.54 74.55 0.00 7.59 0.00 0.00 tableware
183 1.51888 14.99 0.78 1.74 72.50 0.00 9.95 0.00 0.00 tableware
184 1.51916 14.15 0.00 2.09 72.74 0.00 10.88 0.00 0.00 tableware
185 1.51969 14.56 0.00 0.56 73.48 0.00 11.22 0.00 0.00 tableware
186 1.51115 17.38 0.00 0.34 75.41 0.00 6.65 0.00 0.00 tableware
187 1.51131 13.69 3.20 1.81 72.81 1.76 5.43 1.19 0.00 headlamps
188 1.51838 14.32 3.26 2.22 71.25 1.46 5.79 1.63 0.00 headlamps
189 1.52315 13.44 3.34 1.23 72.38 0.60 8.83 0.00 0.00 headlamps
190 1.52247 14.86 2.20 2.06 70.26 0.76 9.76 0.00 0.00 headlamps
191 1.52365 15.79 1.83 1.31 70.43 0.31 8.61 1.68 0.00 headlamps
192 1.51613 13.88 1.78 1.79 73.10 0.00 8.67 0.76 0.00 headlamps
193 1.51602 14.85 0.00 2.38 73.28 0.00 8.76 0.64 0.09 headlamps
194 1.51623 14.20 0.00 2.79 73.46 0.04 9.04 0.40 0.09 headlamps
195 1.51719 14.75 0.00 2.00 73.02 0.00 8.53 1.59 0.08 headlamps
196 1.51683 14.56 0.00 1.98 73.29 0.00 8.52 1.57 0.07 headlamps
197 1.51545 14.14 0.00 2.68 73.39 0.08 9.07 0.61 0.05 headlamps
198 1.51556 13.87 0.00 2.54 73.23 0.14 9.41 0.81 0.01 headlamps
199 1.51727 14.70 0.00 2.34 73.28 0.00 8.95 0.66 0.00 headlamps
200 1.51531 14.38 0.00 2.66 73.10 0.04 9.08 0.64 0.00 headlamps
201 1.51609 15.01 0.00 2.51 73.05 0.05 8.83 0.53 0.00 headlamps
202 1.51508 15.15 0.00 2.25 73.50 0.00 8.34 0.63 0.00 headlamps
203 1.51653 11.95 0.00 1.19 75.18 2.70 8.93 0.00 0.00 headlamps
204 1.51514 14.85 0.00 2.42 73.72 0.00 8.39 0.56 0.00 headlamps
205 1.51658 14.80 0.00 1.99 73.11 0.00 8.28 1.71 0.00 headlamps
206 1.51617 14.95 0.00 2.27 73.30 0.00 8.71 0.67 0.00 headlamps
207 1.51732 14.95 0.00 1.80 72.99 0.00 8.61 1.55 0.00 headlamps
208 1.51645 14.94 0.00 1.87 73.11 0.00 8.67 1.38 0.00 headlamps
209 1.51831 14.39 0.00 1.82 72.86 1.41 6.47 2.88 0.00 headlamps
210 1.51640 14.37 0.00 2.74 72.85 0.00 9.45 0.54 0.00 headlamps
211 1.51623 14.14 0.00 2.88 72.61 0.08 9.18 1.06 0.00 headlamps
212 1.51685 14.92 0.00 1.99 73.06 0.00 8.40 1.59 0.00 headlamps
213 1.52065 14.36 0.00 2.02 73.42 0.00 8.44 1.64 0.00 headlamps
214 1.51651 14.38 0.00 1.94 73.61 0.00 8.48 1.57 0.00 headlamps
215 1.51711 14.23 0.00 2.08 73.36 0.00 8.62 1.67 0.00 headlamps

View File

@ -0,0 +1,151 @@
sepal_length,sepal_width,petal_length,petal_width,species
5.1,3.5,1.4,0.2,setosa
4.9,3,1.4,0.2,setosa
4.7,3.2,1.3,0.2,setosa
4.6,3.1,1.5,0.2,setosa
5,3.6,1.4,0.2,setosa
5.4,3.9,1.7,0.4,setosa
4.6,3.4,1.4,0.3,setosa
5,3.4,1.5,0.2,setosa
4.4,2.9,1.4,0.2,setosa
4.9,3.1,1.5,0.1,setosa
5.4,3.7,1.5,0.2,setosa
4.8,3.4,1.6,0.2,setosa
4.8,3,1.4,0.1,setosa
4.3,3,1.1,0.1,setosa
5.8,4,1.2,0.2,setosa
5.7,4.4,1.5,0.4,setosa
5.4,3.9,1.3,0.4,setosa
5.1,3.5,1.4,0.3,setosa
5.7,3.8,1.7,0.3,setosa
5.1,3.8,1.5,0.3,setosa
5.4,3.4,1.7,0.2,setosa
5.1,3.7,1.5,0.4,setosa
4.6,3.6,1,0.2,setosa
5.1,3.3,1.7,0.5,setosa
4.8,3.4,1.9,0.2,setosa
5,3,1.6,0.2,setosa
5,3.4,1.6,0.4,setosa
5.2,3.5,1.5,0.2,setosa
5.2,3.4,1.4,0.2,setosa
4.7,3.2,1.6,0.2,setosa
4.8,3.1,1.6,0.2,setosa
5.4,3.4,1.5,0.4,setosa
5.2,4.1,1.5,0.1,setosa
5.5,4.2,1.4,0.2,setosa
4.9,3.1,1.5,0.1,setosa
5,3.2,1.2,0.2,setosa
5.5,3.5,1.3,0.2,setosa
4.9,3.1,1.5,0.1,setosa
4.4,3,1.3,0.2,setosa
5.1,3.4,1.5,0.2,setosa
5,3.5,1.3,0.3,setosa
4.5,2.3,1.3,0.3,setosa
4.4,3.2,1.3,0.2,setosa
5,3.5,1.6,0.6,setosa
5.1,3.8,1.9,0.4,setosa
4.8,3,1.4,0.3,setosa
5.1,3.8,1.6,0.2,setosa
4.6,3.2,1.4,0.2,setosa
5.3,3.7,1.5,0.2,setosa
5,3.3,1.4,0.2,setosa
7,3.2,4.7,1.4,versicolor
6.4,3.2,4.5,1.5,versicolor
6.9,3.1,4.9,1.5,versicolor
5.5,2.3,4,1.3,versicolor
6.5,2.8,4.6,1.5,versicolor
5.7,2.8,4.5,1.3,versicolor
6.3,3.3,4.7,1.6,versicolor
4.9,2.4,3.3,1,versicolor
6.6,2.9,4.6,1.3,versicolor
5.2,2.7,3.9,1.4,versicolor
5,2,3.5,1,versicolor
5.9,3,4.2,1.5,versicolor
6,2.2,4,1,versicolor
6.1,2.9,4.7,1.4,versicolor
5.6,2.9,3.6,1.3,versicolor
6.7,3.1,4.4,1.4,versicolor
5.6,3,4.5,1.5,versicolor
5.8,2.7,4.1,1,versicolor
6.2,2.2,4.5,1.5,versicolor
5.6,2.5,3.9,1.1,versicolor
5.9,3.2,4.8,1.8,versicolor
6.1,2.8,4,1.3,versicolor
6.3,2.5,4.9,1.5,versicolor
6.1,2.8,4.7,1.2,versicolor
6.4,2.9,4.3,1.3,versicolor
6.6,3,4.4,1.4,versicolor
6.8,2.8,4.8,1.4,versicolor
6.7,3,5,1.7,versicolor
6,2.9,4.5,1.5,versicolor
5.7,2.6,3.5,1,versicolor
5.5,2.4,3.8,1.1,versicolor
5.5,2.4,3.7,1,versicolor
5.8,2.7,3.9,1.2,versicolor
6,2.7,5.1,1.6,versicolor
5.4,3,4.5,1.5,versicolor
6,3.4,4.5,1.6,versicolor
6.7,3.1,4.7,1.5,versicolor
6.3,2.3,4.4,1.3,versicolor
5.6,3,4.1,1.3,versicolor
5.5,2.5,4,1.3,versicolor
5.5,2.6,4.4,1.2,versicolor
6.1,3,4.6,1.4,versicolor
5.8,2.6,4,1.2,versicolor
5,2.3,3.3,1,versicolor
5.6,2.7,4.2,1.3,versicolor
5.7,3,4.2,1.2,versicolor
5.7,2.9,4.2,1.3,versicolor
6.2,2.9,4.3,1.3,versicolor
5.1,2.5,3,1.1,versicolor
5.7,2.8,4.1,1.3,versicolor
6.3,3.3,6,2.5,virginica
5.8,2.7,5.1,1.9,virginica
7.1,3,5.9,2.1,virginica
6.3,2.9,5.6,1.8,virginica
6.5,3,5.8,2.2,virginica
7.6,3,6.6,2.1,virginica
4.9,2.5,4.5,1.7,virginica
7.3,2.9,6.3,1.8,virginica
6.7,2.5,5.8,1.8,virginica
7.2,3.6,6.1,2.5,virginica
6.5,3.2,5.1,2,virginica
6.4,2.7,5.3,1.9,virginica
6.8,3,5.5,2.1,virginica
5.7,2.5,5,2,virginica
5.8,2.8,5.1,2.4,virginica
6.4,3.2,5.3,2.3,virginica
6.5,3,5.5,1.8,virginica
7.7,3.8,6.7,2.2,virginica
7.7,2.6,6.9,2.3,virginica
6,2.2,5,1.5,virginica
6.9,3.2,5.7,2.3,virginica
5.6,2.8,4.9,2,virginica
7.7,2.8,6.7,2,virginica
6.3,2.7,4.9,1.8,virginica
6.7,3.3,5.7,2.1,virginica
7.2,3.2,6,1.8,virginica
6.2,2.8,4.8,1.8,virginica
6.1,3,4.9,1.8,virginica
6.4,2.8,5.6,2.1,virginica
7.2,3,5.8,1.6,virginica
7.4,2.8,6.1,1.9,virginica
7.9,3.8,6.4,2,virginica
6.4,2.8,5.6,2.2,virginica
6.3,2.8,5.1,1.5,virginica
6.1,2.6,5.6,1.4,virginica
7.7,3,6.1,2.3,virginica
6.3,3.4,5.6,2.4,virginica
6.4,3.1,5.5,1.8,virginica
6,3,4.8,1.8,virginica
6.9,3.1,5.4,2.1,virginica
6.7,3.1,5.6,2.4,virginica
6.9,3.1,5.1,2.3,virginica
5.8,2.7,5.1,1.9,virginica
6.8,3.2,5.9,2.3,virginica
6.7,3.3,5.7,2.5,virginica
6.7,3,5.2,2.3,virginica
6.3,2.5,5,1.9,virginica
6.5,3,5.2,2,virginica
6.2,3.4,5.4,2.3,virginica
5.9,3,5.1,1.8,virginica
1 sepal_length sepal_width petal_length petal_width species
2 5.1 3.5 1.4 0.2 setosa
3 4.9 3 1.4 0.2 setosa
4 4.7 3.2 1.3 0.2 setosa
5 4.6 3.1 1.5 0.2 setosa
6 5 3.6 1.4 0.2 setosa
7 5.4 3.9 1.7 0.4 setosa
8 4.6 3.4 1.4 0.3 setosa
9 5 3.4 1.5 0.2 setosa
10 4.4 2.9 1.4 0.2 setosa
11 4.9 3.1 1.5 0.1 setosa
12 5.4 3.7 1.5 0.2 setosa
13 4.8 3.4 1.6 0.2 setosa
14 4.8 3 1.4 0.1 setosa
15 4.3 3 1.1 0.1 setosa
16 5.8 4 1.2 0.2 setosa
17 5.7 4.4 1.5 0.4 setosa
18 5.4 3.9 1.3 0.4 setosa
19 5.1 3.5 1.4 0.3 setosa
20 5.7 3.8 1.7 0.3 setosa
21 5.1 3.8 1.5 0.3 setosa
22 5.4 3.4 1.7 0.2 setosa
23 5.1 3.7 1.5 0.4 setosa
24 4.6 3.6 1 0.2 setosa
25 5.1 3.3 1.7 0.5 setosa
26 4.8 3.4 1.9 0.2 setosa
27 5 3 1.6 0.2 setosa
28 5 3.4 1.6 0.4 setosa
29 5.2 3.5 1.5 0.2 setosa
30 5.2 3.4 1.4 0.2 setosa
31 4.7 3.2 1.6 0.2 setosa
32 4.8 3.1 1.6 0.2 setosa
33 5.4 3.4 1.5 0.4 setosa
34 5.2 4.1 1.5 0.1 setosa
35 5.5 4.2 1.4 0.2 setosa
36 4.9 3.1 1.5 0.1 setosa
37 5 3.2 1.2 0.2 setosa
38 5.5 3.5 1.3 0.2 setosa
39 4.9 3.1 1.5 0.1 setosa
40 4.4 3 1.3 0.2 setosa
41 5.1 3.4 1.5 0.2 setosa
42 5 3.5 1.3 0.3 setosa
43 4.5 2.3 1.3 0.3 setosa
44 4.4 3.2 1.3 0.2 setosa
45 5 3.5 1.6 0.6 setosa
46 5.1 3.8 1.9 0.4 setosa
47 4.8 3 1.4 0.3 setosa
48 5.1 3.8 1.6 0.2 setosa
49 4.6 3.2 1.4 0.2 setosa
50 5.3 3.7 1.5 0.2 setosa
51 5 3.3 1.4 0.2 setosa
52 7 3.2 4.7 1.4 versicolor
53 6.4 3.2 4.5 1.5 versicolor
54 6.9 3.1 4.9 1.5 versicolor
55 5.5 2.3 4 1.3 versicolor
56 6.5 2.8 4.6 1.5 versicolor
57 5.7 2.8 4.5 1.3 versicolor
58 6.3 3.3 4.7 1.6 versicolor
59 4.9 2.4 3.3 1 versicolor
60 6.6 2.9 4.6 1.3 versicolor
61 5.2 2.7 3.9 1.4 versicolor
62 5 2 3.5 1 versicolor
63 5.9 3 4.2 1.5 versicolor
64 6 2.2 4 1 versicolor
65 6.1 2.9 4.7 1.4 versicolor
66 5.6 2.9 3.6 1.3 versicolor
67 6.7 3.1 4.4 1.4 versicolor
68 5.6 3 4.5 1.5 versicolor
69 5.8 2.7 4.1 1 versicolor
70 6.2 2.2 4.5 1.5 versicolor
71 5.6 2.5 3.9 1.1 versicolor
72 5.9 3.2 4.8 1.8 versicolor
73 6.1 2.8 4 1.3 versicolor
74 6.3 2.5 4.9 1.5 versicolor
75 6.1 2.8 4.7 1.2 versicolor
76 6.4 2.9 4.3 1.3 versicolor
77 6.6 3 4.4 1.4 versicolor
78 6.8 2.8 4.8 1.4 versicolor
79 6.7 3 5 1.7 versicolor
80 6 2.9 4.5 1.5 versicolor
81 5.7 2.6 3.5 1 versicolor
82 5.5 2.4 3.8 1.1 versicolor
83 5.5 2.4 3.7 1 versicolor
84 5.8 2.7 3.9 1.2 versicolor
85 6 2.7 5.1 1.6 versicolor
86 5.4 3 4.5 1.5 versicolor
87 6 3.4 4.5 1.6 versicolor
88 6.7 3.1 4.7 1.5 versicolor
89 6.3 2.3 4.4 1.3 versicolor
90 5.6 3 4.1 1.3 versicolor
91 5.5 2.5 4 1.3 versicolor
92 5.5 2.6 4.4 1.2 versicolor
93 6.1 3 4.6 1.4 versicolor
94 5.8 2.6 4 1.2 versicolor
95 5 2.3 3.3 1 versicolor
96 5.6 2.7 4.2 1.3 versicolor
97 5.7 3 4.2 1.2 versicolor
98 5.7 2.9 4.2 1.3 versicolor
99 6.2 2.9 4.3 1.3 versicolor
100 5.1 2.5 3 1.1 versicolor
101 5.7 2.8 4.1 1.3 versicolor
102 6.3 3.3 6 2.5 virginica
103 5.8 2.7 5.1 1.9 virginica
104 7.1 3 5.9 2.1 virginica
105 6.3 2.9 5.6 1.8 virginica
106 6.5 3 5.8 2.2 virginica
107 7.6 3 6.6 2.1 virginica
108 4.9 2.5 4.5 1.7 virginica
109 7.3 2.9 6.3 1.8 virginica
110 6.7 2.5 5.8 1.8 virginica
111 7.2 3.6 6.1 2.5 virginica
112 6.5 3.2 5.1 2 virginica
113 6.4 2.7 5.3 1.9 virginica
114 6.8 3 5.5 2.1 virginica
115 5.7 2.5 5 2 virginica
116 5.8 2.8 5.1 2.4 virginica
117 6.4 3.2 5.3 2.3 virginica
118 6.5 3 5.5 1.8 virginica
119 7.7 3.8 6.7 2.2 virginica
120 7.7 2.6 6.9 2.3 virginica
121 6 2.2 5 1.5 virginica
122 6.9 3.2 5.7 2.3 virginica
123 5.6 2.8 4.9 2 virginica
124 7.7 2.8 6.7 2 virginica
125 6.3 2.7 4.9 1.8 virginica
126 6.7 3.3 5.7 2.1 virginica
127 7.2 3.2 6 1.8 virginica
128 6.2 2.8 4.8 1.8 virginica
129 6.1 3 4.9 1.8 virginica
130 6.4 2.8 5.6 2.1 virginica
131 7.2 3 5.8 1.6 virginica
132 7.4 2.8 6.1 1.9 virginica
133 7.9 3.8 6.4 2 virginica
134 6.4 2.8 5.6 2.2 virginica
135 6.3 2.8 5.1 1.5 virginica
136 6.1 2.6 5.6 1.4 virginica
137 7.7 3 6.1 2.3 virginica
138 6.3 3.4 5.6 2.4 virginica
139 6.4 3.1 5.5 1.8 virginica
140 6 3 4.8 1.8 virginica
141 6.9 3.1 5.4 2.1 virginica
142 6.7 3.1 5.6 2.4 virginica
143 6.9 3.1 5.1 2.3 virginica
144 5.8 2.7 5.1 1.9 virginica
145 6.8 3.2 5.9 2.3 virginica
146 6.7 3.3 5.7 2.5 virginica
147 6.7 3 5.2 2.3 virginica
148 6.3 2.5 5 1.9 virginica
149 6.5 3 5.2 2 virginica
150 6.2 3.4 5.4 2.3 virginica
151 5.9 3 5.1 1.8 virginica

View File

@ -0,0 +1,179 @@
alcohol,malic acid,ash,alcalinity of ash,magnesium,total phenols,flavanoids,nonflavanoid phenols,proanthocyanins,color intensity,hue,OD280/OD315 of diluted wines,proline,class
14.23,1.71,2.43,15.6,127,2.8,3.06,.28,2.29,5.64,1.04,3.92,1065,1
13.2,1.78,2.14,11.2,100,2.65,2.76,.26,1.28,4.38,1.05,3.4,1050,1
13.16,2.36,2.67,18.6,101,2.8,3.24,.3,2.81,5.68,1.03,3.17,1185,1
14.37,1.95,2.5,16.8,113,3.85,3.49,.24,2.18,7.8,.86,3.45,1480,1
13.24,2.59,2.87,21,118,2.8,2.69,.39,1.82,4.32,1.04,2.93,735,1
14.2,1.76,2.45,15.2,112,3.27,3.39,.34,1.97,6.75,1.05,2.85,1450,1
14.39,1.87,2.45,14.6,96,2.5,2.52,.3,1.98,5.25,1.02,3.58,1290,1
14.06,2.15,2.61,17.6,121,2.6,2.51,.31,1.25,5.05,1.06,3.58,1295,1
14.83,1.64,2.17,14,97,2.8,2.98,.29,1.98,5.2,1.08,2.85,1045,1
13.86,1.35,2.27,16,98,2.98,3.15,.22,1.85,7.22,1.01,3.55,1045,1
14.1,2.16,2.3,18,105,2.95,3.32,.22,2.38,5.75,1.25,3.17,1510,1
14.12,1.48,2.32,16.8,95,2.2,2.43,.26,1.57,5,1.17,2.82,1280,1
13.75,1.73,2.41,16,89,2.6,2.76,.29,1.81,5.6,1.15,2.9,1320,1
14.75,1.73,2.39,11.4,91,3.1,3.69,.43,2.81,5.4,1.25,2.73,1150,1
14.38,1.87,2.38,12,102,3.3,3.64,.29,2.96,7.5,1.2,3,1547,1
13.63,1.81,2.7,17.2,112,2.85,2.91,.3,1.46,7.3,1.28,2.88,1310,1
14.3,1.92,2.72,20,120,2.8,3.14,.33,1.97,6.2,1.07,2.65,1280,1
13.83,1.57,2.62,20,115,2.95,3.4,.4,1.72,6.6,1.13,2.57,1130,1
14.19,1.59,2.48,16.5,108,3.3,3.93,.32,1.86,8.7,1.23,2.82,1680,1
13.64,3.1,2.56,15.2,116,2.7,3.03,.17,1.66,5.1,.96,3.36,845,1
14.06,1.63,2.28,16,126,3,3.17,.24,2.1,5.65,1.09,3.71,780,1
12.93,3.8,2.65,18.6,102,2.41,2.41,.25,1.98,4.5,1.03,3.52,770,1
13.71,1.86,2.36,16.6,101,2.61,2.88,.27,1.69,3.8,1.11,4,1035,1
12.85,1.6,2.52,17.8,95,2.48,2.37,.26,1.46,3.93,1.09,3.63,1015,1
13.5,1.81,2.61,20,96,2.53,2.61,.28,1.66,3.52,1.12,3.82,845,1
13.05,2.05,3.22,25,124,2.63,2.68,.47,1.92,3.58,1.13,3.2,830,1
13.39,1.77,2.62,16.1,93,2.85,2.94,.34,1.45,4.8,.92,3.22,1195,1
13.3,1.72,2.14,17,94,2.4,2.19,.27,1.35,3.95,1.02,2.77,1285,1
13.87,1.9,2.8,19.4,107,2.95,2.97,.37,1.76,4.5,1.25,3.4,915,1
14.02,1.68,2.21,16,96,2.65,2.33,.26,1.98,4.7,1.04,3.59,1035,1
13.73,1.5,2.7,22.5,101,3,3.25,.29,2.38,5.7,1.19,2.71,1285,1
13.58,1.66,2.36,19.1,106,2.86,3.19,.22,1.95,6.9,1.09,2.88,1515,1
13.68,1.83,2.36,17.2,104,2.42,2.69,.42,1.97,3.84,1.23,2.87,990,1
13.76,1.53,2.7,19.5,132,2.95,2.74,.5,1.35,5.4,1.25,3,1235,1
13.51,1.8,2.65,19,110,2.35,2.53,.29,1.54,4.2,1.1,2.87,1095,1
13.48,1.81,2.41,20.5,100,2.7,2.98,.26,1.86,5.1,1.04,3.47,920,1
13.28,1.64,2.84,15.5,110,2.6,2.68,.34,1.36,4.6,1.09,2.78,880,1
13.05,1.65,2.55,18,98,2.45,2.43,.29,1.44,4.25,1.12,2.51,1105,1
13.07,1.5,2.1,15.5,98,2.4,2.64,.28,1.37,3.7,1.18,2.69,1020,1
14.22,3.99,2.51,13.2,128,3,3.04,.2,2.08,5.1,.89,3.53,760,1
13.56,1.71,2.31,16.2,117,3.15,3.29,.34,2.34,6.13,.95,3.38,795,1
13.41,3.84,2.12,18.8,90,2.45,2.68,.27,1.48,4.28,.91,3,1035,1
13.88,1.89,2.59,15,101,3.25,3.56,.17,1.7,5.43,.88,3.56,1095,1
13.24,3.98,2.29,17.5,103,2.64,2.63,.32,1.66,4.36,.82,3,680,1
13.05,1.77,2.1,17,107,3,3,.28,2.03,5.04,.88,3.35,885,1
14.21,4.04,2.44,18.9,111,2.85,2.65,.3,1.25,5.24,.87,3.33,1080,1
14.38,3.59,2.28,16,102,3.25,3.17,.27,2.19,4.9,1.04,3.44,1065,1
13.9,1.68,2.12,16,101,3.1,3.39,.21,2.14,6.1,.91,3.33,985,1
14.1,2.02,2.4,18.8,103,2.75,2.92,.32,2.38,6.2,1.07,2.75,1060,1
13.94,1.73,2.27,17.4,108,2.88,3.54,.32,2.08,8.90,1.12,3.1,1260,1
13.05,1.73,2.04,12.4,92,2.72,3.27,.17,2.91,7.2,1.12,2.91,1150,1
13.83,1.65,2.6,17.2,94,2.45,2.99,.22,2.29,5.6,1.24,3.37,1265,1
13.82,1.75,2.42,14,111,3.88,3.74,.32,1.87,7.05,1.01,3.26,1190,1
13.77,1.9,2.68,17.1,115,3,2.79,.39,1.68,6.3,1.13,2.93,1375,1
13.74,1.67,2.25,16.4,118,2.6,2.9,.21,1.62,5.85,.92,3.2,1060,1
13.56,1.73,2.46,20.5,116,2.96,2.78,.2,2.45,6.25,.98,3.03,1120,1
14.22,1.7,2.3,16.3,118,3.2,3,.26,2.03,6.38,.94,3.31,970,1
13.29,1.97,2.68,16.8,102,3,3.23,.31,1.66,6,1.07,2.84,1270,1
13.72,1.43,2.5,16.7,108,3.4,3.67,.19,2.04,6.8,.89,2.87,1285,1
12.37,.94,1.36,10.6,88,1.98,.57,.28,.42,1.95,1.05,1.82,520,2
12.33,1.1,2.28,16,101,2.05,1.09,.63,.41,3.27,1.25,1.67,680,2
12.64,1.36,2.02,16.8,100,2.02,1.41,.53,.62,5.75,.98,1.59,450,2
13.67,1.25,1.92,18,94,2.1,1.79,.32,.73,3.8,1.23,2.46,630,2
12.37,1.13,2.16,19,87,3.5,3.1,.19,1.87,4.45,1.22,2.87,420,2
12.17,1.45,2.53,19,104,1.89,1.75,.45,1.03,2.95,1.45,2.23,355,2
12.37,1.21,2.56,18.1,98,2.42,2.65,.37,2.08,4.6,1.19,2.3,678,2
13.11,1.01,1.7,15,78,2.98,3.18,.26,2.28,5.3,1.12,3.18,502,2
12.37,1.17,1.92,19.6,78,2.11,2,.27,1.04,4.68,1.12,3.48,510,2
13.34,.94,2.36,17,110,2.53,1.3,.55,.42,3.17,1.02,1.93,750,2
12.21,1.19,1.75,16.8,151,1.85,1.28,.14,2.5,2.85,1.28,3.07,718,2
12.29,1.61,2.21,20.4,103,1.1,1.02,.37,1.46,3.05,.906,1.82,870,2
13.86,1.51,2.67,25,86,2.95,2.86,.21,1.87,3.38,1.36,3.16,410,2
13.49,1.66,2.24,24,87,1.88,1.84,.27,1.03,3.74,.98,2.78,472,2
12.99,1.67,2.6,30,139,3.3,2.89,.21,1.96,3.35,1.31,3.5,985,2
11.96,1.09,2.3,21,101,3.38,2.14,.13,1.65,3.21,.99,3.13,886,2
11.66,1.88,1.92,16,97,1.61,1.57,.34,1.15,3.8,1.23,2.14,428,2
13.03,.9,1.71,16,86,1.95,2.03,.24,1.46,4.6,1.19,2.48,392,2
11.84,2.89,2.23,18,112,1.72,1.32,.43,.95,2.65,.96,2.52,500,2
12.33,.99,1.95,14.8,136,1.9,1.85,.35,2.76,3.4,1.06,2.31,750,2
12.7,3.87,2.4,23,101,2.83,2.55,.43,1.95,2.57,1.19,3.13,463,2
12,.92,2,19,86,2.42,2.26,.3,1.43,2.5,1.38,3.12,278,2
12.72,1.81,2.2,18.8,86,2.2,2.53,.26,1.77,3.9,1.16,3.14,714,2
12.08,1.13,2.51,24,78,2,1.58,.4,1.4,2.2,1.31,2.72,630,2
13.05,3.86,2.32,22.5,85,1.65,1.59,.61,1.62,4.8,.84,2.01,515,2
11.84,.89,2.58,18,94,2.2,2.21,.22,2.35,3.05,.79,3.08,520,2
12.67,.98,2.24,18,99,2.2,1.94,.3,1.46,2.62,1.23,3.16,450,2
12.16,1.61,2.31,22.8,90,1.78,1.69,.43,1.56,2.45,1.33,2.26,495,2
11.65,1.67,2.62,26,88,1.92,1.61,.4,1.34,2.6,1.36,3.21,562,2
11.64,2.06,2.46,21.6,84,1.95,1.69,.48,1.35,2.8,1,2.75,680,2
12.08,1.33,2.3,23.6,70,2.2,1.59,.42,1.38,1.74,1.07,3.21,625,2
12.08,1.83,2.32,18.5,81,1.6,1.5,.52,1.64,2.4,1.08,2.27,480,2
12,1.51,2.42,22,86,1.45,1.25,.5,1.63,3.6,1.05,2.65,450,2
12.69,1.53,2.26,20.7,80,1.38,1.46,.58,1.62,3.05,.96,2.06,495,2
12.29,2.83,2.22,18,88,2.45,2.25,.25,1.99,2.15,1.15,3.3,290,2
11.62,1.99,2.28,18,98,3.02,2.26,.17,1.35,3.25,1.16,2.96,345,2
12.47,1.52,2.2,19,162,2.5,2.27,.32,3.28,2.6,1.16,2.63,937,2
11.81,2.12,2.74,21.5,134,1.6,.99,.14,1.56,2.5,.95,2.26,625,2
12.29,1.41,1.98,16,85,2.55,2.5,.29,1.77,2.9,1.23,2.74,428,2
12.37,1.07,2.1,18.5,88,3.52,3.75,.24,1.95,4.5,1.04,2.77,660,2
12.29,3.17,2.21,18,88,2.85,2.99,.45,2.81,2.3,1.42,2.83,406,2
12.08,2.08,1.7,17.5,97,2.23,2.17,.26,1.4,3.3,1.27,2.96,710,2
12.6,1.34,1.9,18.5,88,1.45,1.36,.29,1.35,2.45,1.04,2.77,562,2
12.34,2.45,2.46,21,98,2.56,2.11,.34,1.31,2.8,.8,3.38,438,2
11.82,1.72,1.88,19.5,86,2.5,1.64,.37,1.42,2.06,.94,2.44,415,2
12.51,1.73,1.98,20.5,85,2.2,1.92,.32,1.48,2.94,1.04,3.57,672,2
12.42,2.55,2.27,22,90,1.68,1.84,.66,1.42,2.7,.86,3.3,315,2
12.25,1.73,2.12,19,80,1.65,2.03,.37,1.63,3.4,1,3.17,510,2
12.72,1.75,2.28,22.5,84,1.38,1.76,.48,1.63,3.3,.88,2.42,488,2
12.22,1.29,1.94,19,92,2.36,2.04,.39,2.08,2.7,.86,3.02,312,2
11.61,1.35,2.7,20,94,2.74,2.92,.29,2.49,2.65,.96,3.26,680,2
11.46,3.74,1.82,19.5,107,3.18,2.58,.24,3.58,2.9,.75,2.81,562,2
12.52,2.43,2.17,21,88,2.55,2.27,.26,1.22,2,.9,2.78,325,2
11.76,2.68,2.92,20,103,1.75,2.03,.6,1.05,3.8,1.23,2.5,607,2
11.41,.74,2.5,21,88,2.48,2.01,.42,1.44,3.08,1.1,2.31,434,2
12.08,1.39,2.5,22.5,84,2.56,2.29,.43,1.04,2.9,.93,3.19,385,2
11.03,1.51,2.2,21.5,85,2.46,2.17,.52,2.01,1.9,1.71,2.87,407,2
11.82,1.47,1.99,20.8,86,1.98,1.6,.3,1.53,1.95,.95,3.33,495,2
12.42,1.61,2.19,22.5,108,2,2.09,.34,1.61,2.06,1.06,2.96,345,2
12.77,3.43,1.98,16,80,1.63,1.25,.43,.83,3.4,.7,2.12,372,2
12,3.43,2,19,87,2,1.64,.37,1.87,1.28,.93,3.05,564,2
11.45,2.4,2.42,20,96,2.9,2.79,.32,1.83,3.25,.8,3.39,625,2
11.56,2.05,3.23,28.5,119,3.18,5.08,.47,1.87,6,.93,3.69,465,2
12.42,4.43,2.73,26.5,102,2.2,2.13,.43,1.71,2.08,.92,3.12,365,2
13.05,5.8,2.13,21.5,86,2.62,2.65,.3,2.01,2.6,.73,3.1,380,2
11.87,4.31,2.39,21,82,2.86,3.03,.21,2.91,2.8,.75,3.64,380,2
12.07,2.16,2.17,21,85,2.6,2.65,.37,1.35,2.76,.86,3.28,378,2
12.43,1.53,2.29,21.5,86,2.74,3.15,.39,1.77,3.94,.69,2.84,352,2
11.79,2.13,2.78,28.5,92,2.13,2.24,.58,1.76,3,.97,2.44,466,2
12.37,1.63,2.3,24.5,88,2.22,2.45,.4,1.9,2.12,.89,2.78,342,2
12.04,4.3,2.38,22,80,2.1,1.75,.42,1.35,2.6,.79,2.57,580,2
12.86,1.35,2.32,18,122,1.51,1.25,.21,.94,4.1,.76,1.29,630,3
12.88,2.99,2.4,20,104,1.3,1.22,.24,.83,5.4,.74,1.42,530,3
12.81,2.31,2.4,24,98,1.15,1.09,.27,.83,5.7,.66,1.36,560,3
12.7,3.55,2.36,21.5,106,1.7,1.2,.17,.84,5,.78,1.29,600,3
12.51,1.24,2.25,17.5,85,2,.58,.6,1.25,5.45,.75,1.51,650,3
12.6,2.46,2.2,18.5,94,1.62,.66,.63,.94,7.1,.73,1.58,695,3
12.25,4.72,2.54,21,89,1.38,.47,.53,.8,3.85,.75,1.27,720,3
12.53,5.51,2.64,25,96,1.79,.6,.63,1.1,5,.82,1.69,515,3
13.49,3.59,2.19,19.5,88,1.62,.48,.58,.88,5.7,.81,1.82,580,3
12.84,2.96,2.61,24,101,2.32,.6,.53,.81,4.92,.89,2.15,590,3
12.93,2.81,2.7,21,96,1.54,.5,.53,.75,4.6,.77,2.31,600,3
13.36,2.56,2.35,20,89,1.4,.5,.37,.64,5.6,.7,2.47,780,3
13.52,3.17,2.72,23.5,97,1.55,.52,.5,.55,4.35,.89,2.06,520,3
13.62,4.95,2.35,20,92,2,.8,.47,1.02,4.4,.91,2.05,550,3
12.25,3.88,2.2,18.5,112,1.38,.78,.29,1.14,8.21,.65,2,855,3
13.16,3.57,2.15,21,102,1.5,.55,.43,1.3,4,.6,1.68,830,3
13.88,5.04,2.23,20,80,.98,.34,.4,.68,4.9,.58,1.33,415,3
12.87,4.61,2.48,21.5,86,1.7,.65,.47,.86,7.65,.54,1.86,625,3
13.32,3.24,2.38,21.5,92,1.93,.76,.45,1.25,8.42,.55,1.62,650,3
13.08,3.9,2.36,21.5,113,1.41,1.39,.34,1.14,9.40,.57,1.33,550,3
13.5,3.12,2.62,24,123,1.4,1.57,.22,1.25,8.60,.59,1.3,500,3
12.79,2.67,2.48,22,112,1.48,1.36,.24,1.26,10.8,.48,1.47,480,3
13.11,1.9,2.75,25.5,116,2.2,1.28,.26,1.56,7.1,.61,1.33,425,3
13.23,3.3,2.28,18.5,98,1.8,.83,.61,1.87,10.52,.56,1.51,675,3
12.58,1.29,2.1,20,103,1.48,.58,.53,1.4,7.6,.58,1.55,640,3
13.17,5.19,2.32,22,93,1.74,.63,.61,1.55,7.9,.6,1.48,725,3
13.84,4.12,2.38,19.5,89,1.8,.83,.48,1.56,9.01,.57,1.64,480,3
12.45,3.03,2.64,27,97,1.9,.58,.63,1.14,7.5,.67,1.73,880,3
14.34,1.68,2.7,25,98,2.8,1.31,.53,2.7,13,.57,1.96,660,3
13.48,1.67,2.64,22.5,89,2.6,1.1,.52,2.29,11.75,.57,1.78,620,3
12.36,3.83,2.38,21,88,2.3,.92,.5,1.04,7.65,.56,1.58,520,3
13.69,3.26,2.54,20,107,1.83,.56,.5,.8,5.88,.96,1.82,680,3
12.85,3.27,2.58,22,106,1.65,.6,.6,.96,5.58,.87,2.11,570,3
12.96,3.45,2.35,18.5,106,1.39,.7,.4,.94,5.28,.68,1.75,675,3
13.78,2.76,2.3,22,90,1.35,.68,.41,1.03,9.58,.7,1.68,615,3
13.73,4.36,2.26,22.5,88,1.28,.47,.52,1.15,6.62,.78,1.75,520,3
13.45,3.7,2.6,23,111,1.7,.92,.43,1.46,10.68,.85,1.56,695,3
12.82,3.37,2.3,19.5,88,1.48,.66,.4,.97,10.26,.72,1.75,685,3
13.58,2.58,2.69,24.5,105,1.55,.84,.39,1.54,8.66,.74,1.8,750,3
13.4,4.6,2.86,25,112,1.98,.96,.27,1.11,8.5,.67,1.92,630,3
12.2,3.03,2.32,19,96,1.25,.49,.4,.73,5.5,.66,1.83,510,3
12.77,2.39,2.28,19.5,86,1.39,.51,.48,.64,9.899999,.57,1.63,470,3
14.16,2.51,2.48,20,91,1.68,.7,.44,1.24,9.7,.62,1.71,660,3
13.71,5.65,2.45,20.5,95,1.68,.61,.52,1.06,7.7,.64,1.74,740,3
13.4,3.91,2.48,23,102,1.8,.75,.43,1.41,7.3,.7,1.56,750,3
13.27,4.28,2.26,20,120,1.59,.69,.43,1.35,10.2,.59,1.56,835,3
13.17,2.59,2.37,20,120,1.65,.68,.53,1.46,9.3,.6,1.62,840,3
14.13,4.1,2.74,24.5,96,2.05,.76,.56,1.35,9.2,.61,1.6,560,3
1 alcohol malic acid ash alcalinity of ash magnesium total phenols flavanoids nonflavanoid phenols proanthocyanins color intensity hue OD280/OD315 of diluted wines proline class
2 14.23 1.71 2.43 15.6 127 2.8 3.06 .28 2.29 5.64 1.04 3.92 1065 1
3 13.2 1.78 2.14 11.2 100 2.65 2.76 .26 1.28 4.38 1.05 3.4 1050 1
4 13.16 2.36 2.67 18.6 101 2.8 3.24 .3 2.81 5.68 1.03 3.17 1185 1
5 14.37 1.95 2.5 16.8 113 3.85 3.49 .24 2.18 7.8 .86 3.45 1480 1
6 13.24 2.59 2.87 21 118 2.8 2.69 .39 1.82 4.32 1.04 2.93 735 1
7 14.2 1.76 2.45 15.2 112 3.27 3.39 .34 1.97 6.75 1.05 2.85 1450 1
8 14.39 1.87 2.45 14.6 96 2.5 2.52 .3 1.98 5.25 1.02 3.58 1290 1
9 14.06 2.15 2.61 17.6 121 2.6 2.51 .31 1.25 5.05 1.06 3.58 1295 1
10 14.83 1.64 2.17 14 97 2.8 2.98 .29 1.98 5.2 1.08 2.85 1045 1
11 13.86 1.35 2.27 16 98 2.98 3.15 .22 1.85 7.22 1.01 3.55 1045 1
12 14.1 2.16 2.3 18 105 2.95 3.32 .22 2.38 5.75 1.25 3.17 1510 1
13 14.12 1.48 2.32 16.8 95 2.2 2.43 .26 1.57 5 1.17 2.82 1280 1
14 13.75 1.73 2.41 16 89 2.6 2.76 .29 1.81 5.6 1.15 2.9 1320 1
15 14.75 1.73 2.39 11.4 91 3.1 3.69 .43 2.81 5.4 1.25 2.73 1150 1
16 14.38 1.87 2.38 12 102 3.3 3.64 .29 2.96 7.5 1.2 3 1547 1
17 13.63 1.81 2.7 17.2 112 2.85 2.91 .3 1.46 7.3 1.28 2.88 1310 1
18 14.3 1.92 2.72 20 120 2.8 3.14 .33 1.97 6.2 1.07 2.65 1280 1
19 13.83 1.57 2.62 20 115 2.95 3.4 .4 1.72 6.6 1.13 2.57 1130 1
20 14.19 1.59 2.48 16.5 108 3.3 3.93 .32 1.86 8.7 1.23 2.82 1680 1
21 13.64 3.1 2.56 15.2 116 2.7 3.03 .17 1.66 5.1 .96 3.36 845 1
22 14.06 1.63 2.28 16 126 3 3.17 .24 2.1 5.65 1.09 3.71 780 1
23 12.93 3.8 2.65 18.6 102 2.41 2.41 .25 1.98 4.5 1.03 3.52 770 1
24 13.71 1.86 2.36 16.6 101 2.61 2.88 .27 1.69 3.8 1.11 4 1035 1
25 12.85 1.6 2.52 17.8 95 2.48 2.37 .26 1.46 3.93 1.09 3.63 1015 1
26 13.5 1.81 2.61 20 96 2.53 2.61 .28 1.66 3.52 1.12 3.82 845 1
27 13.05 2.05 3.22 25 124 2.63 2.68 .47 1.92 3.58 1.13 3.2 830 1
28 13.39 1.77 2.62 16.1 93 2.85 2.94 .34 1.45 4.8 .92 3.22 1195 1
29 13.3 1.72 2.14 17 94 2.4 2.19 .27 1.35 3.95 1.02 2.77 1285 1
30 13.87 1.9 2.8 19.4 107 2.95 2.97 .37 1.76 4.5 1.25 3.4 915 1
31 14.02 1.68 2.21 16 96 2.65 2.33 .26 1.98 4.7 1.04 3.59 1035 1
32 13.73 1.5 2.7 22.5 101 3 3.25 .29 2.38 5.7 1.19 2.71 1285 1
33 13.58 1.66 2.36 19.1 106 2.86 3.19 .22 1.95 6.9 1.09 2.88 1515 1
34 13.68 1.83 2.36 17.2 104 2.42 2.69 .42 1.97 3.84 1.23 2.87 990 1
35 13.76 1.53 2.7 19.5 132 2.95 2.74 .5 1.35 5.4 1.25 3 1235 1
36 13.51 1.8 2.65 19 110 2.35 2.53 .29 1.54 4.2 1.1 2.87 1095 1
37 13.48 1.81 2.41 20.5 100 2.7 2.98 .26 1.86 5.1 1.04 3.47 920 1
38 13.28 1.64 2.84 15.5 110 2.6 2.68 .34 1.36 4.6 1.09 2.78 880 1
39 13.05 1.65 2.55 18 98 2.45 2.43 .29 1.44 4.25 1.12 2.51 1105 1
40 13.07 1.5 2.1 15.5 98 2.4 2.64 .28 1.37 3.7 1.18 2.69 1020 1
41 14.22 3.99 2.51 13.2 128 3 3.04 .2 2.08 5.1 .89 3.53 760 1
42 13.56 1.71 2.31 16.2 117 3.15 3.29 .34 2.34 6.13 .95 3.38 795 1
43 13.41 3.84 2.12 18.8 90 2.45 2.68 .27 1.48 4.28 .91 3 1035 1
44 13.88 1.89 2.59 15 101 3.25 3.56 .17 1.7 5.43 .88 3.56 1095 1
45 13.24 3.98 2.29 17.5 103 2.64 2.63 .32 1.66 4.36 .82 3 680 1
46 13.05 1.77 2.1 17 107 3 3 .28 2.03 5.04 .88 3.35 885 1
47 14.21 4.04 2.44 18.9 111 2.85 2.65 .3 1.25 5.24 .87 3.33 1080 1
48 14.38 3.59 2.28 16 102 3.25 3.17 .27 2.19 4.9 1.04 3.44 1065 1
49 13.9 1.68 2.12 16 101 3.1 3.39 .21 2.14 6.1 .91 3.33 985 1
50 14.1 2.02 2.4 18.8 103 2.75 2.92 .32 2.38 6.2 1.07 2.75 1060 1
51 13.94 1.73 2.27 17.4 108 2.88 3.54 .32 2.08 8.90 1.12 3.1 1260 1
52 13.05 1.73 2.04 12.4 92 2.72 3.27 .17 2.91 7.2 1.12 2.91 1150 1
53 13.83 1.65 2.6 17.2 94 2.45 2.99 .22 2.29 5.6 1.24 3.37 1265 1
54 13.82 1.75 2.42 14 111 3.88 3.74 .32 1.87 7.05 1.01 3.26 1190 1
55 13.77 1.9 2.68 17.1 115 3 2.79 .39 1.68 6.3 1.13 2.93 1375 1
56 13.74 1.67 2.25 16.4 118 2.6 2.9 .21 1.62 5.85 .92 3.2 1060 1
57 13.56 1.73 2.46 20.5 116 2.96 2.78 .2 2.45 6.25 .98 3.03 1120 1
58 14.22 1.7 2.3 16.3 118 3.2 3 .26 2.03 6.38 .94 3.31 970 1
59 13.29 1.97 2.68 16.8 102 3 3.23 .31 1.66 6 1.07 2.84 1270 1
60 13.72 1.43 2.5 16.7 108 3.4 3.67 .19 2.04 6.8 .89 2.87 1285 1
61 12.37 .94 1.36 10.6 88 1.98 .57 .28 .42 1.95 1.05 1.82 520 2
62 12.33 1.1 2.28 16 101 2.05 1.09 .63 .41 3.27 1.25 1.67 680 2
63 12.64 1.36 2.02 16.8 100 2.02 1.41 .53 .62 5.75 .98 1.59 450 2
64 13.67 1.25 1.92 18 94 2.1 1.79 .32 .73 3.8 1.23 2.46 630 2
65 12.37 1.13 2.16 19 87 3.5 3.1 .19 1.87 4.45 1.22 2.87 420 2
66 12.17 1.45 2.53 19 104 1.89 1.75 .45 1.03 2.95 1.45 2.23 355 2
67 12.37 1.21 2.56 18.1 98 2.42 2.65 .37 2.08 4.6 1.19 2.3 678 2
68 13.11 1.01 1.7 15 78 2.98 3.18 .26 2.28 5.3 1.12 3.18 502 2
69 12.37 1.17 1.92 19.6 78 2.11 2 .27 1.04 4.68 1.12 3.48 510 2
70 13.34 .94 2.36 17 110 2.53 1.3 .55 .42 3.17 1.02 1.93 750 2
71 12.21 1.19 1.75 16.8 151 1.85 1.28 .14 2.5 2.85 1.28 3.07 718 2
72 12.29 1.61 2.21 20.4 103 1.1 1.02 .37 1.46 3.05 .906 1.82 870 2
73 13.86 1.51 2.67 25 86 2.95 2.86 .21 1.87 3.38 1.36 3.16 410 2
74 13.49 1.66 2.24 24 87 1.88 1.84 .27 1.03 3.74 .98 2.78 472 2
75 12.99 1.67 2.6 30 139 3.3 2.89 .21 1.96 3.35 1.31 3.5 985 2
76 11.96 1.09 2.3 21 101 3.38 2.14 .13 1.65 3.21 .99 3.13 886 2
77 11.66 1.88 1.92 16 97 1.61 1.57 .34 1.15 3.8 1.23 2.14 428 2
78 13.03 .9 1.71 16 86 1.95 2.03 .24 1.46 4.6 1.19 2.48 392 2
79 11.84 2.89 2.23 18 112 1.72 1.32 .43 .95 2.65 .96 2.52 500 2
80 12.33 .99 1.95 14.8 136 1.9 1.85 .35 2.76 3.4 1.06 2.31 750 2
81 12.7 3.87 2.4 23 101 2.83 2.55 .43 1.95 2.57 1.19 3.13 463 2
82 12 .92 2 19 86 2.42 2.26 .3 1.43 2.5 1.38 3.12 278 2
83 12.72 1.81 2.2 18.8 86 2.2 2.53 .26 1.77 3.9 1.16 3.14 714 2
84 12.08 1.13 2.51 24 78 2 1.58 .4 1.4 2.2 1.31 2.72 630 2
85 13.05 3.86 2.32 22.5 85 1.65 1.59 .61 1.62 4.8 .84 2.01 515 2
86 11.84 .89 2.58 18 94 2.2 2.21 .22 2.35 3.05 .79 3.08 520 2
87 12.67 .98 2.24 18 99 2.2 1.94 .3 1.46 2.62 1.23 3.16 450 2
88 12.16 1.61 2.31 22.8 90 1.78 1.69 .43 1.56 2.45 1.33 2.26 495 2
89 11.65 1.67 2.62 26 88 1.92 1.61 .4 1.34 2.6 1.36 3.21 562 2
90 11.64 2.06 2.46 21.6 84 1.95 1.69 .48 1.35 2.8 1 2.75 680 2
91 12.08 1.33 2.3 23.6 70 2.2 1.59 .42 1.38 1.74 1.07 3.21 625 2
92 12.08 1.83 2.32 18.5 81 1.6 1.5 .52 1.64 2.4 1.08 2.27 480 2
93 12 1.51 2.42 22 86 1.45 1.25 .5 1.63 3.6 1.05 2.65 450 2
94 12.69 1.53 2.26 20.7 80 1.38 1.46 .58 1.62 3.05 .96 2.06 495 2
95 12.29 2.83 2.22 18 88 2.45 2.25 .25 1.99 2.15 1.15 3.3 290 2
96 11.62 1.99 2.28 18 98 3.02 2.26 .17 1.35 3.25 1.16 2.96 345 2
97 12.47 1.52 2.2 19 162 2.5 2.27 .32 3.28 2.6 1.16 2.63 937 2
98 11.81 2.12 2.74 21.5 134 1.6 .99 .14 1.56 2.5 .95 2.26 625 2
99 12.29 1.41 1.98 16 85 2.55 2.5 .29 1.77 2.9 1.23 2.74 428 2
100 12.37 1.07 2.1 18.5 88 3.52 3.75 .24 1.95 4.5 1.04 2.77 660 2
101 12.29 3.17 2.21 18 88 2.85 2.99 .45 2.81 2.3 1.42 2.83 406 2
102 12.08 2.08 1.7 17.5 97 2.23 2.17 .26 1.4 3.3 1.27 2.96 710 2
103 12.6 1.34 1.9 18.5 88 1.45 1.36 .29 1.35 2.45 1.04 2.77 562 2
104 12.34 2.45 2.46 21 98 2.56 2.11 .34 1.31 2.8 .8 3.38 438 2
105 11.82 1.72 1.88 19.5 86 2.5 1.64 .37 1.42 2.06 .94 2.44 415 2
106 12.51 1.73 1.98 20.5 85 2.2 1.92 .32 1.48 2.94 1.04 3.57 672 2
107 12.42 2.55 2.27 22 90 1.68 1.84 .66 1.42 2.7 .86 3.3 315 2
108 12.25 1.73 2.12 19 80 1.65 2.03 .37 1.63 3.4 1 3.17 510 2
109 12.72 1.75 2.28 22.5 84 1.38 1.76 .48 1.63 3.3 .88 2.42 488 2
110 12.22 1.29 1.94 19 92 2.36 2.04 .39 2.08 2.7 .86 3.02 312 2
111 11.61 1.35 2.7 20 94 2.74 2.92 .29 2.49 2.65 .96 3.26 680 2
112 11.46 3.74 1.82 19.5 107 3.18 2.58 .24 3.58 2.9 .75 2.81 562 2
113 12.52 2.43 2.17 21 88 2.55 2.27 .26 1.22 2 .9 2.78 325 2
114 11.76 2.68 2.92 20 103 1.75 2.03 .6 1.05 3.8 1.23 2.5 607 2
115 11.41 .74 2.5 21 88 2.48 2.01 .42 1.44 3.08 1.1 2.31 434 2
116 12.08 1.39 2.5 22.5 84 2.56 2.29 .43 1.04 2.9 .93 3.19 385 2
117 11.03 1.51 2.2 21.5 85 2.46 2.17 .52 2.01 1.9 1.71 2.87 407 2
118 11.82 1.47 1.99 20.8 86 1.98 1.6 .3 1.53 1.95 .95 3.33 495 2
119 12.42 1.61 2.19 22.5 108 2 2.09 .34 1.61 2.06 1.06 2.96 345 2
120 12.77 3.43 1.98 16 80 1.63 1.25 .43 .83 3.4 .7 2.12 372 2
121 12 3.43 2 19 87 2 1.64 .37 1.87 1.28 .93 3.05 564 2
122 11.45 2.4 2.42 20 96 2.9 2.79 .32 1.83 3.25 .8 3.39 625 2
123 11.56 2.05 3.23 28.5 119 3.18 5.08 .47 1.87 6 .93 3.69 465 2
124 12.42 4.43 2.73 26.5 102 2.2 2.13 .43 1.71 2.08 .92 3.12 365 2
125 13.05 5.8 2.13 21.5 86 2.62 2.65 .3 2.01 2.6 .73 3.1 380 2
126 11.87 4.31 2.39 21 82 2.86 3.03 .21 2.91 2.8 .75 3.64 380 2
127 12.07 2.16 2.17 21 85 2.6 2.65 .37 1.35 2.76 .86 3.28 378 2
128 12.43 1.53 2.29 21.5 86 2.74 3.15 .39 1.77 3.94 .69 2.84 352 2
129 11.79 2.13 2.78 28.5 92 2.13 2.24 .58 1.76 3 .97 2.44 466 2
130 12.37 1.63 2.3 24.5 88 2.22 2.45 .4 1.9 2.12 .89 2.78 342 2
131 12.04 4.3 2.38 22 80 2.1 1.75 .42 1.35 2.6 .79 2.57 580 2
132 12.86 1.35 2.32 18 122 1.51 1.25 .21 .94 4.1 .76 1.29 630 3
133 12.88 2.99 2.4 20 104 1.3 1.22 .24 .83 5.4 .74 1.42 530 3
134 12.81 2.31 2.4 24 98 1.15 1.09 .27 .83 5.7 .66 1.36 560 3
135 12.7 3.55 2.36 21.5 106 1.7 1.2 .17 .84 5 .78 1.29 600 3
136 12.51 1.24 2.25 17.5 85 2 .58 .6 1.25 5.45 .75 1.51 650 3
137 12.6 2.46 2.2 18.5 94 1.62 .66 .63 .94 7.1 .73 1.58 695 3
138 12.25 4.72 2.54 21 89 1.38 .47 .53 .8 3.85 .75 1.27 720 3
139 12.53 5.51 2.64 25 96 1.79 .6 .63 1.1 5 .82 1.69 515 3
140 13.49 3.59 2.19 19.5 88 1.62 .48 .58 .88 5.7 .81 1.82 580 3
141 12.84 2.96 2.61 24 101 2.32 .6 .53 .81 4.92 .89 2.15 590 3
142 12.93 2.81 2.7 21 96 1.54 .5 .53 .75 4.6 .77 2.31 600 3
143 13.36 2.56 2.35 20 89 1.4 .5 .37 .64 5.6 .7 2.47 780 3
144 13.52 3.17 2.72 23.5 97 1.55 .52 .5 .55 4.35 .89 2.06 520 3
145 13.62 4.95 2.35 20 92 2 .8 .47 1.02 4.4 .91 2.05 550 3
146 12.25 3.88 2.2 18.5 112 1.38 .78 .29 1.14 8.21 .65 2 855 3
147 13.16 3.57 2.15 21 102 1.5 .55 .43 1.3 4 .6 1.68 830 3
148 13.88 5.04 2.23 20 80 .98 .34 .4 .68 4.9 .58 1.33 415 3
149 12.87 4.61 2.48 21.5 86 1.7 .65 .47 .86 7.65 .54 1.86 625 3
150 13.32 3.24 2.38 21.5 92 1.93 .76 .45 1.25 8.42 .55 1.62 650 3
151 13.08 3.9 2.36 21.5 113 1.41 1.39 .34 1.14 9.40 .57 1.33 550 3
152 13.5 3.12 2.62 24 123 1.4 1.57 .22 1.25 8.60 .59 1.3 500 3
153 12.79 2.67 2.48 22 112 1.48 1.36 .24 1.26 10.8 .48 1.47 480 3
154 13.11 1.9 2.75 25.5 116 2.2 1.28 .26 1.56 7.1 .61 1.33 425 3
155 13.23 3.3 2.28 18.5 98 1.8 .83 .61 1.87 10.52 .56 1.51 675 3
156 12.58 1.29 2.1 20 103 1.48 .58 .53 1.4 7.6 .58 1.55 640 3
157 13.17 5.19 2.32 22 93 1.74 .63 .61 1.55 7.9 .6 1.48 725 3
158 13.84 4.12 2.38 19.5 89 1.8 .83 .48 1.56 9.01 .57 1.64 480 3
159 12.45 3.03 2.64 27 97 1.9 .58 .63 1.14 7.5 .67 1.73 880 3
160 14.34 1.68 2.7 25 98 2.8 1.31 .53 2.7 13 .57 1.96 660 3
161 13.48 1.67 2.64 22.5 89 2.6 1.1 .52 2.29 11.75 .57 1.78 620 3
162 12.36 3.83 2.38 21 88 2.3 .92 .5 1.04 7.65 .56 1.58 520 3
163 13.69 3.26 2.54 20 107 1.83 .56 .5 .8 5.88 .96 1.82 680 3
164 12.85 3.27 2.58 22 106 1.65 .6 .6 .96 5.58 .87 2.11 570 3
165 12.96 3.45 2.35 18.5 106 1.39 .7 .4 .94 5.28 .68 1.75 675 3
166 13.78 2.76 2.3 22 90 1.35 .68 .41 1.03 9.58 .7 1.68 615 3
167 13.73 4.36 2.26 22.5 88 1.28 .47 .52 1.15 6.62 .78 1.75 520 3
168 13.45 3.7 2.6 23 111 1.7 .92 .43 1.46 10.68 .85 1.56 695 3
169 12.82 3.37 2.3 19.5 88 1.48 .66 .4 .97 10.26 .72 1.75 685 3
170 13.58 2.58 2.69 24.5 105 1.55 .84 .39 1.54 8.66 .74 1.8 750 3
171 13.4 4.6 2.86 25 112 1.98 .96 .27 1.11 8.5 .67 1.92 630 3
172 12.2 3.03 2.32 19 96 1.25 .49 .4 .73 5.5 .66 1.83 510 3
173 12.77 2.39 2.28 19.5 86 1.39 .51 .48 .64 9.899999 .57 1.63 470 3
174 14.16 2.51 2.48 20 91 1.68 .7 .44 1.24 9.7 .62 1.71 660 3
175 13.71 5.65 2.45 20.5 95 1.68 .61 .52 1.06 7.7 .64 1.74 740 3
176 13.4 3.91 2.48 23 102 1.8 .75 .43 1.41 7.3 .7 1.56 750 3
177 13.27 4.28 2.26 20 120 1.59 .69 .43 1.35 10.2 .59 1.56 835 3
178 13.17 2.59 2.37 20 120 1.65 .68 .53 1.46 9.3 .6 1.62 840 3
179 14.13 4.1 2.74 24.5 96 2.05 .76 .56 1.35 9.2 .61 1.6 560 3

View File

@ -0,0 +1,332 @@
<?php
declare(strict_types=1);
namespace Phpml\Association;
use Phpml\Helper\Predictable;
use Phpml\Helper\Trainable;
class Apriori implements Associator
{
use Trainable;
use Predictable;
public const ARRAY_KEY_ANTECEDENT = 'antecedent';
public const ARRAY_KEY_CONFIDENCE = 'confidence';
public const ARRAY_KEY_CONSEQUENT = 'consequent';
public const ARRAY_KEY_SUPPORT = 'support';
/**
* Minimum relative probability of frequent transactions.
*
* @var float
*/
private $confidence;
/**
* The large set contains frequent k-length item sets.
*
* @var mixed[][][]
*/
private $large = [];
/**
* Minimum relative frequency of transactions.
*
* @var float
*/
private $support;
/**
* The generated Apriori association rules.
*
* @var mixed[][]
*/
private $rules = [];
/**
* Apriori constructor.
*/
public function __construct(float $support = 0.0, float $confidence = 0.0)
{
$this->support = $support;
$this->confidence = $confidence;
}
/**
* Get all association rules which are generated for every k-length frequent item set.
*
* @return mixed[][]
*/
public function getRules(): array
{
if (count($this->large) === 0) {
$this->large = $this->apriori();
}
if (count($this->rules) > 0) {
return $this->rules;
}
$this->rules = [];
$this->generateAllRules();
return $this->rules;
}
/**
* Generates frequent item sets.
*
* @return mixed[][][]
*/
public function apriori(): array
{
$L = [];
$items = $this->frequent($this->items());
for ($k = 1; isset($items[0]); ++$k) {
$L[$k] = $items;
$items = $this->frequent($this->candidates($items));
}
return $L;
}
/**
* @param mixed[] $sample
*
* @return mixed[][]
*/
protected function predictSample(array $sample): array
{
$predicts = array_values(array_filter($this->getRules(), function ($rule) use ($sample) {
return $this->equals($rule[self::ARRAY_KEY_ANTECEDENT], $sample);
}));
return array_map(function ($rule) {
return $rule[self::ARRAY_KEY_CONSEQUENT];
}, $predicts);
}
/**
* Generate rules for each k-length frequent item set.
*/
private function generateAllRules(): void
{
for ($k = 2; isset($this->large[$k]); ++$k) {
foreach ($this->large[$k] as $frequent) {
$this->generateRules($frequent);
}
}
}
/**
* Generate confident rules for frequent item set.
*
* @param mixed[] $frequent
*/
private function generateRules(array $frequent): void
{
foreach ($this->antecedents($frequent) as $antecedent) {
$confidence = $this->confidence($frequent, $antecedent);
if ($this->confidence <= $confidence) {
$consequent = array_values(array_diff($frequent, $antecedent));
$this->rules[] = [
self::ARRAY_KEY_ANTECEDENT => $antecedent,
self::ARRAY_KEY_CONSEQUENT => $consequent,
self::ARRAY_KEY_SUPPORT => $this->support($frequent),
self::ARRAY_KEY_CONFIDENCE => $confidence,
];
}
}
}
/**
* Generates the power set for given item set $sample.
*
* @param mixed[] $sample
*
* @return mixed[][]
*/
private function powerSet(array $sample): array
{
$results = [[]];
foreach ($sample as $item) {
foreach ($results as $combination) {
$results[] = array_merge([$item], $combination);
}
}
return $results;
}
/**
* Generates all proper subsets for given set $sample without the empty set.
*
* @param mixed[] $sample
*
* @return mixed[][]
*/
private function antecedents(array $sample): array
{
$cardinality = count($sample);
$antecedents = $this->powerSet($sample);
return array_filter($antecedents, function ($antecedent) use ($cardinality) {
return (count($antecedent) != $cardinality) && ($antecedent != []);
});
}
/**
* Calculates frequent k = 1 item sets.
*
* @return mixed[][]
*/
private function items(): array
{
$items = [];
foreach ($this->samples as $sample) {
foreach ($sample as $item) {
if (!in_array($item, $items, true)) {
$items[] = $item;
}
}
}
return array_map(function ($entry) {
return [$entry];
}, $items);
}
/**
* Returns frequent item sets only.
*
* @param mixed[][] $samples
*
* @return mixed[][]
*/
private function frequent(array $samples): array
{
return array_values(array_filter($samples, function ($entry) {
return $this->support($entry) >= $this->support;
}));
}
/**
* Calculates frequent k item sets, where count($samples) == $k - 1.
*
* @param mixed[][] $samples
*
* @return mixed[][]
*/
private function candidates(array $samples): array
{
$candidates = [];
foreach ($samples as $p) {
foreach ($samples as $q) {
if (count(array_merge(array_diff($p, $q), array_diff($q, $p))) != 2) {
continue;
}
$candidate = array_values(array_unique(array_merge($p, $q)));
if ($this->contains($candidates, $candidate)) {
continue;
}
foreach ($this->samples as $sample) {
if ($this->subset($sample, $candidate)) {
$candidates[] = $candidate;
continue 2;
}
}
}
}
return $candidates;
}
/**
* Calculates confidence for $set. Confidence is the relative amount of sets containing $subset which also contain
* $set.
*
* @param mixed[] $set
* @param mixed[] $subset
*/
private function confidence(array $set, array $subset): float
{
return $this->support($set) / $this->support($subset);
}
/**
* Calculates support for item set $sample. Support is the relative amount of sets containing $sample in the data
* pool.
*
* @see \Phpml\Association\Apriori::samples
*
* @param mixed[] $sample
*/
private function support(array $sample): float
{
return $this->frequency($sample) / count($this->samples);
}
/**
* Counts occurrences of $sample as subset in data pool.
*
* @see \Phpml\Association\Apriori::samples
*
* @param mixed[] $sample
*/
private function frequency(array $sample): int
{
return count(array_filter($this->samples, function ($entry) use ($sample) {
return $this->subset($entry, $sample);
}));
}
/**
* Returns true if set is an element of system.
*
* @see \Phpml\Association\Apriori::equals()
*
* @param mixed[][] $system
* @param mixed[] $set
*/
private function contains(array $system, array $set): bool
{
return (bool) array_filter($system, function ($entry) use ($set) {
return $this->equals($entry, $set);
});
}
/**
* Returns true if subset is a (proper) subset of set by its items string representation.
*
* @param mixed[] $set
* @param mixed[] $subset
*/
private function subset(array $set, array $subset): bool
{
return count(array_diff($subset, array_intersect($subset, $set))) === 0;
}
/**
* Returns true if string representation of items does not differ.
*
* @param mixed[] $set1
* @param mixed[] $set2
*/
private function equals(array $set1, array $set2): bool
{
return array_diff($set1, $set2) == array_diff($set2, $set1);
}
}

View File

@ -0,0 +1,11 @@
<?php
declare(strict_types=1);
namespace Phpml\Association;
use Phpml\Estimator;
interface Associator extends Estimator
{
}

View File

@ -0,0 +1,11 @@
<?php
declare(strict_types=1);
namespace Phpml\Classification;
use Phpml\Estimator;
interface Classifier extends Estimator
{
}

View File

@ -0,0 +1,484 @@
<?php
declare(strict_types=1);
namespace Phpml\Classification;
use Phpml\Classification\DecisionTree\DecisionTreeLeaf;
use Phpml\Exception\InvalidArgumentException;
use Phpml\Helper\Predictable;
use Phpml\Helper\Trainable;
use Phpml\Math\Statistic\Mean;
class DecisionTree implements Classifier
{
use Trainable;
use Predictable;
public const CONTINUOUS = 1;
public const NOMINAL = 2;
/**
* @var int
*/
public $actualDepth = 0;
/**
* @var array
*/
protected $columnTypes = [];
/**
* @var DecisionTreeLeaf
*/
protected $tree;
/**
* @var int
*/
protected $maxDepth;
/**
* @var array
*/
private $labels = [];
/**
* @var int
*/
private $featureCount = 0;
/**
* @var int
*/
private $numUsableFeatures = 0;
/**
* @var array
*/
private $selectedFeatures = [];
/**
* @var array|null
*/
private $featureImportances;
/**
* @var array
*/
private $columnNames = [];
public function __construct(int $maxDepth = 10)
{
$this->maxDepth = $maxDepth;
}
public function train(array $samples, array $targets): void
{
$this->samples = array_merge($this->samples, $samples);
$this->targets = array_merge($this->targets, $targets);
$this->featureCount = count($this->samples[0]);
$this->columnTypes = self::getColumnTypes($this->samples);
$this->labels = array_keys(array_count_values($this->targets));
$this->tree = $this->getSplitLeaf(range(0, count($this->samples) - 1));
// Each time the tree is trained, feature importances are reset so that
// we will have to compute it again depending on the new data
$this->featureImportances = null;
// If column names are given or computed before, then there is no
// need to init it and accidentally remove the previous given names
if ($this->columnNames === []) {
$this->columnNames = range(0, $this->featureCount - 1);
} elseif (count($this->columnNames) > $this->featureCount) {
$this->columnNames = array_slice($this->columnNames, 0, $this->featureCount);
} elseif (count($this->columnNames) < $this->featureCount) {
$this->columnNames = array_merge(
$this->columnNames,
range(count($this->columnNames), $this->featureCount - 1)
);
}
}
public static function getColumnTypes(array $samples): array
{
$types = [];
$featureCount = count($samples[0]);
for ($i = 0; $i < $featureCount; ++$i) {
$values = array_column($samples, $i);
$isCategorical = self::isCategoricalColumn($values);
$types[] = $isCategorical ? self::NOMINAL : self::CONTINUOUS;
}
return $types;
}
/**
* @param mixed $baseValue
*/
public function getGiniIndex($baseValue, array $colValues, array $targets): float
{
$countMatrix = [];
foreach ($this->labels as $label) {
$countMatrix[$label] = [0, 0];
}
foreach ($colValues as $index => $value) {
$label = $targets[$index];
$rowIndex = $value === $baseValue ? 0 : 1;
++$countMatrix[$label][$rowIndex];
}
$giniParts = [0, 0];
for ($i = 0; $i <= 1; ++$i) {
$part = 0;
$sum = array_sum(array_column($countMatrix, $i));
if ($sum > 0) {
foreach ($this->labels as $label) {
$part += ($countMatrix[$label][$i] / (float) $sum) ** 2;
}
}
$giniParts[$i] = (1 - $part) * $sum;
}
return array_sum($giniParts) / count($colValues);
}
/**
* This method is used to set number of columns to be used
* when deciding a split at an internal node of the tree. <br>
* If the value is given 0, then all features are used (default behaviour),
* otherwise the given value will be used as a maximum for number of columns
* randomly selected for each split operation.
*
* @return $this
*
* @throws InvalidArgumentException
*/
public function setNumFeatures(int $numFeatures)
{
if ($numFeatures < 0) {
throw new InvalidArgumentException('Selected column count should be greater or equal to zero');
}
$this->numUsableFeatures = $numFeatures;
return $this;
}
/**
* A string array to represent columns. Useful when HTML output or
* column importances are desired to be inspected.
*
* @return $this
*
* @throws InvalidArgumentException
*/
public function setColumnNames(array $names)
{
if ($this->featureCount !== 0 && count($names) !== $this->featureCount) {
throw new InvalidArgumentException(sprintf('Length of the given array should be equal to feature count %s', $this->featureCount));
}
$this->columnNames = $names;
return $this;
}
public function getHtml(): string
{
return $this->tree->getHTML($this->columnNames);
}
/**
* This will return an array including an importance value for
* each column in the given dataset. The importance values are
* normalized and their total makes 1.<br/>
*/
public function getFeatureImportances(): array
{
if ($this->featureImportances !== null) {
return $this->featureImportances;
}
$sampleCount = count($this->samples);
$this->featureImportances = [];
foreach ($this->columnNames as $column => $columnName) {
$nodes = $this->getSplitNodesByColumn($column, $this->tree);
$importance = 0;
foreach ($nodes as $node) {
$importance += $node->getNodeImpurityDecrease($sampleCount);
}
$this->featureImportances[$columnName] = $importance;
}
// Normalize & sort the importances
$total = array_sum($this->featureImportances);
if ($total > 0) {
array_walk($this->featureImportances, function (&$importance) use ($total): void {
$importance /= $total;
});
arsort($this->featureImportances);
}
return $this->featureImportances;
}
protected function getSplitLeaf(array $records, int $depth = 0): DecisionTreeLeaf
{
$split = $this->getBestSplit($records);
$split->level = $depth;
if ($this->actualDepth < $depth) {
$this->actualDepth = $depth;
}
// Traverse all records to see if all records belong to the same class,
// otherwise group the records so that we can classify the leaf
// in case maximum depth is reached
$leftRecords = [];
$rightRecords = [];
$remainingTargets = [];
$prevRecord = null;
$allSame = true;
foreach ($records as $recordNo) {
// Check if the previous record is the same with the current one
$record = $this->samples[$recordNo];
if ($prevRecord !== null && $prevRecord != $record) {
$allSame = false;
}
$prevRecord = $record;
// According to the split criteron, this record will
// belong to either left or the right side in the next split
if ($split->evaluate($record)) {
$leftRecords[] = $recordNo;
} else {
$rightRecords[] = $recordNo;
}
// Group remaining targets
$target = $this->targets[$recordNo];
if (!array_key_exists($target, $remainingTargets)) {
$remainingTargets[$target] = 1;
} else {
++$remainingTargets[$target];
}
}
if ($allSame || $depth >= $this->maxDepth || count($remainingTargets) === 1) {
$split->isTerminal = true;
arsort($remainingTargets);
$split->classValue = (string) key($remainingTargets);
} else {
if (isset($leftRecords[0])) {
$split->leftLeaf = $this->getSplitLeaf($leftRecords, $depth + 1);
}
if (isset($rightRecords[0])) {
$split->rightLeaf = $this->getSplitLeaf($rightRecords, $depth + 1);
}
}
return $split;
}
protected function getBestSplit(array $records): DecisionTreeLeaf
{
$targets = array_intersect_key($this->targets, array_flip($records));
$samples = (array) array_combine(
$records,
$this->preprocess(array_intersect_key($this->samples, array_flip($records)))
);
$bestGiniVal = 1;
$bestSplit = null;
$features = $this->getSelectedFeatures();
foreach ($features as $i) {
$colValues = [];
foreach ($samples as $index => $row) {
$colValues[$index] = $row[$i];
}
$counts = array_count_values($colValues);
arsort($counts);
$baseValue = key($counts);
if ($baseValue === null) {
continue;
}
$gini = $this->getGiniIndex($baseValue, $colValues, $targets);
if ($bestSplit === null || $bestGiniVal > $gini) {
$split = new DecisionTreeLeaf();
$split->value = $baseValue;
$split->giniIndex = $gini;
$split->columnIndex = $i;
$split->isContinuous = $this->columnTypes[$i] === self::CONTINUOUS;
$split->records = $records;
// If a numeric column is to be selected, then
// the original numeric value and the selected operator
// will also be saved into the leaf for future access
if ($this->columnTypes[$i] === self::CONTINUOUS) {
$matches = [];
preg_match("/^([<>=]{1,2})\s*(.*)/", (string) $split->value, $matches);
$split->operator = $matches[1];
$split->numericValue = (float) $matches[2];
}
$bestSplit = $split;
$bestGiniVal = $gini;
}
}
return $bestSplit;
}
/**
* Returns available features/columns to the tree for the decision making
* process. <br>
*
* If a number is given with setNumFeatures() method, then a random selection
* of features up to this number is returned. <br>
*
* If some features are manually selected by use of setSelectedFeatures(),
* then only these features are returned <br>
*
* If any of above methods were not called beforehand, then all features
* are returned by default.
*/
protected function getSelectedFeatures(): array
{
$allFeatures = range(0, $this->featureCount - 1);
if ($this->numUsableFeatures === 0 && count($this->selectedFeatures) === 0) {
return $allFeatures;
}
if (count($this->selectedFeatures) > 0) {
return $this->selectedFeatures;
}
$numFeatures = $this->numUsableFeatures;
if ($numFeatures > $this->featureCount) {
$numFeatures = $this->featureCount;
}
shuffle($allFeatures);
$selectedFeatures = array_slice($allFeatures, 0, $numFeatures);
sort($selectedFeatures);
return $selectedFeatures;
}
protected function preprocess(array $samples): array
{
// Detect and convert continuous data column values into
// discrete values by using the median as a threshold value
$columns = [];
for ($i = 0; $i < $this->featureCount; ++$i) {
$values = array_column($samples, $i);
if ($this->columnTypes[$i] == self::CONTINUOUS) {
$median = Mean::median($values);
foreach ($values as &$value) {
if ($value <= $median) {
$value = "<= ${median}";
} else {
$value = "> ${median}";
}
}
}
$columns[] = $values;
}
// Below method is a strange yet very simple & efficient method
// to get the transpose of a 2D array
return array_map(null, ...$columns);
}
protected static function isCategoricalColumn(array $columnValues): bool
{
$count = count($columnValues);
// There are two main indicators that *may* show whether a
// column is composed of discrete set of values:
// 1- Column may contain string values and non-float values
// 2- Number of unique values in the column is only a small fraction of
// all values in that column (Lower than or equal to %20 of all values)
$numericValues = array_filter($columnValues, 'is_numeric');
$floatValues = array_filter($columnValues, 'is_float');
if (count($floatValues) > 0) {
return false;
}
if (count($numericValues) !== $count) {
return true;
}
$distinctValues = array_count_values($columnValues);
return count($distinctValues) <= $count / 5;
}
/**
* Used to set predefined features to consider while deciding which column to use for a split
*/
protected function setSelectedFeatures(array $selectedFeatures): void
{
$this->selectedFeatures = $selectedFeatures;
}
/**
* Collects and returns an array of internal nodes that use the given
* column as a split criterion
*/
protected function getSplitNodesByColumn(int $column, DecisionTreeLeaf $node): array
{
if ($node->isTerminal) {
return [];
}
$nodes = [];
if ($node->columnIndex === $column) {
$nodes[] = $node;
}
$lNodes = [];
$rNodes = [];
if ($node->leftLeaf !== null) {
$lNodes = $this->getSplitNodesByColumn($column, $node->leftLeaf);
}
if ($node->rightLeaf !== null) {
$rNodes = $this->getSplitNodesByColumn($column, $node->rightLeaf);
}
return array_merge($nodes, $lNodes, $rNodes);
}
/**
* @return mixed
*/
protected function predictSample(array $sample)
{
$node = $this->tree;
do {
if ($node->isTerminal) {
return $node->classValue;
}
if ($node->evaluate($sample)) {
$node = $node->leftLeaf;
} else {
$node = $node->rightLeaf;
}
} while ($node);
return $this->labels[0];
}
}

View File

@ -0,0 +1,165 @@
<?php
declare(strict_types=1);
namespace Phpml\Classification\DecisionTree;
use Phpml\Math\Comparison;
class DecisionTreeLeaf
{
/**
* @var string|int
*/
public $value;
/**
* @var float
*/
public $numericValue;
/**
* @var string
*/
public $operator;
/**
* @var int
*/
public $columnIndex;
/**
* @var DecisionTreeLeaf|null
*/
public $leftLeaf;
/**
* @var DecisionTreeLeaf|null
*/
public $rightLeaf;
/**
* @var array
*/
public $records = [];
/**
* Class value represented by the leaf, this value is non-empty
* only for terminal leaves
*
* @var string
*/
public $classValue = '';
/**
* @var bool
*/
public $isTerminal = false;
/**
* @var bool
*/
public $isContinuous = false;
/**
* @var float
*/
public $giniIndex = 0;
/**
* @var int
*/
public $level = 0;
/**
* HTML representation of the tree without column names
*/
public function __toString(): string
{
return $this->getHTML();
}
public function evaluate(array $record): bool
{
$recordField = $record[$this->columnIndex];
if ($this->isContinuous) {
return Comparison::compare((string) $recordField, $this->numericValue, $this->operator);
}
return $recordField == $this->value;
}
/**
* Returns Mean Decrease Impurity (MDI) in the node.
* For terminal nodes, this value is equal to 0
*/
public function getNodeImpurityDecrease(int $parentRecordCount): float
{
if ($this->isTerminal) {
return 0.0;
}
$nodeSampleCount = (float) count($this->records);
$iT = $this->giniIndex;
if ($this->leftLeaf !== null) {
$pL = count($this->leftLeaf->records) / $nodeSampleCount;
$iT -= $pL * $this->leftLeaf->giniIndex;
}
if ($this->rightLeaf !== null) {
$pR = count($this->rightLeaf->records) / $nodeSampleCount;
$iT -= $pR * $this->rightLeaf->giniIndex;
}
return $iT * $nodeSampleCount / $parentRecordCount;
}
/**
* Returns HTML representation of the node including children nodes
*/
public function getHTML(?array $columnNames = null): string
{
if ($this->isTerminal) {
$value = "<b>${this}->classValue</b>";
} else {
$value = $this->value;
if ($columnNames !== null) {
$col = $columnNames[$this->columnIndex];
} else {
$col = "col_$this->columnIndex";
}
if ((bool) preg_match('/^[<>=]{1,2}/', (string) $value) === false) {
$value = "=${value}";
}
$value = "<b>${col} ${value}</b><br>Gini: ".number_format($this->giniIndex, 2);
}
$str = "<table ><tr><td colspan=3 align=center style='border:1px solid;'>${value}</td></tr>";
if ($this->leftLeaf !== null || $this->rightLeaf !== null) {
$str .= '<tr>';
if ($this->leftLeaf !== null) {
$str .= '<td valign=top><b>| Yes</b><br>'.$this->leftLeaf->getHTML($columnNames).'</td>';
} else {
$str .= '<td></td>';
}
$str .= '<td>&nbsp;</td>';
if ($this->rightLeaf !== null) {
$str .= '<td valign=top align=right><b>No |</b><br>'.$this->rightLeaf->getHTML($columnNames).'</td>';
} else {
$str .= '<td></td>';
}
$str .= '</tr>';
}
$str .= '</table>';
return $str;
}
}

View File

@ -0,0 +1,252 @@
<?php
declare(strict_types=1);
namespace Phpml\Classification\Ensemble;
use Phpml\Classification\Classifier;
use Phpml\Classification\Linear\DecisionStump;
use Phpml\Classification\WeightedClassifier;
use Phpml\Exception\InvalidArgumentException;
use Phpml\Helper\Predictable;
use Phpml\Helper\Trainable;
use Phpml\Math\Statistic\Mean;
use Phpml\Math\Statistic\StandardDeviation;
use ReflectionClass;
class AdaBoost implements Classifier
{
use Predictable;
use Trainable;
/**
* Actual labels given in the targets array
*
* @var array
*/
protected $labels = [];
/**
* @var int
*/
protected $sampleCount;
/**
* @var int
*/
protected $featureCount;
/**
* Number of maximum iterations to be done
*
* @var int
*/
protected $maxIterations;
/**
* Sample weights
*
* @var array
*/
protected $weights = [];
/**
* List of selected 'weak' classifiers
*
* @var array
*/
protected $classifiers = [];
/**
* Base classifier weights
*
* @var array
*/
protected $alpha = [];
/**
* @var string
*/
protected $baseClassifier = DecisionStump::class;
/**
* @var array
*/
protected $classifierOptions = [];
/**
* ADAptive BOOSTing (AdaBoost) is an ensemble algorithm to
* improve classification performance of 'weak' classifiers such as
* DecisionStump (default base classifier of AdaBoost).
*/
public function __construct(int $maxIterations = 50)
{
$this->maxIterations = $maxIterations;
}
/**
* Sets the base classifier that will be used for boosting (default = DecisionStump)
*/
public function setBaseClassifier(string $baseClassifier = DecisionStump::class, array $classifierOptions = []): void
{
$this->baseClassifier = $baseClassifier;
$this->classifierOptions = $classifierOptions;
}
/**
* @throws InvalidArgumentException
*/
public function train(array $samples, array $targets): void
{
// Initialize usual variables
$this->labels = array_keys(array_count_values($targets));
if (count($this->labels) !== 2) {
throw new InvalidArgumentException('AdaBoost is a binary classifier and can classify between two classes only');
}
// Set all target values to either -1 or 1
$this->labels = [
1 => $this->labels[0],
-1 => $this->labels[1],
];
foreach ($targets as $target) {
$this->targets[] = $target == $this->labels[1] ? 1 : -1;
}
$this->samples = array_merge($this->samples, $samples);
$this->featureCount = count($samples[0]);
$this->sampleCount = count($this->samples);
// Initialize AdaBoost parameters
$this->weights = array_fill(0, $this->sampleCount, 1.0 / $this->sampleCount);
$this->classifiers = [];
$this->alpha = [];
// Execute the algorithm for a maximum number of iterations
$currIter = 0;
while ($this->maxIterations > $currIter++) {
// Determine the best 'weak' classifier based on current weights
$classifier = $this->getBestClassifier();
$errorRate = $this->evaluateClassifier($classifier);
// Update alpha & weight values at each iteration
$alpha = $this->calculateAlpha($errorRate);
$this->updateWeights($classifier, $alpha);
$this->classifiers[] = $classifier;
$this->alpha[] = $alpha;
}
}
/**
* @return mixed
*/
public function predictSample(array $sample)
{
$sum = 0;
foreach ($this->alpha as $index => $alpha) {
$h = $this->classifiers[$index]->predict($sample);
$sum += $h * $alpha;
}
return $this->labels[$sum > 0 ? 1 : -1];
}
/**
* Returns the classifier with the lowest error rate with the
* consideration of current sample weights
*/
protected function getBestClassifier(): Classifier
{
$ref = new ReflectionClass($this->baseClassifier);
/** @var Classifier $classifier */
$classifier = count($this->classifierOptions) === 0 ? $ref->newInstance() : $ref->newInstanceArgs($this->classifierOptions);
if ($classifier instanceof WeightedClassifier) {
$classifier->setSampleWeights($this->weights);
$classifier->train($this->samples, $this->targets);
} else {
[$samples, $targets] = $this->resample();
$classifier->train($samples, $targets);
}
return $classifier;
}
/**
* Resamples the dataset in accordance with the weights and
* returns the new dataset
*/
protected function resample(): array
{
$weights = $this->weights;
$std = StandardDeviation::population($weights);
$mean = Mean::arithmetic($weights);
$min = min($weights);
$minZ = (int) round(($min - $mean) / $std);
$samples = [];
$targets = [];
foreach ($weights as $index => $weight) {
$z = (int) round(($weight - $mean) / $std) - $minZ + 1;
for ($i = 0; $i < $z; ++$i) {
if (random_int(0, 1) == 0) {
continue;
}
$samples[] = $this->samples[$index];
$targets[] = $this->targets[$index];
}
}
return [$samples, $targets];
}
/**
* Evaluates the classifier and returns the classification error rate
*/
protected function evaluateClassifier(Classifier $classifier): float
{
$total = (float) array_sum($this->weights);
$wrong = 0;
foreach ($this->samples as $index => $sample) {
$predicted = $classifier->predict($sample);
if ($predicted != $this->targets[$index]) {
$wrong += $this->weights[$index];
}
}
return $wrong / $total;
}
/**
* Calculates alpha of a classifier
*/
protected function calculateAlpha(float $errorRate): float
{
if ($errorRate == 0) {
$errorRate = 1e-10;
}
return 0.5 * log((1 - $errorRate) / $errorRate);
}
/**
* Updates the sample weights
*/
protected function updateWeights(Classifier $classifier, float $alpha): void
{
$sumOfWeights = array_sum($this->weights);
$weightsT1 = [];
foreach ($this->weights as $index => $weight) {
$desired = $this->targets[$index];
$output = $classifier->predict($this->samples[$index]);
$weight *= exp(-$alpha * $desired * $output) / $sumOfWeights;
$weightsT1[] = $weight;
}
$this->weights = $weightsT1;
}
}

View File

@ -0,0 +1,170 @@
<?php
declare(strict_types=1);
namespace Phpml\Classification\Ensemble;
use Phpml\Classification\Classifier;
use Phpml\Classification\DecisionTree;
use Phpml\Exception\InvalidArgumentException;
use Phpml\Helper\Predictable;
use Phpml\Helper\Trainable;
use ReflectionClass;
class Bagging implements Classifier
{
use Trainable;
use Predictable;
/**
* @var int
*/
protected $numSamples;
/**
* @var int
*/
protected $featureCount = 0;
/**
* @var int
*/
protected $numClassifier;
/**
* @var string
*/
protected $classifier = DecisionTree::class;
/**
* @var array
*/
protected $classifierOptions = ['depth' => 20];
/**
* @var array
*/
protected $classifiers = [];
/**
* @var float
*/
protected $subsetRatio = 0.7;
/**
* Creates an ensemble classifier with given number of base classifiers
* Default number of base classifiers is 50.
* The more number of base classifiers, the better performance but at the cost of procesing time
*/
public function __construct(int $numClassifier = 50)
{
$this->numClassifier = $numClassifier;
}
/**
* This method determines the ratio of samples used to create the 'bootstrap' subset,
* e.g., random samples drawn from the original dataset with replacement (allow repeats),
* to train each base classifier.
*
* @return $this
*
* @throws InvalidArgumentException
*/
public function setSubsetRatio(float $ratio)
{
if ($ratio < 0.1 || $ratio > 1.0) {
throw new InvalidArgumentException('Subset ratio should be between 0.1 and 1.0');
}
$this->subsetRatio = $ratio;
return $this;
}
/**
* This method is used to set the base classifier. Default value is
* DecisionTree::class, but any class that implements the <i>Classifier</i>
* can be used. <br>
* While giving the parameters of the classifier, the values should be
* given in the order they are in the constructor of the classifier and parameter
* names are neglected.
*
* @return $this
*/
public function setClassifer(string $classifier, array $classifierOptions = [])
{
$this->classifier = $classifier;
$this->classifierOptions = $classifierOptions;
return $this;
}
public function train(array $samples, array $targets): void
{
$this->samples = array_merge($this->samples, $samples);
$this->targets = array_merge($this->targets, $targets);
$this->featureCount = count($samples[0]);
$this->numSamples = count($this->samples);
// Init classifiers and train them with bootstrap samples
$this->classifiers = $this->initClassifiers();
$index = 0;
foreach ($this->classifiers as $classifier) {
[$samples, $targets] = $this->getRandomSubset($index);
$classifier->train($samples, $targets);
++$index;
}
}
protected function getRandomSubset(int $index): array
{
$samples = [];
$targets = [];
srand($index);
$bootstrapSize = $this->subsetRatio * $this->numSamples;
for ($i = 0; $i < $bootstrapSize; ++$i) {
$rand = random_int(0, $this->numSamples - 1);
$samples[] = $this->samples[$rand];
$targets[] = $this->targets[$rand];
}
return [$samples, $targets];
}
protected function initClassifiers(): array
{
$classifiers = [];
for ($i = 0; $i < $this->numClassifier; ++$i) {
$ref = new ReflectionClass($this->classifier);
/** @var Classifier $obj */
$obj = count($this->classifierOptions) === 0 ? $ref->newInstance() : $ref->newInstanceArgs($this->classifierOptions);
$classifiers[] = $this->initSingleClassifier($obj);
}
return $classifiers;
}
protected function initSingleClassifier(Classifier $classifier): Classifier
{
return $classifier;
}
/**
* @return mixed
*/
protected function predictSample(array $sample)
{
$predictions = [];
foreach ($this->classifiers as $classifier) {
/** @var Classifier $classifier */
$predictions[] = $classifier->predict($sample);
}
$counts = array_count_values($predictions);
arsort($counts);
reset($counts);
return key($counts);
}
}

View File

@ -0,0 +1,151 @@
<?php
declare(strict_types=1);
namespace Phpml\Classification\Ensemble;
use Phpml\Classification\Classifier;
use Phpml\Classification\DecisionTree;
use Phpml\Exception\InvalidArgumentException;
class RandomForest extends Bagging
{
/**
* @var float|string
*/
protected $featureSubsetRatio = 'log';
/**
* @var array|null
*/
protected $columnNames;
/**
* Initializes RandomForest with the given number of trees. More trees
* may increase the prediction performance while it will also substantially
* increase the processing time and the required memory
*/
public function __construct(int $numClassifier = 50)
{
parent::__construct($numClassifier);
$this->setSubsetRatio(1.0);
}
/**
* This method is used to determine how many of the original columns (features)
* will be used to construct subsets to train base classifiers.<br>
*
* Allowed values: 'sqrt', 'log' or any float number between 0.1 and 1.0 <br>
*
* Default value for the ratio is 'log' which results in log(numFeatures, 2) + 1
* features to be taken into consideration while selecting subspace of features
*
* @param string|float $ratio
*/
public function setFeatureSubsetRatio($ratio): self
{
if (!is_string($ratio) && !is_float($ratio)) {
throw new InvalidArgumentException('Feature subset ratio must be a string or a float');
}
if (is_float($ratio) && ($ratio < 0.1 || $ratio > 1.0)) {
throw new InvalidArgumentException('When a float is given, feature subset ratio should be between 0.1 and 1.0');
}
if (is_string($ratio) && $ratio !== 'sqrt' && $ratio !== 'log') {
throw new InvalidArgumentException("When a string is given, feature subset ratio can only be 'sqrt' or 'log'");
}
$this->featureSubsetRatio = $ratio;
return $this;
}
/**
* RandomForest algorithm is usable *only* with DecisionTree
*
* @return $this
*/
public function setClassifer(string $classifier, array $classifierOptions = [])
{
if ($classifier !== DecisionTree::class) {
throw new InvalidArgumentException('RandomForest can only use DecisionTree as base classifier');
}
return parent::setClassifer($classifier, $classifierOptions);
}
/**
* This will return an array including an importance value for
* each column in the given dataset. Importance values for a column
* is the average importance of that column in all trees in the forest
*/
public function getFeatureImportances(): array
{
// Traverse each tree and sum importance of the columns
$sum = [];
foreach ($this->classifiers as $tree) {
/** @var DecisionTree $tree */
$importances = $tree->getFeatureImportances();
foreach ($importances as $column => $importance) {
if (array_key_exists($column, $sum)) {
$sum[$column] += $importance;
} else {
$sum[$column] = $importance;
}
}
}
// Normalize & sort the importance values
$total = array_sum($sum);
array_walk($sum, function (&$importance) use ($total): void {
$importance /= $total;
});
arsort($sum);
return $sum;
}
/**
* A string array to represent the columns is given. They are useful
* when trying to print some information about the trees such as feature importances
*
* @return $this
*/
public function setColumnNames(array $names)
{
$this->columnNames = $names;
return $this;
}
/**
* @param DecisionTree $classifier
*
* @return DecisionTree
*/
protected function initSingleClassifier(Classifier $classifier): Classifier
{
if (is_float($this->featureSubsetRatio)) {
$featureCount = (int) ($this->featureSubsetRatio * $this->featureCount);
} elseif ($this->featureSubsetRatio === 'sqrt') {
$featureCount = (int) ($this->featureCount ** .5) + 1;
} else {
$featureCount = (int) log($this->featureCount, 2) + 1;
}
if ($featureCount >= $this->featureCount) {
$featureCount = $this->featureCount;
}
if ($this->columnNames === null) {
$this->columnNames = range(0, $this->featureCount - 1);
}
return $classifier
->setColumnNames($this->columnNames)
->setNumFeatures($featureCount);
}
}

View File

@ -0,0 +1,75 @@
<?php
declare(strict_types=1);
namespace Phpml\Classification;
use Phpml\Helper\Predictable;
use Phpml\Helper\Trainable;
use Phpml\Math\Distance;
use Phpml\Math\Distance\Euclidean;
class KNearestNeighbors implements Classifier
{
use Trainable;
use Predictable;
/**
* @var int
*/
private $k;
/**
* @var Distance
*/
private $distanceMetric;
/**
* @param Distance|null $distanceMetric (if null then Euclidean distance as default)
*/
public function __construct(int $k = 3, ?Distance $distanceMetric = null)
{
if ($distanceMetric === null) {
$distanceMetric = new Euclidean();
}
$this->k = $k;
$this->samples = [];
$this->targets = [];
$this->distanceMetric = $distanceMetric;
}
/**
* @return mixed
*/
protected function predictSample(array $sample)
{
$distances = $this->kNeighborsDistances($sample);
$predictions = (array) array_combine(array_values($this->targets), array_fill(0, count($this->targets), 0));
foreach (array_keys($distances) as $index) {
++$predictions[$this->targets[$index]];
}
arsort($predictions);
reset($predictions);
return key($predictions);
}
/**
* @throws \Phpml\Exception\InvalidArgumentException
*/
private function kNeighborsDistances(array $sample): array
{
$distances = [];
foreach ($this->samples as $index => $neighbor) {
$distances[$index] = $this->distanceMetric->distance($sample, $neighbor);
}
asort($distances);
return array_slice($distances, 0, $this->k, true);
}
}

View File

@ -0,0 +1,75 @@
<?php
declare(strict_types=1);
namespace Phpml\Classification\Linear;
use Phpml\Exception\InvalidArgumentException;
class Adaline extends Perceptron
{
/**
* Batch training is the default Adaline training algorithm
*/
public const BATCH_TRAINING = 1;
/**
* Online training: Stochastic gradient descent learning
*/
public const ONLINE_TRAINING = 2;
/**
* Training type may be either 'Batch' or 'Online' learning
*
* @var string|int
*/
protected $trainingType;
/**
* Initalize an Adaline (ADAptive LInear NEuron) classifier with given learning rate and maximum
* number of iterations used while training the classifier <br>
*
* Learning rate should be a float value between 0.0(exclusive) and 1.0 (inclusive) <br>
* Maximum number of iterations can be an integer value greater than 0 <br>
* If normalizeInputs is set to true, then every input given to the algorithm will be standardized
* by use of standard deviation and mean calculation
*
* @throws InvalidArgumentException
*/
public function __construct(
float $learningRate = 0.001,
int $maxIterations = 1000,
bool $normalizeInputs = true,
int $trainingType = self::BATCH_TRAINING
) {
if (!in_array($trainingType, [self::BATCH_TRAINING, self::ONLINE_TRAINING], true)) {
throw new InvalidArgumentException('Adaline can only be trained with batch and online/stochastic gradient descent algorithm');
}
$this->trainingType = $trainingType;
parent::__construct($learningRate, $maxIterations, $normalizeInputs);
}
/**
* Adapts the weights with respect to given samples and targets
* by use of gradient descent learning rule
*/
protected function runTraining(array $samples, array $targets): void
{
// The cost function is the sum of squares
$callback = function ($weights, $sample, $target) {
$this->weights = $weights;
$output = $this->output($sample);
$gradient = $output - $target;
$error = $gradient ** 2;
return [$error, $gradient];
};
$isBatch = $this->trainingType == self::BATCH_TRAINING;
parent::runGradientDescent($samples, $targets, $callback, $isBatch);
}
}

View File

@ -0,0 +1,319 @@
<?php
declare(strict_types=1);
namespace Phpml\Classification\Linear;
use Phpml\Classification\DecisionTree;
use Phpml\Classification\WeightedClassifier;
use Phpml\Exception\InvalidArgumentException;
use Phpml\Helper\OneVsRest;
use Phpml\Helper\Predictable;
use Phpml\Math\Comparison;
class DecisionStump extends WeightedClassifier
{
use Predictable;
use OneVsRest;
public const AUTO_SELECT = -1;
/**
* @var int
*/
protected $givenColumnIndex;
/**
* @var array
*/
protected $binaryLabels = [];
/**
* Lowest error rate obtained while training/optimizing the model
*
* @var float
*/
protected $trainingErrorRate;
/**
* @var int
*/
protected $column;
/**
* @var mixed
*/
protected $value;
/**
* @var string
*/
protected $operator;
/**
* @var array
*/
protected $columnTypes = [];
/**
* @var int
*/
protected $featureCount;
/**
* @var float
*/
protected $numSplitCount = 100.0;
/**
* Distribution of samples in the leaves
*
* @var array
*/
protected $prob = [];
/**
* A DecisionStump classifier is a one-level deep DecisionTree. It is generally
* used with ensemble algorithms as in the weak classifier role. <br>
*
* If columnIndex is given, then the stump tries to produce a decision node
* on this column, otherwise in cases given the value of -1, the stump itself
* decides which column to take for the decision (Default DecisionTree behaviour)
*/
public function __construct(int $columnIndex = self::AUTO_SELECT)
{
$this->givenColumnIndex = $columnIndex;
}
public function __toString(): string
{
return "IF ${this}->column ${this}->operator ${this}->value ".
'THEN '.$this->binaryLabels[0].' '.
'ELSE '.$this->binaryLabels[1];
}
/**
* While finding best split point for a numerical valued column,
* DecisionStump looks for equally distanced values between minimum and maximum
* values in the column. Given <i>$count</i> value determines how many split
* points to be probed. The more split counts, the better performance but
* worse processing time (Default value is 10.0)
*/
public function setNumericalSplitCount(float $count): void
{
$this->numSplitCount = $count;
}
/**
* @throws InvalidArgumentException
*/
protected function trainBinary(array $samples, array $targets, array $labels): void
{
$this->binaryLabels = $labels;
$this->featureCount = count($samples[0]);
// If a column index is given, it should be among the existing columns
if ($this->givenColumnIndex > count($samples[0]) - 1) {
$this->givenColumnIndex = self::AUTO_SELECT;
}
// Check the size of the weights given.
// If none given, then assign 1 as a weight to each sample
if (count($this->weights) === 0) {
$this->weights = array_fill(0, count($samples), 1);
} else {
$numWeights = count($this->weights);
if ($numWeights !== count($samples)) {
throw new InvalidArgumentException('Number of sample weights does not match with number of samples');
}
}
// Determine type of each column as either "continuous" or "nominal"
$this->columnTypes = DecisionTree::getColumnTypes($samples);
// Try to find the best split in the columns of the dataset
// by calculating error rate for each split point in each column
$columns = range(0, count($samples[0]) - 1);
if ($this->givenColumnIndex !== self::AUTO_SELECT) {
$columns = [$this->givenColumnIndex];
}
$bestSplit = [
'value' => 0,
'operator' => '',
'prob' => [],
'column' => 0,
'trainingErrorRate' => 1.0,
];
foreach ($columns as $col) {
if ($this->columnTypes[$col] == DecisionTree::CONTINUOUS) {
$split = $this->getBestNumericalSplit($samples, $targets, $col);
} else {
$split = $this->getBestNominalSplit($samples, $targets, $col);
}
if ($split['trainingErrorRate'] < $bestSplit['trainingErrorRate']) {
$bestSplit = $split;
}
}
// Assign determined best values to the stump
foreach ($bestSplit as $name => $value) {
$this->{$name} = $value;
}
}
/**
* Determines best split point for the given column
*/
protected function getBestNumericalSplit(array $samples, array $targets, int $col): array
{
$values = array_column($samples, $col);
// Trying all possible points may be accomplished in two general ways:
// 1- Try all values in the $samples array ($values)
// 2- Artificially split the range of values into several parts and try them
// We choose the second one because it is faster in larger datasets
$minValue = min($values);
$maxValue = max($values);
$stepSize = ($maxValue - $minValue) / $this->numSplitCount;
$split = [];
foreach (['<=', '>'] as $operator) {
// Before trying all possible split points, let's first try
// the average value for the cut point
$threshold = array_sum($values) / (float) count($values);
[$errorRate, $prob] = $this->calculateErrorRate($targets, $threshold, $operator, $values);
if (!isset($split['trainingErrorRate']) || $errorRate < $split['trainingErrorRate']) {
$split = [
'value' => $threshold,
'operator' => $operator,
'prob' => $prob,
'column' => $col,
'trainingErrorRate' => $errorRate,
];
}
// Try other possible points one by one
for ($step = $minValue; $step <= $maxValue; $step += $stepSize) {
$threshold = (float) $step;
[$errorRate, $prob] = $this->calculateErrorRate($targets, $threshold, $operator, $values);
if ($errorRate < $split['trainingErrorRate']) {
$split = [
'value' => $threshold,
'operator' => $operator,
'prob' => $prob,
'column' => $col,
'trainingErrorRate' => $errorRate,
];
}
}// for
}
return $split;
}
protected function getBestNominalSplit(array $samples, array $targets, int $col): array
{
$values = array_column($samples, $col);
$valueCounts = array_count_values($values);
$distinctVals = array_keys($valueCounts);
$split = [];
foreach (['=', '!='] as $operator) {
foreach ($distinctVals as $val) {
[$errorRate, $prob] = $this->calculateErrorRate($targets, $val, $operator, $values);
if (!isset($split['trainingErrorRate']) || $split['trainingErrorRate'] < $errorRate) {
$split = [
'value' => $val,
'operator' => $operator,
'prob' => $prob,
'column' => $col,
'trainingErrorRate' => $errorRate,
];
}
}
}
return $split;
}
/**
* Calculates the ratio of wrong predictions based on the new threshold
* value given as the parameter
*/
protected function calculateErrorRate(array $targets, float $threshold, string $operator, array $values): array
{
$wrong = 0.0;
$prob = [];
$leftLabel = $this->binaryLabels[0];
$rightLabel = $this->binaryLabels[1];
foreach ($values as $index => $value) {
if (Comparison::compare($value, $threshold, $operator)) {
$predicted = $leftLabel;
} else {
$predicted = $rightLabel;
}
$target = $targets[$index];
if ((string) $predicted != (string) $targets[$index]) {
$wrong += $this->weights[$index];
}
if (!isset($prob[$predicted][$target])) {
$prob[$predicted][$target] = 0;
}
++$prob[$predicted][$target];
}
// Calculate probabilities: Proportion of labels in each leaf
$dist = array_combine($this->binaryLabels, array_fill(0, 2, 0.0));
foreach ($prob as $leaf => $counts) {
$leafTotal = (float) array_sum($prob[$leaf]);
foreach ($counts as $label => $count) {
if ((string) $leaf == (string) $label) {
$dist[$leaf] = $count / $leafTotal;
}
}
}
return [$wrong / (float) array_sum($this->weights), $dist];
}
/**
* Returns the probability of the sample of belonging to the given label
*
* Probability of a sample is calculated as the proportion of the label
* within the labels of the training samples in the decision node
*
* @param mixed $label
*/
protected function predictProbability(array $sample, $label): float
{
$predicted = $this->predictSampleBinary($sample);
if ((string) $predicted == (string) $label) {
return $this->prob[$label];
}
return 0.0;
}
/**
* @return mixed
*/
protected function predictSampleBinary(array $sample)
{
if (Comparison::compare($sample[$this->column], $this->value, $this->operator)) {
return $this->binaryLabels[0];
}
return $this->binaryLabels[1];
}
protected function resetBinary(): void
{
}
}

View File

@ -0,0 +1,283 @@
<?php
declare(strict_types=1);
namespace Phpml\Classification\Linear;
use Closure;
use Exception;
use Phpml\Exception\InvalidArgumentException;
use Phpml\Helper\Optimizer\ConjugateGradient;
class LogisticRegression extends Adaline
{
/**
* Batch training: Gradient descent algorithm (default)
*/
public const BATCH_TRAINING = 1;
/**
* Online training: Stochastic gradient descent learning
*/
public const ONLINE_TRAINING = 2;
/**
* Conjugate Batch: Conjugate Gradient algorithm
*/
public const CONJUGATE_GRAD_TRAINING = 3;
/**
* Cost function to optimize: 'log' and 'sse' are supported <br>
* - 'log' : log likelihood <br>
* - 'sse' : sum of squared errors <br>
*
* @var string
*/
protected $costFunction = 'log';
/**
* Regularization term: only 'L2' is supported
*
* @var string
*/
protected $penalty = 'L2';
/**
* Lambda (λ) parameter of regularization term. If λ is set to 0, then
* regularization term is cancelled.
*
* @var float
*/
protected $lambda = 0.5;
/**
* Initalize a Logistic Regression classifier with maximum number of iterations
* and learning rule to be applied <br>
*
* Maximum number of iterations can be an integer value greater than 0 <br>
* If normalizeInputs is set to true, then every input given to the algorithm will be standardized
* by use of standard deviation and mean calculation <br>
*
* Cost function can be 'log' for log-likelihood and 'sse' for sum of squared errors <br>
*
* Penalty (Regularization term) can be 'L2' or empty string to cancel penalty term
*
* @throws InvalidArgumentException
*/
public function __construct(
int $maxIterations = 500,
bool $normalizeInputs = true,
int $trainingType = self::CONJUGATE_GRAD_TRAINING,
string $cost = 'log',
string $penalty = 'L2'
) {
$trainingTypes = range(self::BATCH_TRAINING, self::CONJUGATE_GRAD_TRAINING);
if (!in_array($trainingType, $trainingTypes, true)) {
throw new InvalidArgumentException(
'Logistic regression can only be trained with '.
'batch (gradient descent), online (stochastic gradient descent) '.
'or conjugate batch (conjugate gradients) algorithms'
);
}
if (!in_array($cost, ['log', 'sse'], true)) {
throw new InvalidArgumentException(
"Logistic regression cost function can be one of the following: \n".
"'log' for log-likelihood and 'sse' for sum of squared errors"
);
}
if ($penalty !== '' && strtoupper($penalty) !== 'L2') {
throw new InvalidArgumentException('Logistic regression supports only \'L2\' regularization');
}
$this->learningRate = 0.001;
parent::__construct($this->learningRate, $maxIterations, $normalizeInputs);
$this->trainingType = $trainingType;
$this->costFunction = $cost;
$this->penalty = $penalty;
}
/**
* Sets the learning rate if gradient descent algorithm is
* selected for training
*/
public function setLearningRate(float $learningRate): void
{
$this->learningRate = $learningRate;
}
/**
* Lambda (λ) parameter of regularization term. If 0 is given,
* then the regularization term is cancelled
*/
public function setLambda(float $lambda): void
{
$this->lambda = $lambda;
}
/**
* Adapts the weights with respect to given samples and targets
* by use of selected solver
*
* @throws \Exception
*/
protected function runTraining(array $samples, array $targets): void
{
$callback = $this->getCostFunction();
switch ($this->trainingType) {
case self::BATCH_TRAINING:
$this->runGradientDescent($samples, $targets, $callback, true);
return;
case self::ONLINE_TRAINING:
$this->runGradientDescent($samples, $targets, $callback, false);
return;
case self::CONJUGATE_GRAD_TRAINING:
$this->runConjugateGradient($samples, $targets, $callback);
return;
default:
// Not reached
throw new Exception(sprintf('Logistic regression has invalid training type: %d.', $this->trainingType));
}
}
/**
* Executes Conjugate Gradient method to optimize the weights of the LogReg model
*/
protected function runConjugateGradient(array $samples, array $targets, Closure $gradientFunc): void
{
if ($this->optimizer === null) {
$this->optimizer = (new ConjugateGradient($this->featureCount))
->setMaxIterations($this->maxIterations);
}
$this->weights = $this->optimizer->runOptimization($samples, $targets, $gradientFunc);
$this->costValues = $this->optimizer->getCostValues();
}
/**
* Returns the appropriate callback function for the selected cost function
*
* @throws \Exception
*/
protected function getCostFunction(): Closure
{
$penalty = 0;
if ($this->penalty === 'L2') {
$penalty = $this->lambda;
}
switch ($this->costFunction) {
case 'log':
/*
* Negative of Log-likelihood cost function to be minimized:
* J(x) = ∑( - y . log(h(x)) - (1 - y) . log(1 - h(x)))
*
* If regularization term is given, then it will be added to the cost:
* for L2 : J(x) = J(x) + λ/m . w
*
* The gradient of the cost function to be used with gradient descent:
* ∇J(x) = -(y - h(x)) = (h(x) - y)
*/
return function ($weights, $sample, $y) use ($penalty) {
$this->weights = $weights;
$hX = $this->output($sample);
// In cases where $hX = 1 or $hX = 0, the log-likelihood
// value will give a NaN, so we fix these values
if ($hX == 1) {
$hX = 1 - 1e-10;
}
if ($hX == 0) {
$hX = 1e-10;
}
$y = $y < 0 ? 0 : 1;
$error = -$y * log($hX) - (1 - $y) * log(1 - $hX);
$gradient = $hX - $y;
return [$error, $gradient, $penalty];
};
case 'sse':
/*
* Sum of squared errors or least squared errors cost function:
* J(x) = ∑ (y - h(x))^2
*
* If regularization term is given, then it will be added to the cost:
* for L2 : J(x) = J(x) + λ/m . w
*
* The gradient of the cost function:
* ∇J(x) = -(h(x) - y) . h(x) . (1 - h(x))
*/
return function ($weights, $sample, $y) use ($penalty) {
$this->weights = $weights;
$hX = $this->output($sample);
$y = $y < 0 ? 0 : 1;
$error = ($y - $hX) ** 2;
$gradient = -($y - $hX) * $hX * (1 - $hX);
return [$error, $gradient, $penalty];
};
default:
// Not reached
throw new Exception(sprintf('Logistic regression has invalid cost function: %s.', $this->costFunction));
}
}
/**
* Returns the output of the network, a float value between 0.0 and 1.0
*/
protected function output(array $sample): float
{
$sum = parent::output($sample);
return 1.0 / (1.0 + exp(-$sum));
}
/**
* Returns the class value (either -1 or 1) for the given input
*/
protected function outputClass(array $sample): int
{
$output = $this->output($sample);
if ($output > 0.5) {
return 1;
}
return -1;
}
/**
* Returns the probability of the sample of belonging to the given label.
*
* The probability is simply taken as the distance of the sample
* to the decision plane.
*
* @param mixed $label
*/
protected function predictProbability(array $sample, $label): float
{
$sample = $this->checkNormalizedSample($sample);
$probability = $this->output($sample);
if (array_search($label, $this->labels, true) > 0) {
return $probability;
}
return 1 - $probability;
}
}

View File

@ -0,0 +1,264 @@
<?php
declare(strict_types=1);
namespace Phpml\Classification\Linear;
use Closure;
use Phpml\Classification\Classifier;
use Phpml\Exception\InvalidArgumentException;
use Phpml\Helper\OneVsRest;
use Phpml\Helper\Optimizer\GD;
use Phpml\Helper\Optimizer\Optimizer;
use Phpml\Helper\Optimizer\StochasticGD;
use Phpml\Helper\Predictable;
use Phpml\IncrementalEstimator;
use Phpml\Preprocessing\Normalizer;
class Perceptron implements Classifier, IncrementalEstimator
{
use Predictable;
use OneVsRest;
/**
* @var Optimizer|GD|StochasticGD|null
*/
protected $optimizer;
/**
* @var array
*/
protected $labels = [];
/**
* @var int
*/
protected $featureCount = 0;
/**
* @var array
*/
protected $weights = [];
/**
* @var float
*/
protected $learningRate;
/**
* @var int
*/
protected $maxIterations;
/**
* @var Normalizer
*/
protected $normalizer;
/**
* @var bool
*/
protected $enableEarlyStop = true;
/**
* Initalize a perceptron classifier with given learning rate and maximum
* number of iterations used while training the perceptron
*
* @param float $learningRate Value between 0.0(exclusive) and 1.0(inclusive)
* @param int $maxIterations Must be at least 1
*
* @throws InvalidArgumentException
*/
public function __construct(float $learningRate = 0.001, int $maxIterations = 1000, bool $normalizeInputs = true)
{
if ($learningRate <= 0.0 || $learningRate > 1.0) {
throw new InvalidArgumentException('Learning rate should be a float value between 0.0(exclusive) and 1.0(inclusive)');
}
if ($maxIterations <= 0) {
throw new InvalidArgumentException('Maximum number of iterations must be an integer greater than 0');
}
if ($normalizeInputs) {
$this->normalizer = new Normalizer(Normalizer::NORM_STD);
}
$this->learningRate = $learningRate;
$this->maxIterations = $maxIterations;
}
public function partialTrain(array $samples, array $targets, array $labels = []): void
{
$this->trainByLabel($samples, $targets, $labels);
}
public function trainBinary(array $samples, array $targets, array $labels): void
{
if ($this->normalizer !== null) {
$this->normalizer->transform($samples);
}
// Set all target values to either -1 or 1
$this->labels = [
1 => $labels[0],
-1 => $labels[1],
];
foreach ($targets as $key => $target) {
$targets[$key] = (string) $target == (string) $this->labels[1] ? 1 : -1;
}
// Set samples and feature count vars
$this->featureCount = count($samples[0]);
$this->runTraining($samples, $targets);
}
/**
* Normally enabling early stopping for the optimization procedure may
* help saving processing time while in some cases it may result in
* premature convergence.<br>
*
* If "false" is given, the optimization procedure will always be executed
* for $maxIterations times
*
* @return $this
*/
public function setEarlyStop(bool $enable = true)
{
$this->enableEarlyStop = $enable;
return $this;
}
/**
* Returns the cost values obtained during the training.
*/
public function getCostValues(): array
{
return $this->costValues;
}
protected function resetBinary(): void
{
$this->labels = [];
$this->optimizer = null;
$this->featureCount = 0;
$this->weights = [];
$this->costValues = [];
}
/**
* Trains the perceptron model with Stochastic Gradient Descent optimization
* to get the correct set of weights
*/
protected function runTraining(array $samples, array $targets): void
{
// The cost function is the sum of squares
$callback = function ($weights, $sample, $target) {
$this->weights = $weights;
$prediction = $this->outputClass($sample);
$gradient = $prediction - $target;
$error = $gradient ** 2;
return [$error, $gradient];
};
$this->runGradientDescent($samples, $targets, $callback);
}
/**
* Executes a Gradient Descent algorithm for
* the given cost function
*/
protected function runGradientDescent(array $samples, array $targets, Closure $gradientFunc, bool $isBatch = false): void
{
$class = $isBatch ? GD::class : StochasticGD::class;
if ($this->optimizer === null) {
$this->optimizer = (new $class($this->featureCount))
->setLearningRate($this->learningRate)
->setMaxIterations($this->maxIterations)
->setChangeThreshold(1e-6)
->setEarlyStop($this->enableEarlyStop);
}
$this->weights = $this->optimizer->runOptimization($samples, $targets, $gradientFunc);
$this->costValues = $this->optimizer->getCostValues();
}
/**
* Checks if the sample should be normalized and if so, returns the
* normalized sample
*/
protected function checkNormalizedSample(array $sample): array
{
if ($this->normalizer !== null) {
$samples = [$sample];
$this->normalizer->transform($samples);
$sample = $samples[0];
}
return $sample;
}
/**
* Calculates net output of the network as a float value for the given input
*
* @return int|float
*/
protected function output(array $sample)
{
$sum = 0;
foreach ($this->weights as $index => $w) {
if ($index == 0) {
$sum += $w;
} else {
$sum += $w * $sample[$index - 1];
}
}
return $sum;
}
/**
* Returns the class value (either -1 or 1) for the given input
*/
protected function outputClass(array $sample): int
{
return $this->output($sample) > 0 ? 1 : -1;
}
/**
* Returns the probability of the sample of belonging to the given label.
*
* The probability is simply taken as the distance of the sample
* to the decision plane.
*
* @param mixed $label
*/
protected function predictProbability(array $sample, $label): float
{
$predicted = $this->predictSampleBinary($sample);
if ((string) $predicted == (string) $label) {
$sample = $this->checkNormalizedSample($sample);
return (float) abs($this->output($sample));
}
return 0.0;
}
/**
* @return mixed
*/
protected function predictSampleBinary(array $sample)
{
$sample = $this->checkNormalizedSample($sample);
$predictedClass = $this->outputClass($sample);
return $this->labels[$predictedClass];
}
}

View File

@ -0,0 +1,58 @@
<?php
declare(strict_types=1);
namespace Phpml\Classification;
use Phpml\Exception\InvalidArgumentException;
use Phpml\NeuralNetwork\Network\MultilayerPerceptron;
class MLPClassifier extends MultilayerPerceptron implements Classifier
{
/**
* @param mixed $target
*
* @throws InvalidArgumentException
*/
public function getTargetClass($target): int
{
if (!in_array($target, $this->classes, true)) {
throw new InvalidArgumentException(
sprintf('Target with value "%s" is not part of the accepted classes', $target)
);
}
return array_search($target, $this->classes, true);
}
/**
* @return mixed
*/
protected function predictSample(array $sample)
{
$output = $this->setInput($sample)->getOutput();
$predictedClass = null;
$max = 0;
foreach ($output as $class => $value) {
if ($value > $max) {
$predictedClass = $class;
$max = $value;
}
}
return $predictedClass;
}
/**
* @param mixed $target
*/
protected function trainSample(array $sample, $target): void
{
// Feed-forward.
$this->setInput($sample);
// Back-propagate.
$this->backpropagation->backpropagate($this->getLayers(), $this->getTargetClass($target));
}
}

View File

@ -0,0 +1,184 @@
<?php
declare(strict_types=1);
namespace Phpml\Classification;
use Phpml\Exception\InvalidArgumentException;
use Phpml\Helper\Predictable;
use Phpml\Helper\Trainable;
use Phpml\Math\Statistic\Mean;
use Phpml\Math\Statistic\StandardDeviation;
class NaiveBayes implements Classifier
{
use Trainable;
use Predictable;
public const CONTINUOS = 1;
public const NOMINAL = 2;
public const EPSILON = 1e-10;
/**
* @var array
*/
private $std = [];
/**
* @var array
*/
private $mean = [];
/**
* @var array
*/
private $discreteProb = [];
/**
* @var array
*/
private $dataType = [];
/**
* @var array
*/
private $p = [];
/**
* @var int
*/
private $sampleCount = 0;
/**
* @var int
*/
private $featureCount = 0;
/**
* @var array
*/
private $labels = [];
public function train(array $samples, array $targets): void
{
$this->samples = array_merge($this->samples, $samples);
$this->targets = array_merge($this->targets, $targets);
$this->sampleCount = count($this->samples);
$this->featureCount = count($this->samples[0]);
$this->labels = array_map('strval', array_flip(array_flip($this->targets)));
foreach ($this->labels as $label) {
$samples = $this->getSamplesByLabel($label);
$this->p[$label] = count($samples) / $this->sampleCount;
$this->calculateStatistics($label, $samples);
}
}
/**
* @return mixed
*/
protected function predictSample(array $sample)
{
// Use NaiveBayes assumption for each label using:
// P(label|features) = P(label) * P(feature0|label) * P(feature1|label) .... P(featureN|label)
// Then compare probability for each class to determine which label is most likely
$predictions = [];
foreach ($this->labels as $label) {
$p = $this->p[$label];
for ($i = 0; $i < $this->featureCount; ++$i) {
$Plf = $this->sampleProbability($sample, $i, $label);
$p += $Plf;
}
$predictions[$label] = $p;
}
arsort($predictions, SORT_NUMERIC);
reset($predictions);
return key($predictions);
}
/**
* Calculates vital statistics for each label & feature. Stores these
* values in private array in order to avoid repeated calculation
*/
private function calculateStatistics(string $label, array $samples): void
{
$this->std[$label] = array_fill(0, $this->featureCount, 0);
$this->mean[$label] = array_fill(0, $this->featureCount, 0);
$this->dataType[$label] = array_fill(0, $this->featureCount, self::CONTINUOS);
$this->discreteProb[$label] = array_fill(0, $this->featureCount, self::CONTINUOS);
for ($i = 0; $i < $this->featureCount; ++$i) {
// Get the values of nth column in the samples array
// Mean::arithmetic is called twice, can be optimized
$values = array_column($samples, $i);
$numValues = count($values);
// if the values contain non-numeric data,
// then it should be treated as nominal/categorical/discrete column
if ($values !== array_filter($values, 'is_numeric')) {
$this->dataType[$label][$i] = self::NOMINAL;
$this->discreteProb[$label][$i] = array_count_values($values);
$db = &$this->discreteProb[$label][$i];
$db = array_map(function ($el) use ($numValues) {
return $el / $numValues;
}, $db);
} else {
$this->mean[$label][$i] = Mean::arithmetic($values);
// Add epsilon in order to avoid zero stdev
$this->std[$label][$i] = 1e-10 + StandardDeviation::population($values, false);
}
}
}
/**
* Calculates the probability P(label|sample_n)
*/
private function sampleProbability(array $sample, int $feature, string $label): float
{
if (!isset($sample[$feature])) {
throw new InvalidArgumentException('Missing feature. All samples must have equal number of features');
}
$value = $sample[$feature];
if ($this->dataType[$label][$feature] == self::NOMINAL) {
if (!isset($this->discreteProb[$label][$feature][$value]) ||
$this->discreteProb[$label][$feature][$value] == 0) {
return self::EPSILON;
}
return $this->discreteProb[$label][$feature][$value];
}
$std = $this->std[$label][$feature];
$mean = $this->mean[$label][$feature];
// Calculate the probability density by use of normal/Gaussian distribution
// Ref: https://en.wikipedia.org/wiki/Normal_distribution
//
// In order to avoid numerical errors because of small or zero values,
// some libraries adopt taking log of calculations such as
// scikit-learn did.
// (See : https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/naive_bayes.py)
$pdf = -0.5 * log(2.0 * M_PI * $std * $std);
$pdf -= 0.5 * (($value - $mean) ** 2) / ($std * $std);
return $pdf;
}
/**
* Return samples belonging to specific label
*/
private function getSamplesByLabel(string $label): array
{
$samples = [];
for ($i = 0; $i < $this->sampleCount; ++$i) {
if ($this->targets[$i] == $label) {
$samples[] = $this->samples[$i];
}
}
return $samples;
}
}

View File

@ -0,0 +1,26 @@
<?php
declare(strict_types=1);
namespace Phpml\Classification;
use Phpml\SupportVectorMachine\Kernel;
use Phpml\SupportVectorMachine\SupportVectorMachine;
use Phpml\SupportVectorMachine\Type;
class SVC extends SupportVectorMachine implements Classifier
{
public function __construct(
int $kernel = Kernel::RBF,
float $cost = 1.0,
int $degree = 3,
?float $gamma = null,
float $coef0 = 0.0,
float $tolerance = 0.001,
int $cacheSize = 100,
bool $shrinking = true,
bool $probabilityEstimates = false
) {
parent::__construct(Type::C_SVC, $kernel, $cost, 0.5, $degree, $gamma, $coef0, 0.1, $tolerance, $cacheSize, $shrinking, $probabilityEstimates);
}
}

View File

@ -0,0 +1,21 @@
<?php
declare(strict_types=1);
namespace Phpml\Classification;
abstract class WeightedClassifier implements Classifier
{
/**
* @var array
*/
protected $weights = [];
/**
* Sets the array including a weight for each sample
*/
public function setSampleWeights(array $weights): void
{
$this->weights = $weights;
}
}

View File

@ -0,0 +1,10 @@
<?php
declare(strict_types=1);
namespace Phpml\Clustering;
interface Clusterer
{
public function cluster(array $samples): array;
}

View File

@ -0,0 +1,120 @@
<?php
declare(strict_types=1);
namespace Phpml\Clustering;
use Phpml\Math\Distance;
use Phpml\Math\Distance\Euclidean;
class DBSCAN implements Clusterer
{
private const NOISE = -1;
/**
* @var float
*/
private $epsilon;
/**
* @var int
*/
private $minSamples;
/**
* @var Distance
*/
private $distanceMetric;
public function __construct(float $epsilon = 0.5, int $minSamples = 3, ?Distance $distanceMetric = null)
{
if ($distanceMetric === null) {
$distanceMetric = new Euclidean();
}
$this->epsilon = $epsilon;
$this->minSamples = $minSamples;
$this->distanceMetric = $distanceMetric;
}
public function cluster(array $samples): array
{
$labels = [];
$n = 0;
foreach ($samples as $index => $sample) {
if (isset($labels[$index])) {
continue;
}
$neighborIndices = $this->getIndicesInRegion($sample, $samples);
if (count($neighborIndices) < $this->minSamples) {
$labels[$index] = self::NOISE;
continue;
}
$labels[$index] = $n;
$this->expandCluster($samples, $neighborIndices, $labels, $n);
++$n;
}
return $this->groupByCluster($samples, $labels, $n);
}
private function expandCluster(array $samples, array $seeds, array &$labels, int $n): void
{
while (($index = array_pop($seeds)) !== null) {
if (isset($labels[$index])) {
if ($labels[$index] === self::NOISE) {
$labels[$index] = $n;
}
continue;
}
$labels[$index] = $n;
$sample = $samples[$index];
$neighborIndices = $this->getIndicesInRegion($sample, $samples);
if (count($neighborIndices) >= $this->minSamples) {
$seeds = array_unique(array_merge($seeds, $neighborIndices));
}
}
}
private function getIndicesInRegion(array $center, array $samples): array
{
$indices = [];
foreach ($samples as $index => $sample) {
if ($this->distanceMetric->distance($center, $sample) < $this->epsilon) {
$indices[] = $index;
}
}
return $indices;
}
private function groupByCluster(array $samples, array $labels, int $n): array
{
$clusters = array_fill(0, $n, []);
foreach ($samples as $index => $sample) {
if ($labels[$index] !== self::NOISE) {
$clusters[$labels[$index]][$index] = $sample;
}
}
// Reindex (i.e. to 0, 1, 2, ...) integer indices for backword compatibility
foreach ($clusters as $index => $cluster) {
$clusters[$index] = array_merge($cluster, []);
}
return $clusters;
}
}

View File

@ -0,0 +1,239 @@
<?php
declare(strict_types=1);
namespace Phpml\Clustering;
use Phpml\Clustering\KMeans\Cluster;
use Phpml\Clustering\KMeans\Point;
use Phpml\Clustering\KMeans\Space;
use Phpml\Exception\InvalidArgumentException;
use Phpml\Math\Distance\Euclidean;
class FuzzyCMeans implements Clusterer
{
/**
* @var int
*/
private $clustersNumber;
/**
* @var Cluster[]
*/
private $clusters = [];
/**
* @var Space
*/
private $space;
/**
* @var float[][]
*/
private $membership = [];
/**
* @var float
*/
private $fuzziness;
/**
* @var float
*/
private $epsilon;
/**
* @var int
*/
private $maxIterations;
/**
* @var int
*/
private $sampleCount;
/**
* @var array
*/
private $samples = [];
/**
* @throws InvalidArgumentException
*/
public function __construct(int $clustersNumber, float $fuzziness = 2.0, float $epsilon = 1e-2, int $maxIterations = 100)
{
if ($clustersNumber <= 0) {
throw new InvalidArgumentException('Invalid clusters number');
}
$this->clustersNumber = $clustersNumber;
$this->fuzziness = $fuzziness;
$this->epsilon = $epsilon;
$this->maxIterations = $maxIterations;
}
public function getMembershipMatrix(): array
{
return $this->membership;
}
/**
* @param Point[]|int[][] $samples
*/
public function cluster(array $samples): array
{
// Initialize variables, clusters and membership matrix
$this->sampleCount = count($samples);
$this->samples = &$samples;
$this->space = new Space(count($samples[0]));
$this->initClusters();
// Our goal is minimizing the objective value while
// executing the clustering steps at a maximum number of iterations
$lastObjective = 0.0;
$iterations = 0;
do {
// Update the membership matrix and cluster centers, respectively
$this->updateMembershipMatrix();
$this->updateClusters();
// Calculate the new value of the objective function
$objectiveVal = $this->getObjective();
$difference = abs($lastObjective - $objectiveVal);
$lastObjective = $objectiveVal;
} while ($difference > $this->epsilon && $iterations++ <= $this->maxIterations);
// Attach (hard cluster) each data point to the nearest cluster
for ($k = 0; $k < $this->sampleCount; ++$k) {
$column = array_column($this->membership, $k);
arsort($column);
reset($column);
$cluster = $this->clusters[key($column)];
$cluster->attach(new Point($this->samples[$k]));
}
// Return grouped samples
$grouped = [];
foreach ($this->clusters as $cluster) {
$grouped[] = $cluster->getPoints();
}
return $grouped;
}
protected function initClusters(): void
{
// Membership array is a matrix of cluster number by sample counts
// We initilize the membership array with random values
$dim = $this->space->getDimension();
$this->generateRandomMembership($dim, $this->sampleCount);
$this->updateClusters();
}
protected function generateRandomMembership(int $rows, int $cols): void
{
$this->membership = [];
for ($i = 0; $i < $rows; ++$i) {
$row = [];
$total = 0.0;
for ($k = 0; $k < $cols; ++$k) {
$val = random_int(1, 5) / 10.0;
$row[] = $val;
$total += $val;
}
$this->membership[] = array_map(function ($val) use ($total) {
return $val / $total;
}, $row);
}
}
protected function updateClusters(): void
{
$dim = $this->space->getDimension();
if (count($this->clusters) === 0) {
for ($i = 0; $i < $this->clustersNumber; ++$i) {
$this->clusters[] = new Cluster($this->space, array_fill(0, $dim, 0.0));
}
}
for ($i = 0; $i < $this->clustersNumber; ++$i) {
$cluster = $this->clusters[$i];
$center = $cluster->getCoordinates();
for ($k = 0; $k < $dim; ++$k) {
$a = $this->getMembershipRowTotal($i, $k, true);
$b = $this->getMembershipRowTotal($i, $k, false);
$center[$k] = $a / $b;
}
$cluster->setCoordinates($center);
}
}
protected function getMembershipRowTotal(int $row, int $col, bool $multiply): float
{
$sum = 0.0;
for ($k = 0; $k < $this->sampleCount; ++$k) {
$val = $this->membership[$row][$k] ** $this->fuzziness;
if ($multiply) {
$val *= $this->samples[$k][$col];
}
$sum += $val;
}
return $sum;
}
protected function updateMembershipMatrix(): void
{
for ($i = 0; $i < $this->clustersNumber; ++$i) {
for ($k = 0; $k < $this->sampleCount; ++$k) {
$distCalc = $this->getDistanceCalc($i, $k);
$this->membership[$i][$k] = 1.0 / $distCalc;
}
}
}
protected function getDistanceCalc(int $row, int $col): float
{
$sum = 0.0;
$distance = new Euclidean();
$dist1 = $distance->distance(
$this->clusters[$row]->getCoordinates(),
$this->samples[$col]
);
for ($j = 0; $j < $this->clustersNumber; ++$j) {
$dist2 = $distance->distance(
$this->clusters[$j]->getCoordinates(),
$this->samples[$col]
);
$val = ($dist1 / $dist2) ** 2.0 / ($this->fuzziness - 1);
$sum += $val;
}
return $sum;
}
/**
* The objective is to minimize the distance between all data points
* and all cluster centers. This method returns the summation of all
* these distances
*/
protected function getObjective(): float
{
$sum = 0.0;
$distance = new Euclidean();
for ($i = 0; $i < $this->clustersNumber; ++$i) {
$clust = $this->clusters[$i]->getCoordinates();
for ($k = 0; $k < $this->sampleCount; ++$k) {
$point = $this->samples[$k];
$sum += $distance->distance($clust, $point);
}
}
return $sum;
}
}

View File

@ -0,0 +1,50 @@
<?php
declare(strict_types=1);
namespace Phpml\Clustering;
use Phpml\Clustering\KMeans\Space;
use Phpml\Exception\InvalidArgumentException;
class KMeans implements Clusterer
{
public const INIT_RANDOM = 1;
public const INIT_KMEANS_PLUS_PLUS = 2;
/**
* @var int
*/
private $clustersNumber;
/**
* @var int
*/
private $initialization;
public function __construct(int $clustersNumber, int $initialization = self::INIT_KMEANS_PLUS_PLUS)
{
if ($clustersNumber <= 0) {
throw new InvalidArgumentException('Invalid clusters number');
}
$this->clustersNumber = $clustersNumber;
$this->initialization = $initialization;
}
public function cluster(array $samples): array
{
$space = new Space(count(reset($samples)));
foreach ($samples as $key => $sample) {
$space->addPoint($sample, $key);
}
$clusters = [];
foreach ($space->cluster($this->clustersNumber, $this->initialization) as $cluster) {
$clusters[] = $cluster->getPoints();
}
return $clusters;
}
}

View File

@ -0,0 +1,117 @@
<?php
declare(strict_types=1);
namespace Phpml\Clustering\KMeans;
use IteratorAggregate;
use LogicException;
use SplObjectStorage;
class Cluster extends Point implements IteratorAggregate
{
/**
* @var Space
*/
protected $space;
/**
* @var SplObjectStorage|Point[]
*/
protected $points;
public function __construct(Space $space, array $coordinates)
{
parent::__construct($coordinates);
$this->space = $space;
$this->points = new SplObjectStorage();
}
public function getPoints(): array
{
$points = [];
foreach ($this->points as $point) {
if ($point->label === null) {
$points[] = $point->toArray();
} else {
$points[$point->label] = $point->toArray();
}
}
return $points;
}
public function toArray(): array
{
return [
'centroid' => parent::toArray(),
'points' => $this->getPoints(),
];
}
public function attach(Point $point): Point
{
if ($point instanceof self) {
throw new LogicException('Cannot attach a cluster to another');
}
$this->points->attach($point);
return $point;
}
public function detach(Point $point): Point
{
$this->points->detach($point);
return $point;
}
public function attachAll(SplObjectStorage $points): void
{
$this->points->addAll($points);
}
public function detachAll(SplObjectStorage $points): void
{
$this->points->removeAll($points);
}
public function updateCentroid(): void
{
$count = count($this->points);
if ($count === 0) {
return;
}
$centroid = $this->space->newPoint(array_fill(0, $this->dimension, 0));
foreach ($this->points as $point) {
for ($n = 0; $n < $this->dimension; ++$n) {
$centroid->coordinates[$n] += $point->coordinates[$n];
}
}
for ($n = 0; $n < $this->dimension; ++$n) {
$this->coordinates[$n] = $centroid->coordinates[$n] / $count;
}
}
/**
* @return Point[]|SplObjectStorage
*/
public function getIterator()
{
return $this->points;
}
public function count(): int
{
return count($this->points);
}
public function setCoordinates(array $newCoordinates): void
{
$this->coordinates = $newCoordinates;
}
}

View File

@ -0,0 +1,125 @@
<?php
declare(strict_types=1);
namespace Phpml\Clustering\KMeans;
use ArrayAccess;
class Point implements ArrayAccess, \Countable
{
/**
* @var int
*/
protected $dimension;
/**
* @var array
*/
protected $coordinates = [];
/**
* @var mixed
*/
protected $label;
/**
* @param mixed $label
*/
public function __construct(array $coordinates, $label = null)
{
$this->dimension = count($coordinates);
$this->coordinates = $coordinates;
$this->label = $label;
}
public function toArray(): array
{
return $this->coordinates;
}
/**
* @return float|int
*/
public function getDistanceWith(self $point, bool $precise = true)
{
$distance = 0;
for ($n = 0; $n < $this->dimension; ++$n) {
$difference = $this->coordinates[$n] - $point->coordinates[$n];
$distance += $difference * $difference;
}
return $precise ? $distance ** .5 : $distance;
}
/**
* @param Point[] $points
*/
public function getClosest(array $points): ?self
{
$minPoint = null;
foreach ($points as $point) {
$distance = $this->getDistanceWith($point, false);
if (!isset($minDistance)) {
$minDistance = $distance;
$minPoint = $point;
continue;
}
if ($distance < $minDistance) {
$minDistance = $distance;
$minPoint = $point;
}
}
return $minPoint;
}
public function getCoordinates(): array
{
return $this->coordinates;
}
/**
* @param mixed $offset
*/
public function offsetExists($offset): bool
{
return isset($this->coordinates[$offset]);
}
/**
* @param mixed $offset
*
* @return mixed
*/
public function offsetGet($offset)
{
return $this->coordinates[$offset];
}
/**
* @param mixed $offset
* @param mixed $value
*/
public function offsetSet($offset, $value): void
{
$this->coordinates[$offset] = $value;
}
/**
* @param mixed $offset
*/
public function offsetUnset($offset): void
{
unset($this->coordinates[$offset]);
}
public function count(): int
{
return count($this->coordinates);
}
}

View File

@ -0,0 +1,259 @@
<?php
declare(strict_types=1);
namespace Phpml\Clustering\KMeans;
use InvalidArgumentException;
use LogicException;
use Phpml\Clustering\KMeans;
use SplObjectStorage;
class Space extends SplObjectStorage
{
/**
* @var int
*/
protected $dimension;
public function __construct(int $dimension)
{
if ($dimension < 1) {
throw new LogicException('a space dimension cannot be null or negative');
}
$this->dimension = $dimension;
}
public function toArray(): array
{
$points = [];
/** @var Point $point */
foreach ($this as $point) {
$points[] = $point->toArray();
}
return ['points' => $points];
}
/**
* @param mixed $label
*/
public function newPoint(array $coordinates, $label = null): Point
{
if (count($coordinates) !== $this->dimension) {
throw new LogicException('('.implode(',', $coordinates).') is not a point of this space');
}
return new Point($coordinates, $label);
}
/**
* @param mixed $label
* @param mixed $data
*/
public function addPoint(array $coordinates, $label = null, $data = null): void
{
$this->attach($this->newPoint($coordinates, $label), $data);
}
/**
* @param object $point
* @param mixed $data
*/
public function attach($point, $data = null): void
{
if (!$point instanceof Point) {
throw new InvalidArgumentException('can only attach points to spaces');
}
parent::attach($point, $data);
}
public function getDimension(): int
{
return $this->dimension;
}
/**
* @return array|bool
*/
public function getBoundaries()
{
if (count($this) === 0) {
return false;
}
$min = $this->newPoint(array_fill(0, $this->dimension, null));
$max = $this->newPoint(array_fill(0, $this->dimension, null));
/** @var self $point */
foreach ($this as $point) {
for ($n = 0; $n < $this->dimension; ++$n) {
if ($min[$n] === null || $min[$n] > $point[$n]) {
$min[$n] = $point[$n];
}
if ($max[$n] === null || $max[$n] < $point[$n]) {
$max[$n] = $point[$n];
}
}
}
return [$min, $max];
}
public function getRandomPoint(Point $min, Point $max): Point
{
$point = $this->newPoint(array_fill(0, $this->dimension, null));
for ($n = 0; $n < $this->dimension; ++$n) {
$point[$n] = random_int($min[$n], $max[$n]);
}
return $point;
}
/**
* @return Cluster[]
*/
public function cluster(int $clustersNumber, int $initMethod = KMeans::INIT_RANDOM): array
{
$clusters = $this->initializeClusters($clustersNumber, $initMethod);
do {
} while (!$this->iterate($clusters));
return $clusters;
}
/**
* @return Cluster[]
*/
protected function initializeClusters(int $clustersNumber, int $initMethod): array
{
switch ($initMethod) {
case KMeans::INIT_RANDOM:
$clusters = $this->initializeRandomClusters($clustersNumber);
break;
case KMeans::INIT_KMEANS_PLUS_PLUS:
$clusters = $this->initializeKMPPClusters($clustersNumber);
break;
default:
return [];
}
$clusters[0]->attachAll($this);
return $clusters;
}
/**
* @param Cluster[] $clusters
*/
protected function iterate(array $clusters): bool
{
$convergence = true;
$attach = new SplObjectStorage();
$detach = new SplObjectStorage();
foreach ($clusters as $cluster) {
foreach ($cluster as $point) {
$closest = $point->getClosest($clusters);
if ($closest !== $cluster) {
$attach[$closest] ?? $attach[$closest] = new SplObjectStorage();
$detach[$cluster] ?? $detach[$cluster] = new SplObjectStorage();
$attach[$closest]->attach($point);
$detach[$cluster]->attach($point);
$convergence = false;
}
}
}
/** @var Cluster $cluster */
foreach ($attach as $cluster) {
$cluster->attachAll($attach[$cluster]);
}
/** @var Cluster $cluster */
foreach ($detach as $cluster) {
$cluster->detachAll($detach[$cluster]);
}
foreach ($clusters as $cluster) {
$cluster->updateCentroid();
}
return $convergence;
}
/**
* @return Cluster[]
*/
protected function initializeKMPPClusters(int $clustersNumber): array
{
$clusters = [];
$this->rewind();
/** @var Point $current */
$current = $this->current();
$clusters[] = new Cluster($this, $current->getCoordinates());
$distances = new SplObjectStorage();
for ($i = 1; $i < $clustersNumber; ++$i) {
$sum = 0;
/** @var Point $point */
foreach ($this as $point) {
$closest = $point->getClosest($clusters);
if ($closest === null) {
continue;
}
$distance = $point->getDistanceWith($closest);
$sum += $distances[$point] = $distance;
}
$sum = random_int(0, (int) $sum);
/** @var Point $point */
foreach ($this as $point) {
$sum -= $distances[$point];
if ($sum > 0) {
continue;
}
$clusters[] = new Cluster($this, $point->getCoordinates());
break;
}
}
return $clusters;
}
/**
* @return Cluster[]
*/
private function initializeRandomClusters(int $clustersNumber): array
{
$clusters = [];
[$min, $max] = $this->getBoundaries();
for ($n = 0; $n < $clustersNumber; ++$n) {
$clusters[] = new Cluster($this, $this->getRandomPoint($min, $max)->getCoordinates());
}
return $clusters;
}
}

View File

@ -0,0 +1,26 @@
<?php
declare(strict_types=1);
namespace Phpml\CrossValidation;
use Phpml\Dataset\Dataset;
class RandomSplit extends Split
{
protected function splitDataset(Dataset $dataset, float $testSize): void
{
$samples = $dataset->getSamples();
$labels = $dataset->getTargets();
$datasetSize = count($samples);
$testCount = count($this->testSamples);
for ($i = $datasetSize; $i > 0; --$i) {
$key = mt_rand(0, $datasetSize - 1);
$setName = (count($this->testSamples) - $testCount) / $datasetSize >= $testSize ? 'train' : 'test';
$this->{$setName.'Samples'}[] = $samples[$key];
$this->{$setName.'Labels'}[] = $labels[$key];
}
}
}

View File

@ -0,0 +1,73 @@
<?php
declare(strict_types=1);
namespace Phpml\CrossValidation;
use Phpml\Dataset\Dataset;
use Phpml\Exception\InvalidArgumentException;
abstract class Split
{
/**
* @var array
*/
protected $trainSamples = [];
/**
* @var array
*/
protected $testSamples = [];
/**
* @var array
*/
protected $trainLabels = [];
/**
* @var array
*/
protected $testLabels = [];
public function __construct(Dataset $dataset, float $testSize = 0.3, ?int $seed = null)
{
if ($testSize <= 0 || $testSize >= 1) {
throw new InvalidArgumentException('testsize must be between 0.0 and 1.0');
}
$this->seedGenerator($seed);
$this->splitDataset($dataset, $testSize);
}
public function getTrainSamples(): array
{
return $this->trainSamples;
}
public function getTestSamples(): array
{
return $this->testSamples;
}
public function getTrainLabels(): array
{
return $this->trainLabels;
}
public function getTestLabels(): array
{
return $this->testLabels;
}
abstract protected function splitDataset(Dataset $dataset, float $testSize): void;
protected function seedGenerator(?int $seed = null): void
{
if ($seed === null) {
mt_srand();
} else {
mt_srand($seed);
}
}
}

View File

@ -0,0 +1,49 @@
<?php
declare(strict_types=1);
namespace Phpml\CrossValidation;
use Phpml\Dataset\ArrayDataset;
use Phpml\Dataset\Dataset;
class StratifiedRandomSplit extends RandomSplit
{
protected function splitDataset(Dataset $dataset, float $testSize): void
{
$datasets = $this->splitByTarget($dataset);
foreach ($datasets as $targetSet) {
parent::splitDataset($targetSet, $testSize);
}
}
/**
* @return Dataset[]
*/
private function splitByTarget(Dataset $dataset): array
{
$targets = $dataset->getTargets();
$samples = $dataset->getSamples();
$uniqueTargets = array_unique($targets);
/** @var array $split */
$split = array_combine($uniqueTargets, array_fill(0, count($uniqueTargets), []));
foreach ($samples as $key => $sample) {
$split[$targets[$key]][] = $sample;
}
return $this->createDatasets($uniqueTargets, $split);
}
private function createDatasets(array $uniqueTargets, array $split): array
{
$datasets = [];
foreach ($uniqueTargets as $target) {
$datasets[$target] = new ArrayDataset($split[$target], array_fill(0, count($split[$target]), $target));
}
return $datasets;
}
}

View File

@ -0,0 +1,62 @@
<?php
declare(strict_types=1);
namespace Phpml\Dataset;
use Phpml\Exception\InvalidArgumentException;
class ArrayDataset implements Dataset
{
/**
* @var array
*/
protected $samples = [];
/**
* @var array
*/
protected $targets = [];
/**
* @throws InvalidArgumentException
*/
public function __construct(array $samples, array $targets)
{
if (count($samples) !== count($targets)) {
throw new InvalidArgumentException('Size of given arrays does not match');
}
$this->samples = $samples;
$this->targets = $targets;
}
public function getSamples(): array
{
return $this->samples;
}
public function getTargets(): array
{
return $this->targets;
}
/**
* @param int[] $columns
*/
public function removeColumns(array $columns): void
{
foreach ($this->samples as &$sample) {
$this->removeColumnsFromSample($sample, $columns);
}
}
private function removeColumnsFromSample(array &$sample, array $columns): void
{
foreach ($columns as $index) {
unset($sample[$index]);
}
$sample = array_values($sample);
}
}

View File

@ -0,0 +1,52 @@
<?php
declare(strict_types=1);
namespace Phpml\Dataset;
use Phpml\Exception\FileException;
class CsvDataset extends ArrayDataset
{
/**
* @var array
*/
protected $columnNames = [];
/**
* @throws FileException
*/
public function __construct(string $filepath, int $features, bool $headingRow = true, string $delimiter = ',', int $maxLineLength = 0)
{
if (!file_exists($filepath)) {
throw new FileException(sprintf('File "%s" missing.', basename($filepath)));
}
$handle = fopen($filepath, 'rb');
if ($handle === false) {
throw new FileException(sprintf('File "%s" can\'t be open.', basename($filepath)));
}
if ($headingRow) {
$data = fgetcsv($handle, $maxLineLength, $delimiter);
$this->columnNames = array_slice((array) $data, 0, $features);
} else {
$this->columnNames = range(0, $features - 1);
}
$samples = $targets = [];
while (($data = fgetcsv($handle, $maxLineLength, $delimiter)) !== false) {
$samples[] = array_slice((array) $data, 0, $features);
$targets[] = $data[$features];
}
fclose($handle);
parent::__construct($samples, $targets);
}
public function getColumnNames(): array
{
return $this->columnNames;
}
}

View File

@ -0,0 +1,12 @@
<?php
declare(strict_types=1);
namespace Phpml\Dataset;
interface Dataset
{
public function getSamples(): array;
public function getTargets(): array;
}

View File

@ -0,0 +1,28 @@
<?php
declare(strict_types=1);
namespace Phpml\Dataset\Demo;
use Phpml\Dataset\CsvDataset;
/**
* Classes: 6
* Samples per class:
* 70 float processed building windows
* 17 float processed vehicle windows
* 76 non-float processed building windows
* 13 containers
* 9 tableware
* 29 headlamps
* Samples total: 214
* Features per sample: 9.
*/
class GlassDataset extends CsvDataset
{
public function __construct()
{
$filepath = __DIR__.'/../../../data/glass.csv';
parent::__construct($filepath, 9, true);
}
}

View File

@ -0,0 +1,22 @@
<?php
declare(strict_types=1);
namespace Phpml\Dataset\Demo;
use Phpml\Dataset\CsvDataset;
/**
* Classes: 3
* Samples per class: 50
* Samples total: 150
* Features per sample: 4.
*/
class IrisDataset extends CsvDataset
{
public function __construct()
{
$filepath = __DIR__.'/../../../data/iris.csv';
parent::__construct($filepath, 4, true);
}
}

View File

@ -0,0 +1,22 @@
<?php
declare(strict_types=1);
namespace Phpml\Dataset\Demo;
use Phpml\Dataset\CsvDataset;
/**
* Classes: 3
* Samples per class: class 1 59; class 2 71; class 3 48
* Samples total: 178
* Features per sample: 13.
*/
class WineDataset extends CsvDataset
{
public function __construct()
{
$filepath = __DIR__.'/../../../data/wine.csv';
parent::__construct($filepath, 13, true);
}
}

View File

@ -0,0 +1,36 @@
<?php
declare(strict_types=1);
namespace Phpml\Dataset;
use Phpml\Exception\DatasetException;
class FilesDataset extends ArrayDataset
{
public function __construct(string $rootPath)
{
if (!is_dir($rootPath)) {
throw new DatasetException(sprintf('Dataset root folder "%s" missing.', $rootPath));
}
$this->scanRootPath($rootPath);
}
private function scanRootPath(string $rootPath): void
{
foreach (glob($rootPath.DIRECTORY_SEPARATOR.'*', GLOB_ONLYDIR) as $dir) {
$this->scanDir($dir);
}
}
private function scanDir(string $dir): void
{
$target = basename($dir);
foreach (array_filter(glob($dir.DIRECTORY_SEPARATOR.'*'), 'is_file') as $file) {
$this->samples[] = file_get_contents($file);
$this->targets[] = $target;
}
}
}

View File

@ -0,0 +1,101 @@
<?php
declare(strict_types=1);
namespace Phpml\Dataset;
use Phpml\Exception\InvalidArgumentException;
/**
* MNIST dataset: http://yann.lecun.com/exdb/mnist/
* original mnist dataset reader: https://github.com/AndrewCarterUK/mnist-neural-network-plain-php
*/
final class MnistDataset extends ArrayDataset
{
private const MAGIC_IMAGE = 0x00000803;
private const MAGIC_LABEL = 0x00000801;
private const IMAGE_ROWS = 28;
private const IMAGE_COLS = 28;
public function __construct(string $imagePath, string $labelPath)
{
$this->samples = $this->readImages($imagePath);
$this->targets = $this->readLabels($labelPath);
if (count($this->samples) !== count($this->targets)) {
throw new InvalidArgumentException('Must have the same number of images and labels');
}
}
private function readImages(string $imagePath): array
{
$stream = fopen($imagePath, 'rb');
if ($stream === false) {
throw new InvalidArgumentException('Could not open file: '.$imagePath);
}
$images = [];
try {
$header = fread($stream, 16);
$fields = unpack('Nmagic/Nsize/Nrows/Ncols', (string) $header);
if ($fields['magic'] !== self::MAGIC_IMAGE) {
throw new InvalidArgumentException('Invalid magic number: '.$imagePath);
}
if ($fields['rows'] != self::IMAGE_ROWS) {
throw new InvalidArgumentException('Invalid number of image rows: '.$imagePath);
}
if ($fields['cols'] != self::IMAGE_COLS) {
throw new InvalidArgumentException('Invalid number of image cols: '.$imagePath);
}
for ($i = 0; $i < $fields['size']; $i++) {
$imageBytes = fread($stream, $fields['rows'] * $fields['cols']);
// Convert to float between 0 and 1
$images[] = array_map(function ($b) {
return $b / 255;
}, array_values(unpack('C*', (string) $imageBytes)));
}
} finally {
fclose($stream);
}
return $images;
}
private function readLabels(string $labelPath): array
{
$stream = fopen($labelPath, 'rb');
if ($stream === false) {
throw new InvalidArgumentException('Could not open file: '.$labelPath);
}
$labels = [];
try {
$header = fread($stream, 8);
$fields = unpack('Nmagic/Nsize', (string) $header);
if ($fields['magic'] !== self::MAGIC_LABEL) {
throw new InvalidArgumentException('Invalid magic number: '.$labelPath);
}
$labels = fread($stream, $fields['size']);
} finally {
fclose($stream);
}
return array_values(unpack('C*', (string) $labels));
}
}

View File

@ -0,0 +1,131 @@
<?php
declare(strict_types=1);
namespace Phpml\Dataset;
use Phpml\Exception\DatasetException;
use Phpml\Exception\FileException;
class SvmDataset extends ArrayDataset
{
public function __construct(string $filePath)
{
[$samples, $targets] = self::readProblem($filePath);
parent::__construct($samples, $targets);
}
private static function readProblem(string $filePath): array
{
$handle = self::openFile($filePath);
$samples = [];
$targets = [];
$maxIndex = 0;
while (false !== $line = fgets($handle)) {
[$sample, $target, $maxIndex] = self::processLine((string) $line, $maxIndex);
$samples[] = $sample;
$targets[] = $target;
}
fclose($handle);
foreach ($samples as &$sample) {
$sample = array_pad($sample, $maxIndex + 1, 0);
}
return [$samples, $targets];
}
/**
* @return resource
*/
private static function openFile(string $filePath)
{
if (!file_exists($filePath)) {
throw new FileException(sprintf('File "%s" missing.', basename($filePath)));
}
$handle = fopen($filePath, 'rb');
if ($handle === false) {
throw new FileException(sprintf('File "%s" can\'t be open.', basename($filePath)));
}
return $handle;
}
private static function processLine(string $line, int $maxIndex): array
{
$columns = self::parseLine($line);
$target = self::parseTargetColumn($columns[0]);
$sample = array_fill(0, $maxIndex + 1, 0);
$n = count($columns);
for ($i = 1; $i < $n; ++$i) {
[$index, $value] = self::parseFeatureColumn($columns[$i]);
if ($index > $maxIndex) {
$maxIndex = $index;
$sample = array_pad($sample, $maxIndex + 1, 0);
}
$sample[$index] = $value;
}
return [$sample, $target, $maxIndex];
}
private static function parseLine(string $line): array
{
$line = explode('#', $line, 2)[0];
$line = rtrim($line);
$line = str_replace("\t", ' ', $line);
return explode(' ', $line);
}
private static function parseTargetColumn(string $column): float
{
if (!is_numeric($column)) {
throw new DatasetException(sprintf('Invalid target "%s".', $column));
}
return (float) $column;
}
private static function parseFeatureColumn(string $column): array
{
$feature = explode(':', $column, 2);
if (count($feature) !== 2) {
throw new DatasetException(sprintf('Invalid value "%s".', $column));
}
$index = self::parseFeatureIndex($feature[0]);
$value = self::parseFeatureValue($feature[1]);
return [$index, $value];
}
private static function parseFeatureIndex(string $index): int
{
if (!is_numeric($index) || !ctype_digit($index)) {
throw new DatasetException(sprintf('Invalid index "%s".', $index));
}
if ((int) $index < 1) {
throw new DatasetException(sprintf('Invalid index "%s".', $index));
}
return (int) $index - 1;
}
private static function parseFeatureValue(string $value): float
{
if (!is_numeric($value)) {
throw new DatasetException(sprintf('Invalid value "%s".', $value));
}
return (float) $value;
}
}

View File

@ -0,0 +1,94 @@
<?php
declare(strict_types=1);
namespace Phpml\DimensionReduction;
use Phpml\Math\LinearAlgebra\EigenvalueDecomposition;
use Phpml\Math\Matrix;
/**
* Class to compute eigen pairs (values & vectors) of a given matrix
* with the consideration of numFeatures or totalVariance to be preserved
*
* @author hp
*/
abstract class EigenTransformerBase
{
/**
* Total variance to be conserved after the reduction
*
* @var float
*/
public $totalVariance = 0.9;
/**
* Number of features to be preserved after the reduction
*
* @var int
*/
public $numFeatures = null;
/**
* Top eigenvectors of the matrix
*
* @var array
*/
protected $eigVectors = [];
/**
* Top eigenValues of the matrix
*
* @var array
*/
protected $eigValues = [];
/**
* Calculates eigenValues and eigenVectors of the given matrix. Returns
* top eigenVectors along with the largest eigenValues. The total explained variance
* of these eigenVectors will be no less than desired $totalVariance value
*/
protected function eigenDecomposition(array $matrix): void
{
$eig = new EigenvalueDecomposition($matrix);
$eigVals = $eig->getRealEigenvalues();
$eigVects = $eig->getEigenvectors();
$totalEigVal = array_sum($eigVals);
// Sort eigenvalues in descending order
arsort($eigVals);
$explainedVar = 0.0;
$vectors = [];
$values = [];
foreach ($eigVals as $i => $eigVal) {
$explainedVar += $eigVal / $totalEigVal;
$vectors[] = $eigVects[$i];
$values[] = $eigVal;
if ($this->numFeatures !== null) {
if (count($vectors) == $this->numFeatures) {
break;
}
} else {
if ($explainedVar >= $this->totalVariance) {
break;
}
}
}
$this->eigValues = $values;
$this->eigVectors = $vectors;
}
/**
* Returns the reduced data
*/
protected function reduce(array $data): array
{
$m1 = new Matrix($data);
$m2 = new Matrix($this->eigVectors);
return $m1->multiply($m2->transpose())->toArray();
}
}

View File

@ -0,0 +1,234 @@
<?php
declare(strict_types=1);
namespace Phpml\DimensionReduction;
use Closure;
use Phpml\Exception\InvalidArgumentException;
use Phpml\Exception\InvalidOperationException;
use Phpml\Math\Distance\Euclidean;
use Phpml\Math\Distance\Manhattan;
use Phpml\Math\Matrix;
class KernelPCA extends PCA
{
public const KERNEL_RBF = 1;
public const KERNEL_SIGMOID = 2;
public const KERNEL_LAPLACIAN = 3;
public const KERNEL_LINEAR = 4;
/**
* Selected kernel function
*
* @var int
*/
protected $kernel;
/**
* Gamma value used by the kernel
*
* @var float|null
*/
protected $gamma;
/**
* Original dataset used to fit KernelPCA
*
* @var array
*/
protected $data = [];
/**
* Kernel principal component analysis (KernelPCA) is an extension of PCA using
* techniques of kernel methods. It is more suitable for data that involves
* vectors that are not linearly separable<br><br>
* Example: <b>$kpca = new KernelPCA(KernelPCA::KERNEL_RBF, null, 2, 15.0);</b>
* will initialize the algorithm with an RBF kernel having the gamma parameter as 15,0. <br>
* This transformation will return the same number of rows with only <i>2</i> columns.
*
* @param float $totalVariance Total variance to be preserved if numFeatures is not given
* @param int $numFeatures Number of columns to be returned
* @param float $gamma Gamma parameter is used with RBF and Sigmoid kernels
*
* @throws InvalidArgumentException
*/
public function __construct(int $kernel = self::KERNEL_RBF, ?float $totalVariance = null, ?int $numFeatures = null, ?float $gamma = null)
{
if (!in_array($kernel, [self::KERNEL_RBF, self::KERNEL_SIGMOID, self::KERNEL_LAPLACIAN, self::KERNEL_LINEAR], true)) {
throw new InvalidArgumentException('KernelPCA can be initialized with the following kernels only: Linear, RBF, Sigmoid and Laplacian');
}
parent::__construct($totalVariance, $numFeatures);
$this->kernel = $kernel;
$this->gamma = $gamma;
}
/**
* Takes a data and returns a lower dimensional version
* of this data while preserving $totalVariance or $numFeatures. <br>
* $data is an n-by-m matrix and returned array is
* n-by-k matrix where k <= m
*/
public function fit(array $data): array
{
$numRows = count($data);
$this->data = $data;
if ($this->gamma === null) {
$this->gamma = 1.0 / $numRows;
}
$matrix = $this->calculateKernelMatrix($this->data, $numRows);
$matrix = $this->centerMatrix($matrix, $numRows);
$this->eigenDecomposition($matrix);
$this->fit = true;
return Matrix::transposeArray($this->eigVectors);
}
/**
* Transforms the given sample to a lower dimensional vector by using
* the variables obtained during the last run of <code>fit</code>.
*
* @throws InvalidArgumentException
* @throws InvalidOperationException
*/
public function transform(array $sample): array
{
if (!$this->fit) {
throw new InvalidOperationException('KernelPCA has not been fitted with respect to original dataset, please run KernelPCA::fit() first');
}
if (is_array($sample[0])) {
throw new InvalidArgumentException('KernelPCA::transform() accepts only one-dimensional arrays');
}
$pairs = $this->getDistancePairs($sample);
return $this->projectSample($pairs);
}
/**
* Calculates similarity matrix by use of selected kernel function<br>
* An n-by-m matrix is given and an n-by-n matrix is returned
*/
protected function calculateKernelMatrix(array $data, int $numRows): array
{
$kernelFunc = $this->getKernel();
$matrix = [];
for ($i = 0; $i < $numRows; ++$i) {
for ($k = 0; $k < $numRows; ++$k) {
if ($i <= $k) {
$matrix[$i][$k] = $kernelFunc($data[$i], $data[$k]);
} else {
$matrix[$i][$k] = $matrix[$k][$i];
}
}
}
return $matrix;
}
/**
* Kernel matrix is centered in its original space by using the following
* conversion:
*
* K = K N.K K.N + N.K.N where N is n-by-n matrix filled with 1/n
*/
protected function centerMatrix(array $matrix, int $n): array
{
$N = array_fill(0, $n, array_fill(0, $n, 1.0 / $n));
$N = new Matrix($N, false);
$K = new Matrix($matrix, false);
// K.N (This term is repeated so we cache it once)
$K_N = $K->multiply($N);
// N.K
$N_K = $N->multiply($K);
// N.K.N
$N_K_N = $N->multiply($K_N);
return $K->subtract($N_K)
->subtract($K_N)
->add($N_K_N)
->toArray();
}
/**
* Returns the callable kernel function
*
* @throws \Exception
*/
protected function getKernel(): Closure
{
switch ($this->kernel) {
case self::KERNEL_LINEAR:
// k(x,y) = xT.y
return function ($x, $y) {
return Matrix::dot($x, $y)[0];
};
case self::KERNEL_RBF:
// k(x,y)=exp(-γ.|x-y|) where |..| is Euclidean distance
$dist = new Euclidean();
return function ($x, $y) use ($dist) {
return exp(-$this->gamma * $dist->sqDistance($x, $y));
};
case self::KERNEL_SIGMOID:
// k(x,y)=tanh(γ.xT.y+c0) where c0=1
return function ($x, $y) {
$res = Matrix::dot($x, $y)[0] + 1.0;
return tanh((float) $this->gamma * $res);
};
case self::KERNEL_LAPLACIAN:
// k(x,y)=exp(-γ.|x-y|) where |..| is Manhattan distance
$dist = new Manhattan();
return function ($x, $y) use ($dist) {
return exp(-$this->gamma * $dist->distance($x, $y));
};
default:
// Not reached
throw new InvalidArgumentException(sprintf('KernelPCA initialized with invalid kernel: %d', $this->kernel));
}
}
protected function getDistancePairs(array $sample): array
{
$kernel = $this->getKernel();
$pairs = [];
foreach ($this->data as $row) {
$pairs[] = $kernel($row, $sample);
}
return $pairs;
}
protected function projectSample(array $pairs): array
{
// Normalize eigenvectors by eig = eigVectors / eigValues
$func = function ($eigVal, $eigVect) {
$m = new Matrix($eigVect, false);
$a = $m->divideByScalar($eigVal)->toArray();
return $a[0];
};
$eig = array_map($func, $this->eigValues, $this->eigVectors);
// return k.dot(eig)
return Matrix::dot($pairs, $eig);
}
}

View File

@ -0,0 +1,223 @@
<?php
declare(strict_types=1);
namespace Phpml\DimensionReduction;
use Phpml\Exception\InvalidArgumentException;
use Phpml\Exception\InvalidOperationException;
use Phpml\Math\Matrix;
class LDA extends EigenTransformerBase
{
/**
* @var bool
*/
public $fit = false;
/**
* @var array
*/
public $labels = [];
/**
* @var array
*/
public $means = [];
/**
* @var array
*/
public $counts = [];
/**
* @var float[]
*/
public $overallMean = [];
/**
* Linear Discriminant Analysis (LDA) is used to reduce the dimensionality
* of the data. Unlike Principal Component Analysis (PCA), it is a supervised
* technique that requires the class labels in order to fit the data to a
* lower dimensional space. <br><br>
* The algorithm can be initialized by speciyfing
* either with the totalVariance(a value between 0.1 and 0.99)
* or numFeatures (number of features in the dataset) to be preserved.
*
* @param float|null $totalVariance Total explained variance to be preserved
* @param int|null $numFeatures Number of features to be preserved
*
* @throws InvalidArgumentException
*/
public function __construct(?float $totalVariance = null, ?int $numFeatures = null)
{
if ($totalVariance !== null && ($totalVariance < 0.1 || $totalVariance > 0.99)) {
throw new InvalidArgumentException('Total variance can be a value between 0.1 and 0.99');
}
if ($numFeatures !== null && $numFeatures <= 0) {
throw new InvalidArgumentException('Number of features to be preserved should be greater than 0');
}
if (($totalVariance !== null) === ($numFeatures !== null)) {
throw new InvalidArgumentException('Either totalVariance or numFeatures should be specified in order to run the algorithm');
}
if ($numFeatures !== null) {
$this->numFeatures = $numFeatures;
}
if ($totalVariance !== null) {
$this->totalVariance = $totalVariance;
}
}
/**
* Trains the algorithm to transform the given data to a lower dimensional space.
*/
public function fit(array $data, array $classes): array
{
$this->labels = $this->getLabels($classes);
$this->means = $this->calculateMeans($data, $classes);
$sW = $this->calculateClassVar($data, $classes);
$sB = $this->calculateClassCov();
$S = $sW->inverse()->multiply($sB);
$this->eigenDecomposition($S->toArray());
$this->fit = true;
return $this->reduce($data);
}
/**
* Transforms the given sample to a lower dimensional vector by using
* the eigenVectors obtained in the last run of <code>fit</code>.
*
* @throws InvalidOperationException
*/
public function transform(array $sample): array
{
if (!$this->fit) {
throw new InvalidOperationException('LDA has not been fitted with respect to original dataset, please run LDA::fit() first');
}
if (!is_array($sample[0])) {
$sample = [$sample];
}
return $this->reduce($sample);
}
/**
* Returns unique labels in the dataset
*/
protected function getLabels(array $classes): array
{
$counts = array_count_values($classes);
return array_keys($counts);
}
/**
* Calculates mean of each column for each class and returns
* n by m matrix where n is number of labels and m is number of columns
*/
protected function calculateMeans(array $data, array $classes): array
{
$means = [];
$counts = [];
$overallMean = array_fill(0, count($data[0]), 0.0);
foreach ($data as $index => $row) {
$label = array_search($classes[$index], $this->labels, true);
foreach ($row as $col => $val) {
if (!isset($means[$label][$col])) {
$means[$label][$col] = 0.0;
}
$means[$label][$col] += $val;
$overallMean[$col] += $val;
}
if (!isset($counts[$label])) {
$counts[$label] = 0;
}
++$counts[$label];
}
foreach ($means as $index => $row) {
foreach ($row as $col => $sum) {
$means[$index][$col] = $sum / $counts[$index];
}
}
// Calculate overall mean of the dataset for each column
$numElements = array_sum($counts);
$map = function ($el) use ($numElements) {
return $el / $numElements;
};
$this->overallMean = array_map($map, $overallMean);
$this->counts = $counts;
return $means;
}
/**
* Returns in-class scatter matrix for each class, which
* is a n by m matrix where n is number of classes and
* m is number of columns
*/
protected function calculateClassVar(array $data, array $classes): Matrix
{
// s is an n (number of classes) by m (number of column) matrix
$s = array_fill(0, count($data[0]), array_fill(0, count($data[0]), 0));
$sW = new Matrix($s, false);
foreach ($data as $index => $row) {
$label = array_search($classes[$index], $this->labels, true);
$means = $this->means[$label];
$row = $this->calculateVar($row, $means);
$sW = $sW->add($row);
}
return $sW;
}
/**
* Returns between-class scatter matrix for each class, which
* is an n by m matrix where n is number of classes and
* m is number of columns
*/
protected function calculateClassCov(): Matrix
{
// s is an n (number of classes) by m (number of column) matrix
$s = array_fill(0, count($this->overallMean), array_fill(0, count($this->overallMean), 0));
$sB = new Matrix($s, false);
foreach ($this->means as $index => $classMeans) {
$row = $this->calculateVar($classMeans, $this->overallMean);
$N = $this->counts[$index];
$sB = $sB->add($row->multiplyByScalar($N));
}
return $sB;
}
/**
* Returns the result of the calculation (x - m)T.(x - m)
*/
protected function calculateVar(array $row, array $means): Matrix
{
$x = new Matrix($row, false);
$m = new Matrix($means, false);
$diff = $x->subtract($m);
return $diff->transpose()->multiply($diff);
}
}

View File

@ -0,0 +1,131 @@
<?php
declare(strict_types=1);
namespace Phpml\DimensionReduction;
use Phpml\Exception\InvalidArgumentException;
use Phpml\Exception\InvalidOperationException;
use Phpml\Math\Statistic\Covariance;
use Phpml\Math\Statistic\Mean;
class PCA extends EigenTransformerBase
{
/**
* Temporary storage for mean values for each dimension in given data
*
* @var array
*/
protected $means = [];
/**
* @var bool
*/
protected $fit = false;
/**
* PCA (Principal Component Analysis) used to explain given
* data with lower number of dimensions. This analysis transforms the
* data to a lower dimensional version of it by conserving a proportion of total variance
* within the data. It is a lossy data compression technique.<br>
*
* @param float $totalVariance Total explained variance to be preserved
* @param int $numFeatures Number of features to be preserved
*
* @throws InvalidArgumentException
*/
public function __construct(?float $totalVariance = null, ?int $numFeatures = null)
{
if ($totalVariance !== null && ($totalVariance < 0.1 || $totalVariance > 0.99)) {
throw new InvalidArgumentException('Total variance can be a value between 0.1 and 0.99');
}
if ($numFeatures !== null && $numFeatures <= 0) {
throw new InvalidArgumentException('Number of features to be preserved should be greater than 0');
}
if (($totalVariance !== null) === ($numFeatures !== null)) {
throw new InvalidArgumentException('Either totalVariance or numFeatures should be specified in order to run the algorithm');
}
if ($numFeatures !== null) {
$this->numFeatures = $numFeatures;
}
if ($totalVariance !== null) {
$this->totalVariance = $totalVariance;
}
}
/**
* Takes a data and returns a lower dimensional version
* of this data while preserving $totalVariance or $numFeatures. <br>
* $data is an n-by-m matrix and returned array is
* n-by-k matrix where k <= m
*/
public function fit(array $data): array
{
$n = count($data[0]);
$data = $this->normalize($data, $n);
$covMatrix = Covariance::covarianceMatrix($data, array_fill(0, $n, 0));
$this->eigenDecomposition($covMatrix);
$this->fit = true;
return $this->reduce($data);
}
/**
* Transforms the given sample to a lower dimensional vector by using
* the eigenVectors obtained in the last run of <code>fit</code>.
*
* @throws InvalidOperationException
*/
public function transform(array $sample): array
{
if (!$this->fit) {
throw new InvalidOperationException('PCA has not been fitted with respect to original dataset, please run PCA::fit() first');
}
if (!is_array($sample[0])) {
$sample = [$sample];
}
$sample = $this->normalize($sample, count($sample[0]));
return $this->reduce($sample);
}
protected function calculateMeans(array $data, int $n): void
{
// Calculate means for each dimension
$this->means = [];
for ($i = 0; $i < $n; ++$i) {
$column = array_column($data, $i);
$this->means[] = Mean::arithmetic($column);
}
}
/**
* Normalization of the data includes subtracting mean from
* each dimension therefore dimensions will be centered to zero
*/
protected function normalize(array $data, int $n): array
{
if (count($this->means) === 0) {
$this->calculateMeans($data, $n);
}
// Normalize data
foreach (array_keys($data) as $i) {
for ($k = 0; $k < $n; ++$k) {
$data[$i][$k] -= $this->means[$k];
}
}
return $data;
}
}

View File

@ -0,0 +1,15 @@
<?php
declare(strict_types=1);
namespace Phpml;
interface Estimator
{
public function train(array $samples, array $targets): void;
/**
* @return mixed
*/
public function predict(array $samples);
}

View File

@ -0,0 +1,11 @@
<?php
declare(strict_types=1);
namespace Phpml\Exception;
use Exception;
class DatasetException extends Exception
{
}

View File

@ -0,0 +1,11 @@
<?php
declare(strict_types=1);
namespace Phpml\Exception;
use Exception;
class FileException extends Exception
{
}

View File

@ -0,0 +1,11 @@
<?php
declare(strict_types=1);
namespace Phpml\Exception;
use Exception;
class InvalidArgumentException extends Exception
{
}

View File

@ -0,0 +1,11 @@
<?php
declare(strict_types=1);
namespace Phpml\Exception;
use Exception;
class InvalidOperationException extends Exception
{
}

View File

@ -0,0 +1,11 @@
<?php
declare(strict_types=1);
namespace Phpml\Exception;
use Exception;
class LibsvmCommandException extends Exception
{
}

View File

@ -0,0 +1,11 @@
<?php
declare(strict_types=1);
namespace Phpml\Exception;
use Exception;
class MatrixException extends Exception
{
}

View File

@ -0,0 +1,11 @@
<?php
declare(strict_types=1);
namespace Phpml\Exception;
use Exception;
class NormalizerException extends Exception
{
}

View File

@ -0,0 +1,11 @@
<?php
declare(strict_types=1);
namespace Phpml\Exception;
use Exception;
class SerializeException extends Exception
{
}

View File

@ -0,0 +1,36 @@
<?php
declare(strict_types=1);
namespace Phpml\FeatureExtraction;
use Phpml\Exception\InvalidArgumentException;
class StopWords
{
/**
* @var array
*/
protected $stopWords = [];
public function __construct(array $stopWords)
{
$this->stopWords = array_fill_keys($stopWords, true);
}
public function isStopWord(string $token): bool
{
return isset($this->stopWords[$token]);
}
public static function factory(string $language = 'English'): self
{
$className = __NAMESPACE__."\\StopWords\\${language}";
if (!class_exists($className)) {
throw new InvalidArgumentException(sprintf('Can\'t find "%s" language for StopWords', $language));
}
return new $className();
}
}

View File

@ -0,0 +1,33 @@
<?php
declare(strict_types=1);
namespace Phpml\FeatureExtraction\StopWords;
use Phpml\FeatureExtraction\StopWords;
final class English extends StopWords
{
/**
* @var array
*/
protected $stopWords = [
'a', 'about', 'above', 'after', 'again', 'against', 'all', 'am', 'an', 'and', 'any', 'are', 'aren\'t', 'as', 'at', 'be', 'because',
'been', 'before', 'being', 'below', 'between', 'both', 'but', 'by', 'can\'t', 'cannot', 'could', 'couldn\'t', 'did', 'didn\'t',
'do', 'does', 'doesn\'t', 'doing', 'don\'t', 'down', 'during', 'each', 'few', 'for', 'from', 'further', 'had', 'hadn\'t', 'has',
'hasn\'t', 'have', 'haven\'t', 'having', 'he', 'he\'d', 'he\'ll', 'he\'s', 'her', 'here', 'here\'s', 'hers', 'herself', 'him',
'himself', 'his', 'how', 'how\'s', 'i', 'i\'d', 'i\'ll', 'i\'m', 'i\'ve', 'if', 'in', 'into', 'is', 'isn\'t', 'it', 'it\'s', 'its',
'itself', 'let\'s', 'me', 'more', 'most', 'mustn\'t', 'my', 'myself', 'no', 'nor', 'not', 'of', 'off', 'on', 'once', 'only', 'or',
'other', 'ought', 'our', 'oursourselves', 'out', 'over', 'own', 'same', 'shan\'t', 'she', 'she\'d', 'she\'ll', 'she\'s', 'should',
'shouldn\'t', 'so', 'some', 'such', 'than', 'that', 'that\'s', 'the', 'their', 'theirs', 'them', 'themselves', 'then', 'there',
'there\'s', 'these', 'they', 'they\'d', 'they\'ll', 'they\'re', 'they\'ve', 'this', 'those', 'through', 'to', 'too', 'under',
'until', 'up', 'very', 'was', 'wasn\'t', 'we', 'we\'d', 'we\'ll', 'we\'re', 'we\'ve', 'were', 'weren\'t', 'what', 'what\'s',
'when', 'when\'s', 'where', 'where\'s', 'which', 'while', 'who', 'who\'s', 'whom', 'why', 'why\'s', 'with', 'won\'t', 'would',
'wouldn\'t', 'you', 'you\'d', 'you\'ll', 'you\'re', 'you\'ve', 'your', 'yours', 'yourself', 'yourselves',
];
public function __construct()
{
parent::__construct($this->stopWords);
}
}

View File

@ -0,0 +1,29 @@
<?php
declare(strict_types=1);
namespace Phpml\FeatureExtraction\StopWords;
use Phpml\FeatureExtraction\StopWords;
final class French extends StopWords
{
/**
* @var array
*/
protected $stopWords = [
'alors', 'au', 'aucuns', 'aussi', 'autre', 'avant', 'avec', 'avoir', 'bon', 'car', 'ce', 'cela', 'ces', 'ceux', 'chaque', 'ci',
'comme', 'comment', 'dans', 'des', 'du', 'dedans', 'dehors', 'depuis', 'devrait', 'doit', 'donc', 'dos', 'début', 'elle', 'elles',
'en', 'encore', 'essai', 'est', 'et', 'eu', 'fait', 'faites', 'fois', 'font', 'hors', 'ici', 'il', 'ils', 'je', 'juste', 'la',
'le', 'les', 'leur', 'là', 'ma', 'maintenant', 'mais', 'mes', 'mine', 'moins', 'mon', 'mot', 'même', 'ni', 'nommés', 'notre',
'nous', 'ou', 'où', 'par', 'parce', 'pas', 'peut', 'peu', 'plupart', 'pour', 'pourquoi', 'quand', 'que', 'quel', 'quelle',
'quelles', 'quels', 'qui', 'sa', 'sans', 'ses', 'seulement', 'si', 'sien', 'son', 'sont', 'sous', 'soyez', 'sujet', 'sur', 'ta',
'tandis', 'tellement', 'tels', 'tes', 'ton', 'tous', 'tout', 'trop', 'très', 'tu', 'voient', 'vont', 'votre', 'vous', 'vu',
'ça', 'étaient', 'état', 'étions', 'été', 'être',
];
public function __construct()
{
parent::__construct($this->stopWords);
}
}

View File

@ -0,0 +1,30 @@
<?php
declare(strict_types=1);
namespace Phpml\FeatureExtraction\StopWords;
use Phpml\FeatureExtraction\StopWords;
final class Polish extends StopWords
{
/**
* @var array
*/
protected $stopWords = [
'ach', 'aj', 'albo', 'bardzo', 'bez', 'bo', 'być', 'ci', 'cię', 'ciebie', 'co', 'czy', 'daleko', 'dla', 'dlaczego', 'dlatego',
'do', 'dobrze', 'dokąd', 'dość', 'dużo', 'dwa', 'dwaj', 'dwie', 'dwoje', 'dziś', 'dzisiaj', 'gdyby', 'gdzie', 'go', 'ich', 'ile',
'im', 'inny', 'ja', 'ją', 'jak', 'jakby', 'jaki', 'je', 'jeden', 'jedna', 'jedno', 'jego', 'jej', 'jemu', 'jeśli', 'jest', 'jestem',
'jeżeli', 'już', 'każdy', 'kiedy', 'kierunku', 'kto', 'ku', 'lub', 'ma', 'mają', 'mam', 'mi', 'mną', 'mnie', 'moi', 'mój', 'moja',
'moje', 'może', 'mu', 'my', 'na', 'nam', 'nami', 'nas', 'nasi', 'nasz', 'nasza', 'nasze', 'natychmiast', 'nią', 'nic', 'nich',
'nie', 'niego', 'niej', 'niemu', 'nigdy', 'nim', 'nimi', 'niż', 'obok', 'od', 'około', 'on', 'ona', 'one', 'oni', 'ono', 'owszem',
'po', 'pod', 'ponieważ', 'przed', 'przedtem', 'są', 'sam', 'sama', 'się', 'skąd', 'tak', 'taki', 'tam', 'ten', 'to', 'tobą', 'tobie',
'tu', 'tutaj', 'twoi', 'twój', 'twoja', 'twoje', 'ty', 'wam', 'wami', 'was', 'wasi', 'wasz', 'wasza', 'wasze', 'we', 'więc',
'wszystko', 'wtedy', 'wy', 'żaden', 'zawsze', 'że',
];
public function __construct()
{
parent::__construct($this->stopWords);
}
}

View File

@ -0,0 +1,54 @@
<?php
declare(strict_types=1);
namespace Phpml\FeatureExtraction;
use Phpml\Transformer;
class TfIdfTransformer implements Transformer
{
/**
* @var array
*/
private $idf = [];
public function __construct(array $samples = [])
{
if (count($samples) > 0) {
$this->fit($samples);
}
}
public function fit(array $samples, ?array $targets = null): void
{
$this->countTokensFrequency($samples);
$count = count($samples);
foreach ($this->idf as &$value) {
$value = log((float) ($count / $value), 10.0);
}
}
public function transform(array &$samples): void
{
foreach ($samples as &$sample) {
foreach ($sample as $index => &$feature) {
$feature *= $this->idf[$index];
}
}
}
private function countTokensFrequency(array $samples): void
{
$this->idf = array_fill_keys(array_keys($samples[0]), 0);
foreach ($samples as $sample) {
foreach ($sample as $index => $count) {
if ($count > 0) {
++$this->idf[$index];
}
}
}
}
}

View File

@ -0,0 +1,166 @@
<?php
declare(strict_types=1);
namespace Phpml\FeatureExtraction;
use Phpml\Tokenization\Tokenizer;
use Phpml\Transformer;
class TokenCountVectorizer implements Transformer
{
/**
* @var Tokenizer
*/
private $tokenizer;
/**
* @var StopWords|null
*/
private $stopWords;
/**
* @var float
*/
private $minDF;
/**
* @var array
*/
private $vocabulary = [];
/**
* @var array
*/
private $frequencies = [];
public function __construct(Tokenizer $tokenizer, ?StopWords $stopWords = null, float $minDF = 0.0)
{
$this->tokenizer = $tokenizer;
$this->stopWords = $stopWords;
$this->minDF = $minDF;
}
public function fit(array $samples, ?array $targets = null): void
{
$this->buildVocabulary($samples);
}
public function transform(array &$samples): void
{
array_walk($samples, function (string &$sample): void {
$this->transformSample($sample);
});
$this->checkDocumentFrequency($samples);
}
public function getVocabulary(): array
{
return array_flip($this->vocabulary);
}
private function buildVocabulary(array &$samples): void
{
foreach ($samples as $sample) {
$tokens = $this->tokenizer->tokenize($sample);
foreach ($tokens as $token) {
$this->addTokenToVocabulary($token);
}
}
}
private function transformSample(string &$sample): void
{
$counts = [];
$tokens = $this->tokenizer->tokenize($sample);
foreach ($tokens as $token) {
$index = $this->getTokenIndex($token);
if ($index !== false) {
$this->updateFrequency($token);
if (!isset($counts[$index])) {
$counts[$index] = 0;
}
++$counts[$index];
}
}
foreach ($this->vocabulary as $index) {
if (!isset($counts[$index])) {
$counts[$index] = 0;
}
}
ksort($counts);
$sample = $counts;
}
/**
* @return int|bool
*/
private function getTokenIndex(string $token)
{
if ($this->isStopWord($token)) {
return false;
}
return $this->vocabulary[$token] ?? false;
}
private function addTokenToVocabulary(string $token): void
{
if ($this->isStopWord($token)) {
return;
}
if (!isset($this->vocabulary[$token])) {
$this->vocabulary[$token] = count($this->vocabulary);
}
}
private function isStopWord(string $token): bool
{
return $this->stopWords !== null && $this->stopWords->isStopWord($token);
}
private function updateFrequency(string $token): void
{
if (!isset($this->frequencies[$token])) {
$this->frequencies[$token] = 0;
}
++$this->frequencies[$token];
}
private function checkDocumentFrequency(array &$samples): void
{
if ($this->minDF > 0) {
$beyondMinimum = $this->getBeyondMinimumIndexes(count($samples));
foreach ($samples as &$sample) {
$this->resetBeyondMinimum($sample, $beyondMinimum);
}
}
}
private function resetBeyondMinimum(array &$sample, array $beyondMinimum): void
{
foreach ($beyondMinimum as $index) {
$sample[$index] = 0;
}
}
private function getBeyondMinimumIndexes(int $samplesCount): array
{
$indexes = [];
foreach ($this->frequencies as $token => $frequency) {
if (($frequency / $samplesCount) < $this->minDF) {
$indexes[] = $this->getTokenIndex((string) $token);
}
}
return $indexes;
}
}

View File

@ -0,0 +1,10 @@
<?php
declare(strict_types=1);
namespace Phpml\FeatureSelection;
interface ScoringFunction
{
public function score(array $samples, array $targets): array;
}

View File

@ -0,0 +1,21 @@
<?php
declare(strict_types=1);
namespace Phpml\FeatureSelection\ScoringFunction;
use Phpml\FeatureSelection\ScoringFunction;
use Phpml\Math\Statistic\ANOVA;
final class ANOVAFValue implements ScoringFunction
{
public function score(array $samples, array $targets): array
{
$grouped = [];
foreach ($samples as $index => $sample) {
$grouped[$targets[$index]][] = $sample;
}
return ANOVA::oneWayF(array_values($grouped));
}
}

View File

@ -0,0 +1,81 @@
<?php
declare(strict_types=1);
namespace Phpml\FeatureSelection\ScoringFunction;
use Phpml\FeatureSelection\ScoringFunction;
use Phpml\Math\Matrix;
use Phpml\Math\Statistic\Mean;
/**
* Quick linear model for testing the effect of a single regressor,
* sequentially for many regressors.
*
* This is done in 2 steps:
*
* 1. The cross correlation between each regressor and the target is computed,
* that is, ((X[:, i] - mean(X[:, i])) * (y - mean_y)) / (std(X[:, i]) *std(y)).
* 2. It is converted to an F score.
*
* Ported from scikit-learn f_regression function (http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.f_regression.html#sklearn.feature_selection.f_regression)
*/
final class UnivariateLinearRegression implements ScoringFunction
{
/**
* @var bool
*/
private $center;
/**
* @param bool $center - if true samples and targets will be centered
*/
public function __construct(bool $center = true)
{
$this->center = $center;
}
public function score(array $samples, array $targets): array
{
if ($this->center) {
$this->centerTargets($targets);
$this->centerSamples($samples);
}
$correlations = [];
foreach (array_keys($samples[0]) as $index) {
$featureColumn = array_column($samples, $index);
$correlations[$index] =
(Matrix::dot($targets, $featureColumn)[0] / (new Matrix($featureColumn, false))->transpose()->frobeniusNorm())
/ (new Matrix($targets, false))->frobeniusNorm();
}
$degreesOfFreedom = count($targets) - ($this->center ? 2 : 1);
return array_map(function (float $correlation) use ($degreesOfFreedom): float {
return $correlation ** 2 / (1 - $correlation ** 2) * $degreesOfFreedom;
}, $correlations);
}
private function centerTargets(array &$targets): void
{
$mean = Mean::arithmetic($targets);
array_walk($targets, function (&$target) use ($mean): void {
$target -= $mean;
});
}
private function centerSamples(array &$samples): void
{
$means = [];
foreach ($samples[0] as $index => $feature) {
$means[$index] = Mean::arithmetic(array_column($samples, $index));
}
foreach ($samples as &$sample) {
foreach ($sample as $index => &$feature) {
$feature -= $means[$index];
}
}
}
}

View File

@ -0,0 +1,78 @@
<?php
declare(strict_types=1);
namespace Phpml\FeatureSelection;
use Phpml\Exception\InvalidArgumentException;
use Phpml\Exception\InvalidOperationException;
use Phpml\FeatureSelection\ScoringFunction\ANOVAFValue;
use Phpml\Transformer;
final class SelectKBest implements Transformer
{
/**
* @var ScoringFunction
*/
private $scoringFunction;
/**
* @var int
*/
private $k;
/**
* @var array|null
*/
private $scores = null;
/**
* @var array|null
*/
private $keepColumns = null;
public function __construct(int $k = 10, ?ScoringFunction $scoringFunction = null)
{
if ($scoringFunction === null) {
$scoringFunction = new ANOVAFValue();
}
$this->scoringFunction = $scoringFunction;
$this->k = $k;
}
public function fit(array $samples, ?array $targets = null): void
{
if ($targets === null || count($targets) === 0) {
throw new InvalidArgumentException('The array has zero elements');
}
$this->scores = $sorted = $this->scoringFunction->score($samples, $targets);
if ($this->k >= count($sorted)) {
return;
}
arsort($sorted);
$this->keepColumns = array_slice($sorted, 0, $this->k, true);
}
public function transform(array &$samples): void
{
if ($this->keepColumns === null) {
return;
}
foreach ($samples as &$sample) {
$sample = array_values(array_intersect_key($sample, $this->keepColumns));
}
}
public function scores(): array
{
if ($this->scores === null) {
throw new InvalidOperationException('SelectKBest require to fit first to get scores');
}
return $this->scores;
}
}

View File

@ -0,0 +1,57 @@
<?php
declare(strict_types=1);
namespace Phpml\FeatureSelection;
use Phpml\Exception\InvalidArgumentException;
use Phpml\Math\Matrix;
use Phpml\Math\Statistic\Variance;
use Phpml\Transformer;
final class VarianceThreshold implements Transformer
{
/**
* @var float
*/
private $threshold;
/**
* @var array
*/
private $variances = [];
/**
* @var array
*/
private $keepColumns = [];
public function __construct(float $threshold = 0.0)
{
if ($threshold < 0) {
throw new InvalidArgumentException('Threshold can\'t be lower than zero');
}
$this->threshold = $threshold;
}
public function fit(array $samples, ?array $targets = null): void
{
$this->variances = array_map(function (array $column) {
return Variance::population($column);
}, Matrix::transposeArray($samples));
foreach ($this->variances as $column => $variance) {
if ($variance > $this->threshold) {
$this->keepColumns[$column] = true;
}
}
}
public function transform(array &$samples): void
{
foreach ($samples as &$sample) {
$sample = array_values(array_intersect_key($sample, $this->keepColumns));
}
}
}

View File

@ -0,0 +1,169 @@
<?php
declare(strict_types=1);
namespace Phpml\Helper;
use Phpml\Classification\Classifier;
trait OneVsRest
{
/**
* @var array
*/
protected $classifiers = [];
/**
* All provided training targets' labels.
*
* @var array
*/
protected $allLabels = [];
/**
* @var array
*/
protected $costValues = [];
/**
* Train a binary classifier in the OvR style
*/
public function train(array $samples, array $targets): void
{
// Clears previous stuff.
$this->reset();
$this->trainByLabel($samples, $targets);
}
/**
* Resets the classifier and the vars internally used by OneVsRest to create multiple classifiers.
*/
public function reset(): void
{
$this->classifiers = [];
$this->allLabels = [];
$this->costValues = [];
$this->resetBinary();
}
protected function trainByLabel(array $samples, array $targets, array $allLabels = []): void
{
// Overwrites the current value if it exist. $allLabels must be provided for each partialTrain run.
$this->allLabels = count($allLabels) === 0 ? array_keys(array_count_values($targets)) : $allLabels;
sort($this->allLabels, SORT_STRING);
// If there are only two targets, then there is no need to perform OvR
if (count($this->allLabels) === 2) {
// Init classifier if required.
if (count($this->classifiers) === 0) {
$this->classifiers[0] = $this->getClassifierCopy();
}
$this->classifiers[0]->trainBinary($samples, $targets, $this->allLabels);
} else {
// Train a separate classifier for each label and memorize them
foreach ($this->allLabels as $label) {
// Init classifier if required.
if (!isset($this->classifiers[$label])) {
$this->classifiers[$label] = $this->getClassifierCopy();
}
[$binarizedTargets, $classifierLabels] = $this->binarizeTargets($targets, $label);
$this->classifiers[$label]->trainBinary($samples, $binarizedTargets, $classifierLabels);
}
}
// If the underlying classifier is capable of giving the cost values
// during the training, then assign it to the relevant variable
// Adding just the first classifier cost values to avoid complex average calculations.
$classifierref = reset($this->classifiers);
if (method_exists($classifierref, 'getCostValues')) {
$this->costValues = $classifierref->getCostValues();
}
}
/**
* Returns an instance of the current class after cleaning up OneVsRest stuff.
*/
protected function getClassifierCopy(): Classifier
{
// Clone the current classifier, so that
// we don't mess up its variables while training
// multiple instances of this classifier
$classifier = clone $this;
$classifier->reset();
return $classifier;
}
/**
* @return mixed
*/
protected function predictSample(array $sample)
{
if (count($this->allLabels) === 2) {
return $this->classifiers[0]->predictSampleBinary($sample);
}
$probs = [];
foreach ($this->classifiers as $label => $predictor) {
$probs[$label] = $predictor->predictProbability($sample, $label);
}
arsort($probs, SORT_NUMERIC);
return key($probs);
}
/**
* Each classifier should implement this method instead of train(samples, targets)
*/
abstract protected function trainBinary(array $samples, array $targets, array $labels);
/**
* To be overwritten by OneVsRest classifiers.
*/
abstract protected function resetBinary(): void;
/**
* Each classifier that make use of OvR approach should be able to
* return a probability for a sample to belong to the given label.
*
* @return mixed
*/
abstract protected function predictProbability(array $sample, string $label);
/**
* Each classifier should implement this method instead of predictSample()
*
* @return mixed
*/
abstract protected function predictSampleBinary(array $sample);
/**
* Groups all targets into two groups: Targets equal to
* the given label and the others
*
* $targets is not passed by reference nor contains objects so this method
* changes will not affect the caller $targets array.
*
* @param mixed $label
*
* @return array Binarized targets and target's labels
*/
private function binarizeTargets(array $targets, $label): array
{
$notLabel = "not_${label}";
foreach ($targets as $key => $target) {
$targets[$key] = $target == $label ? $label : $notLabel;
}
$labels = [$label, $notLabel];
return [$targets, $labels];
}
}

View File

@ -0,0 +1,304 @@
<?php
declare(strict_types=1);
namespace Phpml\Helper\Optimizer;
use Closure;
/**
* Conjugate Gradient method to solve a non-linear f(x) with respect to unknown x
* See https://en.wikipedia.org/wiki/Nonlinear_conjugate_gradient_method)
*
* The method applied below is explained in the below document in a practical manner
* - http://web.cs.iastate.edu/~cs577/handouts/conjugate-gradient.pdf
*
* However it is compliant with the general Conjugate Gradient method with
* Fletcher-Reeves update method. Note that, the f(x) is assumed to be one-dimensional
* and one gradient is utilized for all dimensions in the given data.
*/
class ConjugateGradient extends GD
{
public function runOptimization(array $samples, array $targets, Closure $gradientCb): array
{
$this->samples = $samples;
$this->targets = $targets;
$this->gradientCb = $gradientCb;
$this->sampleCount = count($samples);
$this->costValues = [];
$d = MP::muls($this->gradient($this->theta), -1);
for ($i = 0; $i < $this->maxIterations; ++$i) {
// Obtain α that minimizes f(θ + α.d)
$alpha = $this->getAlpha($d);
// θ(k+1) = θ(k) + α.d
$thetaNew = $this->getNewTheta($alpha, $d);
// β = ||∇f(x(k+1))||² ||∇f(x(k))||²
$beta = $this->getBeta($thetaNew);
// d(k+1) =∇f(x(k+1)) + β(k).d(k)
$d = $this->getNewDirection($thetaNew, $beta, $d);
// Save values for the next iteration
$oldTheta = $this->theta;
$this->costValues[] = $this->cost($thetaNew);
$this->theta = $thetaNew;
if ($this->enableEarlyStop && $this->earlyStop($oldTheta)) {
break;
}
}
$this->clear();
return $this->theta;
}
/**
* Executes the callback function for the problem and returns
* sum of the gradient for all samples & targets.
*/
protected function gradient(array $theta): array
{
[, $updates, $penalty] = parent::gradient($theta);
// Calculate gradient for each dimension
$gradient = [];
for ($i = 0; $i <= $this->dimensions; ++$i) {
if ($i === 0) {
$gradient[$i] = array_sum($updates);
} else {
$col = array_column($this->samples, $i - 1);
$error = 0;
foreach ($col as $index => $val) {
$error += $val * $updates[$index];
}
$gradient[$i] = $error + $penalty * $theta[$i];
}
}
return $gradient;
}
/**
* Returns the value of f(x) for given solution
*/
protected function cost(array $theta): float
{
[$cost] = parent::gradient($theta);
return array_sum($cost) / (int) $this->sampleCount;
}
/**
* Calculates alpha that minimizes the function f(θ + α.d)
* by performing a line search that does not rely upon the derivation.
*
* There are several alternatives for this function. For now, we
* prefer a method inspired from the bisection method for its simplicity.
* This algorithm attempts to find an optimum alpha value between 0.0001 and 0.01
*
* Algorithm as follows:
* a) Probe a small alpha (0.0001) and calculate cost function
* b) Probe a larger alpha (0.01) and calculate cost function
* b-1) If cost function decreases, continue enlarging alpha
* b-2) If cost function increases, take the midpoint and try again
*/
protected function getAlpha(array $d): float
{
$small = MP::muls($d, 0.0001);
$large = MP::muls($d, 0.01);
// Obtain θ + α.d for two initial values, x0 and x1
$x0 = MP::add($this->theta, $small);
$x1 = MP::add($this->theta, $large);
$epsilon = 0.0001;
$iteration = 0;
do {
$fx1 = $this->cost($x1);
$fx0 = $this->cost($x0);
// If the difference between two values is small enough
// then break the loop
if (abs($fx1 - $fx0) <= $epsilon) {
break;
}
if ($fx1 < $fx0) {
$x0 = $x1;
$x1 = MP::adds($x1, 0.01); // Enlarge second
} else {
$x1 = MP::divs(MP::add($x1, $x0), 2.0);
} // Get to the midpoint
$error = $fx1 / $this->dimensions;
} while ($error <= $epsilon || $iteration++ < 10);
// Return α = θ / d
// For accuracy, choose a dimension which maximize |d[i]|
$imax = 0;
for ($i = 1; $i <= $this->dimensions; ++$i) {
if (abs($d[$i]) > abs($d[$imax])) {
$imax = $i;
}
}
if ($d[$imax] == 0) {
return $x1[$imax] - $this->theta[$imax];
}
return ($x1[$imax] - $this->theta[$imax]) / $d[$imax];
}
/**
* Calculates new set of solutions with given alpha (for each θ(k)) and
* gradient direction.
*
* θ(k+1) = θ(k) + α.d
*/
protected function getNewTheta(float $alpha, array $d): array
{
return MP::add($this->theta, MP::muls($d, $alpha));
}
/**
* Calculates new beta (β) for given set of solutions by using
* FletcherReeves method.
*
* β = ||f(x(k+1))||² ||f(x(k))||²
*
* See:
* R. Fletcher and C. M. Reeves, "Function minimization by conjugate gradients", Comput. J. 7 (1964), 149154.
*/
protected function getBeta(array $newTheta): float
{
$gNew = $this->gradient($newTheta);
$gOld = $this->gradient($this->theta);
$dNew = 0;
$dOld = 1e-100;
for ($i = 0; $i <= $this->dimensions; ++$i) {
$dNew += $gNew[$i] ** 2;
$dOld += $gOld[$i] ** 2;
}
return $dNew / $dOld;
}
/**
* Calculates the new conjugate direction
*
* d(k+1) =∇f(x(k+1)) + β(k).d(k)
*/
protected function getNewDirection(array $theta, float $beta, array $d): array
{
$grad = $this->gradient($theta);
return MP::add(MP::muls($grad, -1), MP::muls($d, $beta));
}
}
/**
* Handles element-wise vector operations between vector-vector
* and vector-scalar variables
*/
class MP
{
/**
* Element-wise <b>multiplication</b> of two vectors of the same size
*/
public static function mul(array $m1, array $m2): array
{
$res = [];
foreach ($m1 as $i => $val) {
$res[] = $val * $m2[$i];
}
return $res;
}
/**
* Element-wise <b>division</b> of two vectors of the same size
*/
public static function div(array $m1, array $m2): array
{
$res = [];
foreach ($m1 as $i => $val) {
$res[] = $val / $m2[$i];
}
return $res;
}
/**
* Element-wise <b>addition</b> of two vectors of the same size
*/
public static function add(array $m1, array $m2, int $mag = 1): array
{
$res = [];
foreach ($m1 as $i => $val) {
$res[] = $val + $mag * $m2[$i];
}
return $res;
}
/**
* Element-wise <b>subtraction</b> of two vectors of the same size
*/
public static function sub(array $m1, array $m2): array
{
return self::add($m1, $m2, -1);
}
/**
* Element-wise <b>multiplication</b> of a vector with a scalar
*/
public static function muls(array $m1, float $m2): array
{
$res = [];
foreach ($m1 as $val) {
$res[] = $val * $m2;
}
return $res;
}
/**
* Element-wise <b>division</b> of a vector with a scalar
*/
public static function divs(array $m1, float $m2): array
{
$res = [];
foreach ($m1 as $val) {
$res[] = $val / ($m2 + 1e-32);
}
return $res;
}
/**
* Element-wise <b>addition</b> of a vector with a scalar
*/
public static function adds(array $m1, float $m2, int $mag = 1): array
{
$res = [];
foreach ($m1 as $val) {
$res[] = $val + $mag * $m2;
}
return $res;
}
/**
* Element-wise <b>subtraction</b> of a vector with a scalar
*/
public static function subs(array $m1, float $m2): array
{
return self::adds($m1, $m2, -1);
}
}

View File

@ -0,0 +1,111 @@
<?php
declare(strict_types=1);
namespace Phpml\Helper\Optimizer;
use Closure;
use Phpml\Exception\InvalidOperationException;
/**
* Batch version of Gradient Descent to optimize the weights
* of a classifier given samples, targets and the objective function to minimize
*/
class GD extends StochasticGD
{
/**
* Number of samples given
*
* @var int|null
*/
protected $sampleCount;
public function runOptimization(array $samples, array $targets, Closure $gradientCb): array
{
$this->samples = $samples;
$this->targets = $targets;
$this->gradientCb = $gradientCb;
$this->sampleCount = count($this->samples);
// Batch learning is executed:
$currIter = 0;
$this->costValues = [];
while ($this->maxIterations > $currIter++) {
$theta = $this->theta;
// Calculate update terms for each sample
[$errors, $updates, $totalPenalty] = $this->gradient($theta);
$this->updateWeightsWithUpdates($updates, $totalPenalty);
$this->costValues[] = array_sum($errors) / $this->sampleCount;
if ($this->earlyStop($theta)) {
break;
}
}
$this->clear();
return $this->theta;
}
/**
* Calculates gradient, cost function and penalty term for each sample
* then returns them as an array of values
*/
protected function gradient(array $theta): array
{
$costs = [];
$gradient = [];
$totalPenalty = 0;
if ($this->gradientCb === null) {
throw new InvalidOperationException('Gradient callback is not defined');
}
foreach ($this->samples as $index => $sample) {
$target = $this->targets[$index];
$result = ($this->gradientCb)($theta, $sample, $target);
[$cost, $grad, $penalty] = array_pad($result, 3, 0);
$costs[] = $cost;
$gradient[] = $grad;
$totalPenalty += $penalty;
}
$totalPenalty /= $this->sampleCount;
return [$costs, $gradient, $totalPenalty];
}
protected function updateWeightsWithUpdates(array $updates, float $penalty): void
{
// Updates all weights at once
for ($i = 0; $i <= $this->dimensions; ++$i) {
if ($i === 0) {
$this->theta[0] -= $this->learningRate * array_sum($updates);
} else {
$col = array_column($this->samples, $i - 1);
$error = 0;
foreach ($col as $index => $val) {
$error += $val * $updates[$index];
}
$this->theta[$i] -= $this->learningRate *
($error + $penalty * $this->theta[$i]);
}
}
}
/**
* Clears the optimizer internal vars after the optimization process.
*/
protected function clear(): void
{
$this->sampleCount = null;
parent::clear();
}
}

View File

@ -0,0 +1,56 @@
<?php
declare(strict_types=1);
namespace Phpml\Helper\Optimizer;
use Closure;
use Phpml\Exception\InvalidArgumentException;
abstract class Optimizer
{
/**
* Unknown variables to be found
*
* @var array
*/
protected $theta = [];
/**
* Number of dimensions
*
* @var int
*/
protected $dimensions;
/**
* Inits a new instance of Optimizer for the given number of dimensions
*/
public function __construct(int $dimensions)
{
$this->dimensions = $dimensions;
// Inits the weights randomly
$this->theta = [];
for ($i = 0; $i < $this->dimensions; ++$i) {
$this->theta[] = (random_int(0, PHP_INT_MAX) / PHP_INT_MAX) + 0.1;
}
}
public function setTheta(array $theta): self
{
if (count($theta) !== $this->dimensions) {
throw new InvalidArgumentException(sprintf('Number of values in the weights array should be %s', $this->dimensions));
}
$this->theta = $theta;
return $this;
}
/**
* Executes the optimization with the given samples & targets
* and returns the weights
*/
abstract public function runOptimization(array $samples, array $targets, Closure $gradientCb): array;
}

View File

@ -0,0 +1,278 @@
<?php
declare(strict_types=1);
namespace Phpml\Helper\Optimizer;
use Closure;
use Phpml\Exception\InvalidArgumentException;
use Phpml\Exception\InvalidOperationException;
/**
* Stochastic Gradient Descent optimization method
* to find a solution for the equation A.ϴ = y where
* A (samples) and y (targets) are known and ϴ is unknown.
*/
class StochasticGD extends Optimizer
{
/**
* A (samples)
*
* @var array
*/
protected $samples = [];
/**
* y (targets)
*
* @var array
*/
protected $targets = [];
/**
* Callback function to get the gradient and cost value
* for a specific set of theta (ϴ) and a pair of sample & target
*
* @var \Closure|null
*/
protected $gradientCb;
/**
* Maximum number of iterations used to train the model
*
* @var int
*/
protected $maxIterations = 1000;
/**
* Learning rate is used to control the speed of the optimization.<br>
*
* Larger values of lr may overshoot the optimum or even cause divergence
* while small values slows down the convergence and increases the time
* required for the training
*
* @var float
*/
protected $learningRate = 0.001;
/**
* Minimum amount of change in the weights and error values
* between iterations that needs to be obtained to continue the training
*
* @var float
*/
protected $threshold = 1e-4;
/**
* Enable/Disable early stopping by checking the weight & cost values
* to see whether they changed large enough to continue the optimization
*
* @var bool
*/
protected $enableEarlyStop = true;
/**
* List of values obtained by evaluating the cost function at each iteration
* of the algorithm
*
* @var array
*/
protected $costValues = [];
/**
* Initializes the SGD optimizer for the given number of dimensions
*/
public function __construct(int $dimensions)
{
// Add one more dimension for the bias
parent::__construct($dimensions + 1);
$this->dimensions = $dimensions;
}
public function setTheta(array $theta): Optimizer
{
if (count($theta) !== $this->dimensions + 1) {
throw new InvalidArgumentException(sprintf('Number of values in the weights array should be %s', $this->dimensions + 1));
}
$this->theta = $theta;
return $this;
}
/**
* Sets minimum value for the change in the theta values
* between iterations to continue the iterations.<br>
*
* If change in the theta is less than given value then the
* algorithm will stop training
*
* @return $this
*/
public function setChangeThreshold(float $threshold = 1e-5)
{
$this->threshold = $threshold;
return $this;
}
/**
* Enable/Disable early stopping by checking at each iteration
* whether changes in theta or cost value are not large enough
*
* @return $this
*/
public function setEarlyStop(bool $enable = true)
{
$this->enableEarlyStop = $enable;
return $this;
}
/**
* @return $this
*/
public function setLearningRate(float $learningRate)
{
$this->learningRate = $learningRate;
return $this;
}
/**
* @return $this
*/
public function setMaxIterations(int $maxIterations)
{
$this->maxIterations = $maxIterations;
return $this;
}
/**
* Optimization procedure finds the unknow variables for the equation A.ϴ = y
* for the given samples (A) and targets (y).<br>
*
* The cost function to minimize and the gradient of the function are to be
* handled by the callback function provided as the third parameter of the method.
*/
public function runOptimization(array $samples, array $targets, Closure $gradientCb): array
{
$this->samples = $samples;
$this->targets = $targets;
$this->gradientCb = $gradientCb;
$currIter = 0;
$bestTheta = null;
$bestScore = 0.0;
$this->costValues = [];
while ($this->maxIterations > $currIter++) {
$theta = $this->theta;
// Update the guess
$cost = $this->updateTheta();
// Save the best theta in the "pocket" so that
// any future set of theta worse than this will be disregarded
if ($bestTheta === null || $cost <= $bestScore) {
$bestTheta = $theta;
$bestScore = $cost;
}
// Add the cost value for this iteration to the list
$this->costValues[] = $cost;
// Check for early stop
if ($this->enableEarlyStop && $this->earlyStop($theta)) {
break;
}
}
$this->clear();
// Solution in the pocket is better than or equal to the last state
// so, we use this solution
return $this->theta = (array) $bestTheta;
}
/**
* Returns the list of cost values for each iteration executed in
* last run of the optimization
*/
public function getCostValues(): array
{
return $this->costValues;
}
protected function updateTheta(): float
{
$jValue = 0.0;
$theta = $this->theta;
if ($this->gradientCb === null) {
throw new InvalidOperationException('Gradient callback is not defined');
}
foreach ($this->samples as $index => $sample) {
$target = $this->targets[$index];
$result = ($this->gradientCb)($theta, $sample, $target);
[$error, $gradient, $penalty] = array_pad($result, 3, 0);
// Update bias
$this->theta[0] -= $this->learningRate * $gradient;
// Update other values
for ($i = 1; $i <= $this->dimensions; ++$i) {
$this->theta[$i] -= $this->learningRate *
($gradient * $sample[$i - 1] + $penalty * $this->theta[$i]);
}
// Sum error rate
$jValue += $error;
}
return $jValue / count($this->samples);
}
/**
* Checks if the optimization is not effective enough and can be stopped
* in case large enough changes in the solution do not happen
*/
protected function earlyStop(array $oldTheta): bool
{
// Check for early stop: No change larger than threshold (default 1e-5)
$diff = array_map(
function ($w1, $w2) {
return abs($w1 - $w2) > $this->threshold ? 1 : 0;
},
$oldTheta,
$this->theta
);
if (array_sum($diff) == 0) {
return true;
}
// Check if the last two cost values are almost the same
$costs = array_slice($this->costValues, -2);
if (count($costs) === 2 && abs($costs[1] - $costs[0]) < $this->threshold) {
return true;
}
return false;
}
/**
* Clears the optimizer internal vars after the optimization process.
*/
protected function clear(): void
{
$this->samples = [];
$this->targets = [];
$this->gradientCb = null;
}
}

View File

@ -0,0 +1,30 @@
<?php
declare(strict_types=1);
namespace Phpml\Helper;
trait Predictable
{
/**
* @return mixed
*/
public function predict(array $samples)
{
if (!is_array($samples[0])) {
return $this->predictSample($samples);
}
$predicted = [];
foreach ($samples as $index => $sample) {
$predicted[$index] = $this->predictSample($sample);
}
return $predicted;
}
/**
* @return mixed
*/
abstract protected function predictSample(array $sample);
}

View File

@ -0,0 +1,24 @@
<?php
declare(strict_types=1);
namespace Phpml\Helper;
trait Trainable
{
/**
* @var array
*/
private $samples = [];
/**
* @var array
*/
private $targets = [];
public function train(array $samples, array $targets): void
{
$this->samples = array_merge($this->samples, $samples);
$this->targets = array_merge($this->targets, $targets);
}
}

View File

@ -0,0 +1,10 @@
<?php
declare(strict_types=1);
namespace Phpml;
interface IncrementalEstimator
{
public function partialTrain(array $samples, array $targets, array $labels = []): void;
}

View File

@ -0,0 +1,42 @@
<?php
declare(strict_types=1);
namespace Phpml\Math;
use Phpml\Exception\InvalidArgumentException;
class Comparison
{
/**
* @param mixed $a
* @param mixed $b
*
* @throws InvalidArgumentException
*/
public static function compare($a, $b, string $operator): bool
{
switch ($operator) {
case '>':
return $a > $b;
case '>=':
return $a >= $b;
case '=':
case '==':
return $a == $b;
case '===':
return $a === $b;
case '<=':
return $a <= $b;
case '<':
return $a < $b;
case '!=':
case '<>':
return $a != $b;
case '!==':
return $a !== $b;
default:
throw new InvalidArgumentException(sprintf('Invalid operator "%s" provided', $operator));
}
}
}

View File

@ -0,0 +1,10 @@
<?php
declare(strict_types=1);
namespace Phpml\Math;
interface Distance
{
public function distance(array $a, array $b): float;
}

View File

@ -0,0 +1,19 @@
<?php
declare(strict_types=1);
namespace Phpml\Math\Distance;
/**
* Class Chebyshev
*/
class Chebyshev extends Distance
{
/**
* {@inheritdoc}
*/
public function distance(array $a, array $b): float
{
return max($this->deltas($a, $b));
}
}

View File

@ -0,0 +1,61 @@
<?php
declare(strict_types=1);
namespace Phpml\Math\Distance;
use Phpml\Exception\InvalidArgumentException;
use Phpml\Math\Distance as DistanceInterface;
/**
* Class Distance
*/
abstract class Distance implements DistanceInterface
{
/**
* @var float|int
*/
public $norm;
/**
* Distance constructor.
*/
public function __construct(float $norm = 3.0)
{
$this->norm = $norm;
}
/**
* @throws InvalidArgumentException
*/
public function distance(array $a, array $b): float
{
$distance = 0;
foreach ($this->deltas($a, $b) as $delta) {
$distance += $delta ** $this->norm;
}
return $distance ** (1 / $this->norm);
}
/**
* @throws InvalidArgumentException
*/
protected function deltas(array $a, array $b): array
{
$count = count($a);
if ($count !== count($b)) {
throw new InvalidArgumentException('Size of given arrays does not match');
}
$deltas = [];
for ($i = 0; $i < $count; $i++) {
$deltas[] = abs($a[$i] - $b[$i]);
}
return $deltas;
}
}

View File

@ -0,0 +1,31 @@
<?php
declare(strict_types=1);
namespace Phpml\Math\Distance;
/**
* Class Euclidean
*
* L^2 Metric.
*/
class Euclidean extends Distance
{
/**
* Euclidean constructor.
*/
public function __construct()
{
parent::__construct(2.0);
}
/**
* Square of Euclidean distance
*
* @throws \Phpml\Exception\InvalidArgumentException
*/
public function sqDistance(array $a, array $b): float
{
return $this->distance($a, $b) ** 2;
}
}

View File

@ -0,0 +1,21 @@
<?php
declare(strict_types=1);
namespace Phpml\Math\Distance;
/**
* Class Manhattan
*
* L^1 Metric.
*/
class Manhattan extends Distance
{
/**
* Manhattan constructor.
*/
public function __construct()
{
parent::__construct(1.0);
}
}

View File

@ -0,0 +1,14 @@
<?php
declare(strict_types=1);
namespace Phpml\Math\Distance;
/**
* Class Minkowski
*
* L^n Metric.
*/
class Minkowski extends Distance
{
}

View File

@ -0,0 +1,16 @@
<?php
declare(strict_types=1);
namespace Phpml\Math;
interface Kernel
{
/**
* @param float|array $a
* @param float|array $b
*
* @return float|array
*/
public function compute($a, $b);
}

View File

@ -0,0 +1,33 @@
<?php
declare(strict_types=1);
namespace Phpml\Math\Kernel;
use Phpml\Math\Kernel;
use Phpml\Math\Product;
class RBF implements Kernel
{
/**
* @var float
*/
private $gamma;
public function __construct(float $gamma)
{
$this->gamma = $gamma;
}
/**
* @param array $a
* @param array $b
*/
public function compute($a, $b): float
{
$score = 2 * Product::scalar($a, $b);
$squares = Product::scalar($a, $a) + Product::scalar($b, $b);
return exp(-$this->gamma * ($squares - $score));
}
}

View File

@ -0,0 +1,959 @@
<?php
declare(strict_types=1);
/**
* Class to obtain eigenvalues and eigenvectors of a real matrix.
*
* If A is symmetric, then A = V*D*V' where the eigenvalue matrix D
* is diagonal and the eigenvector matrix V is orthogonal (i.e.
* A = V.times(D.times(V.transpose())) and V.times(V.transpose())
* equals the identity matrix).
*
* If A is not symmetric, then the eigenvalue matrix D is block diagonal
* with the real eigenvalues in 1-by-1 blocks and any complex eigenvalues,
* lambda + i*mu, in 2-by-2 blocks, [lambda, mu; -mu, lambda]. The
* columns of V represent the eigenvectors in the sense that A*V = V*D,
* i.e. A.times(V) equals V.times(D). The matrix V may be badly
* conditioned, or even singular, so the validity of the equation
* A = V*D*inverse(V) depends upon V.cond().
*
* @author Paul Meagher
* @license PHP v3.0
*
* @version 1.1
*
* Slightly changed to adapt the original code to PHP-ML library
* @date 2017/04/11
*
* @author Mustafa Karabulut
*/
namespace Phpml\Math\LinearAlgebra;
use Phpml\Math\Matrix;
class EigenvalueDecomposition
{
/**
* Row and column dimension (square matrix).
*
* @var int
*/
private $n;
/**
* Arrays for internal storage of eigenvalues.
*
* @var array
*/
private $d = [];
/**
* @var array
*/
private $e = [];
/**
* Array for internal storage of eigenvectors.
*
* @var array
*/
private $V = [];
/**
* Array for internal storage of nonsymmetric Hessenberg form.
*
* @var array
*/
private $H = [];
/**
* Working storage for nonsymmetric algorithm.
*
* @var array
*/
private $ort = [];
/**
* Used for complex scalar division.
*
* @var float
*/
private $cdivr;
/**
* @var float
*/
private $cdivi;
/**
* Constructor: Check for symmetry, then construct the eigenvalue decomposition
*/
public function __construct(array $arg)
{
$this->n = count($arg[0]);
$symmetric = true;
for ($j = 0; ($j < $this->n) & $symmetric; ++$j) {
for ($i = 0; ($i < $this->n) & $symmetric; ++$i) {
$symmetric = $arg[$i][$j] == $arg[$j][$i];
}
}
if ($symmetric) {
$this->V = $arg;
// Tridiagonalize.
$this->tred2();
// Diagonalize.
$this->tql2();
} else {
$this->H = $arg;
$this->ort = [];
// Reduce to Hessenberg form.
$this->orthes();
// Reduce Hessenberg to real Schur form.
$this->hqr2();
}
}
/**
* Return the eigenvector matrix
*/
public function getEigenvectors(): array
{
$vectors = $this->V;
// Always return the eigenvectors of length 1.0
$vectors = new Matrix($vectors);
$vectors = array_map(function ($vect) {
$sum = 0;
$count = count($vect);
for ($i = 0; $i < $count; ++$i) {
$sum += $vect[$i] ** 2;
}
$sum **= .5;
for ($i = 0; $i < $count; ++$i) {
$vect[$i] /= $sum;
}
return $vect;
}, $vectors->transpose()->toArray());
return $vectors;
}
/**
* Return the real parts of the eigenvalues<br>
* d = real(diag(D));
*/
public function getRealEigenvalues(): array
{
return $this->d;
}
/**
* Return the imaginary parts of the eigenvalues <br>
* d = imag(diag(D))
*/
public function getImagEigenvalues(): array
{
return $this->e;
}
/**
* Return the block diagonal eigenvalue matrix
*/
public function getDiagonalEigenvalues(): array
{
$D = [];
for ($i = 0; $i < $this->n; ++$i) {
$D[$i] = array_fill(0, $this->n, 0.0);
$D[$i][$i] = $this->d[$i];
if ($this->e[$i] == 0) {
continue;
}
$o = $this->e[$i] > 0 ? $i + 1 : $i - 1;
$D[$i][$o] = $this->e[$i];
}
return $D;
}
/**
* Symmetric Householder reduction to tridiagonal form.
*/
private function tred2(): void
{
// This is derived from the Algol procedures tred2 by
// Bowdler, Martin, Reinsch, and Wilkinson, Handbook for
// Auto. Comp., Vol.ii-Linear Algebra, and the corresponding
// Fortran subroutine in EISPACK.
$this->d = $this->V[$this->n - 1];
// Householder reduction to tridiagonal form.
for ($i = $this->n - 1; $i > 0; --$i) {
$i_ = $i - 1;
// Scale to avoid under/overflow.
$h = $scale = 0.0;
$scale += array_sum(array_map('abs', $this->d));
if ($scale == 0.0) {
$this->e[$i] = $this->d[$i_];
$this->d = array_slice($this->V[$i_], 0, $this->n - 1);
for ($j = 0; $j < $i; ++$j) {
$this->V[$j][$i] = $this->V[$i][$j] = 0.0;
}
} else {
// Generate Householder vector.
for ($k = 0; $k < $i; ++$k) {
$this->d[$k] /= $scale;
$h += $this->d[$k] ** 2;
}
$f = $this->d[$i_];
$g = $h ** .5;
if ($f > 0) {
$g = -$g;
}
$this->e[$i] = $scale * $g;
$h -= $f * $g;
$this->d[$i_] = $f - $g;
for ($j = 0; $j < $i; ++$j) {
$this->e[$j] = 0.0;
}
// Apply similarity transformation to remaining columns.
for ($j = 0; $j < $i; ++$j) {
$f = $this->d[$j];
$this->V[$j][$i] = $f;
$g = $this->e[$j] + $this->V[$j][$j] * $f;
for ($k = $j + 1; $k <= $i_; ++$k) {
$g += $this->V[$k][$j] * $this->d[$k];
$this->e[$k] += $this->V[$k][$j] * $f;
}
$this->e[$j] = $g;
}
$f = 0.0;
if ($h == 0.0) {
$h = 1e-32;
}
for ($j = 0; $j < $i; ++$j) {
$this->e[$j] /= $h;
$f += $this->e[$j] * $this->d[$j];
}
$hh = $f / (2 * $h);
for ($j = 0; $j < $i; ++$j) {
$this->e[$j] -= $hh * $this->d[$j];
}
for ($j = 0; $j < $i; ++$j) {
$f = $this->d[$j];
$g = $this->e[$j];
for ($k = $j; $k <= $i_; ++$k) {
$this->V[$k][$j] -= ($f * $this->e[$k] + $g * $this->d[$k]);
}
$this->d[$j] = $this->V[$i - 1][$j];
$this->V[$i][$j] = 0.0;
}
}
$this->d[$i] = $h;
}
// Accumulate transformations.
for ($i = 0; $i < $this->n - 1; ++$i) {
$this->V[$this->n - 1][$i] = $this->V[$i][$i];
$this->V[$i][$i] = 1.0;
$h = $this->d[$i + 1];
if ($h != 0.0) {
for ($k = 0; $k <= $i; ++$k) {
$this->d[$k] = $this->V[$k][$i + 1] / $h;
}
for ($j = 0; $j <= $i; ++$j) {
$g = 0.0;
for ($k = 0; $k <= $i; ++$k) {
$g += $this->V[$k][$i + 1] * $this->V[$k][$j];
}
for ($k = 0; $k <= $i; ++$k) {
$this->V[$k][$j] -= $g * $this->d[$k];
}
}
}
for ($k = 0; $k <= $i; ++$k) {
$this->V[$k][$i + 1] = 0.0;
}
}
$this->d = $this->V[$this->n - 1];
$this->V[$this->n - 1] = array_fill(0, $this->n, 0.0);
$this->V[$this->n - 1][$this->n - 1] = 1.0;
$this->e[0] = 0.0;
}
/**
* Symmetric tridiagonal QL algorithm.
*
* This is derived from the Algol procedures tql2, by
* Bowdler, Martin, Reinsch, and Wilkinson, Handbook for
* Auto. Comp., Vol.ii-Linear Algebra, and the corresponding
* Fortran subroutine in EISPACK.
*/
private function tql2(): void
{
for ($i = 1; $i < $this->n; ++$i) {
$this->e[$i - 1] = $this->e[$i];
}
$this->e[$this->n - 1] = 0.0;
$f = 0.0;
$tst1 = 0.0;
$eps = 2.0 ** -52.0;
for ($l = 0; $l < $this->n; ++$l) {
// Find small subdiagonal element
$tst1 = max($tst1, abs($this->d[$l]) + abs($this->e[$l]));
$m = $l;
while ($m < $this->n) {
if (abs($this->e[$m]) <= $eps * $tst1) {
break;
}
++$m;
}
// If m == l, $this->d[l] is an eigenvalue,
// otherwise, iterate.
if ($m > $l) {
do {
// Compute implicit shift
$g = $this->d[$l];
$p = ($this->d[$l + 1] - $g) / (2.0 * $this->e[$l]);
$r = hypot($p, 1.0);
if ($p < 0) {
$r *= -1;
}
$this->d[$l] = $this->e[$l] / ($p + $r);
$this->d[$l + 1] = $this->e[$l] * ($p + $r);
$dl1 = $this->d[$l + 1];
$h = $g - $this->d[$l];
for ($i = $l + 2; $i < $this->n; ++$i) {
$this->d[$i] -= $h;
}
$f += $h;
// Implicit QL transformation.
$p = $this->d[$m];
$c = 1.0;
$c2 = $c3 = $c;
$el1 = $this->e[$l + 1];
$s = $s2 = 0.0;
for ($i = $m - 1; $i >= $l; --$i) {
$c3 = $c2;
$c2 = $c;
$s2 = $s;
$g = $c * $this->e[$i];
$h = $c * $p;
$r = hypot($p, $this->e[$i]);
$this->e[$i + 1] = $s * $r;
$s = $this->e[$i] / $r;
$c = $p / $r;
$p = $c * $this->d[$i] - $s * $g;
$this->d[$i + 1] = $h + $s * ($c * $g + $s * $this->d[$i]);
// Accumulate transformation.
for ($k = 0; $k < $this->n; ++$k) {
$h = $this->V[$k][$i + 1];
$this->V[$k][$i + 1] = $s * $this->V[$k][$i] + $c * $h;
$this->V[$k][$i] = $c * $this->V[$k][$i] - $s * $h;
}
}
$p = -$s * $s2 * $c3 * $el1 * $this->e[$l] / $dl1;
$this->e[$l] = $s * $p;
$this->d[$l] = $c * $p;
// Check for convergence.
} while (abs($this->e[$l]) > $eps * $tst1);
}
$this->d[$l] += $f;
$this->e[$l] = 0.0;
}
// Sort eigenvalues and corresponding vectors.
for ($i = 0; $i < $this->n - 1; ++$i) {
$k = $i;
$p = $this->d[$i];
for ($j = $i + 1; $j < $this->n; ++$j) {
if ($this->d[$j] < $p) {
$k = $j;
$p = $this->d[$j];
}
}
if ($k != $i) {
$this->d[$k] = $this->d[$i];
$this->d[$i] = $p;
for ($j = 0; $j < $this->n; ++$j) {
$p = $this->V[$j][$i];
$this->V[$j][$i] = $this->V[$j][$k];
$this->V[$j][$k] = $p;
}
}
}
}
/**
* Nonsymmetric reduction to Hessenberg form.
*
* This is derived from the Algol procedures orthes and ortran,
* by Martin and Wilkinson, Handbook for Auto. Comp.,
* Vol.ii-Linear Algebra, and the corresponding
* Fortran subroutines in EISPACK.
*/
private function orthes(): void
{
$low = 0;
$high = $this->n - 1;
for ($m = $low + 1; $m <= $high - 1; ++$m) {
// Scale column.
$scale = 0.0;
for ($i = $m; $i <= $high; ++$i) {
$scale += abs($this->H[$i][$m - 1]);
}
if ($scale != 0.0) {
// Compute Householder transformation.
$h = 0.0;
for ($i = $high; $i >= $m; --$i) {
$this->ort[$i] = $this->H[$i][$m - 1] / $scale;
$h += $this->ort[$i] * $this->ort[$i];
}
$g = $h ** .5;
if ($this->ort[$m] > 0) {
$g *= -1;
}
$h -= $this->ort[$m] * $g;
$this->ort[$m] -= $g;
// Apply Householder similarity transformation
// H = (I -u * u' / h) * H * (I -u * u') / h)
for ($j = $m; $j < $this->n; ++$j) {
$f = 0.0;
for ($i = $high; $i >= $m; --$i) {
$f += $this->ort[$i] * $this->H[$i][$j];
}
$f /= $h;
for ($i = $m; $i <= $high; ++$i) {
$this->H[$i][$j] -= $f * $this->ort[$i];
}
}
for ($i = 0; $i <= $high; ++$i) {
$f = 0.0;
for ($j = $high; $j >= $m; --$j) {
$f += $this->ort[$j] * $this->H[$i][$j];
}
$f /= $h;
for ($j = $m; $j <= $high; ++$j) {
$this->H[$i][$j] -= $f * $this->ort[$j];
}
}
$this->ort[$m] = $scale * $this->ort[$m];
$this->H[$m][$m - 1] = $scale * $g;
}
}
// Accumulate transformations (Algol's ortran).
for ($i = 0; $i < $this->n; ++$i) {
for ($j = 0; $j < $this->n; ++$j) {
$this->V[$i][$j] = ($i == $j ? 1.0 : 0.0);
}
}
for ($m = $high - 1; $m >= $low + 1; --$m) {
if ($this->H[$m][$m - 1] != 0.0) {
for ($i = $m + 1; $i <= $high; ++$i) {
$this->ort[$i] = $this->H[$i][$m - 1];
}
for ($j = $m; $j <= $high; ++$j) {
$g = 0.0;
for ($i = $m; $i <= $high; ++$i) {
$g += $this->ort[$i] * $this->V[$i][$j];
}
// Double division avoids possible underflow
$g = ($g / $this->ort[$m]) / $this->H[$m][$m - 1];
for ($i = $m; $i <= $high; ++$i) {
$this->V[$i][$j] += $g * $this->ort[$i];
}
}
}
}
}
/**
* Performs complex division.
*
* @param int|float $xr
* @param int|float $xi
* @param int|float $yr
* @param int|float $yi
*/
private function cdiv($xr, $xi, $yr, $yi): void
{
if (abs($yr) > abs($yi)) {
$r = $yi / $yr;
$d = $yr + $r * $yi;
$this->cdivr = ($xr + $r * $xi) / $d;
$this->cdivi = ($xi - $r * $xr) / $d;
} else {
$r = $yr / $yi;
$d = $yi + $r * $yr;
$this->cdivr = ($r * $xr + $xi) / $d;
$this->cdivi = ($r * $xi - $xr) / $d;
}
}
/**
* Nonsymmetric reduction from Hessenberg to real Schur form.
*
* Code is derived from the Algol procedure hqr2,
* by Martin and Wilkinson, Handbook for Auto. Comp.,
* Vol.ii-Linear Algebra, and the corresponding
* Fortran subroutine in EISPACK.
*/
private function hqr2(): void
{
// Initialize
$nn = $this->n;
$n = $nn - 1;
$low = 0;
$high = $nn - 1;
$eps = 2.0 ** -52.0;
$exshift = 0.0;
$p = $q = $r = $s = $z = 0;
// Store roots isolated by balanc and compute matrix norm
$norm = 0.0;
for ($i = 0; $i < $nn; ++$i) {
if (($i < $low) or ($i > $high)) {
$this->d[$i] = $this->H[$i][$i];
$this->e[$i] = 0.0;
}
for ($j = max($i - 1, 0); $j < $nn; ++$j) {
$norm += abs($this->H[$i][$j]);
}
}
// Outer loop over eigenvalue index
$iter = 0;
while ($n >= $low) {
// Look for single small sub-diagonal element
$l = $n;
while ($l > $low) {
$s = abs($this->H[$l - 1][$l - 1]) + abs($this->H[$l][$l]);
if ($s == 0.0) {
$s = $norm;
}
if (abs($this->H[$l][$l - 1]) < $eps * $s) {
break;
}
--$l;
}
// Check for convergence
// One root found
if ($l == $n) {
$this->H[$n][$n] += $exshift;
$this->d[$n] = $this->H[$n][$n];
$this->e[$n] = 0.0;
--$n;
$iter = 0;
// Two roots found
} elseif ($l == $n - 1) {
$w = $this->H[$n][$n - 1] * $this->H[$n - 1][$n];
$p = ($this->H[$n - 1][$n - 1] - $this->H[$n][$n]) / 2.0;
$q = $p * $p + $w;
$z = abs($q) ** .5;
$this->H[$n][$n] += $exshift;
$this->H[$n - 1][$n - 1] += $exshift;
$x = $this->H[$n][$n];
// Real pair
if ($q >= 0) {
if ($p >= 0) {
$z = $p + $z;
} else {
$z = $p - $z;
}
$this->d[$n - 1] = $x + $z;
$this->d[$n] = $this->d[$n - 1];
if ($z != 0.0) {
$this->d[$n] = $x - $w / $z;
}
$this->e[$n - 1] = 0.0;
$this->e[$n] = 0.0;
$x = $this->H[$n][$n - 1];
$s = abs($x) + abs($z);
$p = $x / $s;
$q = $z / $s;
$r = ($p * $p + $q * $q) ** .5;
$p /= $r;
$q /= $r;
// Row modification
for ($j = $n - 1; $j < $nn; ++$j) {
$z = $this->H[$n - 1][$j];
$this->H[$n - 1][$j] = $q * $z + $p * $this->H[$n][$j];
$this->H[$n][$j] = $q * $this->H[$n][$j] - $p * $z;
}
// Column modification
for ($i = 0; $i <= $n; ++$i) {
$z = $this->H[$i][$n - 1];
$this->H[$i][$n - 1] = $q * $z + $p * $this->H[$i][$n];
$this->H[$i][$n] = $q * $this->H[$i][$n] - $p * $z;
}
// Accumulate transformations
for ($i = $low; $i <= $high; ++$i) {
$z = $this->V[$i][$n - 1];
$this->V[$i][$n - 1] = $q * $z + $p * $this->V[$i][$n];
$this->V[$i][$n] = $q * $this->V[$i][$n] - $p * $z;
}
// Complex pair
} else {
$this->d[$n - 1] = $x + $p;
$this->d[$n] = $x + $p;
$this->e[$n - 1] = $z;
$this->e[$n] = -$z;
}
$n -= 2;
$iter = 0;
// No convergence yet
} else {
// Form shift
$x = $this->H[$n][$n];
$y = 0.0;
$w = 0.0;
if ($l < $n) {
$y = $this->H[$n - 1][$n - 1];
$w = $this->H[$n][$n - 1] * $this->H[$n - 1][$n];
}
// Wilkinson's original ad hoc shift
if ($iter == 10) {
$exshift += $x;
for ($i = $low; $i <= $n; ++$i) {
$this->H[$i][$i] -= $x;
}
$s = abs($this->H[$n][$n - 1]) + abs($this->H[$n - 1][$n - 2]);
$x = $y = 0.75 * $s;
$w = -0.4375 * $s * $s;
}
// MATLAB's new ad hoc shift
if ($iter == 30) {
$s = ($y - $x) / 2.0;
$s *= $s + $w;
if ($s > 0) {
$s **= .5;
if ($y < $x) {
$s = -$s;
}
$s = $x - $w / (($y - $x) / 2.0 + $s);
for ($i = $low; $i <= $n; ++$i) {
$this->H[$i][$i] -= $s;
}
$exshift += $s;
$x = $y = $w = 0.964;
}
}
// Could check iteration count here.
++$iter;
// Look for two consecutive small sub-diagonal elements
$m = $n - 2;
while ($m >= $l) {
$z = $this->H[$m][$m];
$r = $x - $z;
$s = $y - $z;
$p = ($r * $s - $w) / $this->H[$m + 1][$m] + $this->H[$m][$m + 1];
$q = $this->H[$m + 1][$m + 1] - $z - $r - $s;
$r = $this->H[$m + 2][$m + 1];
$s = abs($p) + abs($q) + abs($r);
$p /= $s;
$q /= $s;
$r /= $s;
if ($m == $l) {
break;
}
if (abs($this->H[$m][$m - 1]) * (abs($q) + abs($r)) <
$eps * (abs($p) * (abs($this->H[$m - 1][$m - 1]) + abs($z) + abs($this->H[$m + 1][$m + 1])))) {
break;
}
--$m;
}
for ($i = $m + 2; $i <= $n; ++$i) {
$this->H[$i][$i - 2] = 0.0;
if ($i > $m + 2) {
$this->H[$i][$i - 3] = 0.0;
}
}
// Double QR step involving rows l:n and columns m:n
for ($k = $m; $k <= $n - 1; ++$k) {
$notlast = ($k != $n - 1);
if ($k != $m) {
$p = $this->H[$k][$k - 1];
$q = $this->H[$k + 1][$k - 1];
$r = ($notlast ? $this->H[$k + 2][$k - 1] : 0.0);
$x = abs($p) + abs($q) + abs($r);
if ($x != 0.0) {
$p /= $x;
$q /= $x;
$r /= $x;
}
}
if ($x == 0.0) {
break;
}
$s = ($p * $p + $q * $q + $r * $r) ** .5;
if ($p < 0) {
$s = -$s;
}
if ($s != 0) {
if ($k != $m) {
$this->H[$k][$k - 1] = -$s * $x;
} elseif ($l != $m) {
$this->H[$k][$k - 1] = -$this->H[$k][$k - 1];
}
$p += $s;
$x = $p / $s;
$y = $q / $s;
$z = $r / $s;
$q /= $p;
$r /= $p;
// Row modification
for ($j = $k; $j < $nn; ++$j) {
$p = $this->H[$k][$j] + $q * $this->H[$k + 1][$j];
if ($notlast) {
$p += $r * $this->H[$k + 2][$j];
$this->H[$k + 2][$j] -= $p * $z;
}
$this->H[$k][$j] -= $p * $x;
$this->H[$k + 1][$j] -= $p * $y;
}
// Column modification
for ($i = 0; $i <= min($n, $k + 3); ++$i) {
$p = $x * $this->H[$i][$k] + $y * $this->H[$i][$k + 1];
if ($notlast) {
$p += $z * $this->H[$i][$k + 2];
$this->H[$i][$k + 2] -= $p * $r;
}
$this->H[$i][$k] -= $p;
$this->H[$i][$k + 1] -= $p * $q;
}
// Accumulate transformations
for ($i = $low; $i <= $high; ++$i) {
$p = $x * $this->V[$i][$k] + $y * $this->V[$i][$k + 1];
if ($notlast) {
$p += $z * $this->V[$i][$k + 2];
$this->V[$i][$k + 2] -= $p * $r;
}
$this->V[$i][$k] -= $p;
$this->V[$i][$k + 1] -= $p * $q;
}
} // ($s != 0)
} // k loop
} // check convergence
} // while ($n >= $low)
// Backsubstitute to find vectors of upper triangular form
if ($norm == 0.0) {
return;
}
for ($n = $nn - 1; $n >= 0; --$n) {
$p = $this->d[$n];
$q = $this->e[$n];
// Real vector
if ($q == 0) {
$l = $n;
$this->H[$n][$n] = 1.0;
for ($i = $n - 1; $i >= 0; --$i) {
$w = $this->H[$i][$i] - $p;
$r = 0.0;
for ($j = $l; $j <= $n; ++$j) {
$r += $this->H[$i][$j] * $this->H[$j][$n];
}
if ($this->e[$i] < 0.0) {
$z = $w;
$s = $r;
} else {
$l = $i;
if ($this->e[$i] == 0.0) {
if ($w != 0.0) {
$this->H[$i][$n] = -$r / $w;
} else {
$this->H[$i][$n] = -$r / ($eps * $norm);
}
// Solve real equations
} else {
$x = $this->H[$i][$i + 1];
$y = $this->H[$i + 1][$i];
$q = ($this->d[$i] - $p) * ($this->d[$i] - $p) + $this->e[$i] * $this->e[$i];
$t = ($x * $s - $z * $r) / $q;
$this->H[$i][$n] = $t;
if (abs($x) > abs($z)) {
$this->H[$i + 1][$n] = (-$r - $w * $t) / $x;
} else {
$this->H[$i + 1][$n] = (-$s - $y * $t) / $z;
}
}
// Overflow control
$t = abs($this->H[$i][$n]);
if (($eps * $t) * $t > 1) {
for ($j = $i; $j <= $n; ++$j) {
$this->H[$j][$n] /= $t;
}
}
}
}
// Complex vector
} elseif ($q < 0) {
$l = $n - 1;
// Last vector component imaginary so matrix is triangular
if (abs($this->H[$n][$n - 1]) > abs($this->H[$n - 1][$n])) {
$this->H[$n - 1][$n - 1] = $q / $this->H[$n][$n - 1];
$this->H[$n - 1][$n] = -($this->H[$n][$n] - $p) / $this->H[$n][$n - 1];
} else {
$this->cdiv(0.0, -$this->H[$n - 1][$n], $this->H[$n - 1][$n - 1] - $p, $q);
$this->H[$n - 1][$n - 1] = $this->cdivr;
$this->H[$n - 1][$n] = $this->cdivi;
}
$this->H[$n][$n - 1] = 0.0;
$this->H[$n][$n] = 1.0;
for ($i = $n - 2; $i >= 0; --$i) {
// double ra,sa,vr,vi;
$ra = 0.0;
$sa = 0.0;
for ($j = $l; $j <= $n; ++$j) {
$ra += $this->H[$i][$j] * $this->H[$j][$n - 1];
$sa += $this->H[$i][$j] * $this->H[$j][$n];
}
$w = $this->H[$i][$i] - $p;
if ($this->e[$i] < 0.0) {
$z = $w;
$r = $ra;
$s = $sa;
} else {
$l = $i;
if ($this->e[$i] == 0) {
$this->cdiv(-$ra, -$sa, $w, $q);
$this->H[$i][$n - 1] = $this->cdivr;
$this->H[$i][$n] = $this->cdivi;
} else {
// Solve complex equations
$x = $this->H[$i][$i + 1];
$y = $this->H[$i + 1][$i];
$vr = ($this->d[$i] - $p) * ($this->d[$i] - $p) + $this->e[$i] * $this->e[$i] - $q * $q;
$vi = ($this->d[$i] - $p) * 2.0 * $q;
if ($vr == 0.0 && $vi == 0.0) {
$vr = $eps * $norm * (abs($w) + abs($q) + abs($x) + abs($y) + abs($z));
}
$this->cdiv($x * $r - $z * $ra + $q * $sa, $x * $s - $z * $sa - $q * $ra, $vr, $vi);
$this->H[$i][$n - 1] = $this->cdivr;
$this->H[$i][$n] = $this->cdivi;
if (abs($x) > (abs($z) + abs($q))) {
$this->H[$i + 1][$n - 1] = (-$ra - $w * $this->H[$i][$n - 1] + $q * $this->H[$i][$n]) / $x;
$this->H[$i + 1][$n] = (-$sa - $w * $this->H[$i][$n] - $q * $this->H[$i][$n - 1]) / $x;
} else {
$this->cdiv(-$r - $y * $this->H[$i][$n - 1], -$s - $y * $this->H[$i][$n], $z, $q);
$this->H[$i + 1][$n - 1] = $this->cdivr;
$this->H[$i + 1][$n] = $this->cdivi;
}
}
// Overflow control
$t = max(abs($this->H[$i][$n - 1]), abs($this->H[$i][$n]));
if (($eps * $t) * $t > 1) {
for ($j = $i; $j <= $n; ++$j) {
$this->H[$j][$n - 1] /= $t;
$this->H[$j][$n] /= $t;
}
}
} // end else
} // end for
} // end else for complex case
} // end for
// Vectors of isolated roots
for ($i = 0; $i < $nn; ++$i) {
if ($i < $low || $i > $high) {
for ($j = $i; $j < $nn; ++$j) {
$this->V[$i][$j] = $this->H[$i][$j];
}
}
}
// Back transformation to get eigenvectors of original matrix
for ($j = $nn - 1; $j >= $low; --$j) {
for ($i = $low; $i <= $high; ++$i) {
$z = 0.0;
for ($k = $low; $k <= min($j, $high); ++$k) {
$z += $this->V[$i][$k] * $this->H[$k][$j];
}
$this->V[$i][$j] = $z;
}
}
}
}

View File

@ -0,0 +1,299 @@
<?php
declare(strict_types=1);
/**
* @package JAMA
*
* For an m-by-n matrix A with m >= n, the LU decomposition is an m-by-n
* unit lower triangular matrix L, an n-by-n upper triangular matrix U,
* and a permutation vector piv of length m so that A(piv,:) = L*U.
* If m < n, then L is m-by-m and U is m-by-n.
*
* The LU decompostion with pivoting always exists, even if the matrix is
* singular, so the constructor will never fail. The primary use of the
* LU decomposition is in the solution of square systems of simultaneous
* linear equations. This will fail if isNonsingular() returns false.
*
* @author Paul Meagher
* @author Bartosz Matosiuk
* @author Michael Bommarito
*
* @version 1.1
*
* @license PHP v3.0
*
* Slightly changed to adapt the original code to PHP-ML library
* @date 2017/04/24
*
* @author Mustafa Karabulut
*/
namespace Phpml\Math\LinearAlgebra;
use Phpml\Exception\MatrixException;
use Phpml\Math\Matrix;
class LUDecomposition
{
/**
* Decomposition storage
*
* @var array
*/
private $LU = [];
/**
* Row dimension.
*
* @var int
*/
private $m;
/**
* Column dimension.
*
* @var int
*/
private $n;
/**
* Pivot sign.
*
* @var int
*/
private $pivsign;
/**
* Internal storage of pivot vector.
*
* @var array
*/
private $piv = [];
/**
* Constructs Structure to access L, U and piv.
*
* @param Matrix $A Rectangular matrix
*
* @throws MatrixException
*/
public function __construct(Matrix $A)
{
if ($A->getRows() !== $A->getColumns()) {
throw new MatrixException('Matrix is not square matrix');
}
// Use a "left-looking", dot-product, Crout/Doolittle algorithm.
$this->LU = $A->toArray();
$this->m = $A->getRows();
$this->n = $A->getColumns();
for ($i = 0; $i < $this->m; ++$i) {
$this->piv[$i] = $i;
}
$this->pivsign = 1;
$LUcolj = [];
// Outer loop.
for ($j = 0; $j < $this->n; ++$j) {
// Make a copy of the j-th column to localize references.
for ($i = 0; $i < $this->m; ++$i) {
$LUcolj[$i] = &$this->LU[$i][$j];
}
// Apply previous transformations.
for ($i = 0; $i < $this->m; ++$i) {
$LUrowi = $this->LU[$i];
// Most of the time is spent in the following dot product.
$kmax = min($i, $j);
$s = 0.0;
for ($k = 0; $k < $kmax; ++$k) {
$s += $LUrowi[$k] * $LUcolj[$k];
}
$LUrowi[$j] = $LUcolj[$i] -= $s;
}
// Find pivot and exchange if necessary.
$p = $j;
for ($i = $j + 1; $i < $this->m; ++$i) {
if (abs($LUcolj[$i] ?? 0) > abs($LUcolj[$p] ?? 0)) {
$p = $i;
}
}
if ($p != $j) {
for ($k = 0; $k < $this->n; ++$k) {
$t = $this->LU[$p][$k];
$this->LU[$p][$k] = $this->LU[$j][$k];
$this->LU[$j][$k] = $t;
}
$k = $this->piv[$p];
$this->piv[$p] = $this->piv[$j];
$this->piv[$j] = $k;
$this->pivsign *= -1;
}
// Compute multipliers.
if (($j < $this->m) && ($this->LU[$j][$j] != 0.0)) {
for ($i = $j + 1; $i < $this->m; ++$i) {
$this->LU[$i][$j] /= $this->LU[$j][$j];
}
}
}
}
/**
* Get lower triangular factor.
*
* @return Matrix Lower triangular factor
*/
public function getL(): Matrix
{
$L = [];
for ($i = 0; $i < $this->m; ++$i) {
for ($j = 0; $j < $this->n; ++$j) {
if ($i > $j) {
$L[$i][$j] = $this->LU[$i][$j];
} elseif ($i == $j) {
$L[$i][$j] = 1.0;
} else {
$L[$i][$j] = 0.0;
}
}
}
return new Matrix($L);
}
/**
* Get upper triangular factor.
*
* @return Matrix Upper triangular factor
*/
public function getU(): Matrix
{
$U = [];
for ($i = 0; $i < $this->n; ++$i) {
for ($j = 0; $j < $this->n; ++$j) {
if ($i <= $j) {
$U[$i][$j] = $this->LU[$i][$j];
} else {
$U[$i][$j] = 0.0;
}
}
}
return new Matrix($U);
}
/**
* Return pivot permutation vector.
*
* @return array Pivot vector
*/
public function getPivot(): array
{
return $this->piv;
}
/**
* Alias for getPivot
*
* @see getPivot
*/
public function getDoublePivot(): array
{
return $this->getPivot();
}
/**
* Is the matrix nonsingular?
*
* @return bool true if U, and hence A, is nonsingular.
*/
public function isNonsingular(): bool
{
for ($j = 0; $j < $this->n; ++$j) {
if ($this->LU[$j][$j] == 0) {
return false;
}
}
return true;
}
public function det(): float
{
$d = $this->pivsign;
for ($j = 0; $j < $this->n; ++$j) {
$d *= $this->LU[$j][$j];
}
return (float) $d;
}
/**
* Solve A*X = B
*
* @param Matrix $B A Matrix with as many rows as A and any number of columns.
*
* @return array X so that L*U*X = B(piv,:)
*
* @throws MatrixException
*/
public function solve(Matrix $B): array
{
if ($B->getRows() != $this->m) {
throw new MatrixException('Matrix is not square matrix');
}
if (!$this->isNonsingular()) {
throw new MatrixException('Matrix is singular');
}
// Copy right hand side with pivoting
$nx = $B->getColumns();
$X = $this->getSubMatrix($B->toArray(), $this->piv, 0, $nx - 1);
// Solve L*Y = B(piv,:)
for ($k = 0; $k < $this->n; ++$k) {
for ($i = $k + 1; $i < $this->n; ++$i) {
for ($j = 0; $j < $nx; ++$j) {
$X[$i][$j] -= $X[$k][$j] * $this->LU[$i][$k];
}
}
}
// Solve U*X = Y;
for ($k = $this->n - 1; $k >= 0; --$k) {
for ($j = 0; $j < $nx; ++$j) {
$X[$k][$j] /= $this->LU[$k][$k];
}
for ($i = 0; $i < $k; ++$i) {
for ($j = 0; $j < $nx; ++$j) {
$X[$i][$j] -= $X[$k][$j] * $this->LU[$i][$k];
}
}
}
return $X;
}
protected function getSubMatrix(array $matrix, array $RL, int $j0, int $jF): array
{
$m = count($RL);
$n = $jF - $j0;
$R = array_fill(0, $m, array_fill(0, $n + 1, 0.0));
for ($i = 0; $i < $m; ++$i) {
for ($j = $j0; $j <= $jF; ++$j) {
$R[$i][$j - $j0] = $matrix[$RL[$i]][$j];
}
}
return $R;
}
}

View File

@ -0,0 +1,327 @@
<?php
declare(strict_types=1);
namespace Phpml\Math;
use Phpml\Exception\InvalidArgumentException;
use Phpml\Exception\MatrixException;
use Phpml\Math\LinearAlgebra\LUDecomposition;
class Matrix
{
/**
* @var array
*/
private $matrix = [];
/**
* @var int
*/
private $rows;
/**
* @var int
*/
private $columns;
/**
* @var float
*/
private $determinant;
/**
* @throws InvalidArgumentException
*/
public function __construct(array $matrix, bool $validate = true)
{
// When a row vector is given
if (!is_array($matrix[0])) {
$this->rows = 1;
$this->columns = count($matrix);
$matrix = [$matrix];
} else {
$this->rows = count($matrix);
$this->columns = count($matrix[0]);
}
if ($validate) {
for ($i = 0; $i < $this->rows; ++$i) {
if (count($matrix[$i]) !== $this->columns) {
throw new InvalidArgumentException('Matrix dimensions did not match');
}
}
}
$this->matrix = $matrix;
}
public static function fromFlatArray(array $array): self
{
$matrix = [];
foreach ($array as $value) {
$matrix[] = [$value];
}
return new self($matrix);
}
public function toArray(): array
{
return $this->matrix;
}
public function toScalar(): float
{
return $this->matrix[0][0];
}
public function getRows(): int
{
return $this->rows;
}
public function getColumns(): int
{
return $this->columns;
}
/**
* @throws MatrixException
*/
public function getColumnValues(int $column): array
{
if ($column >= $this->columns) {
throw new MatrixException('Column out of range');
}
return array_column($this->matrix, $column);
}
/**
* @return float|int
*
* @throws MatrixException
*/
public function getDeterminant()
{
if ($this->determinant !== null) {
return $this->determinant;
}
if (!$this->isSquare()) {
throw new MatrixException('Matrix is not square matrix');
}
$lu = new LUDecomposition($this);
return $this->determinant = $lu->det();
}
public function isSquare(): bool
{
return $this->columns === $this->rows;
}
public function transpose(): self
{
if ($this->rows === 1) {
$matrix = array_map(function ($el) {
return [$el];
}, $this->matrix[0]);
} else {
$matrix = array_map(null, ...$this->matrix);
}
return new self($matrix, false);
}
public function multiply(self $matrix): self
{
if ($this->columns !== $matrix->getRows()) {
throw new InvalidArgumentException('Inconsistent matrix supplied');
}
$array1 = $this->toArray();
$array2 = $matrix->toArray();
$colCount = $matrix->columns;
/*
- To speed-up multiplication, we need to avoid use of array index operator [ ] as much as possible( See #255 for details)
- A combination of "foreach" and "array_column" works much faster then accessing the array via index operator
*/
$product = [];
foreach ($array1 as $row => $rowData) {
for ($col = 0; $col < $colCount; ++$col) {
$columnData = array_column($array2, $col);
$sum = 0;
foreach ($rowData as $key => $valueData) {
$sum += $valueData * $columnData[$key];
}
$product[$row][$col] = $sum;
}
}
return new self($product, false);
}
/**
* @param float|int $value
*/
public function divideByScalar($value): self
{
$newMatrix = [];
for ($i = 0; $i < $this->rows; ++$i) {
for ($j = 0; $j < $this->columns; ++$j) {
$newMatrix[$i][$j] = $this->matrix[$i][$j] / $value;
}
}
return new self($newMatrix, false);
}
/**
* @param float|int $value
*/
public function multiplyByScalar($value): self
{
$newMatrix = [];
for ($i = 0; $i < $this->rows; ++$i) {
for ($j = 0; $j < $this->columns; ++$j) {
$newMatrix[$i][$j] = $this->matrix[$i][$j] * $value;
}
}
return new self($newMatrix, false);
}
/**
* Element-wise addition of the matrix with another one
*/
public function add(self $other): self
{
return $this->_add($other);
}
/**
* Element-wise subtracting of another matrix from this one
*/
public function subtract(self $other): self
{
return $this->_add($other, -1);
}
public function inverse(): self
{
if (!$this->isSquare()) {
throw new MatrixException('Matrix is not square matrix');
}
$LU = new LUDecomposition($this);
$identity = $this->getIdentity();
$inverse = $LU->solve($identity);
return new self($inverse, false);
}
public function crossOut(int $row, int $column): self
{
$newMatrix = [];
$r = 0;
for ($i = 0; $i < $this->rows; ++$i) {
$c = 0;
if ($row != $i) {
for ($j = 0; $j < $this->columns; ++$j) {
if ($column != $j) {
$newMatrix[$r][$c] = $this->matrix[$i][$j];
++$c;
}
}
++$r;
}
}
return new self($newMatrix, false);
}
public function isSingular(): bool
{
return $this->getDeterminant() == 0;
}
/**
* Frobenius norm (HilbertSchmidt norm, Euclidean norm) (‖A‖F)
* Square root of the sum of the square of all elements.
*
* https://en.wikipedia.org/wiki/Matrix_norm#Frobenius_norm
*
* _____________
* /ᵐ ⁿ
* ‖A‖F = √ Σ Σ |aᵢⱼ|²
* ᵢ₌₁ ᵢ₌₁
*/
public function frobeniusNorm(): float
{
$squareSum = 0;
for ($i = 0; $i < $this->rows; ++$i) {
for ($j = 0; $j < $this->columns; ++$j) {
$squareSum += $this->matrix[$i][$j] ** 2;
}
}
return $squareSum ** .5;
}
/**
* Returns the transpose of given array
*/
public static function transposeArray(array $array): array
{
return (new self($array, false))->transpose()->toArray();
}
/**
* Returns the dot product of two arrays<br>
* Matrix::dot(x, y) ==> x.y'
*/
public static function dot(array $array1, array $array2): array
{
$m1 = new self($array1, false);
$m2 = new self($array2, false);
return $m1->multiply($m2->transpose())->toArray()[0];
}
/**
* Element-wise addition or substraction depending on the given sign parameter
*/
private function _add(self $other, int $sign = 1): self
{
$a1 = $this->toArray();
$a2 = $other->toArray();
$newMatrix = [];
for ($i = 0; $i < $this->rows; ++$i) {
for ($k = 0; $k < $this->columns; ++$k) {
$newMatrix[$i][$k] = $a1[$i][$k] + $sign * $a2[$i][$k];
}
}
return new self($newMatrix, false);
}
/**
* Returns diagonal identity matrix of the same size of this matrix
*/
private function getIdentity(): self
{
$array = array_fill(0, $this->rows, array_fill(0, $this->columns, 0));
for ($i = 0; $i < $this->rows; ++$i) {
$array[$i][$i] = 1;
}
return new self($array, false);
}
}

View File

@ -0,0 +1,23 @@
<?php
declare(strict_types=1);
namespace Phpml\Math;
class Product
{
/**
* @return mixed
*/
public static function scalar(array $a, array $b)
{
$product = 0;
foreach ($a as $index => $value) {
if (is_numeric($value) && is_numeric($b[$index])) {
$product += (float) $value * (float) $b[$index];
}
}
return $product;
}
}

View File

@ -0,0 +1,173 @@
<?php
declare(strict_types=1);
namespace Phpml\Math;
use ArrayIterator;
use IteratorAggregate;
class Set implements IteratorAggregate
{
/**
* @var string[]|int[]|float[]|bool[]
*/
private $elements = [];
/**
* @param string[]|int[]|float[]|bool[] $elements
*/
public function __construct(array $elements = [])
{
$this->elements = self::sanitize($elements);
}
/**
* Creates the union of A and B.
*/
public static function union(self $a, self $b): self
{
return new self(array_merge($a->toArray(), $b->toArray()));
}
/**
* Creates the intersection of A and B.
*/
public static function intersection(self $a, self $b): self
{
return new self(array_intersect($a->toArray(), $b->toArray()));
}
/**
* Creates the difference of A and B.
*/
public static function difference(self $a, self $b): self
{
return new self(array_diff($a->toArray(), $b->toArray()));
}
/**
* Creates the Cartesian product of A and B.
*
* @return Set[]
*/
public static function cartesian(self $a, self $b): array
{
$cartesian = [];
foreach ($a as $multiplier) {
foreach ($b as $multiplicand) {
$cartesian[] = new self(array_merge([$multiplicand], [$multiplier]));
}
}
return $cartesian;
}
/**
* Creates the power set of A.
*
* @return Set[]
*/
public static function power(self $a): array
{
$power = [new self()];
foreach ($a as $multiplicand) {
foreach ($power as $multiplier) {
$power[] = new self(array_merge([$multiplicand], $multiplier->toArray()));
}
}
return $power;
}
/**
* @param string|int|float|bool $element
*/
public function add($element): self
{
return $this->addAll([$element]);
}
/**
* @param string[]|int[]|float[]|bool[] $elements
*/
public function addAll(array $elements): self
{
$this->elements = self::sanitize(array_merge($this->elements, $elements));
return $this;
}
/**
* @param string|int|float $element
*/
public function remove($element): self
{
return $this->removeAll([$element]);
}
/**
* @param string[]|int[]|float[] $elements
*/
public function removeAll(array $elements): self
{
$this->elements = self::sanitize(array_diff($this->elements, $elements));
return $this;
}
/**
* @param string|int|float $element
*/
public function contains($element): bool
{
return $this->containsAll([$element]);
}
/**
* @param string[]|int[]|float[] $elements
*/
public function containsAll(array $elements): bool
{
return count(array_diff($elements, $this->elements)) === 0;
}
/**
* @return string[]|int[]|float[]|bool[]
*/
public function toArray(): array
{
return $this->elements;
}
public function getIterator(): ArrayIterator
{
return new ArrayIterator($this->elements);
}
public function isEmpty(): bool
{
return $this->cardinality() === 0;
}
public function cardinality(): int
{
return count($this->elements);
}
/**
* Removes duplicates and rewrites index.
*
* @param string[]|int[]|float[]|bool[] $elements
*
* @return string[]|int[]|float[]|bool[]
*/
private static function sanitize(array $elements): array
{
sort($elements, SORT_ASC);
return array_values(array_unique($elements, SORT_ASC));
}
}

Some files were not shown because too many files have changed in this diff Show More