-
-
Notifications
You must be signed in to change notification settings - Fork 185
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* RBX file format (#158) * Initial commit * Fix coding style * Shorten revision number and header checksum * Filesystem test uses RBX format * Recurse into object tree * Check for header/body class match * Fix coding style * Variable compression levels * Remove length header * Outline of specification * Convert checksums to HMACs * Spiff up a little bit * Tidy up * Use Native base serializer under the hood * Better error messages * Base serializer now injectable * Introduce encrypted variant * Appease Stan * Add beef to RBXP payload hash * Rename RBX portable to RBX standard * Use password digests by default * Appease Stan * More appeasement * Unrestricted digest length * Benchmark serializers * Change default base serializers * Switch payload HMAC to sha256 * Fix hmac * Fix mkdocs nav * RBX use checksums instead of HMACs * No default password * Tidy up * Remove PHP 8.0 from CI due to 3rd party incompatibility * Move RBXE to Extras package * Appease Stan * Dynamic column width in console output (#149) * Added dynamic column size in console Refactoring Console.php * Cast columnSize to int * Replace array_reduce with foreach * Mark tests as skipped * GitHub CLA * GitHub CLA check * GitHub CLA check with other username * Embed library version in RBX format * Appease Stan * Added custom class revision mismatch exception * Add RBX stuff to the user guide * Deprecate Igbinary serializer * New Transformer: Boolean Converter (#159) * add a boolean converter which converts true to 1 and false to 0. * updating the BooleanConverter too accept a customizable true/false value. Also updated docs to include the BooleanConverter * fix up the PHPdoc. Failed static analysis. * working through static analysis failures. * using single quotes * improvements per Andrew's comments on the PR * Add windows latest to CI build environments * Add fileinfo to required CI extensions * add a boolean converter which converts true to 1 and false to 0. * updating the BooleanConverter too accept a customizable true/false value. Also updated docs to include the BooleanConverter * fix up the PHPdoc. Failed static analysis. * working through static analysis failures. * using single quotes * improvements per Andrew's comments on the PR Co-authored-by: Andrew DalPino <[email protected]> * Appease Stan * Remove debug.log that should have been ignored by Git * Appease Stan * Update changelog * Tighten up RBX format * Initial commit (#162) * Deprecate explainedVar() and noiseVar() methods on PCA and LDA * Add missing extension specification and exception * Rename Autotrack Revisions * No need to sort singular values * Implement transformer conduits * Add return transformers method to Conduit * Clean up * Revert conduits * Single-threaded by default * Bump Tensor version requirement * Polish up for release Co-authored-by: Vladimir Stepanov <[email protected]> Co-authored-by: Zachary Vander Velden <[email protected]>
- Loading branch information
1 parent
61de61f
commit 4320e3f
Showing
128 changed files
with
1,604 additions
and
362 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
<?php | ||
|
||
namespace Rubix\ML\Benchmarks\Persisters\Serializers; | ||
|
||
use Rubix\ML\Datasets\Generators\Blob; | ||
use Rubix\ML\Classifiers\KNearestNeighbors; | ||
use Rubix\ML\Datasets\Generators\Agglomerate; | ||
use Rubix\ML\Persisters\Serializers\Gzip; | ||
|
||
/** | ||
* @Groups({"Serializers"}) | ||
* @BeforeMethods({"setUp"}) | ||
*/ | ||
class GzipBench | ||
{ | ||
protected const TRAINING_SIZE = 2500; | ||
|
||
/** | ||
* @var \Rubix\ML\Persisters\Serializers\Gzip | ||
*/ | ||
protected $serializer; | ||
|
||
/** | ||
* @var \Rubix\ML\Persistable | ||
*/ | ||
protected $persistable; | ||
|
||
public function setUp() : void | ||
{ | ||
$generator = new Agglomerate([ | ||
'Iris-setosa' => new Blob([5.0, 3.42, 1.46, 0.24], [0.35, 0.38, 0.17, 0.1]), | ||
'Iris-versicolor' => new Blob([5.94, 2.77, 4.26, 1.33], [0.51, 0.31, 0.47, 0.2]), | ||
'Iris-virginica' => new Blob([6.59, 2.97, 5.55, 2.03], [0.63, 0.32, 0.55, 0.27]), | ||
]); | ||
|
||
$training = $generator->generate(self::TRAINING_SIZE); | ||
|
||
$estimator = new KNearestNeighbors(5, true); | ||
|
||
$estimator->train($training); | ||
|
||
$this->persistable = $estimator; | ||
|
||
$this->serializer = new Gzip(); | ||
} | ||
|
||
/** | ||
* @Subject | ||
* @revs(10) | ||
* @Iterations(5) | ||
* @OutputTimeUnit("milliseconds", precision=3) | ||
*/ | ||
public function serializeUnserialize() : void | ||
{ | ||
$encoding = $this->serializer->serialize($this->persistable); | ||
|
||
$persistable = $this->serializer->unserialize($encoding); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
<?php | ||
|
||
namespace Rubix\ML\Benchmarks\Persisters\Serializers; | ||
|
||
use Rubix\ML\Datasets\Generators\Blob; | ||
use Rubix\ML\Classifiers\KNearestNeighbors; | ||
use Rubix\ML\Datasets\Generators\Agglomerate; | ||
use Rubix\ML\Persisters\Serializers\Native; | ||
|
||
/** | ||
* @Groups({"Serializers"}) | ||
* @BeforeMethods({"setUp"}) | ||
*/ | ||
class NativeBench | ||
{ | ||
protected const TRAINING_SIZE = 2500; | ||
|
||
/** | ||
* @var \Rubix\ML\Persisters\Serializers\Native | ||
*/ | ||
protected $serializer; | ||
|
||
/** | ||
* @var \Rubix\ML\Persistable | ||
*/ | ||
protected $persistable; | ||
|
||
public function setUp() : void | ||
{ | ||
$generator = new Agglomerate([ | ||
'Iris-setosa' => new Blob([5.0, 3.42, 1.46, 0.24], [0.35, 0.38, 0.17, 0.1]), | ||
'Iris-versicolor' => new Blob([5.94, 2.77, 4.26, 1.33], [0.51, 0.31, 0.47, 0.2]), | ||
'Iris-virginica' => new Blob([6.59, 2.97, 5.55, 2.03], [0.63, 0.32, 0.55, 0.27]), | ||
]); | ||
|
||
$training = $generator->generate(self::TRAINING_SIZE); | ||
|
||
$estimator = new KNearestNeighbors(5, true); | ||
|
||
$estimator->train($training); | ||
|
||
$this->persistable = $estimator; | ||
|
||
$this->serializer = new Native(); | ||
} | ||
|
||
/** | ||
* @Subject | ||
* @revs(10) | ||
* @Iterations(5) | ||
* @OutputTimeUnit("milliseconds", precision=3) | ||
*/ | ||
public function serializeUnserialize() : void | ||
{ | ||
$encoding = $this->serializer->serialize($this->persistable); | ||
|
||
$persistable = $this->serializer->unserialize($encoding); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
<?php | ||
|
||
namespace Rubix\ML\Benchmarks\Persisters\Serializers; | ||
|
||
use Rubix\ML\Datasets\Generators\Blob; | ||
use Rubix\ML\Classifiers\KNearestNeighbors; | ||
use Rubix\ML\Datasets\Generators\Agglomerate; | ||
use Rubix\ML\Persisters\Serializers\RBX; | ||
|
||
/** | ||
* @Groups({"Serializers"}) | ||
* @BeforeMethods({"setUp"}) | ||
*/ | ||
class RBXBench | ||
{ | ||
protected const TRAINING_SIZE = 2500; | ||
|
||
/** | ||
* @var \Rubix\ML\Persisters\Serializers\RBX | ||
*/ | ||
protected $serializer; | ||
|
||
/** | ||
* @var \Rubix\ML\Persistable | ||
*/ | ||
protected $persistable; | ||
|
||
public function setUp() : void | ||
{ | ||
$generator = new Agglomerate([ | ||
'Iris-setosa' => new Blob([5.0, 3.42, 1.46, 0.24], [0.35, 0.38, 0.17, 0.1]), | ||
'Iris-versicolor' => new Blob([5.94, 2.77, 4.26, 1.33], [0.51, 0.31, 0.47, 0.2]), | ||
'Iris-virginica' => new Blob([6.59, 2.97, 5.55, 2.03], [0.63, 0.32, 0.55, 0.27]), | ||
]); | ||
|
||
$training = $generator->generate(self::TRAINING_SIZE); | ||
|
||
$estimator = new KNearestNeighbors(5, true); | ||
|
||
$estimator->train($training); | ||
|
||
$this->persistable = $estimator; | ||
|
||
$this->serializer = new RBX(); | ||
} | ||
|
||
/** | ||
* @Subject | ||
* @revs(10) | ||
* @Iterations(5) | ||
* @OutputTimeUnit("milliseconds", precision=3) | ||
*/ | ||
public function serializeUnserialize() : void | ||
{ | ||
$encoding = $this->serializer->serialize($this->persistable); | ||
|
||
$persistable = $this->serializer->unserialize($encoding); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
<?php | ||
|
||
namespace Rubix\ML\Benchmarks\Transformers; | ||
|
||
use Rubix\ML\Datasets\Generators\Blob; | ||
use Rubix\ML\Transformers\NumericStringConverter; | ||
|
||
/** | ||
* @Groups({"Transformers"}) | ||
* @BeforeMethods({"setUp"}) | ||
*/ | ||
class NumericStringConverterBench | ||
{ | ||
protected const DATASET_SIZE = 100000; | ||
|
||
/** | ||
* @var \Rubix\ML\Datasets\Dataset | ||
*/ | ||
public $dataset; | ||
|
||
/** | ||
* @var \Rubix\ML\Transformers\NumericStringConverter | ||
*/ | ||
protected $transformer; | ||
|
||
public function setUp() : void | ||
{ | ||
$generator = new Blob([0.0, 0.0, 0.0, 0.0]); | ||
|
||
$this->dataset = $generator->generate(self::DATASET_SIZE) | ||
->transformColumn(1, 'strval') | ||
->transformColumn(3, 'strval'); | ||
|
||
$this->transformer = new NumericStringConverter(); | ||
} | ||
|
||
/** | ||
* @Subject | ||
* @Iterations(3) | ||
* @OutputTimeUnit("milliseconds", precision=3) | ||
*/ | ||
public function apply() : void | ||
{ | ||
$this->dataset->apply($this->transformer); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
<?php | ||
|
||
namespace Rubix\ML\Benchmarks\Transformers; | ||
|
||
use Rubix\ML\Datasets\Generators\Blob; | ||
use Rubix\ML\Datasets\Generators\Agglomerate; | ||
use Rubix\ML\Transformers\TruncatedSVD; | ||
|
||
/** | ||
* @Groups({"Transformers"}) | ||
* @BeforeMethods({"setUp"}) | ||
*/ | ||
class TruncatedSVDBench | ||
{ | ||
protected const DATASET_SIZE = 10000; | ||
|
||
/** | ||
* @var \Rubix\ML\Datasets\Labeled | ||
*/ | ||
public $dataset; | ||
|
||
/** | ||
* @var \Rubix\ML\Transformers\TruncatedSVD | ||
*/ | ||
protected $transformer; | ||
|
||
public function setUp() : void | ||
{ | ||
$generator = new Agglomerate([ | ||
'Iris-setosa' => new Blob([5.0, 3.42, 1.46, 0.24], [0.35, 0.38, 0.17, 0.1]), | ||
'Iris-versicolor' => new Blob([5.94, 2.77, 4.26, 1.33], [0.51, 0.31, 0.47, 0.2]), | ||
'Iris-virginica' => new Blob([6.59, 2.97, 5.55, 2.03], [0.63, 0.32, 0.55, 0.27]), | ||
]); | ||
|
||
$this->dataset = $generator->generate(self::DATASET_SIZE); | ||
|
||
$this->transformer = new TruncatedSVD(1); | ||
} | ||
|
||
/** | ||
* @Subject | ||
* @Iterations(3) | ||
* @OutputTimeUnit("milliseconds", precision=3) | ||
*/ | ||
public function apply() : void | ||
{ | ||
$this->dataset->apply($this->transformer); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.