├── .asf.yaml ├── .gitignore ├── LICENSE ├── NOTICE ├── README.md ├── pom.xml ├── src ├── main │ └── java │ │ └── org │ │ └── apache │ │ └── datasketches │ │ └── vector │ │ ├── MatrixFamily.java │ │ ├── decomposition │ │ ├── FrequentDirections.java │ │ ├── MatrixOps.java │ │ ├── MatrixOpsImplOjAlgo.java │ │ ├── PreambleUtil.java │ │ ├── SVDAlgo.java │ │ └── package-info.java │ │ ├── matrix │ │ ├── Matrix.java │ │ ├── MatrixBuilder.java │ │ ├── MatrixImplOjAlgo.java │ │ ├── MatrixPreambleUtil.java │ │ ├── MatrixType.java │ │ └── package-info.java │ │ └── package-info.java └── test │ └── java │ └── org │ └── apache │ └── datasketches │ └── vector │ ├── decomposition │ ├── FrequentDirectionsTest.java │ └── MatrixOpsTest.java │ └── matrix │ ├── MatrixBuilderTest.java │ ├── MatrixImplOjAlgoTest.java │ └── MatrixTest.java └── tools ├── FindBugsExcludeFilter.xml └── SketchesCheckstyle.xml /.asf.yaml: -------------------------------------------------------------------------------- 1 | github: 2 | homepage: https://datasketches.apache.org -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Eclipse project files 2 | .classpath 3 | .project 4 | .settings/ 5 | .checkstyle 6 | 7 | # IntelliJ project files 8 | *.iml 9 | *.ipr 10 | *.iws 11 | 12 | # Additional tools 13 | .clover/ 14 | 15 | # OSX files 16 | .DS_Store 17 | 18 | # Compiler output, class files 19 | *.class 20 | bin/ 21 | 22 | # Log file 23 | *.log 24 | 25 | # BlueJ files 26 | *.ctxt 27 | 28 | # Mobile Tools for Java (J2ME) 29 | .mtj.tmp/ 30 | 31 | # Package Files # 32 | *.jar 33 | *.war 34 | *.ear 35 | *.zip 36 | *.tar.gz 37 | *.rar 38 | 39 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 40 | hs_err_pid* 41 | 42 | #Test config and output 43 | /target/ 44 | test-output/ 45 | local/ 46 | reports/ 47 | .pmd 48 | tmp/ 49 | 50 | # Build artifacts 51 | out/ 52 | build/ 53 | jarsIn/ 54 | build.xml 55 | .idea 56 | *.properties 57 | *.releaseBackup 58 | *.next 59 | *.tag 60 | 61 | # Jekyll 62 | Gemfile 63 | Gemfile.lock 64 | _site/ 65 | _* 66 | _*/ 67 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | 180 | 181 | APPENDIX A: How to apply the Apache License to your work. 182 | 183 | To apply the Apache License to your work, attach the following 184 | boilerplate notice, with the fields enclosed by brackets "[]" 185 | replaced with your own identifying information. (Don't include 186 | the brackets!) The text should be enclosed in the appropriate 187 | comment syntax for the file format. We also recommend that a 188 | file or class name and description of purpose be included on the 189 | same "printed page" as the copyright notice for easier 190 | identification within third-party archives. 191 | ------------------------------------------------------------- 192 | Copyright [yyyy] [name of copyright owner] 193 | 194 | Licensed under the Apache License, Version 2.0 (the "License"); 195 | you may not use this file except in compliance with the License. 196 | You may obtain a copy of the License at 197 | 198 | http://www.apache.org/licenses/LICENSE-2.0 199 | 200 | Unless required by applicable law or agreed to in writing, software 201 | distributed under the License is distributed on an "AS IS" BASIS, 202 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 203 | See the License for the specific language governing permissions and 204 | limitations under the License. 205 | ------------------------------------------------------------- 206 | 207 | 208 | 209 | APPENDIX B: Additional licenses relevant to this product: 210 | (none) 211 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Apache DataSketches Vector 2 | Copyright 2020 - The Apache Software Foundation 3 | 4 | Copyright 2015-2018 Yahoo 5 | Copyright 2019 Verizon Media 6 | 7 | This product includes software developed at 8 | The Apache Software Foundation (http://www.apache.org/). 9 | 10 | Prior to moving to ASF, the software for this project was developed at 11 | Yahoo (now Verizon Media) (https://developer.yahoo.com). 12 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 19 | 20 | # DataSketches Java Vector Library (Experimental) 21 | This code is offered "as is" and may not be to the quality of code in, for example, the core datasketches-java repository. 22 | 23 | Please visit the main [DataSketches website](https://datasketches.apache.org) for more information. 24 | 25 | If you are interested in making contributions to this site please see our [Community](https://datasketches.apache.org/docs/Community/) page for how to contact us. 26 | 27 | 28 | ## Build Instructions 29 | __NOTE:__ This component accesses resource files for testing. As a result, the directory elements of the full absolute path of the target installation directory must qualify as Java identifiers. In other words, the directory elements must not have any space characters (or non-Java identifier characters) in any of the path elements. This is required by the Oracle Java Specification in order to ensure location-independent access to resources: [See Oracle Location-Independent Access to Resources](https://docs.oracle.com/javase/8/docs/technotes/guides/lang/resources.html) 30 | 31 | ### JDK8 is required to compile 32 | This DataSketches component is pure Java and you must compile using JDK 8. 33 | 34 | ### Recommended Build Tool 35 | This DataSketches component is structured as a Maven project and Maven is the recommended Build Tool. 36 | 37 | There are two types of tests: normal unit tests and tests run by the strict profile. 38 | 39 | To run normal unit tests: 40 | 41 | $ mvn clean test 42 | 43 | To run the strict profile tests: 44 | 45 | $ mvn clean test -P strict 46 | 47 | To install jars built from the downloaded source: 48 | 49 | $ mvn clean install -DskipTests=true 50 | 51 | This will create the following jars: 52 | 53 | * datasketches-vector-X.Y.Z.jar The compiled main class files. 54 | * datasketches-vector-X.Y.Z-tests.jar The compiled test class files. 55 | * datasketches-vector-X.Y.Z-sources.jar The main source files. 56 | * datasketches-vector-X.Y.Z-test-sources.jar The test source files 57 | * datasketches-vector-X.Y.Z-javadoc.jar The compressed Javadocs. 58 | 59 | ### Dependencies 60 | 61 | #### Run-time 62 | There are two run-time dependencies: 63 | 64 | * org.ojalgo : ojalgo 65 | * org.apache.datasketches : datasketches-memory 66 | 67 | #### Testing 68 | See the pom.xml file for test dependencies. 69 | 70 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 21 | 22 | 24 | 4.0.0 25 | 26 | 27 | org.apache 28 | apache 29 | 23 30 | 31 | 32 | org.apache.datasketches 33 | 34 | 35 | datasketches-vector 36 | 1.0.0-SNAPSHOT 37 | Vector sketch algorithms for the DataSketches library. 38 | 39 | 40 | https://datasketches.apache.org/ 41 | ${project.artifactId} 42 | 2015 43 | jar 44 | 45 | 46 | 47 | DataSketches Developers 48 | dev-subscribe@datasketches.apache.org 49 | dev-unsubscribe@datasketches.apache.org 50 | dev@datasketches.apache.org 51 | https://mail-archives.apache.org/mod_mbox/datasketches-dev 52 | 53 | 54 | sketches-user 55 | https://groups.google.com/forum/#!forum/sketches-user 56 | mailto:sketches-user%2Bsubscribe@googlegroups.com 57 | mailto:sketches-user%2Bunsubscribe@googlegroups.com 58 | mailto:sketches-user@googlegroups.com 59 | 60 | 61 | 62 | 63 | scm:git:ssh://git@github.com/apache/${project.artifactId}.git 64 | scm:git:ssh://git@github.com/apache/${project.artifactId}.git 65 | https://github.com/apache/${project.artifactId} 66 | HEAD 67 | 68 | 69 | 70 | 72 | GitHub 73 | https://github.com/apache/${project.artifactId}/issues 74 | 75 | 76 | 77 | 78 | The Apache DataSketches Team 79 | dev@datasketches.apache.org 80 | https://datasketches.apache.org 81 | Apache Software Foundation 82 | http://www.apache.org 83 | 84 | 85 | 86 | 87 | 88 | 48.0.0 89 | 1.2.0-incubating 90 | 91 | 92 | 93 | 7.5.1 94 | 95 | 96 | 3.0.0 97 | 1.8 98 | ${java.version} 99 | ${java.version} 100 | -Xmx4g -Duser.language=en -Duser.country=US -Dfile.encoding=UTF-8 101 | UTF-8 102 | ${charset.encoding} 103 | ${charset.encoding} 104 | ${charset.encoding} 105 | 106 | 107 | 2.8.8 108 | 109 | 3.3.0 110 | 3.8.1 111 | 3.0.0-M1 112 | 3.0.0-M2 113 | 1.6 114 | 3.2.0 115 | 3.2.0 116 | 3.0.0-M1 117 | [1.7.0,) 118 | 3.2.1 119 | 3.0.0-M5 120 | 121 | 0.13 122 | 123 | 0.8.6 124 | 125 | 4.3.0 126 | 127 | 1.0.0 128 | 3.0.0 129 | 130 | 131 | 132 | 133 | apache.snapshots 134 | Apache Snapshot Repository 135 | https://repository.apache.org/content/groups/snapshots/org/apache/datasketches/ 136 | 137 | false 138 | 139 | 140 | true 141 | 142 | 143 | 144 | apache 145 | Apache Releases Repository 146 | https://repository.apache.org/content/repositories/releases/org/apache/datasketches/ 147 | 148 | true 149 | 150 | 151 | false 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | org.ojalgo 160 | ojalgo 161 | ${ojalgo.version} 162 | 163 | 164 | org.apache.datasketches 165 | datasketches-memory 166 | ${datasketches-memory.version} 167 | 168 | 169 | 170 | 171 | 172 | org.testng 173 | testng 174 | ${testng.version} 175 | test 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | org.apache.maven.plugins 186 | maven-deploy-plugin 187 | ${maven-deploy-plugin.version} 188 | 189 | true 190 | 191 | 192 | 193 | 194 | 195 | org.apache.maven.plugins 196 | maven-release-plugin 197 | ${maven-release-plugin.version} 198 | 199 | 200 | 201 | org.apache.maven.plugins 202 | maven-jar-plugin 203 | ${maven-jar-plugin.version} 204 | 205 | 206 | default-jar 207 | 208 | jar 209 | 210 | 211 | 212 | default-test-jar 213 | 214 | test-jar 215 | 216 | 217 | 218 | 219 | 220 | 221 | org.apache.maven.plugins 222 | maven-javadoc-plugin 223 | ${maven-javadoc-plugin.version} 224 | 225 | true 226 | 227 | 228 | 229 | attach-javadocs 230 | 231 | jar 232 | 233 | 234 | 235 | 236 | 237 | org.apache.maven.plugins 238 | maven-enforcer-plugin 239 | ${maven-enforcer-plugin.version} 240 | 241 | 242 | enforce-banned-dependencies 243 | 244 | enforce 245 | 246 | 247 | 248 | 249 | ${java.version} 250 | 251 | 252 | ${maven.version}, 253 | 254 | 255 | 256 | 257 | com.google.code.findbugs:annotations 258 | 259 | 260 | 261 | true 262 | 263 | 264 | 265 | 266 | 267 | 268 | org.apache.rat 269 | apache-rat-plugin 270 | ${apache-rat-plugin.version} 271 | 272 | 273 | verify 274 | 275 | check 276 | 277 | 278 | 279 | 280 | ${project.basedir}/rat 281 | true 282 | true 283 | 284 | 285 | **/test/resources/**/*.txt 286 | .asf.yaml 287 | LICENSE 288 | NOTICE 289 | 290 | 291 | 292 | 293 | 294 | org.apache.maven.plugins 295 | maven-source-plugin 296 | ${maven-source-plugin.version} 297 | 298 | 299 | attach-sources 300 | package 301 | 302 | jar-no-fork 303 | 304 | 305 | 306 | attach-test-sources 307 | package 308 | 309 | test-jar-no-fork 310 | 311 | 312 | 313 | 314 | 315 | 316 | org.apache.maven.plugins 317 | maven-surefire-plugin 318 | ${maven-surefire-plugin.version} 319 | 320 | false 321 | false 322 | true 323 | 324 | 325 | 326 | 327 | org.jacoco 328 | jacoco-maven-plugin 329 | ${jacoco-maven-plugin.version} 330 | 331 | 332 | prepare-agent 333 | 334 | prepare-agent 335 | 336 | 337 | 338 | 339 | 340 | 341 | org.eluder.coveralls 342 | coveralls-maven-plugin 343 | ${coveralls-maven-plugin.version} 344 | 345 | 346 | 347 | 348 | 349 | 350 | 351 | 352 | org.apache.maven.plugins 353 | maven-deploy-plugin 354 | 355 | 356 | org.apache.maven.plugins 357 | maven-release-plugin 358 | 359 | 360 | org.apache.maven.plugins 361 | maven-jar-plugin 362 | 363 | 364 | org.apache.maven.plugins 365 | maven-javadoc-plugin 366 | 367 | 368 | org.apache.maven.plugins 369 | maven-enforcer-plugin 370 | 371 | 372 | org.apache.rat 373 | apache-rat-plugin 374 | 375 | 376 | org.apache.maven.plugins 377 | maven-source-plugin 378 | 379 | 380 | org.apache.maven.plugins 381 | maven-surefire-plugin 382 | 383 | 384 | org.jacoco 385 | jacoco-maven-plugin 386 | 387 | 388 | org.eluder.coveralls 389 | coveralls-maven-plugin 390 | 391 | 392 | 393 | 394 | 401 | 402 | m2e 403 | 404 | 405 | m2e.version 406 | 407 | 408 | 409 | 410 | 411 | 412 | org.eclipse.m2e 413 | lifecycle-mapping 414 | ${lifecycle-mapping.version} 415 | 416 | 417 | 418 | 419 | 420 | org.apache.maven.plugins 421 | maven-remote-resources-plugin 422 | ${maven-remote-resources-plugin.version} 423 | 424 | process 425 | 426 | 427 | 428 | 429 | 430 | 431 | 432 | 433 | 434 | 435 | 436 | 437 | 438 | 439 | 440 | 441 | strict 442 | 443 | 444 | 445 | 446 | org.apache.maven.plugins 447 | maven-compiler-plugin 448 | ${maven-compiler-plugin.version} 449 | 450 | 451 | org.codehaus.plexus 452 | plexus-compiler-javac-errorprone 453 | ${plexus-compiler-javac-errorprone.version} 454 | 455 | 456 | 457 | ${maven.compiler.source} 458 | ${maven.compiler.target} 459 | javac-with-errorprone 460 | true 461 | 462 | 463 | 464 | 465 | 466 | 467 | 468 | 477 | 478 | nexus-jars 479 | 480 | 481 | 482 | 483 | pl.project13.maven 484 | git-commit-id-plugin 485 | ${git-commit-id-plugin.version} 486 | 487 | 488 | 489 | revision 490 | 491 | initialize 492 | 493 | 494 | 495 | ${project.basedir}/.git 496 | UTC 497 | false 498 | false 499 | json 500 | true 501 | ${project.build.directory}/git.properties 502 | true 503 | true 504 | full 505 | 506 | git.branch 507 | git.commit.id.full 508 | git.commit.time 509 | git.commit.user.email 510 | git.tags 511 | 512 | 513 | false 514 | true 515 | 7 516 | -dirty 517 | true 518 | true 519 | 520 | 521 | 522 | 523 | 524 | 525 | org.apache.maven.plugins 526 | maven-jar-plugin 527 | ${maven-jar-plugin.version} 528 | 529 | 530 | default-jar 531 | 532 | jar 533 | 534 | 535 | 536 | default-test-jar 537 | 538 | test-jar 539 | 540 | 541 | 542 | 543 | true 544 | 545 | 546 | false 547 | false 548 | false 549 | 550 | 551 | ${java.version} (${java.vendor} ${java.vm.version}) 552 | ${os.name} ${os.arch} ${os.version} 553 | The Apache Software Foundation 554 | ${project.groupId}:${project.artifactId} 555 | ${git.branch} 556 | ${git.commit.id.full} 557 | ${git.commit.time} 558 | ${git.commit.user.email} 559 | ${git.tags} 560 | 561 | 562 | 563 | 564 | 565 | 566 | 567 | org.apache.maven.plugins 568 | maven-gpg-plugin 569 | ${maven-gpg-plugin.version} 570 | 571 | 572 | sign-artifacts 573 | verify 574 | 575 | sign 576 | 577 | 578 | 579 | 580 | 581 | --verbose 582 | 583 | --personal-digest-preferences=SHA512 584 | 585 | 586 | 587 | 588 | 589 | 590 | 591 | 592 | pl.project13.maven 593 | git-commit-id-plugin 594 | 595 | 596 | org.apache.maven.plugins 597 | maven-jar-plugin 598 | 599 | 600 | org.apache.maven.plugins 601 | maven-gpg-plugin 602 | 603 | 604 | 605 | 606 | 609 | 610 | apache-release 611 | 612 | 613 | 614 | maven-assembly-plugin 615 | ${maven-assembly-plugin.version} 616 | 617 | 618 | source-release-assembly 619 | none 620 | 621 | 622 | 623 | 624 | 625 | 626 | 627 | 628 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/vector/MatrixFamily.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.vector; 21 | 22 | import java.util.HashMap; 23 | import java.util.Map; 24 | 25 | /** 26 | * Defines the various families of sketch and set operation classes. A family defines a set of 27 | * classes that share fundamental algorithms and behaviors. The classes within a family may 28 | * still differ by how they are stored and accessed. For example, internally there may be separate 29 | * classes for algorithms that operate on the Java heap and off-heap. 30 | * Not all of these families have parallel forms on and off-heap but are included for completeness. 31 | * 32 | *

Family IDs start at 128 to allow separation from sketches-core for as long as possible without 33 | * inducing a mutual dependency between packages.

34 | * 35 | * @author Lee Rhodes 36 | * @author Jon Malkin 37 | */ 38 | public enum MatrixFamily { 39 | /** 40 | * The Frequent Directions sketch is used for approximate Singular Value Decomposition (MatrixOps) of a 41 | * matrix. 42 | */ 43 | MATRIX(128, "Matrix", 2, 3), 44 | /** 45 | * Select Frequent Directions Family 46 | */ 47 | FREQUENTDIRECTIONS(129, "FrequentDirections", 2, 4); 48 | 49 | 50 | private static final Map lookupID = new HashMap<>(); 51 | private static final Map lookupFamName = new HashMap<>(); 52 | private int id_; 53 | private String famName_; 54 | private int minPreLongs_; 55 | private int maxPreLongs_; 56 | 57 | static { 58 | for (MatrixFamily f : values()) { 59 | lookupID.put(f.getID(), f); 60 | lookupFamName.put(f.getFamilyName().toUpperCase(), f); 61 | } 62 | } 63 | 64 | MatrixFamily(final int id, final String famName, final int minPreLongs, final int maxPreLongs) { 65 | id_ = id; 66 | famName_ = famName.toUpperCase(); 67 | minPreLongs_ = minPreLongs; 68 | maxPreLongs_ = maxPreLongs; 69 | } 70 | 71 | /** 72 | * Returns the byte ID for this family 73 | * @return the byte ID for this family 74 | */ 75 | public int getID() { 76 | return id_; 77 | } 78 | 79 | /** 80 | * 81 | * @param id the given id, a value ≥ 128. 82 | */ 83 | public void checkFamilyID(final int id) { 84 | if (id != id_) { 85 | throw new IllegalArgumentException( 86 | "Possible Corruption: This Family " + toString() 87 | + " does not match the ID of the given Family: " + idToFamily(id).toString()); 88 | } 89 | } 90 | 91 | /** 92 | * Returns the name for this family 93 | * @return the name for this family 94 | */ 95 | public String getFamilyName() { 96 | return famName_; 97 | } 98 | 99 | /** 100 | * Returns the minimum preamble size for this family in longs 101 | * @return the minimum preamble size for this family in longs 102 | */ 103 | public int getMinPreLongs() { 104 | return minPreLongs_; 105 | } 106 | 107 | /** 108 | * Returns the maximum preamble size for this family in longs 109 | * @return the maximum preamble size for this family in longs 110 | */ 111 | public int getMaxPreLongs() { 112 | return maxPreLongs_; 113 | } 114 | 115 | @Override 116 | public String toString() { 117 | return famName_; 118 | } 119 | 120 | /** 121 | * Returns the Family given the ID 122 | * @param id the given ID 123 | * @return the Family given the ID 124 | */ 125 | public static MatrixFamily idToFamily(final int id) { 126 | final MatrixFamily f = lookupID.get(id); 127 | if (f == null) { 128 | throw new IllegalArgumentException("Possible Corruption: Illegal Family ID: " + id); 129 | } 130 | return f; 131 | } 132 | 133 | /** 134 | * Returns the Family given the family name 135 | * @param famName the family name 136 | * @return the Family given the family name 137 | */ 138 | public static MatrixFamily stringToFamily(final String famName) { 139 | final MatrixFamily f = lookupFamName.get(famName.toUpperCase()); 140 | if (f == null) { 141 | throw new IllegalArgumentException("Possible Corruption: Illegal Family Name: " + famName); 142 | } 143 | return f; 144 | } 145 | 146 | /** 147 | * Returns the Family given one of the recognized class objects on one of the Families 148 | * @param obj a recognized Family class object 149 | * @return the Family given one of the recognized class objects on one of the Families 150 | */ 151 | public static MatrixFamily objectToFamily(final Object obj) { 152 | final String sname = obj.getClass().getSimpleName().toUpperCase(); 153 | for (MatrixFamily f : values()) { 154 | if (sname.contains(f.toString())) { 155 | return f; 156 | } 157 | } 158 | throw new IllegalArgumentException("Possible Corruption: Unknown object"); 159 | } 160 | } 161 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/vector/decomposition/FrequentDirections.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.vector.decomposition; 21 | 22 | import static org.apache.datasketches.memory.UnsafeUtil.LS; 23 | import static org.apache.datasketches.vector.decomposition.PreambleUtil.EMPTY_FLAG_MASK; 24 | import static org.apache.datasketches.vector.decomposition.PreambleUtil.SER_VER; 25 | import static org.apache.datasketches.vector.decomposition.PreambleUtil.extractFamilyID; 26 | import static org.apache.datasketches.vector.decomposition.PreambleUtil.extractFlags; 27 | import static org.apache.datasketches.vector.decomposition.PreambleUtil.extractK; 28 | import static org.apache.datasketches.vector.decomposition.PreambleUtil.extractN; 29 | import static org.apache.datasketches.vector.decomposition.PreambleUtil.extractNumColumns; 30 | import static org.apache.datasketches.vector.decomposition.PreambleUtil.extractNumRows; 31 | import static org.apache.datasketches.vector.decomposition.PreambleUtil.extractSVAdjustment; 32 | import static org.apache.datasketches.vector.decomposition.PreambleUtil.extractSerVer; 33 | import static org.apache.datasketches.vector.decomposition.PreambleUtil.getAndCheckPreLongs; 34 | import static org.apache.datasketches.vector.decomposition.PreambleUtil.insertFamilyID; 35 | import static org.apache.datasketches.vector.decomposition.PreambleUtil.insertFlags; 36 | import static org.apache.datasketches.vector.decomposition.PreambleUtil.insertK; 37 | import static org.apache.datasketches.vector.decomposition.PreambleUtil.insertN; 38 | import static org.apache.datasketches.vector.decomposition.PreambleUtil.insertNumColumns; 39 | import static org.apache.datasketches.vector.decomposition.PreambleUtil.insertNumRows; 40 | import static org.apache.datasketches.vector.decomposition.PreambleUtil.insertPreLongs; 41 | import static org.apache.datasketches.vector.decomposition.PreambleUtil.insertSVAdjustment; 42 | import static org.apache.datasketches.vector.decomposition.PreambleUtil.insertSerVer; 43 | 44 | import org.apache.datasketches.memory.Memory; 45 | import org.apache.datasketches.memory.WritableMemory; 46 | import org.apache.datasketches.vector.MatrixFamily; 47 | import org.apache.datasketches.vector.matrix.Matrix; 48 | import org.apache.datasketches.vector.matrix.MatrixBuilder; 49 | import org.apache.datasketches.vector.matrix.MatrixType; 50 | 51 | /** 52 | * This class implements the Frequent Directions algorithm proposed by Edo Liberty in "Simple and 53 | * Deterministic Matrix Sketches," KDD 2013. The sketch provides an approximation to the singular 54 | * value decomposition of a matrix with deterministic error bounds on the error between the 55 | * approximation and the optimal rank-k matrix decomposition. 56 | * 57 | * @author Jon Malkin 58 | */ 59 | public final class FrequentDirections { 60 | private static final MatrixType DEFAULT_MATRIX_TYPE = MatrixType.OJALGO; 61 | private static final SVDAlgo DEFAULT_SVD_ALGO = SVDAlgo.SYM; 62 | 63 | private final int k_; 64 | private final int l_; 65 | private final int d_; 66 | private long n_; 67 | 68 | private double svAdjustment_; 69 | 70 | private Matrix B_; 71 | 72 | private SVDAlgo algo_ = DEFAULT_SVD_ALGO; 73 | 74 | transient private int nextZeroRow_; 75 | transient private MatrixOps svd_; // avoids re-initializing 76 | 77 | /** 78 | * Creates a new instance of a Frequent Directions sketch using the default Linear Algebra backing library 79 | * @param k Number of dimensions (rows) in the sketch output 80 | * @param d Number of dimensions per input vector (columns) 81 | * @return An empty Frequent Directions sketch 82 | */ 83 | public static FrequentDirections newInstance(final int k, final int d) { 84 | return newInstance(k, d, DEFAULT_MATRIX_TYPE); 85 | } 86 | 87 | /** 88 | * Creates a new instance of a Frequent Directions sketch using a specific MatrixType 89 | * Package-private until (if ever) MTJ works properly. 90 | * @param k Number of dimensions (rows) in the sketch output 91 | * @param d Number of dimensions per input vector (columns) 92 | * @param type MatrixType to use for backing matrix. Impacts choice of SVD library. 93 | * @return An empty Frequent Directions sketch 94 | */ 95 | static FrequentDirections newInstance(final int k, final int d, final MatrixType type) { 96 | return new FrequentDirections(k, d, null, type); 97 | } 98 | 99 | /** 100 | * Instantiates a Frequent Directions sketch from a serialized image. 101 | * @param srcMem Memory containing the serialized image of a Frequent Directions sketch 102 | * @return A Frequent Directions sketch 103 | */ 104 | public static FrequentDirections heapify(final Memory srcMem) { 105 | return heapify(srcMem, DEFAULT_MATRIX_TYPE); 106 | } 107 | 108 | /** 109 | * Instantiates a Frequent Directions sketch from a serialized image using a specific MatrixType. 110 | * Package-private until (if ever) MTJ works properly. 111 | * @param srcMem Memory containing the serialized image of a Frequent Directions sketch 112 | * @param type The MatrixType to use with this instance 113 | * @return A Frequent Directions sketch 114 | */ 115 | static FrequentDirections heapify(final Memory srcMem, final MatrixType type) { 116 | final int preLongs = getAndCheckPreLongs(srcMem); 117 | final int serVer = extractSerVer(srcMem); 118 | if (serVer != SER_VER) { 119 | throw new IllegalArgumentException("Invalid serialization version: " + serVer); 120 | } 121 | 122 | final int family = extractFamilyID(srcMem); 123 | if (family != MatrixFamily.FREQUENTDIRECTIONS.getID()) { 124 | throw new IllegalArgumentException("Possible corruption: Family id (" + family + ") " 125 | + "is not a FrequentDirections sketch"); 126 | } 127 | 128 | final int k = extractK(srcMem); 129 | final int numRows = extractNumRows(srcMem); 130 | final int d = extractNumColumns(srcMem); 131 | final boolean empty = (extractFlags(srcMem) & EMPTY_FLAG_MASK) > 0; 132 | 133 | if (empty) { 134 | return new FrequentDirections(k, d); 135 | } 136 | 137 | final long offsetBytes = (long)preLongs * Long.BYTES; 138 | final long mtxBytes = srcMem.getCapacity() - offsetBytes; 139 | final Matrix B = Matrix.heapify(srcMem.region(offsetBytes, mtxBytes), type); 140 | assert B != null; 141 | 142 | final FrequentDirections fd = new FrequentDirections(k, d, B, B.getMatrixType()); 143 | fd.n_ = extractN(srcMem); 144 | fd.nextZeroRow_ = numRows; 145 | fd.svAdjustment_ = extractSVAdjustment(srcMem); 146 | 147 | return fd; 148 | } 149 | 150 | private FrequentDirections(final int k, final int d) { 151 | this(k, d, null, DEFAULT_MATRIX_TYPE); 152 | } 153 | 154 | // uses MatrixType of B, if present, otherwise falls back to type input 155 | private FrequentDirections(final int k, final int d, final Matrix B, final MatrixType type) { 156 | if (k < 1) { 157 | throw new IllegalArgumentException("Number of projected dimensions must be at least 1"); 158 | } 159 | if (d < 1) { 160 | throw new IllegalArgumentException("Number of feature dimensions must be at least 1"); 161 | } 162 | 163 | k_ = k; 164 | l_ = 2 * k; 165 | d_ = d; 166 | 167 | if (d_ < l_) { 168 | throw new IllegalArgumentException("Running with d < 2k not yet supported"); 169 | } 170 | 171 | svAdjustment_ = 0.0; 172 | 173 | nextZeroRow_ = 0; 174 | n_ = 0; 175 | 176 | if (B == null) { 177 | B_ = new MatrixBuilder().setType(type).build(l_, d_); 178 | } else { 179 | B_ = B; 180 | } 181 | } 182 | 183 | /** 184 | * Update sketch with a dense input vector of exactly d dimensions. 185 | * @param vector A dense input vector representing one row of the input matrix 186 | */ 187 | public void update(final double[] vector) { 188 | if (vector == null) { 189 | return; 190 | } 191 | 192 | if (vector.length != d_) { 193 | throw new IllegalArgumentException("Input vector has wrong number of dimensions. Expected " 194 | + d_ + "; found " + vector.length); 195 | } 196 | 197 | if (nextZeroRow_ == l_) { 198 | reduceRank(); 199 | } 200 | 201 | B_.setRow(nextZeroRow_, vector); 202 | 203 | ++n_; 204 | ++nextZeroRow_; 205 | } 206 | 207 | /** 208 | * Merge a Frequent Directions sketch into the current one. 209 | * @param fd A Frequent Direction sketch to be merged. 210 | */ 211 | public void update(final FrequentDirections fd) { 212 | if ((fd == null) || (fd.nextZeroRow_ == 0)) { 213 | return; 214 | } 215 | 216 | if ((fd.d_ != d_) || (fd.k_ < k_)) { 217 | throw new IllegalArgumentException("Incoming sketch must have same number of dimensions " 218 | + "and no smaller a value of k"); 219 | } 220 | 221 | for (int m = 0; m < fd.nextZeroRow_; ++m) { 222 | if (nextZeroRow_ == l_) { 223 | reduceRank(); 224 | } 225 | 226 | B_.setRow(nextZeroRow_, fd.B_.getRow(m)); 227 | ++nextZeroRow_; 228 | } 229 | 230 | n_ += fd.n_; 231 | svAdjustment_ += fd.svAdjustment_; 232 | } 233 | 234 | /** 235 | * Checks if the sketch is empty, specifically whether it has processed any input data. 236 | * @return True if the sketch has not yet processed any input 237 | */ 238 | public boolean isEmpty() { 239 | return n_ == 0; 240 | } 241 | 242 | /** 243 | * Returns the target number of dimensions, k, for this sketch. 244 | * @return The sketch's configured k value 245 | */ 246 | public int getK() { return k_; } 247 | 248 | /** 249 | * Returns the number of dimensions per input vector, d, for this sketch. 250 | * @return The sketch's configured number of dimensions per input 251 | */ 252 | public int getD() { return d_; } 253 | 254 | /** 255 | * Returns the total number of items this sketch has seen. 256 | * @return The number of items processed by the sketch. 257 | */ 258 | public long getN() { return n_; } 259 | 260 | /** 261 | * Sets the SVD algorithm to use, allowing exact or approximate computation. @see SVDAlgo for details. 262 | * @param algo The SVDAlgo type to use 263 | */ 264 | public void setSVDAlgo(final SVDAlgo algo) { 265 | algo_ = algo; 266 | } 267 | 268 | /** 269 | * Returns the singular values of the sketch, adjusted for the mass subtracted off during the 270 | * algorithm. 271 | * @return An array of singular values. 272 | */ 273 | public double[] getSingularValues() { 274 | return getSingularValues(false); 275 | } 276 | 277 | /** 278 | * Returns the singular values of the sketch, optionally adjusting for any mass subtracted off 279 | * during the algorithm. 280 | * @param compensative If true, adjusts for mass subtracted during the algorithm, otherwise 281 | * uses raw singular values. 282 | * @return An array of singular values. 283 | */ 284 | public double[] getSingularValues(final boolean compensative) { 285 | if (svd_ == null) { 286 | svd_ = MatrixOps.newInstance(B_, algo_, k_); 287 | } 288 | 289 | final double[] sv = svd_.getSingularValues(B_); 290 | 291 | double medianSVSq = sv[k_ - 1]; // (l_/2)th item, not yet squared 292 | medianSVSq *= medianSVSq; 293 | final double tmpSvAdj = svAdjustment_ + medianSVSq; 294 | final double[] svList = new double[k_]; 295 | 296 | for (int i = 0; i < (k_ - 1); ++i) { 297 | final double val = sv[i]; 298 | double adjSqSV = (val * val) - medianSVSq; 299 | if (compensative) { adjSqSV += tmpSvAdj; } 300 | svList[i] = adjSqSV < 0 ? 0.0 : Math.sqrt(adjSqSV); 301 | } 302 | 303 | return svList; 304 | } 305 | 306 | /** 307 | * Returns an orthonormal projection Matrix V^T that can be used to project input vectors into the 308 | * k-dimensional space represented by the sketch. 309 | * @return An orthonormal Matrix object 310 | */ 311 | public Matrix getProjectionMatrix() { 312 | if (svd_ == null) { 313 | svd_ = MatrixOps.newInstance(B_, algo_, k_); 314 | } 315 | 316 | return svd_.getVt(); 317 | } 318 | 319 | /** 320 | * Reduces matrix rank to no more than k, regardless of whether the sketch has reached its 321 | * internal capacity. Has no effect if there are no more than k active rows. 322 | */ 323 | public void forceReduceRank() { 324 | if (nextZeroRow_ > k_) { 325 | reduceRank(); 326 | } 327 | } 328 | 329 | /** 330 | * Returns a Matrix with the current state of the sketch. Call trim() first to ensure 331 | * no more than k rows. Equivalent to calling getResult(false). 332 | * @return A Matrix representing the data in this sketch 333 | */ 334 | public Matrix getResult() { 335 | return getResult(false); 336 | } 337 | 338 | /** 339 | * Returns a Matrix with the current state of the sketch. Call trim() first to ensure 340 | * no more than k rows. If compensative, uses only the top k singular values. If not applying compensation 341 | * factor, this method returns the actual data object meaning any changes to the result data will corrupt 342 | * the sketch. 343 | * @param compensative If true, returns a copy of the data matrix after applying adjustment to singular 344 | * values based on the cumulative weight subtracted off. If false, returns the actual 345 | * data matrix. 346 | * @return A Matrix of the data in this sketch, possibly adjusted by compensating for subtracted weight. 347 | */ 348 | public Matrix getResult(final boolean compensative) { 349 | if (isEmpty()) { 350 | return null; 351 | } 352 | 353 | if (compensative) { 354 | if (svd_ == null) { 355 | svd_ = MatrixOps.newInstance(B_, algo_, k_); 356 | } 357 | 358 | return svd_.applyAdjustment(B_, svAdjustment_); 359 | } else { 360 | return B_; 361 | } 362 | } 363 | 364 | /** 365 | * Resets the sketch to its virgin state. 366 | */ 367 | public void reset() { 368 | n_ = 0; 369 | nextZeroRow_ = 0; 370 | svAdjustment_ = 0.0; 371 | } 372 | 373 | /** 374 | * Returns a serialized representation of the sketch. 375 | * @return A serialized representation of the sketch. 376 | */ 377 | public byte[] toByteArray() { 378 | final boolean empty = isEmpty(); 379 | final int familyId = MatrixFamily.FREQUENTDIRECTIONS.getID(); 380 | 381 | final int preLongs = empty 382 | ? MatrixFamily.FREQUENTDIRECTIONS.getMinPreLongs() 383 | : MatrixFamily.FREQUENTDIRECTIONS.getMaxPreLongs(); 384 | 385 | final int mtxBytes = empty ? 0 : B_.getCompactSizeBytes(nextZeroRow_, d_); 386 | final int outBytes = (preLongs * Long.BYTES) + mtxBytes; 387 | 388 | final byte[] outArr = new byte[outBytes]; 389 | final WritableMemory memOut = WritableMemory.wrap(outArr); 390 | final Object memObj = memOut.getArray(); 391 | final long memAddr = memOut.getCumulativeOffset(0L); 392 | 393 | insertPreLongs(memObj, memAddr, preLongs); 394 | insertSerVer(memObj, memAddr, SER_VER); 395 | insertFamilyID(memObj, memAddr, familyId); 396 | insertFlags(memObj, memAddr, (empty ? EMPTY_FLAG_MASK : 0)); 397 | insertK(memObj, memAddr, k_); 398 | insertNumRows(memObj, memAddr, nextZeroRow_); 399 | insertNumColumns(memObj, memAddr, d_); 400 | 401 | if (empty) { 402 | return outArr; 403 | } 404 | 405 | insertN(memObj, memAddr, n_); 406 | insertSVAdjustment(memObj, memAddr, svAdjustment_); 407 | 408 | memOut.putByteArray((long)preLongs * Long.BYTES, 409 | B_.toCompactByteArray(nextZeroRow_, d_), 0, mtxBytes); 410 | 411 | return outArr; 412 | } 413 | 414 | @Override 415 | public String toString() { 416 | return toString(false, false, false); 417 | } 418 | 419 | /** 420 | * Returns a human-readable summary of the sketch and, optionally, prints the singular values. 421 | * @param printSingularValues If true, prints sketch's data matrix 422 | * @return A String representation of the sketch. 423 | */ 424 | public String toString(final boolean printSingularValues) { 425 | return toString(printSingularValues, false, false); 426 | } 427 | 428 | /** 429 | * Returns a human-readable summary of the sketch, optionally printing either the filled 430 | * or complete sketch matrix, and also optionally adjusting the singular values based on the 431 | * total weight subtacted during the algorithm. 432 | * @param printSingularValues If true, prints the sketch's singular values 433 | * @param printMatrix If true, prints the sketch's data matrix 434 | * @param applyCompensation If true, prints adjusted singular values 435 | * @return A String representation of the sketch. 436 | */ 437 | public String toString(final boolean printSingularValues, 438 | final boolean printMatrix, 439 | final boolean applyCompensation) { 440 | final StringBuilder sb = new StringBuilder(); 441 | 442 | final String thisSimpleName = this.getClass().getSimpleName(); 443 | 444 | sb.append(LS); 445 | sb.append("### ").append(thisSimpleName).append(" INFO: ").append(LS); 446 | if (applyCompensation) { 447 | sb.append("Applying compensative adjustments to matrix values").append(LS); 448 | } 449 | sb.append(" k : ").append(k_).append(LS); 450 | sb.append(" d : ").append(d_).append(LS); 451 | sb.append(" l : ").append(l_).append(LS); 452 | sb.append(" n : ").append(n_).append(LS); 453 | sb.append(" numRows : ").append(nextZeroRow_).append(LS); 454 | sb.append(" SV adjustment: ").append(svAdjustment_).append(LS); 455 | 456 | if (printSingularValues) { 457 | sb.append(" Singular Vals: ") 458 | .append(applyCompensation ? "(adjusted)" : "(unadjusted)").append(LS); 459 | final double[] sv = getSingularValues(applyCompensation); 460 | for (int i = 0; i < Math.min(k_, n_); ++i) { 461 | if (sv[i] > 0.0) { 462 | double val = sv[i]; 463 | if ((val > 0.0) && applyCompensation) { 464 | val = Math.sqrt((val * val) + svAdjustment_); 465 | } 466 | 467 | sb.append(" \t").append(i).append(":\t").append(val).append(LS); 468 | } 469 | } 470 | } 471 | 472 | if (!printMatrix) { 473 | return sb.toString(); 474 | } 475 | 476 | final int tmpColDim = (int) B_.getNumColumns(); 477 | 478 | sb.append(" Matrix data :").append(LS); 479 | sb.append(B_.getClass().getName()); 480 | sb.append(" < ").append(nextZeroRow_).append(" x ").append(tmpColDim).append(" >"); 481 | 482 | // First element 483 | sb.append("\n{ { ").append(String.format("%.3f", B_.getElement(0, 0))); 484 | 485 | // Rest of the first row 486 | for (int j = 1; j < tmpColDim; j++) { 487 | sb.append(",\t").append(String.format("%.3f", B_.getElement(0, j))); 488 | } 489 | 490 | // For each of the remaining rows 491 | for (int i = 1; i < nextZeroRow_; i++) { 492 | 493 | // First column 494 | sb.append(" },\n{ ").append(String.format("%.3f", B_.getElement(i, 0))); 495 | 496 | // Remaining columns 497 | for (int j = 1; j < tmpColDim; j++) { 498 | sb.append(",\t").append(String.format("%.3f", B_.getElement(i, j))); 499 | } 500 | } 501 | 502 | // Finish 503 | sb.append(" } }").append(LS); 504 | sb.append("### END SKETCH SUMMARY").append(LS); 505 | 506 | return sb.toString(); 507 | } 508 | 509 | int getNumRows() { return nextZeroRow_; } 510 | 511 | // exists for testing 512 | double getSvAdjustment() { return svAdjustment_; } 513 | 514 | private void reduceRank() { 515 | if (svd_ == null) { 516 | svd_ = MatrixOps.newInstance(B_, algo_, k_); 517 | } 518 | 519 | final double newSvAdjustment = svd_.reduceRank(B_); 520 | svAdjustment_ += newSvAdjustment; 521 | nextZeroRow_ = (int) Math.min(k_ - 1, n_); 522 | } 523 | } 524 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/vector/decomposition/MatrixOps.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | package org.apache.datasketches.vector.decomposition; 20 | 21 | import org.apache.datasketches.vector.matrix.Matrix; 22 | 23 | /** 24 | * Abstract class to wrap libraries to compute singular value 25 | * decompositions and related Matrix operations needed by Frequent 26 | * Directions. May return as many singular values as exist, but other 27 | * operations will limit output to k dimensions. 28 | */ 29 | abstract class MatrixOps { 30 | 31 | // iterations for SISVD 32 | private static final int DEFAULT_NUM_ITER = 8; 33 | 34 | /** 35 | * Matrix dimensions 36 | */ 37 | final int n_; // rows 38 | final int d_; // columns 39 | 40 | /** 41 | * Target number of dimensions 42 | */ 43 | final int k_; 44 | 45 | /** 46 | * Singular value decomposition method to use 47 | */ 48 | final SVDAlgo algo_; 49 | 50 | int numSISVDIter_; 51 | 52 | /** 53 | * Creates an empty MatrixOps object to support Frequent Directions matrix operations 54 | * 55 | * @param A Matrix of the required type and correct dimensions 56 | * @param algo Enum indicating method to use for SVD 57 | * @param k Target number of dimensions for results 58 | * @return an empty MatrixOps object 59 | */ 60 | public static MatrixOps newInstance(final Matrix A, final SVDAlgo algo, final int k) { 61 | final int n = (int) A.getNumRows(); 62 | final int d = (int) A.getNumColumns(); 63 | 64 | final MatrixOps mo; 65 | 66 | switch (A.getMatrixType()) { 67 | case OJALGO: 68 | mo = new MatrixOpsImplOjAlgo(n, d, algo, k); 69 | break; 70 | 71 | default: 72 | throw new IllegalArgumentException("Unknown MatrixType: " + A.getMatrixType().toString()); 73 | } 74 | 75 | if (algo == SVDAlgo.SISVD) { 76 | mo.setNumSISVDIter((int) Math.ceil(Math.log(d))); 77 | } 78 | return mo; 79 | } 80 | 81 | MatrixOps(final int n, final int d, final SVDAlgo algo, final int k) { 82 | // TODO: make these actual checks 83 | assert n > 0; 84 | assert d > 0; 85 | assert n < d; 86 | assert k > 0; 87 | assert k < n; 88 | 89 | n_ = n; 90 | d_ = d; 91 | algo_ = algo; 92 | k_ = k; 93 | 94 | numSISVDIter_ = DEFAULT_NUM_ITER; 95 | } 96 | 97 | /** 98 | * Computes and returns the singular values, in descending order. May modify the internal state 99 | * of this object. 100 | * @param A Matrix to decompose 101 | * @return Array of singular values 102 | */ 103 | public double[] getSingularValues(final Matrix A) { 104 | svd(A, false); 105 | return getSingularValues(); 106 | } 107 | 108 | /** 109 | * Returns pre-computed singular values (stored in descending order). Does not perform new computation. 110 | * @return Singular values from the last computation 111 | */ 112 | abstract double[] getSingularValues(); 113 | 114 | /** 115 | * Computes and returns the right singular vectors of A. May modify the internal state of this object. 116 | * @param A Matrix to decompose 117 | * @return Matrix of size d x k 118 | */ 119 | public Matrix getVt(final Matrix A) { 120 | svd(A, true); 121 | return getVt(); 122 | } 123 | 124 | /** 125 | * Returns pre-computed right singular vectors (row-wise?). Does not perform new computation. 126 | * 127 | * @return Matrix of size d x k 128 | */ 129 | abstract Matrix getVt(); 130 | 131 | /** 132 | * Performs a Frequent Directions rank reduction with the SVDAlgo used when obtaining the instance. 133 | * Modifies internal state, with results queried via getVt() and getSingularValues(). 134 | * @return The amount of weight subtracted from the singular values 135 | */ 136 | abstract double reduceRank(final Matrix A); 137 | 138 | /** 139 | * Returns Matrix object reconstructed using the provided singular value adjustment. Requires first 140 | * decomposing the matrix. 141 | * @param A Matrix to decompose and adjust 142 | * @param adjustment Amount by which to adjust the singular values 143 | * @return A new Matrix based on A with singular values adjusted by adjustment 144 | */ 145 | abstract Matrix applyAdjustment(final Matrix A, final double adjustment); 146 | 147 | /** 148 | * Computes a singular value decomposition of the provided Matrix. 149 | * 150 | * @param A Matrix to decompose. Size must conform, and it may be overwritten on return. Pass a copy to 151 | * avoid this. 152 | * @param computeVectors True to compute Vt, false if only need singular values/ 153 | */ 154 | abstract void svd(final Matrix A, final boolean computeVectors); 155 | 156 | void setNumSISVDIter(final int numSISVDIter) { 157 | numSISVDIter_ = numSISVDIter; 158 | } 159 | } 160 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/vector/decomposition/MatrixOpsImplOjAlgo.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.vector.decomposition; 21 | 22 | import java.util.Optional; 23 | 24 | import org.ojalgo.matrix.decomposition.Eigenvalue; 25 | import org.ojalgo.matrix.decomposition.QR; 26 | import org.ojalgo.matrix.decomposition.SingularValue; 27 | import org.ojalgo.matrix.store.MatrixStore; 28 | import org.ojalgo.matrix.store.Primitive64Store; 29 | import org.ojalgo.matrix.store.SparseStore; 30 | import org.ojalgo.random.Normal; 31 | 32 | import org.apache.datasketches.vector.matrix.Matrix; 33 | import org.apache.datasketches.vector.matrix.MatrixType; 34 | 35 | class MatrixOpsImplOjAlgo extends MatrixOps { 36 | private double[] sv_; 37 | private Primitive64Store Vt_; 38 | 39 | // work objects for SISVD 40 | private Primitive64Store block_; 41 | private Primitive64Store T_; // also used in SymmetricEVD 42 | private QR qr_; 43 | 44 | // work objects for Symmetric EVD 45 | private Eigenvalue evd_; 46 | 47 | // work object for full SVD 48 | private SingularValue svd_; 49 | 50 | transient private SparseStore S_; // to hold singular value matrix 51 | 52 | MatrixOpsImplOjAlgo(final int n, final int d, final SVDAlgo algo, final int k) { 53 | super(n, d, algo, k); 54 | 55 | // Allocate space for the decomposition 56 | sv_ = new double[Math.min(n_, d_)]; 57 | Vt_ = null; // lazy allocation 58 | } 59 | 60 | @Override 61 | void svd(final Matrix A, final boolean computeVectors) { 62 | assert A.getMatrixType() == MatrixType.OJALGO; 63 | 64 | if (A.getNumRows() != n_) { 65 | throw new IllegalArgumentException("A.numRows() != n_"); 66 | } else if (A.getNumColumns() != d_) { 67 | throw new IllegalArgumentException("A.numColumns() != d_"); 68 | } 69 | 70 | if (computeVectors && (Vt_ == null)) { 71 | Vt_ = Primitive64Store.FACTORY.make(n_, d_); 72 | S_ = SparseStore.makePrimitive(sv_.length, sv_.length); 73 | } 74 | 75 | switch (algo_) { 76 | case FULL: 77 | computeFullSVD((Primitive64Store) A.getRawObject(), computeVectors); 78 | return; 79 | 80 | case SISVD: 81 | computeSISVD((Primitive64Store) A.getRawObject(), computeVectors); 82 | return; 83 | 84 | case SYM: 85 | computeSymmEigSVD((Primitive64Store) A.getRawObject(), computeVectors); 86 | return; 87 | 88 | default: 89 | throw new RuntimeException("SVDAlgo type not (yet?) supported: " + algo_.toString()); 90 | } 91 | } 92 | 93 | @Override 94 | double[] getSingularValues() { 95 | return sv_; 96 | } 97 | 98 | @Override 99 | Matrix getVt() { 100 | return Matrix.wrap(Vt_); 101 | } 102 | 103 | @Override 104 | double reduceRank(final Matrix A) { 105 | svd(A, true); 106 | 107 | double svAdjustment = 0.0; 108 | 109 | if (sv_.length >= k_) { 110 | double medianSVSq = sv_[k_ - 1]; // (l_/2)th item, not yet squared 111 | medianSVSq *= medianSVSq; 112 | svAdjustment += medianSVSq; // always track, even if not using compensative mode 113 | for (int i = 0; i < (k_ - 1); ++i) { 114 | final double val = sv_[i]; 115 | final double adjSqSV = (val * val) - medianSVSq; 116 | S_.set(i, i, adjSqSV < 0 ? 0.0 : Math.sqrt(adjSqSV)); // just to be safe 117 | } 118 | for (int i = k_ - 1; i < S_.countColumns(); ++i) { 119 | S_.set(i, i, 0.0); 120 | } 121 | } else { 122 | throw new RuntimeException("Running with d < 2k not (yet?) supported"); 123 | /* 124 | for (int i = 0; i < sv_.length; ++i) { 125 | S_.set(i, i, sv_[i]); 126 | } 127 | for (int i = sv_.length; i < S_.countColumns(); ++i) { 128 | S_.set(i, i, 0.0); 129 | } 130 | */ 131 | } 132 | 133 | // store the result back in A 134 | S_.multiply(Vt_, (Primitive64Store) A.getRawObject()); 135 | 136 | return svAdjustment; 137 | } 138 | 139 | @Override 140 | Matrix applyAdjustment(final Matrix A, final double svAdjustment) { 141 | // copy A before decomposing 142 | final Primitive64Store result 143 | = Primitive64Store.FACTORY.copy((Primitive64Store) A.getRawObject()); 144 | svd(Matrix.wrap(result), true); 145 | 146 | for (int i = 0; i < (k_ - 1); ++i) { 147 | final double val = sv_[i]; 148 | final double adjSV = Math.sqrt((val * val) + svAdjustment); 149 | S_.set(i, i, adjSV); 150 | } 151 | for (int i = k_ - 1; i < S_.countColumns(); ++i) { 152 | S_.set(i, i, 0.0); 153 | } 154 | 155 | S_.multiply(Vt_, result); 156 | 157 | return Matrix.wrap(result); 158 | } 159 | 160 | private void computeFullSVD(final MatrixStore A, final boolean computeVectors) { 161 | if (svd_ == null) { 162 | svd_ = SingularValue.PRIMITIVE.make(A); 163 | } 164 | 165 | if (computeVectors) { 166 | svd_.decompose(A); 167 | svd_.getV().transpose().supplyTo(Vt_); 168 | } else { 169 | svd_.computeValuesOnly(A); 170 | } 171 | svd_.getSingularValues(sv_); 172 | } 173 | 174 | private void computeSISVD(final MatrixStore A, final boolean computeVectors) { 175 | // want to iterate on smaller dimension of A (n x d) 176 | // currently, error in constructor if d < n, so n is always the smaller dimension 177 | if (block_ == null) { 178 | block_ = Primitive64Store.FACTORY.makeFilled(d_, k_, new Normal(0.0, 1.0)); 179 | qr_ = QR.PRIMITIVE.make(block_); 180 | T_ = Primitive64Store.FACTORY.make(n_, k_); 181 | } else { 182 | block_.fillAll(new Normal(0.0, 1.0)); 183 | } 184 | 185 | // orthogonalize for numeric stability 186 | qr_.decompose(block_); 187 | qr_.getQ().supplyTo(block_); 188 | 189 | for (int i = 0; i < numSISVDIter_; ++i) { 190 | A.multiply(block_, T_); 191 | 192 | // again, just for stability 193 | qr_.decompose(T_.premultiply(A.transpose())); 194 | qr_.getQ().supplyTo(block_); 195 | } 196 | 197 | // Rayleigh-Ritz postprocessing 198 | 199 | final SingularValue svd = SingularValue.PRIMITIVE.make(T_); 200 | svd.compute(block_.premultiply(A)); 201 | 202 | svd.getSingularValues(sv_); 203 | 204 | if (computeVectors) { 205 | // V = block * Q2^T so V^T = Q2 * block^T 206 | // and ojAlgo figures out that it only needs to fill the first k_ rows of Vt_ 207 | svd.getV().multiply(block_.transpose()).supplyTo(Vt_); 208 | } 209 | } 210 | 211 | private void computeSymmEigSVD(final MatrixStore A, final boolean computeVectors) { 212 | if (evd_ == null) { 213 | evd_ = Eigenvalue.PRIMITIVE.make(n_, true); 214 | } 215 | 216 | // want left singular vectors U, aka eigenvectors of AA^T -- so compute that 217 | evd_.decompose(A.transpose().premultiply(A)); 218 | 219 | // TODO: can we only use k_ values? 220 | final double[] ev = new double[n_]; 221 | evd_.getEigenvalues(ev, Optional.empty()); 222 | for (int i = 0; i < ev.length; ++i) { 223 | final double val = Math.sqrt(ev[i]); 224 | sv_[i] = val; 225 | if (computeVectors && (val > 0)) { S_.set(i, i, 1 / val); } 226 | } 227 | 228 | if (computeVectors) { 229 | S_.multiply(evd_.getV().transpose()).multiply(A, Vt_); 230 | } 231 | } 232 | } 233 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/vector/decomposition/PreambleUtil.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.vector.decomposition; 21 | 22 | import static org.apache.datasketches.memory.UnsafeUtil.unsafe; 23 | 24 | import org.apache.datasketches.memory.Memory; 25 | import org.apache.datasketches.vector.MatrixFamily; 26 | 27 | /** 28 | * This class defines the preamble items structure and provides basic utilities for some of the key fields. 29 | * 30 | *

31 | * The low significance bytes of this long items structure are on the right. Multi-byte 32 | * integers (int and long) are stored in native byte order. All byte 33 | * values are treated as unsigned.

34 | * 35 | *

An empty Frequent Directions sketch requires 16 bytes. A non-empty sketch requires 32 bytes 36 | * of preamble. The matrix is dense and is expected to dominate storage.

37 | * 38 | *
 39 |  * Long || Start Byte Adr:
 40 |  * Adr:
 41 |  *      ||    7   |    6   |    5   |    4   |    3   |    2   |    1   |     0              |
 42 |  *  0   ||----------Sketch Size (k)----------|  Flags | FamID  | SerVer |   Preamble_Longs   |
 43 |  *
 44 |  *      ||   15   |   14   |   13   |   12   |   11   |   10   |    9   |     8              |
 45 |  *  1   ||-----------Num. Columns------------|------Current Num Rows-------------------------|
 46 |  *
 47 |  *      ||   23   |   22   |   21   |   20   |   19   |   18   |   17   |    16              |
 48 |  *  2   ||-----------------------------Total Records Seen (n)--------------------------------|
 49 |  *
 50 |  *      ||   31   |   30   |   29   |   28   |   27   |   26   |   25   |    24              |
 51 |  *  3   ||------------------------------Total SV Adjustment----------------------------------|
 52 |  *  
53 | * 54 | * @author Jon Malkin 55 | */ 56 | @SuppressWarnings("restriction") 57 | public final class PreambleUtil { 58 | 59 | /** 60 | * The java line separator character as a String. 61 | */ 62 | private static final String LS = System.getProperty("line.separator"); 63 | 64 | private PreambleUtil() {} 65 | 66 | // ###### DO NOT MESS WITH THIS FROM HERE ... 67 | // Preamble byte Addresses 68 | static final int PREAMBLE_LONGS_BYTE = 0; 69 | static final int SER_VER_BYTE = 1; 70 | static final int FAMILY_BYTE = 2; 71 | static final int FLAGS_BYTE = 3; 72 | static final int K_INT = 4; 73 | static final int NUM_ROWS_INT = 8; 74 | static final int NUM_COLUMNS_INT = 12; 75 | static final int N_LONG = 16; 76 | static final int SV_ADJUSTMENT_DOUBLE = 24; 77 | 78 | // flag bit masks 79 | static final int EMPTY_FLAG_MASK = 4; 80 | 81 | // Other constants 82 | static final int SER_VER = 1; 83 | 84 | 85 | /** 86 | * Returns a human readable string summary of the preamble state of the given Memory. 87 | * Note: other than making sure that the given Memory size is large 88 | * enough for just the preamble, this does not do much value checking of the contents of the 89 | * preamble as this is primarily a tool for debugging the preamble visually. 90 | * 91 | * @param mem the given Memory. 92 | * @return the summary preamble string. 93 | */ 94 | public static String preambleToString(final Memory mem) { 95 | 96 | final int preLongs = getAndCheckPreLongs(mem); // make sure we can get the assumed preamble 97 | final MatrixFamily family = MatrixFamily.idToFamily(extractFamilyID(mem)); 98 | 99 | final int serVer = extractSerVer(mem); 100 | final int flags = extractFlags(mem); 101 | final String flagsStr = Integer.toBinaryString(flags) + ", " + flags; 102 | final boolean isEmpty = (flags & EMPTY_FLAG_MASK) > 0; 103 | 104 | final int k = extractK(mem); 105 | final int d = extractNumColumns(mem); 106 | final int numRows = extractNumRows(mem); 107 | 108 | long n = 0; 109 | double svAdjustment = 0.0; 110 | if (!isEmpty) { 111 | n = extractN(mem); 112 | svAdjustment = extractSVAdjustment(mem); 113 | } 114 | 115 | final StringBuilder sb = new StringBuilder(); 116 | sb.append(LS) 117 | .append("### START ") 118 | .append(family.getFamilyName().toUpperCase()) 119 | .append(" PREAMBLE SUMMARY").append(LS) 120 | .append("Byte 0: Preamble Longs : ").append(preLongs).append(LS) 121 | .append("Byte 1: Serialization Version: ").append(serVer).append(LS) 122 | .append("Byte 2: Family : ").append(family.toString()).append(LS) 123 | .append("Byte 3: Flags Field : ").append(flagsStr).append(LS) 124 | .append(" EMPTY : ").append(isEmpty).append(LS) 125 | .append("Bytes 4-7 : Sketch Size (k) : ").append(k).append(LS) 126 | .append("Bytes 8-11: Num Rows : ").append(numRows).append(LS) 127 | .append("Bytes 12-15: Num Dimensions : ").append(d).append(LS); 128 | 129 | if (!isEmpty) { 130 | sb.append("Bytes 16-23: Items Seen(n) : ").append(n).append(LS); 131 | sb.append("Bytes 24-31: SV Adjustment : ").append(svAdjustment).append(LS); 132 | } 133 | 134 | final long numBytes = (long)numRows * d * Double.BYTES; 135 | sb.append("TOTAL Sketch Bytes : ").append(mem.getCapacity()).append(LS) 136 | .append(" Preamble Bytes : ").append(preLongs << 3).append(LS) 137 | .append(" Data Bytes : ").append(numBytes).append(LS) 138 | .append("### END ") 139 | .append(family.getFamilyName().toUpperCase()) 140 | .append(" PREAMBLE SUMMARY").append(LS); 141 | return sb.toString(); 142 | } 143 | 144 | // Extraction methods 145 | 146 | static int extractPreLongs(final Memory mem) { 147 | return mem.getInt(PREAMBLE_LONGS_BYTE) & 0xFF; 148 | } 149 | 150 | static int extractSerVer(final Memory mem) { 151 | return mem.getInt(SER_VER_BYTE) & 0xFF; 152 | } 153 | 154 | static int extractFamilyID(final Memory mem) { 155 | return mem.getByte(FAMILY_BYTE) & 0xFF; 156 | } 157 | 158 | static int extractFlags(final Memory mem) { 159 | return mem.getByte(FLAGS_BYTE) & 0xFF; 160 | } 161 | 162 | static int extractK(final Memory mem) { 163 | return mem.getInt(K_INT); 164 | } 165 | 166 | static int extractNumRows(final Memory mem) { 167 | return mem.getInt(NUM_ROWS_INT); 168 | } 169 | 170 | static int extractNumColumns(final Memory mem) { 171 | return mem.getInt(NUM_COLUMNS_INT); 172 | } 173 | 174 | static long extractN(final Memory mem) { 175 | return mem.getLong(N_LONG); 176 | } 177 | 178 | static double extractSVAdjustment(final Memory mem) { 179 | return mem.getDouble(SV_ADJUSTMENT_DOUBLE); 180 | } 181 | 182 | 183 | // Insertion methods 184 | 185 | static void insertPreLongs(final Object memObj, final long memAddr, final int preLongs) { 186 | unsafe.putByte(memObj, memAddr + PREAMBLE_LONGS_BYTE, (byte) preLongs); 187 | } 188 | 189 | static void insertSerVer(final Object memObj, final long memAddr, final int serVer) { 190 | unsafe.putByte(memObj, memAddr + SER_VER_BYTE, (byte) serVer); 191 | } 192 | 193 | static void insertFamilyID(final Object memObj, final long memAddr, final int matrixFamId) { 194 | unsafe.putByte(memObj, memAddr + FAMILY_BYTE, (byte) matrixFamId); 195 | } 196 | 197 | static void insertFlags(final Object memObj, final long memAddr, final int flags) { 198 | unsafe.putByte(memObj, memAddr + FLAGS_BYTE, (byte) flags); 199 | } 200 | 201 | static void insertK(final Object memObj, final long memAddr, final int k) { 202 | unsafe.putInt(memObj, memAddr + K_INT, k); 203 | } 204 | 205 | static void insertNumRows(final Object memObj, final long memAddr, final int numRows) { 206 | unsafe.putInt(memObj, memAddr + NUM_ROWS_INT, numRows); 207 | } 208 | 209 | static void insertNumColumns(final Object memObj, final long memAddr, final int numColumns) { 210 | unsafe.putInt(memObj, memAddr + NUM_COLUMNS_INT, numColumns); 211 | } 212 | 213 | static void insertN(final Object memObj, final long memAddr, final long n) { 214 | unsafe.putLong(memObj, memAddr + N_LONG, n); 215 | } 216 | 217 | static void insertSVAdjustment(final Object memObj, final long memAddr, final double adj) { 218 | unsafe.putDouble(memObj, memAddr + SV_ADJUSTMENT_DOUBLE, adj); 219 | } 220 | 221 | 222 | /** 223 | * Checks Memory for capacity to hold the preamble and returns the extracted preLongs. 224 | * @param mem the given Memory 225 | * @return the extracted prelongs value. 226 | */ 227 | static int getAndCheckPreLongs(final Memory mem) { 228 | final long cap = mem.getCapacity(); 229 | if (cap < Long.BYTES) { throwNotBigEnough(cap, Long.BYTES); } 230 | final int preLongs = extractPreLongs(mem); 231 | final int required = Math.max(preLongs << 3, Long.BYTES); 232 | if (cap < required) { throwNotBigEnough(cap, required); } 233 | return preLongs; 234 | } 235 | 236 | private static void throwNotBigEnough(final long cap, final int required) { 237 | throw new IllegalArgumentException( 238 | "Possible Corruption: Size of byte array or Memory not large enough: Size: " + cap 239 | + ", Required: " + required); 240 | } 241 | } 242 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/vector/decomposition/SVDAlgo.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.vector.decomposition; 21 | 22 | /** 23 | * This class allows a choice of algorithms for Singular Value Decomposition. The options are: 24 | *
    25 | *
  • FULL: The matrix library's default SVD implementation.
  • 26 | *
  • SISVD: Simultaneous iteration, an approximate method likely to be more efficient only with sparse 27 | * matrices or when k is significantly smaller than the number of rows in the sketch.
  • 28 | *
  • SYM: Takes advantage of matrix dimensionality, first computing eigenvalues of AA^T, then computes 29 | * intended results. Squaring A alters condition number and may cause numeric stability issues, 30 | * but unlikely an issue for Frequent Directions since discarding the smaller singular values/vectors.
  • 31 | *
32 | */ 33 | public enum SVDAlgo { 34 | 35 | /** 36 | * The matrix library's default SVD implementation. 37 | */ 38 | FULL(1, "Full"), 39 | 40 | /** 41 | * Simultaneous iteration, an approximate method likely to be more efficient only with sparse 42 | * matrices or when k is significantly smaller than the number of rows in the sketch. 43 | */ 44 | SISVD(2, "SISVD"), 45 | 46 | /** 47 | * Takes advantage of matrix dimensionality, first computing eigenvalues of AA^T, then computes 48 | * intended results. Squaring A alters condition number and may cause numeric stability issues, 49 | * but unlikely an issue for Frequent Directions since discarding the smaller singular values/vectors. 50 | */ 51 | SYM(3, "Symmetrized"); 52 | 53 | private int id_; 54 | private String name_; 55 | 56 | SVDAlgo(final int id, final String name) { 57 | id_ = id; 58 | name_ = name; 59 | } 60 | 61 | /** 62 | * Returns the ID. 63 | * @return the ID. 64 | */ 65 | public int getId() { return id_; } 66 | 67 | /** 68 | * Gets the name 69 | * @return the name 70 | */ 71 | public String getName() { return name_; } 72 | 73 | @Override 74 | public String toString() { return name_; } 75 | } 76 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/vector/decomposition/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /** 21 | *

This package is dedicated to streaming algorithms that enable approximate matrix 22 | * decompositions.

23 | * 24 | *

These sketches are mergeable and can be serialized and deserialized to/from a compact 25 | * form.

26 | */ 27 | package org.apache.datasketches.vector.decomposition; 28 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/vector/matrix/Matrix.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.vector.matrix; 21 | 22 | import static org.apache.datasketches.vector.matrix.MatrixPreambleUtil.LS; 23 | 24 | import org.ojalgo.matrix.store.Primitive64Store; 25 | 26 | import org.apache.datasketches.memory.Memory; 27 | import org.apache.datasketches.vector.MatrixFamily; 28 | 29 | /** 30 | * Provides an implementation-agnostic wrapper around Matrix classes. 31 | * 32 | * @author Jon Malkin 33 | */ 34 | public abstract class Matrix { 35 | int numRows_; 36 | int numCols_; 37 | 38 | /** 39 | * Loads matrix from srcMem, assuming storage in column-major order to ensure portability. 40 | * Does not necessarily encode matrix size; do not expect size checks based on passed-in 41 | * parameters. 42 | * 43 | * @param srcMem Memory wrapping the matrix 44 | * @param type Matrix implementation type to use 45 | * @return The heapified matrix 46 | */ 47 | public static Matrix heapify(final Memory srcMem, final MatrixType type) { 48 | switch (type) { 49 | case OJALGO: 50 | return MatrixImplOjAlgo.heapifyInstance(srcMem); 51 | default: 52 | return null; 53 | } 54 | } 55 | 56 | /** 57 | * Wraps an object without allocating memory. This method will throw an exception if the mtx 58 | * Object is not of the same type as the implementing class's native format. 59 | * @param mtx Matrix object to wrap 60 | * @return A Matrix object 61 | */ 62 | public static Matrix wrap(final Object mtx) { 63 | if (mtx == null) { 64 | return null; 65 | } else if (mtx instanceof Primitive64Store) { 66 | return MatrixImplOjAlgo.wrap((Primitive64Store) mtx); 67 | } 68 | else { 69 | throw new IllegalArgumentException("wrap() does not currently support " 70 | + mtx.getClass().toString()); 71 | } 72 | } 73 | 74 | /** 75 | * Gets a builder to be able to create instances of Matrix objects 76 | * @return a MatrixBuilder object 77 | */ 78 | public static MatrixBuilder builder() { 79 | return new MatrixBuilder(); 80 | } 81 | 82 | /** 83 | * Returns the raw data object backing this Matrix, as an Object. Must be cast to the 84 | * appropriate type (assuming knowledge of the implementation) to be used. 85 | * @return An Object pointing to the raw data backing this Matrix 86 | */ 87 | public abstract Object getRawObject(); 88 | 89 | /** 90 | * Serializes the Matrix in a custom format as a byte array 91 | * @return A byte[] conttaining a serialized Matrix 92 | */ 93 | public abstract byte[] toByteArray(); 94 | 95 | /** 96 | * Serializes a sub-Matrix by storing only the first numRows and numCols rows and columns, 97 | * respsectively. 98 | * @param numRows Number of rows to write 99 | * @param numCols Number of columns to write 100 | * @return A byte[] containing the serialized sub-Matrix. 101 | */ 102 | public abstract byte[] toCompactByteArray(int numRows, int numCols); 103 | 104 | /** 105 | * Returns a single element from the Matrix 106 | * @param row Row index of target element (0-based) 107 | * @param col Column index of target elemtn (0-based) 108 | * @return Matrix value at (row, column) 109 | */ 110 | public abstract double getElement(int row, int col); 111 | 112 | /** 113 | * Returns a copy of an entire row of the Matrix 114 | * @param row Row index to return (0-based) 115 | * @return A double[] representing the Matrix row 116 | */ 117 | public abstract double[] getRow(int row); 118 | 119 | /** 120 | * Returns a copy of an entire column of the Matrix 121 | * @param col Column index to return (0-based) 122 | * @return A double[] representing the Matrix column 123 | */ 124 | public abstract double[] getColumn(int col); 125 | 126 | /** 127 | * Sets a single element inthe Matrix 128 | * @param row Row index of target element (0-based) 129 | * @param col Column index of target element (0-based) 130 | * @param value The value to insert into the Matrix at (row, column) 131 | */ 132 | public abstract void setElement(int row, int col, double value); 133 | 134 | /** 135 | * Sets an entire row of the Matrix, by copying data from the input 136 | * @param row Target row index (0-based) 137 | * @param values Array of values to write into the Matrix 138 | */ 139 | public abstract void setRow(int row, double[] values); 140 | 141 | /** 142 | * Sets an entire column of the Matrix, by copying data from the input 143 | * @param column Target column index (0-based) 144 | * @param values Array of values to write into the Matrix 145 | */ 146 | public abstract void setColumn(int column, double[] values); 147 | 148 | /** 149 | * Gets the number of rows in the Matrix 150 | * @return Configured number of rows in the Matrix 151 | */ 152 | public long getNumRows() { return numRows_; } 153 | 154 | /** 155 | * Gets the number of columns in the Matrix 156 | * @return Configured number of columns in the Matrix 157 | */ 158 | public long getNumColumns() { return numCols_; } 159 | 160 | /** 161 | * Gets serialized size of the Matrix, in bytes. 162 | * @return Number of bytes needed for a serialized Matrix 163 | */ 164 | public int getSizeBytes() { 165 | final int preBytes = MatrixFamily.MATRIX.getMinPreLongs() * Long.BYTES; 166 | final int mtxBytes = (numRows_ * numCols_) * Double.BYTES; 167 | return preBytes + mtxBytes; 168 | } 169 | 170 | /** 171 | * Gets serialized size of the Matrix in compact form, in bytes. 172 | * @param rows Number of rows to select for writing 173 | * @param cols Number of columns to select for writing 174 | * @return Number of bytes needed to serialize the first (rows, cols) of this Matrix 175 | */ 176 | public int getCompactSizeBytes(final int rows, final int cols) { 177 | final int nRows = Math.min(rows, numRows_); 178 | final int nCols = Math.min(cols, numCols_); 179 | 180 | if ((nRows < 1) || (nCols < 1)) { 181 | return MatrixFamily.MATRIX.getMinPreLongs() * Long.BYTES; 182 | } else if ((nRows == numRows_) && (nCols == numCols_)) { 183 | return getSizeBytes(); 184 | } 185 | 186 | final int preBytes = MatrixFamily.MATRIX.getMaxPreLongs() * Long.BYTES; 187 | final int mtxBytes = (nRows * nCols) * Double.BYTES; 188 | return preBytes + mtxBytes; 189 | } 190 | 191 | /** 192 | * Writes information about this Matrix to a String. 193 | * @return A human-readable representation of a Matrix 194 | */ 195 | @Override 196 | public String toString() { 197 | final StringBuilder sb = new StringBuilder(); 198 | 199 | sb.append(" Matrix data :").append(LS); 200 | sb.append(this.getClass().getName()); 201 | sb.append(" < ").append(numRows_).append(" x ").append(numCols_).append(" >"); 202 | 203 | // First element 204 | sb.append("\n{ { ").append(getElement(0, 0)); 205 | 206 | // Rest of the first row 207 | for (int j = 1; j < numCols_; j++) { 208 | sb.append(",\t").append(getElement(0, j)); 209 | } 210 | 211 | // For each of the remaining rows 212 | for (int i = 1; i < numRows_; i++) { 213 | 214 | // First column 215 | sb.append(" },\n{ ").append(getElement(i, 0)); 216 | 217 | // Remaining columns 218 | for (int j = 1; j < numCols_; j++) { 219 | sb.append(",\t").append(getElement(i, j)); 220 | } 221 | } 222 | 223 | // Finish 224 | sb.append(" } }").append(LS); 225 | 226 | return sb.toString(); 227 | } 228 | 229 | /** 230 | * Gets the matrix type 231 | * @return the matrix type 232 | */ 233 | public abstract MatrixType getMatrixType(); 234 | } 235 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/vector/matrix/MatrixBuilder.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.vector.matrix; 21 | 22 | /** 23 | * Provides a builder for Matrix objects. 24 | */ 25 | public class MatrixBuilder { 26 | 27 | private MatrixType type_ = MatrixType.OJALGO; // default type 28 | 29 | /** 30 | * Default no-op constructor. 31 | */ 32 | public MatrixBuilder() {} 33 | 34 | /** 35 | * Sets the underlying type of object to use with any Matrix objects created. 36 | * @param type One of the supported types 37 | * @return This MatrixBuilder object 38 | */ 39 | public MatrixBuilder setType(final MatrixType type) { 40 | type_ = type; 41 | return this; 42 | } 43 | 44 | /** 45 | * Returns a value from an enum defining the type of object backing any Matrix objects created. 46 | * @return An item from the MatrixType enum. 47 | */ 48 | public MatrixType getBackingType() { 49 | return type_; 50 | } 51 | 52 | /** 53 | * Instantiates a new, empty matrix of the target size 54 | * 55 | * @param numRows Number of rows in matrix 56 | * @param numCols Number of columns in matrix 57 | * @return An empty matrix of the requested size 58 | */ 59 | public Matrix build(final int numRows, final int numCols) { 60 | switch (type_) { 61 | case OJALGO: 62 | return MatrixImplOjAlgo.newInstance(numRows, numCols); 63 | 64 | default: 65 | throw new IllegalArgumentException("OJALGO is currently the only supported MatrixTypes"); 66 | } 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/vector/matrix/MatrixImplOjAlgo.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.vector.matrix; 21 | 22 | import static org.apache.datasketches.vector.matrix.MatrixPreambleUtil.COMPACT_FLAG_MASK; 23 | import static org.apache.datasketches.vector.matrix.MatrixPreambleUtil.extractFamilyID; 24 | import static org.apache.datasketches.vector.matrix.MatrixPreambleUtil.extractFlags; 25 | import static org.apache.datasketches.vector.matrix.MatrixPreambleUtil.extractNumColumns; 26 | import static org.apache.datasketches.vector.matrix.MatrixPreambleUtil.extractNumColumnsUsed; 27 | import static org.apache.datasketches.vector.matrix.MatrixPreambleUtil.extractNumRows; 28 | import static org.apache.datasketches.vector.matrix.MatrixPreambleUtil.extractNumRowsUsed; 29 | import static org.apache.datasketches.vector.matrix.MatrixPreambleUtil.extractPreLongs; 30 | import static org.apache.datasketches.vector.matrix.MatrixPreambleUtil.extractSerVer; 31 | 32 | import org.ojalgo.matrix.store.Primitive64Store; 33 | 34 | import org.apache.datasketches.memory.Memory; 35 | import org.apache.datasketches.memory.WritableMemory; 36 | import org.apache.datasketches.vector.MatrixFamily; 37 | 38 | /** 39 | * Implements the ojAlgo Matrix operations. 40 | */ 41 | public final class MatrixImplOjAlgo extends Matrix { 42 | private Primitive64Store mtx_; 43 | 44 | private MatrixImplOjAlgo(final int numRows, final int numCols) { 45 | mtx_ = Primitive64Store.FACTORY.make(numRows, numCols); 46 | numRows_ = numRows; 47 | numCols_ = numCols; 48 | } 49 | 50 | private MatrixImplOjAlgo(final Primitive64Store mtx) { 51 | mtx_ = mtx; 52 | numRows_ = (int) mtx.countRows(); 53 | numCols_ = (int) mtx.countColumns(); 54 | } 55 | 56 | static Matrix newInstance(final int numRows, final int numCols) { 57 | return new MatrixImplOjAlgo(numRows, numCols); 58 | } 59 | 60 | static Matrix heapifyInstance(final Memory srcMem) { 61 | final int minBytes = MatrixFamily.MATRIX.getMinPreLongs() * Long.BYTES; 62 | final long memCapBytes = srcMem.getCapacity(); 63 | if (memCapBytes < minBytes) { 64 | throw new IllegalArgumentException("Source Memory too small: " + memCapBytes 65 | + " < " + minBytes); 66 | } 67 | 68 | final int preLongs = extractPreLongs(srcMem); 69 | final int serVer = extractSerVer(srcMem); 70 | final int familyID = extractFamilyID(srcMem); 71 | 72 | if (serVer != 1) { 73 | throw new IllegalArgumentException("Invalid SerVer reading srcMem. Expected 1, found: " 74 | + serVer); 75 | } 76 | if (familyID != MatrixFamily.MATRIX.getID()) { 77 | throw new IllegalArgumentException("srcMem does not point to a Matrix"); 78 | } 79 | 80 | final int flags = extractFlags(srcMem); 81 | final boolean isCompact = (flags & COMPACT_FLAG_MASK) > 0; 82 | 83 | int nRows = extractNumRows(srcMem); 84 | int nCols = extractNumColumns(srcMem); 85 | 86 | final MatrixImplOjAlgo matrix = new MatrixImplOjAlgo(nRows, nCols); 87 | if (isCompact) { 88 | nRows = extractNumRowsUsed(srcMem); 89 | nCols = extractNumColumnsUsed(srcMem); 90 | } 91 | 92 | int memOffset = preLongs * Long.BYTES; 93 | for (int c = 0; c < nCols; ++c) { 94 | for (int r = 0; r < nRows; ++r) { 95 | matrix.mtx_.set(r, c, srcMem.getDouble(memOffset)); 96 | memOffset += Double.BYTES; 97 | } 98 | } 99 | 100 | return matrix; 101 | } 102 | 103 | static Matrix wrap(final Primitive64Store mtx) { 104 | return new MatrixImplOjAlgo(mtx); 105 | } 106 | 107 | @Override 108 | public Object getRawObject() { 109 | return mtx_; 110 | } 111 | 112 | @Override 113 | public byte[] toByteArray() { 114 | final int preLongs = 2; 115 | final long numElements = mtx_.count(); 116 | assert numElements == (mtx_.countColumns() * mtx_.countRows()); 117 | 118 | final int outBytes = (int) (((long)preLongs * Long.BYTES) + (numElements * Double.BYTES)); 119 | final byte[] outByteArr = new byte[outBytes]; 120 | final WritableMemory memOut = WritableMemory.wrap(outByteArr); 121 | final Object memObj = memOut.getArray(); 122 | final long memAddr = memOut.getCumulativeOffset(0L); 123 | 124 | MatrixPreambleUtil.insertPreLongs(memObj, memAddr, preLongs); 125 | MatrixPreambleUtil.insertSerVer(memObj, memAddr, MatrixPreambleUtil.SER_VER); 126 | MatrixPreambleUtil.insertFamilyID(memObj, memAddr, MatrixFamily.MATRIX.getID()); 127 | MatrixPreambleUtil.insertFlags(memObj, memAddr, 0); 128 | MatrixPreambleUtil.insertNumRows(memObj, memAddr, (int) mtx_.countRows()); 129 | MatrixPreambleUtil.insertNumColumns(memObj, memAddr, (int) mtx_.countColumns()); 130 | memOut.putDoubleArray(preLongs << 3, mtx_.data, 0, (int) numElements); 131 | 132 | return outByteArr; 133 | } 134 | 135 | @Override 136 | public byte[] toCompactByteArray(final int numRows, final int numCols) { 137 | // TODO: row/col limit checks 138 | 139 | final int preLongs = 3; 140 | 141 | // for non-compact we can do an array copy, so save as non-compact if using the entire matrix 142 | final long numElements = (long) numRows * numCols; 143 | final boolean isCompact = numElements < mtx_.count(); 144 | if (!isCompact) { 145 | return toByteArray(); 146 | } 147 | 148 | assert numElements < mtx_.count(); 149 | 150 | final int outBytes = (int) (((long)preLongs * Long.BYTES) + (numElements * Double.BYTES)); 151 | final byte[] outByteArr = new byte[outBytes]; 152 | final WritableMemory memOut = WritableMemory.wrap(outByteArr); 153 | final Object memObj = memOut.getArray(); 154 | final long memAddr = memOut.getCumulativeOffset(0L); 155 | 156 | MatrixPreambleUtil.insertPreLongs(memObj, memAddr, preLongs); 157 | MatrixPreambleUtil.insertSerVer(memObj, memAddr, MatrixPreambleUtil.SER_VER); 158 | MatrixPreambleUtil.insertFamilyID(memObj, memAddr, MatrixFamily.MATRIX.getID()); 159 | MatrixPreambleUtil.insertFlags(memObj, memAddr, COMPACT_FLAG_MASK); 160 | MatrixPreambleUtil.insertNumRows(memObj, memAddr, (int) mtx_.countRows()); 161 | MatrixPreambleUtil.insertNumColumns(memObj, memAddr, (int) mtx_.countColumns()); 162 | MatrixPreambleUtil.insertNumRowsUsed(memObj, memAddr, numRows); 163 | MatrixPreambleUtil.insertNumColumnsUsed(memObj, memAddr, numCols); 164 | 165 | // write elements in column-major order 166 | long offsetBytes = (long)preLongs * Long.BYTES; 167 | for (int c = 0; c < numCols; ++c) { 168 | for (int r = 0; r < numRows; ++r) { 169 | memOut.putDouble(offsetBytes, mtx_.doubleValue(r, c)); 170 | offsetBytes += Double.BYTES; 171 | } 172 | } 173 | 174 | return outByteArr; 175 | } 176 | 177 | @Override 178 | public double getElement(final int row, final int col) { 179 | return mtx_.doubleValue(row, col); 180 | } 181 | 182 | @Override 183 | public double[] getRow(final int row) { 184 | final int cols = (int) mtx_.countColumns(); 185 | final double[] result = new double[cols]; 186 | for (int c = 0; c < cols; ++c) { 187 | result[c] = mtx_.doubleValue(row, c); 188 | } 189 | return result; 190 | } 191 | 192 | @Override 193 | public double[] getColumn(final int col) { 194 | final int rows = (int) mtx_.countRows(); 195 | final double[] result = new double[rows]; 196 | for (int r = 0; r < rows; ++r) { 197 | result[r] = mtx_.doubleValue(r, col); 198 | } 199 | return result; 200 | } 201 | 202 | @Override 203 | public void setElement(final int row, final int col, final double value) { 204 | mtx_.set(row, col, value); 205 | } 206 | 207 | @Override 208 | public void setRow(final int row, final double[] values) { 209 | if (values.length != mtx_.countColumns()) { 210 | throw new IllegalArgumentException("Invalid number of elements for row. Expected " 211 | + mtx_.countColumns() + ", found " + values.length); 212 | } 213 | 214 | for (int i = 0; i < mtx_.countColumns(); ++i) { 215 | mtx_.set(row, i, values[i]); 216 | } 217 | } 218 | 219 | @Override 220 | public void setColumn(final int column, final double[] values) { 221 | if (values.length != mtx_.countRows()) { 222 | throw new IllegalArgumentException("Invalid number of elements for column. Expected " 223 | + mtx_.countRows() + ", found " + values.length); 224 | } 225 | 226 | for (int i = 0; i < mtx_.countRows(); ++i) { 227 | mtx_.set(i, column, values[i]); 228 | } 229 | } 230 | 231 | @Override 232 | public MatrixType getMatrixType() { 233 | return MatrixType.OJALGO; 234 | } 235 | 236 | } 237 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/vector/matrix/MatrixPreambleUtil.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.vector.matrix; 21 | 22 | import static org.apache.datasketches.memory.UnsafeUtil.unsafe; 23 | 24 | import org.apache.datasketches.memory.Memory; 25 | import org.apache.datasketches.vector.MatrixFamily; 26 | 27 | /** 28 | * This class defines the preamble items structure and provides basic utilities for some of the 29 | * key fields for a Matrix 30 | * 31 | *

32 | * The low significance bytes of this long items structure are on the right. Multi-byte 33 | * integers (int and long) are stored in native byte order. All byte 34 | * values are treated as unsigned.

35 | * 36 | *

An empty or non-compact Matrix requires 16 bytes. A compact under-full matrix requires 37 | * 24 bytes of preamble.

38 | * 39 | *
 40 |  * Long || Start Byte Adr:
 41 |  * Adr:
 42 |  *      ||    7   |    6   |    5   |    4   |    3   |    2   |    1   |     0              |
 43 |  *  0   ||-------------Reserved--------------|  Flags | FamID  | SerVer |   Preamble_Longs   |
 44 |  *
 45 |  *      ||   15   |   14   |   13   |   12   |   11   |   10   |    9   |     8              |
 46 |  *  1   ||-----------Num. Columns------------|-------------Num. Rows-------------------------|
 47 |  *
 48 |  *      ||   23   |   22   |   21   |   20   |   19   |   18   |   17   |    16              |
 49 |  *  2   ||---------Num. Columns Used---------|----------Num. Rows Used-----------------------|
 50 |  *  
51 | * 52 | * @author Jon Malkin 53 | */ 54 | @SuppressWarnings("restriction") 55 | public final class MatrixPreambleUtil { 56 | 57 | /** 58 | * The java line separator character as a String. 59 | */ 60 | static final String LS = System.getProperty("line.separator"); 61 | 62 | private MatrixPreambleUtil() {} 63 | 64 | // ###### DO NOT MESS WITH THIS FROM HERE ... 65 | // Preamble byte Addresses 66 | private static final int PREAMBLE_LONGS_BYTE = 0; 67 | private static final int SER_VER_BYTE = 1; 68 | private static final int FAMILY_BYTE = 2; 69 | private static final int FLAGS_BYTE = 3; 70 | private static final int NUM_ROWS_INT = 8; 71 | private static final int NUM_COLUMNS_INT = 12; 72 | private static final int ROWS_USED_INT = 16; 73 | private static final int COLS_USED_INT = 20; 74 | 75 | // flag bit masks 76 | //static final int EMPTY_FLAG_MASK = 4; 77 | static final int COMPACT_FLAG_MASK = 8; 78 | 79 | // Other constants 80 | static final int SER_VER = 1; 81 | 82 | /** 83 | * Returns a human readable string summary of the preamble state of the given Memory. 84 | * Note: other than making sure that the given Memory size is large 85 | * enough for just the preamble, this does not do much value checking of the contents of the 86 | * preamble as this is primarily a tool for debugging the preamble visually. 87 | * 88 | * @param mem the given Memory. 89 | * @return the summary preamble string. 90 | */ 91 | public static String preambleToString(final Memory mem) { 92 | 93 | final int preLongs = getAndCheckPreLongs(mem); // make sure we can get the assumed preamble 94 | final MatrixFamily family = MatrixFamily.idToFamily(extractFamilyID(mem)); 95 | 96 | final int serVer = extractSerVer(mem); 97 | if (serVer != SER_VER) { 98 | throw new IllegalArgumentException("Invalid serialization version in memory region. " 99 | + "Found: " + serVer); 100 | } 101 | 102 | final int flags = extractFlags(mem); 103 | final String flagsStr = Integer.toBinaryString(flags) + ", " + flags; 104 | //final boolean isEmpty = (flags & EMPTY_FLAG_MASK) > 0; 105 | final boolean isCompact = (flags & COMPACT_FLAG_MASK) > 0; 106 | 107 | final int numRows = extractNumRows(mem); 108 | final int numCols = extractNumColumns(mem); 109 | 110 | int numRowsUsed = numRows; 111 | int numColsUsed = numCols; 112 | if (isCompact) { 113 | numRowsUsed = extractNumRowsUsed(mem); 114 | numColsUsed = extractNumColumnsUsed(mem); 115 | } 116 | 117 | final StringBuilder sb = new StringBuilder(); 118 | sb.append(LS) 119 | .append("### START ") 120 | .append(family.getFamilyName().toUpperCase()) 121 | .append(" PREAMBLE SUMMARY").append(LS) 122 | .append("Byte 0: Preamble Longs : ").append(preLongs).append(LS) 123 | .append("Byte 1: Serialization Version: ").append(serVer).append(LS) 124 | .append("Byte 2: Family : ").append(family.toString()).append(LS) 125 | .append("Byte 3: Flags Field : ").append(flagsStr).append(LS) 126 | //.append(" EMPTY : ").append(isEmpty).append(LS) 127 | .append(" COMPACT : ").append(isCompact).append(LS) 128 | .append("Bytes 8-11: Num Rows : ").append(numRows).append(LS) 129 | .append("Bytes 12-15: Num Columns : ").append(numCols).append(LS); 130 | 131 | if (isCompact) { 132 | sb.append("Bytes 16-23: Num Rows Used : ").append(numRowsUsed).append(LS); 133 | sb.append("Bytes 24-31: Num Columns Used : ").append(numColsUsed).append(LS); 134 | } 135 | 136 | return sb.toString(); 137 | } 138 | 139 | // Extraction methods 140 | 141 | static int extractPreLongs(final Memory mem) { 142 | return mem.getInt(PREAMBLE_LONGS_BYTE) & 0xFF; 143 | } 144 | 145 | static int extractSerVer(final Memory mem) { 146 | return mem.getInt(SER_VER_BYTE) & 0xFF; 147 | } 148 | 149 | static int extractFamilyID(final Memory mem) { 150 | return mem.getByte(FAMILY_BYTE) & 0xFF; 151 | } 152 | 153 | static int extractFlags(final Memory mem) { 154 | return mem.getByte(FLAGS_BYTE) & 0xFF; 155 | } 156 | 157 | static int extractNumRows(final Memory mem) { 158 | return mem.getInt(NUM_ROWS_INT); 159 | } 160 | 161 | static int extractNumColumns(final Memory mem) { 162 | return mem.getInt(NUM_COLUMNS_INT); 163 | } 164 | 165 | static int extractNumRowsUsed(final Memory mem) { 166 | return mem.getInt(ROWS_USED_INT); 167 | } 168 | 169 | static int extractNumColumnsUsed(final Memory mem) { 170 | return mem.getInt(COLS_USED_INT); 171 | } 172 | 173 | // Insertion methods 174 | 175 | static void insertPreLongs(final Object memObj, final long memAddr, final int preLongs) { 176 | unsafe.putByte(memObj, memAddr + PREAMBLE_LONGS_BYTE, (byte) preLongs); 177 | } 178 | 179 | static void insertSerVer(final Object memObj, final long memAddr, final int serVer) { 180 | unsafe.putByte(memObj, memAddr + SER_VER_BYTE, (byte) serVer); 181 | } 182 | 183 | static void insertFamilyID(final Object memObj, final long memAddr, final int matrixFamId) { 184 | unsafe.putByte(memObj, memAddr + FAMILY_BYTE, (byte) matrixFamId); 185 | } 186 | 187 | static void insertFlags(final Object memObj, final long memAddr, final int flags) { 188 | unsafe.putByte(memObj, memAddr + FLAGS_BYTE, (byte) flags); 189 | } 190 | 191 | static void insertNumRows(final Object memObj, final long memAddr, final int numRows) { 192 | unsafe.putInt(memObj, memAddr + NUM_ROWS_INT, numRows); 193 | } 194 | 195 | static void insertNumColumns(final Object memObj, final long memAddr, final int numColumns) { 196 | unsafe.putInt(memObj, memAddr + NUM_COLUMNS_INT, numColumns); 197 | } 198 | 199 | static void insertNumRowsUsed(final Object memObj, final long memAddr, final int rowsUsed) { 200 | unsafe.putInt(memObj, memAddr + ROWS_USED_INT, rowsUsed); 201 | } 202 | 203 | static void insertNumColumnsUsed(final Object memObj, final long memAddr, final int columnsUsed) { 204 | unsafe.putInt(memObj, memAddr + COLS_USED_INT, columnsUsed); 205 | } 206 | 207 | 208 | /** 209 | * Checks Memory for capacity to hold the preamble and returns the extracted preLongs. 210 | * @param mem the given Memory 211 | * @return the extracted prelongs value. 212 | */ 213 | private static int getAndCheckPreLongs(final Memory mem) { 214 | final long cap = mem.getCapacity(); 215 | if (cap < Long.BYTES) { throwNotBigEnough(cap, Long.BYTES); } 216 | final int preLongs = extractPreLongs(mem); 217 | final int required = Math.max(preLongs << 3, Long.BYTES); 218 | if (cap < required) { throwNotBigEnough(cap, required); } 219 | return preLongs; 220 | } 221 | 222 | private static void throwNotBigEnough(final long cap, final int required) { 223 | throw new IllegalArgumentException( 224 | "Possible Corruption: Size of byte array or Memory not large enough: Size: " + cap 225 | + ", Required: " + required); 226 | } 227 | } 228 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/vector/matrix/MatrixType.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.vector.matrix; 21 | 22 | /** 23 | * The MatrixType enum 24 | */ 25 | public enum MatrixType { 26 | /** 27 | * Select the ojAlgo 28 | */ 29 | OJALGO(1, "ojAlgo"); 30 | 31 | private int id_; 32 | private String name_; 33 | 34 | MatrixType(final int id, final String name) { 35 | id_ = id; 36 | name_ = name; 37 | } 38 | 39 | /** 40 | * Gets the ID 41 | * @return the ID 42 | */ 43 | public int getId() { return id_; } 44 | 45 | /** 46 | * Gets the name 47 | * @return the name 48 | */ 49 | public String getName() { return name_; } 50 | 51 | @Override 52 | public String toString() { return name_; } 53 | } 54 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/vector/matrix/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /** 21 | *

This package contains a Matrix class that wraps one of several underlying matrix 22 | * implementations. It can be used to provide a stable public API independent of the 23 | * specific linear algebra package used for computation.

24 | * 25 | *

These Matrix objects can be serialized and deserialized to/from a compact form.

26 | */ 27 | package org.apache.datasketches.vector.matrix; 28 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/vector/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /** 21 | * This is the parent package for all vector sketch algorithms. Any classes at this level are 22 | * used by more than one sub-package. 23 | */ 24 | package org.apache.datasketches.vector; 25 | -------------------------------------------------------------------------------- /src/test/java/org/apache/datasketches/vector/decomposition/FrequentDirectionsTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.vector.decomposition; 21 | 22 | import static org.testng.Assert.assertEquals; 23 | import static org.testng.Assert.assertNotNull; 24 | import static org.testng.Assert.assertNull; 25 | import static org.testng.Assert.assertTrue; 26 | import static org.testng.Assert.fail; 27 | 28 | import java.util.Arrays; 29 | 30 | import org.testng.annotations.Test; 31 | 32 | import org.apache.datasketches.memory.Memory; 33 | import org.apache.datasketches.memory.WritableMemory; 34 | import org.apache.datasketches.vector.MatrixFamily; 35 | import org.apache.datasketches.vector.matrix.Matrix; 36 | 37 | @SuppressWarnings("javadoc") 38 | public class FrequentDirectionsTest { 39 | 40 | @Test 41 | public void instantiateFD() { 42 | final int k = 32; 43 | final int d = 256; 44 | final FrequentDirections fd = FrequentDirections.newInstance(k, d); 45 | 46 | assertNotNull(fd); 47 | assertTrue(fd.isEmpty()); 48 | assertEquals(fd.getK(), k); 49 | assertEquals(fd.getD(), d); 50 | assertEquals(fd.getN(), 0); 51 | assertNull(fd.getResult()); 52 | 53 | // error conditions 54 | // d = 0 55 | try { 56 | FrequentDirections.newInstance(k, 0); 57 | fail(); 58 | } catch (final IllegalArgumentException e) { 59 | // expected 60 | } 61 | 62 | // k = -1 63 | try { 64 | FrequentDirections.newInstance(-1, d); 65 | fail(); 66 | } catch (final IllegalArgumentException e) { 67 | // expected 68 | } 69 | 70 | // d < 2k (not handled in reduceRank() 71 | try { 72 | FrequentDirections.newInstance(d, d); 73 | fail(); 74 | } catch (final IllegalArgumentException e) { 75 | // expected 76 | } 77 | } 78 | 79 | @Test 80 | public void checkSymmUpdate() { 81 | final int k = 4; 82 | final int d = 16; // should be > 2k 83 | final FrequentDirections fd = FrequentDirections.newInstance(k, d); 84 | fd.setSVDAlgo(SVDAlgo.SYM); // default, but force anyway 85 | 86 | runUpdateTest(fd); 87 | } 88 | 89 | @Test 90 | public void checkFullSVDUpdate() { 91 | final int k = 4; 92 | final int d = 16; // should be > 2k 93 | final FrequentDirections fd = FrequentDirections.newInstance(k, d); 94 | fd.setSVDAlgo(SVDAlgo.FULL); 95 | 96 | runUpdateTest(fd); 97 | } 98 | 99 | private static void runUpdateTest(final FrequentDirections fd) { 100 | final int k = fd.getK(); 101 | final int d = fd.getD(); 102 | 103 | // creates matrix with increasing values along diagonal 104 | final double[] input = new double[d]; 105 | for (int i = 0; i < (2 * k); ++i) { 106 | if (i > 0) { 107 | input[i - 1] = 0.0; 108 | } 109 | input[i] = i * 1.0; 110 | fd.update(input); 111 | } 112 | fd.update((double[]) null); // should be a no-op and not impact next lines 113 | assertEquals(fd.getNumRows(), 2 * k); 114 | assertEquals(fd.getN(), 2 * k); 115 | 116 | input[(2 * k) - 1] = 0.0; 117 | input[2 * k] = 2.0 * k; 118 | fd.update(input); // trigger reduceRank(), then add 1 more row 119 | assertEquals(fd.getNumRows(), k); 120 | } 121 | 122 | 123 | @Test 124 | public void updateWithTooFewDimensions() { 125 | final int k = 4; 126 | final int d = 16; // should be > 2k 127 | final FrequentDirections fd = FrequentDirections.newInstance(k, d); 128 | 129 | final double[] input = new double[d - 3]; 130 | try { 131 | fd.update(input); 132 | fail(); 133 | } catch (final IllegalArgumentException e) { 134 | // expected 135 | } 136 | } 137 | 138 | @Test 139 | public void mergeSketches() { 140 | final int k = 5; 141 | final int d = 12; // should be > 2k 142 | final int initialRows = 7; 143 | final FrequentDirections fd1 = FrequentDirections.newInstance(k, d); 144 | final FrequentDirections fd2 = FrequentDirections.newInstance(k, d); 145 | 146 | // two diagonal matrices 147 | final double[] input = new double[d]; 148 | for (int i = 0; i < initialRows; ++i) { 149 | if (i > 0) { 150 | input[i - 1] = 0.0; 151 | } 152 | //input[i] = (2 * k) - (i * 1.0); 153 | input[i] = i * 1.0; 154 | fd1.update(input); 155 | 156 | input[i] = (i * 1.0) - (2 * k); 157 | fd2.update(input); 158 | } 159 | 160 | // the next two lines are no-ops 161 | fd1.update((FrequentDirections) null); 162 | fd1.update(FrequentDirections.newInstance(k, d)); 163 | assertEquals(fd1.getNumRows(), initialRows); 164 | assertEquals(fd1.getN(), initialRows); 165 | 166 | assertEquals(fd2.getNumRows(), initialRows); 167 | assertEquals(fd2.getN(), initialRows); 168 | 169 | fd1.update(fd2); 170 | final int expectedRows = (((2 * initialRows) % k) + k) - 1; // assumes 2 * initialRows > k 171 | assertEquals(fd1.getNumRows(), expectedRows); 172 | assertEquals(fd1.getN(), 2 * initialRows); 173 | 174 | final Matrix result = fd1.getResult(false); 175 | assertNotNull(result); 176 | assertEquals(result.getNumRows(), 2 * k); 177 | 178 | println(fd1.toString(true, true, true)); 179 | } 180 | 181 | @Test 182 | public void checkCompensativeResultSymSVD() { 183 | final int k = 4; 184 | final int d = 10; // should be > 2k 185 | final FrequentDirections fd = FrequentDirections.newInstance(k, d); 186 | fd.setSVDAlgo(SVDAlgo.SYM); 187 | 188 | runCompensativeResultTest(fd); 189 | } 190 | 191 | @Test 192 | public void checkCompensativeResultFullSVD() { 193 | final int k = 4; 194 | final int d = 10; // should be > 2k 195 | final FrequentDirections fd = FrequentDirections.newInstance(k, d); 196 | fd.setSVDAlgo(SVDAlgo.FULL); 197 | 198 | runCompensativeResultTest(fd); 199 | } 200 | 201 | private static void runCompensativeResultTest(final FrequentDirections fd) { 202 | final int d = fd.getD(); 203 | final int k = fd.getK(); 204 | 205 | // diagonal matrix for easy checking 206 | final double[] input = new double[d]; 207 | for (int i = 0; i < (k + 1); ++i) { 208 | if (i > 0) { 209 | input[i - 1] = 0.0; 210 | } 211 | input[i] = (i + 1) * 1.0; 212 | fd.update(input); 213 | } 214 | 215 | Matrix m = fd.getResult(); 216 | for (int i = 0; i < (k + 1); ++i) { 217 | assertEquals(m.getElement(i,i), 1.0 * (i + 1), 1e-6); 218 | } 219 | 220 | // without compensation, but force rank reduction and check projection at the same time 221 | fd.forceReduceRank(); 222 | m = fd.getResult(); 223 | final Matrix p = fd.getProjectionMatrix(); 224 | double[] sv = fd.getSingularValues(false); 225 | for (int i = k; i > 1; --i) { 226 | final double val = Math.abs(m.getElement(k - i, i)); 227 | final double expected = Math.sqrt(((i + 1) * (i + 1)) - fd.getSvAdjustment()); 228 | assertEquals(val, expected, 1e-6); 229 | assertEquals(sv[k - i], expected, 1e-10); 230 | assertEquals(Math.abs(p.getElement(k - i, i)), 1.0, 1e-6); 231 | } 232 | assertEquals(m.getElement(k, 1), 0.0, 0.0); // might return -0.0 233 | assertEquals(p.getElement(k, 1), 0.0, 0.0); // might return -0.0 234 | 235 | // with compensation 236 | m = fd.getResult(true); 237 | sv = fd.getSingularValues(true); 238 | for (int i = k; i > 1; --i) { 239 | final double val = Math.abs(m.getElement(k - i, i)); 240 | assertEquals(val, i + 1.0, 1e-6); 241 | assertEquals(sv[k - i], i + 1.0, 1e-10); 242 | } 243 | assertEquals(m.getElement(k, 1), 0.0); 244 | } 245 | 246 | @Test 247 | public void mergeIncompatibleSketches() { 248 | final int k = 5; 249 | final int d = 12; // should be > 2k 250 | final FrequentDirections fd1 = FrequentDirections.newInstance(k, d); 251 | 252 | final double[] input = new double[d]; 253 | input[0] = 1.0; 254 | fd1.update(input); 255 | 256 | // merge in smaller k 257 | FrequentDirections fd2 = FrequentDirections.newInstance(k - 1, d); 258 | fd2.update(input); 259 | try { 260 | fd1.update(fd2); 261 | fail(); 262 | } catch (final IllegalArgumentException e) { 263 | // expected 264 | } 265 | 266 | // mismatch in d 267 | fd2 = FrequentDirections.newInstance(k, d - 1); 268 | fd2.update(new double[d - 1]); 269 | try { 270 | fd1.update(fd2); 271 | fail(); 272 | } catch (final IllegalArgumentException e) { 273 | // expected 274 | } 275 | } 276 | 277 | @Test 278 | public void checkSerialization() { 279 | final int k = 7; 280 | final int d = 20; 281 | final FrequentDirections fd = FrequentDirections.newInstance(k, d); 282 | 283 | byte[] sketchBytes = fd.toByteArray(); 284 | assertEquals(sketchBytes.length, 285 | MatrixFamily.FREQUENTDIRECTIONS.getMinPreLongs() * Long.BYTES); 286 | Memory mem = Memory.wrap(sketchBytes); 287 | FrequentDirections rebuilt = FrequentDirections.heapify(mem); 288 | assertTrue(rebuilt.isEmpty()); 289 | assertEquals(rebuilt.getD(), fd.getD()); 290 | assertEquals(rebuilt.getK(), fd.getK()); 291 | 292 | // creates matrix with increasing values along diagonal 293 | // k rows, so shouldn't compress yet 294 | final double[] input = new double[d]; 295 | for (int i = 0; i < k; ++i) { 296 | if (i > 0) { 297 | input[i - 1] = 0.0; 298 | } 299 | //input[i] = (2 * k) - (i * 1.0); 300 | input[i] = i * 1.0; 301 | fd.update(input); 302 | } 303 | sketchBytes = fd.toByteArray(); 304 | mem = Memory.wrap(sketchBytes); 305 | rebuilt = FrequentDirections.heapify(mem); 306 | assertEquals(rebuilt.getN(), fd.getN()); 307 | assertEquals(rebuilt.getD(), fd.getD()); 308 | assertEquals(rebuilt.getK(), fd.getK()); 309 | 310 | // add another k rows and serialize, compressing this time 311 | for (int i = k; i < ((2 * k) - 1); ++i) { 312 | input[i] = i * 1.0; 313 | fd.update(input); 314 | } 315 | assertEquals(fd.getNumRows(), (2 * k) - 1); 316 | sketchBytes = fd.toByteArray(); 317 | mem = Memory.wrap(sketchBytes); 318 | rebuilt = FrequentDirections.heapify(mem); 319 | assertEquals(rebuilt.getN(), fd.getN()); 320 | assertEquals(rebuilt.getNumRows(), fd.getNumRows()); 321 | 322 | println(PreambleUtil.preambleToString(mem)); 323 | } 324 | 325 | @Test 326 | public void checkCorruptedHeapify() { 327 | final int k = 50; 328 | final int d = 250; 329 | final FrequentDirections fd = FrequentDirections.newInstance(k, d); 330 | byte[] sketchBytes = fd.toByteArray(); 331 | WritableMemory mem = WritableMemory.wrap(sketchBytes); 332 | 333 | final FrequentDirections rebuilt = FrequentDirections.heapify(mem); 334 | assertTrue(rebuilt.isEmpty()); 335 | println(PreambleUtil.preambleToString(mem)); 336 | 337 | // corrupt the serialization version 338 | mem.putByte(PreambleUtil.SER_VER_BYTE, (byte) 0); 339 | try { 340 | FrequentDirections.heapify(mem); 341 | } catch (final IllegalArgumentException e) { 342 | // expected 343 | } 344 | 345 | // corrupt the family ID, after grabbing fresh bytes 346 | sketchBytes = fd.toByteArray(); 347 | mem = WritableMemory.wrap(sketchBytes); 348 | mem.putByte(PreambleUtil.FAMILY_BYTE, (byte) 0); 349 | try { 350 | FrequentDirections.heapify(mem); 351 | } catch (final IllegalArgumentException e) { 352 | // expected 353 | } 354 | } 355 | 356 | @Test 357 | public void checkInsufficientMemory() { 358 | // no capacity 359 | byte[] bytes = new byte[0]; 360 | Memory mem = Memory.wrap(bytes); 361 | try { 362 | FrequentDirections.heapify(mem); 363 | } catch (final IllegalArgumentException e) { 364 | // expected 365 | } 366 | 367 | // capacity smaller than prelongs size 368 | final FrequentDirections fd = FrequentDirections.newInstance(10, 50); 369 | bytes = fd.toByteArray(); 370 | bytes = Arrays.copyOf(bytes, bytes.length - 1); 371 | mem = Memory.wrap(bytes); 372 | try { 373 | FrequentDirections.heapify(mem); 374 | } catch (final IllegalArgumentException e) { 375 | // expected 376 | } 377 | } 378 | 379 | /** 380 | * println the message 381 | * @param msg the message 382 | */ 383 | private void println(final String msg) { 384 | //System.out.println(msg); 385 | } 386 | } 387 | -------------------------------------------------------------------------------- /src/test/java/org/apache/datasketches/vector/decomposition/MatrixOpsTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.vector.decomposition; 21 | 22 | import static org.testng.Assert.assertEquals; 23 | import static org.testng.Assert.fail; 24 | 25 | import org.testng.annotations.Test; 26 | 27 | import org.apache.datasketches.vector.matrix.Matrix; 28 | import org.apache.datasketches.vector.matrix.MatrixBuilder; 29 | import org.apache.datasketches.vector.matrix.MatrixType; 30 | 31 | @SuppressWarnings("javadoc") 32 | public class MatrixOpsTest { 33 | 34 | @Test 35 | public void compareSVDAccuracy() { 36 | final int d = 10; 37 | final int k = 6; 38 | final Matrix input = generateIncreasingEye(d, 2 * k); 39 | 40 | final MatrixOps moFull = MatrixOps.newInstance(input, SVDAlgo.FULL, k); 41 | final MatrixOps moSym = MatrixOps.newInstance(input, SVDAlgo.SYM, k); 42 | final MatrixOps moSISVD = MatrixOps.newInstance(input, SVDAlgo.SISVD, k); 43 | moSISVD.setNumSISVDIter(50 * k); // intentionally run many extra iterations for tighter convegence 44 | 45 | // just singular values first 46 | moFull.svd(input, false); 47 | moSym.svd(input, false); 48 | moSISVD.svd(input, false); 49 | final double[] fullSv = moFull.getSingularValues(); 50 | compareSingularValues(fullSv, moSym.getSingularValues(), fullSv.length); 51 | compareSingularValues(fullSv, moSISVD.getSingularValues(), k); // SISVD only produces k values 52 | 53 | // now with vectors 54 | moFull.svd(input, true); 55 | moSym.svd(input, true); 56 | moSISVD.svd(input, true); 57 | // TODO: better comparison is vector-wise, testing that sign changes are consistent but that 58 | // requires non-zero elements 59 | final Matrix fullVt = moFull.getVt(); 60 | compareMatrixElementMagnitudes(fullVt, moSym.getVt(), (int) fullVt.getNumRows()); 61 | compareMatrixElementMagnitudes(fullVt, moSISVD.getVt(), k); // SISVD only produces k vectors 62 | 63 | // just to be sure 64 | compareMatrixElementMagnitudes(fullVt, moFull.getVt(input), (int) fullVt.getNumRows()); 65 | } 66 | 67 | @Test 68 | public void checkInvalidMatrixSize() { 69 | final int d = 10; 70 | final int k = 6; 71 | final Matrix A = generateIncreasingEye(d, 2 * k); 72 | final MatrixOps mo = MatrixOps.newInstance(A, SVDAlgo.FULL, k); 73 | 74 | Matrix B = generateIncreasingEye(d, (2 * k) + 1); 75 | try { 76 | mo.svd(B, true); 77 | fail(); 78 | } catch (final IllegalArgumentException e) { 79 | // expected 80 | } 81 | 82 | B = generateIncreasingEye(d - 1, 2 * k); 83 | try { 84 | mo.svd(B, false); 85 | fail(); 86 | } catch (final IllegalArgumentException e) { 87 | // expected 88 | } 89 | 90 | } 91 | 92 | private static void compareSingularValues(final double[] A, final double[] B, final int n) { 93 | assertEquals(A.length, B.length); 94 | 95 | for (int i = 0; i < n; ++i) { 96 | assertEquals(A[i], B[i], 1e-6); 97 | } 98 | } 99 | 100 | 101 | private static void compareMatrixElementMagnitudes(final Matrix A, final Matrix B, final int n) { 102 | assertEquals(A.getNumColumns(), B.getNumColumns()); 103 | assertEquals(A.getNumRows(), B.getNumRows()); 104 | 105 | for (int i = 0; i < n; ++i) { 106 | for (int j = 0; j < A.getNumColumns(); ++j) { 107 | assertEquals(Math.abs(A.getElement(i, j)), Math.abs(B.getElement(i, j)), 1e-6); 108 | } 109 | } 110 | } 111 | 112 | /** 113 | * Creates a scaled I matrix, where the diagonal consists of increasing integers, 114 | * starting with 1.0. 115 | * @param nRows number of rows 116 | * @param nCols number of columns 117 | * @return PrimitiveDenseStore, suitable for direct use or wrapping 118 | */ 119 | private static Matrix generateIncreasingEye(final int nRows, final int nCols) { 120 | final Matrix m = new MatrixBuilder().setType(MatrixType.OJALGO).build(nRows, nCols); 121 | for (int i = 0; (i < nRows) && (i < nCols); ++i) { 122 | m.setElement(i, i, 1.0 + i); 123 | } 124 | return m; 125 | } 126 | 127 | } 128 | -------------------------------------------------------------------------------- /src/test/java/org/apache/datasketches/vector/matrix/MatrixBuilderTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.vector.matrix; 21 | 22 | import static org.testng.Assert.assertEquals; 23 | import static org.testng.Assert.assertNotNull; 24 | 25 | import org.testng.annotations.Test; 26 | 27 | @SuppressWarnings("javadoc") 28 | public class MatrixBuilderTest { 29 | @Test 30 | public void checkBuild() { 31 | final MatrixBuilder builder = new MatrixBuilder(); 32 | assertEquals(builder.getBackingType(), MatrixType.OJALGO); // default type 33 | 34 | Matrix m = builder.build(128, 512); 35 | assertNotNull(m); 36 | 37 | m = builder.build(128, 512); 38 | assertNotNull(m); 39 | } 40 | 41 | @Test 42 | public void checkSetType() { 43 | final MatrixBuilder builder = new MatrixBuilder(); 44 | final MatrixType type = builder.getBackingType(); 45 | assertEquals(type, MatrixType.OJALGO); // default type 46 | assertEquals(type.getId(), MatrixType.OJALGO.getId()); 47 | assertEquals(type.getName(), MatrixType.OJALGO.getName()); 48 | } 49 | 50 | } 51 | -------------------------------------------------------------------------------- /src/test/java/org/apache/datasketches/vector/matrix/MatrixImplOjAlgoTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.vector.matrix; 21 | 22 | import static org.testng.Assert.assertEquals; 23 | import static org.testng.Assert.fail; 24 | 25 | import org.ojalgo.matrix.store.Primitive64Store; 26 | import org.testng.annotations.Test; 27 | 28 | import org.apache.datasketches.memory.Memory; 29 | import org.apache.datasketches.memory.WritableMemory; 30 | 31 | @SuppressWarnings("javadoc") 32 | public class MatrixImplOjAlgoTest { 33 | 34 | @Test 35 | public void checkInstantiation() { 36 | final int nRows = 10; 37 | final int nCols = 15; 38 | final Matrix m = MatrixImplOjAlgo.newInstance(nRows, nCols); 39 | assertEquals(m.getNumRows(), nRows); 40 | assertEquals(m.getNumColumns(), nCols); 41 | 42 | final Primitive64Store pds = (Primitive64Store) m.getRawObject(); 43 | assertEquals(pds.countRows(), nRows); 44 | assertEquals(pds.countColumns(), nCols); 45 | 46 | final Matrix wrapped = Matrix.wrap(pds); 47 | MatrixTest.checkMatrixEquality(wrapped, m); 48 | assertEquals(wrapped.getRawObject(), pds); 49 | } 50 | 51 | @Test 52 | public void updateAndQueryValues() { 53 | final int nRows = 5; 54 | final int nCols = 5; 55 | final Matrix m = generateIncreasingEye(nRows, nCols); // tests setElement() in method 56 | 57 | for (int i = 0; i < nRows; ++i) { 58 | for (int j = 0; j < nCols; ++j) { 59 | final double val = m.getElement(i, j); 60 | if (i == j) { 61 | assertEquals(val, i + 1.0); 62 | } else { 63 | assertEquals(val, 0.0); 64 | } 65 | } 66 | } 67 | } 68 | 69 | @Test 70 | public void checkStandardSerialization() { 71 | final int nRows = 3; 72 | final int nCols = 7; 73 | final Matrix m = generateIncreasingEye(nRows, nCols); 74 | 75 | final byte[] mtxBytes = m.toByteArray(); 76 | assertEquals(mtxBytes.length, m.getSizeBytes()); 77 | 78 | final Memory mem = Memory.wrap(mtxBytes); 79 | final Matrix tgt = MatrixImplOjAlgo.heapifyInstance(mem); 80 | MatrixTest.checkMatrixEquality(tgt, m); 81 | } 82 | 83 | @Test 84 | public void checkCompactSerialization() { 85 | final int nRows = 4; 86 | final int nCols = 7; 87 | final Matrix m = generateIncreasingEye(nRows, nCols); 88 | 89 | byte[] mtxBytes = m.toCompactByteArray(nRows - 1, 7); 90 | assertEquals(mtxBytes.length, m.getCompactSizeBytes(nRows - 1, 7)); 91 | 92 | Memory mem = Memory.wrap(mtxBytes); 93 | Matrix tgt = MatrixImplOjAlgo.heapifyInstance(mem); 94 | for (int c = 0; c < nCols; ++c) { 95 | for (int r = 0; r < (nRows - 1); ++r) { 96 | assertEquals(tgt.getElement(r, c), m.getElement(r, c)); // equal here 97 | } 98 | // assuming nRows - 1 so check only the last row as being 0 99 | assertEquals(tgt.getElement(nRows - 1, c), 0.0); 100 | } 101 | 102 | // test without compacting 103 | mtxBytes = m.toCompactByteArray(nRows, nCols); 104 | assertEquals(mtxBytes.length, m.getSizeBytes()); 105 | mem = Memory.wrap(mtxBytes); 106 | tgt = MatrixImplOjAlgo.heapifyInstance(mem); 107 | MatrixTest.checkMatrixEquality(tgt, m); 108 | } 109 | 110 | @Test 111 | public void matrixRowOperations() { 112 | final int nRows = 7; 113 | final int nCols = 5; 114 | final Matrix m = generateIncreasingEye(nRows, nCols); 115 | 116 | final int tgtCol = 2; 117 | final double[] v = m.getRow(tgtCol); // diagonal matrix, so this works ok 118 | for (int i = 0; i < v.length; ++i) { 119 | assertEquals(v[i], (i == tgtCol ? i + 1.0 : 0.0)); 120 | } 121 | 122 | assertEquals(m.getElement(6, tgtCol), 0.0); 123 | m.setRow(6, v); 124 | assertEquals(m.getElement(6, tgtCol), tgtCol + 1.0); 125 | } 126 | 127 | @Test 128 | public void matrixColumnOperations() { 129 | final int nRows = 9; 130 | final int nCols = 4; 131 | final Matrix m = generateIncreasingEye(nRows, nCols); 132 | 133 | final int tgtRow = 3; 134 | final double[] v = m.getColumn(tgtRow); // diagonal matrix, so this works ok 135 | for (int i = 0; i < v.length; ++i) { 136 | assertEquals(v[i], (i == tgtRow ? i + 1.0 : 0.0)); 137 | } 138 | 139 | assertEquals(m.getElement(tgtRow, 0), 0.0); 140 | m.setColumn(0, v); 141 | assertEquals(m.getElement(tgtRow, 0), tgtRow + 1.0); 142 | } 143 | 144 | @Test 145 | public void invalidRowColumnOperations() { 146 | final int nRows = 9; 147 | final int nCols = 4; 148 | final Matrix m = generateIncreasingEye(nRows, nCols); 149 | 150 | final double[] shortRow = new double[nCols - 2]; 151 | try { 152 | m.setRow(1, shortRow); 153 | fail(); 154 | } catch (final IllegalArgumentException e) { 155 | // expected 156 | } 157 | 158 | final double[] longColumn = new double[nRows + 2]; 159 | try { 160 | m.setColumn(1, longColumn); 161 | fail(); 162 | } catch (final IllegalArgumentException e) { 163 | // expected 164 | } 165 | } 166 | 167 | @Test 168 | public void invalidSerVer() { 169 | final int nRows = 3; 170 | final int nCols = 3; 171 | final Matrix m = generateIncreasingEye(nRows, nCols); 172 | final byte[] sketchBytes = m.toByteArray(); 173 | final WritableMemory mem = WritableMemory.wrap(sketchBytes); 174 | MatrixPreambleUtil.insertSerVer(mem.getArray(), mem.getCumulativeOffset(0L), 0); 175 | 176 | try { 177 | MatrixImplOjAlgo.heapifyInstance(mem); 178 | fail(); 179 | } catch (final IllegalArgumentException e) { 180 | // expected 181 | } 182 | } 183 | 184 | @Test 185 | public void invalidFamily() { 186 | final int nRows = 3; 187 | final int nCols = 3; 188 | final Matrix m = generateIncreasingEye(nRows, nCols); 189 | final byte[] sketchBytes = m.toByteArray(); 190 | final WritableMemory mem = WritableMemory.wrap(sketchBytes); 191 | MatrixPreambleUtil.insertFamilyID(mem.getArray(), mem.getCumulativeOffset(0L), 0); 192 | 193 | try { 194 | MatrixImplOjAlgo.heapifyInstance(mem); 195 | fail(); 196 | } catch (final IllegalArgumentException e) { 197 | // expected 198 | } 199 | } 200 | 201 | @Test 202 | public void insufficientMemoryCapacity() { 203 | final byte[] bytes = new byte[6]; 204 | final Memory mem = Memory.wrap(bytes); 205 | try { 206 | MatrixImplOjAlgo.heapifyInstance(mem); 207 | fail(); 208 | } catch (final IllegalArgumentException e) { 209 | // expected 210 | } 211 | } 212 | 213 | /** 214 | * Creates a scaled I matrix, where the diagonal consists of increasing integers, 215 | * starting with 1.0. 216 | * @param nRows number of rows 217 | * @param nCols number of columns 218 | * @return Primitive64Store, suitable for direct use or wrapping 219 | */ 220 | private static Matrix generateIncreasingEye(final int nRows, final int nCols) { 221 | final Matrix m = MatrixImplOjAlgo.newInstance(nRows, nCols); 222 | for (int i = 0; (i < nRows) && (i < nCols); ++i) { 223 | m.setElement(i, i, 1.0 + i); 224 | } 225 | return m; 226 | } 227 | } 228 | -------------------------------------------------------------------------------- /src/test/java/org/apache/datasketches/vector/matrix/MatrixTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.vector.matrix; 21 | 22 | import static org.testng.Assert.assertEquals; 23 | import static org.testng.Assert.assertNotNull; 24 | import static org.testng.Assert.assertNull; 25 | import static org.testng.Assert.assertTrue; 26 | import static org.testng.Assert.fail; 27 | 28 | import org.testng.annotations.Test; 29 | 30 | import org.apache.datasketches.memory.Memory; 31 | import org.apache.datasketches.memory.WritableMemory; 32 | import org.apache.datasketches.vector.MatrixFamily; 33 | 34 | @SuppressWarnings("javadoc") 35 | public class MatrixTest { 36 | 37 | @Test 38 | public void checkHeapify() { 39 | final Matrix m = Matrix.builder().setType(MatrixType.OJALGO).build(3, 3); 40 | final byte[] bytes = m.toByteArray(); 41 | final Memory mem = Memory.wrap(bytes); 42 | println(MatrixPreambleUtil.preambleToString(mem)); 43 | 44 | final Matrix tgt = Matrix.heapify(mem, MatrixType.OJALGO); 45 | assertTrue(tgt instanceof MatrixImplOjAlgo); 46 | checkMatrixEquality(m, tgt); 47 | } 48 | 49 | @Test 50 | public void checkWrap() { 51 | assertNull(Matrix.wrap(null)); 52 | 53 | final Matrix src = Matrix.builder().setType(MatrixType.OJALGO).build(3, 3); 54 | final Object obj = src.getRawObject(); 55 | final Matrix tgt = Matrix.wrap(obj); 56 | assertTrue(tgt instanceof MatrixImplOjAlgo); 57 | checkMatrixEquality(src, tgt); 58 | 59 | try { 60 | final Object notAMatrix = 1.0; 61 | Matrix.wrap(notAMatrix); 62 | fail(); 63 | } catch (final IllegalArgumentException e) { 64 | // expected 65 | } 66 | 67 | assertNotNull(src.toString()); 68 | } 69 | 70 | @Test 71 | public void checkSize() { 72 | final int nRow = 7; 73 | final int nCol = 3; 74 | final Matrix m = Matrix.builder().build(nRow, nCol); 75 | 76 | int expectedSize = (MatrixFamily.MATRIX.getMinPreLongs() * Long.BYTES) 77 | + (nRow * nCol * Double.BYTES); 78 | assertEquals(m.getSizeBytes(), expectedSize); 79 | 80 | // this should redirect call to getSizeBytes() 81 | assertEquals(m.getCompactSizeBytes(nRow, nCol), expectedSize); 82 | 83 | // degenerate cases 84 | expectedSize = (MatrixFamily.MATRIX.getMinPreLongs() * Long.BYTES); 85 | assertEquals(m.getCompactSizeBytes(0, nCol), expectedSize); 86 | assertEquals(m.getCompactSizeBytes(nRow, 0), expectedSize); 87 | 88 | // matrix subsets 89 | expectedSize = (MatrixFamily.MATRIX.getMaxPreLongs() * Long.BYTES) 90 | + ((5 * 3) * Double.BYTES); 91 | assertEquals(m.getCompactSizeBytes(5, 3), expectedSize); 92 | 93 | expectedSize = (MatrixFamily.MATRIX.getMaxPreLongs() * Long.BYTES) 94 | + ((7 * 2) * Double.BYTES); 95 | assertEquals(m.getCompactSizeBytes(7, 2), expectedSize); 96 | 97 | expectedSize = (MatrixFamily.MATRIX.getMaxPreLongs() * Long.BYTES) 98 | + ((2 * 2) * Double.BYTES); 99 | assertEquals(m.getCompactSizeBytes(2, 2), expectedSize); 100 | } 101 | 102 | @Test 103 | public void invalidSerVer() { 104 | final int nRows = 50; 105 | final int nCols = 75; 106 | final MatrixBuilder mb = new MatrixBuilder(); 107 | final Matrix m = mb.build(nRows, nCols); 108 | final byte[] sketchBytes = m.toByteArray(); 109 | final WritableMemory mem = WritableMemory.wrap(sketchBytes); 110 | MatrixPreambleUtil.insertSerVer(mem.getArray(), mem.getCumulativeOffset(0L), 0); 111 | 112 | try { 113 | MatrixPreambleUtil.preambleToString(mem); 114 | fail(); 115 | } catch (final IllegalArgumentException e) { 116 | // expected 117 | } 118 | } 119 | 120 | @Test 121 | public void invalidFamily() { 122 | final int nRows = 3; 123 | final int nCols = 3; 124 | final MatrixBuilder mb = new MatrixBuilder(); 125 | final Matrix m = mb.build(nRows, nCols); 126 | final byte[] sketchBytes = m.toByteArray(); 127 | final WritableMemory mem = WritableMemory.wrap(sketchBytes); 128 | MatrixPreambleUtil.insertFamilyID(mem.getArray(), mem.getCumulativeOffset(0L), 0); 129 | 130 | try { 131 | MatrixPreambleUtil.preambleToString(mem); 132 | fail(); 133 | } catch (final IllegalArgumentException e) { 134 | // expected 135 | } 136 | } 137 | 138 | @Test 139 | public void checkInsufficientMemoryCapacity() { 140 | final byte[] bytes = new byte[6]; 141 | final Memory mem = Memory.wrap(bytes); 142 | try { 143 | MatrixPreambleUtil.preambleToString(mem); 144 | fail(); 145 | } catch (final IllegalArgumentException e) { 146 | // expected 147 | } 148 | } 149 | 150 | @Test 151 | public void checkCompactPreamble() { 152 | final int nRows = 4; 153 | final int nCols = 7; 154 | final MatrixBuilder mb = new MatrixBuilder(); 155 | final Matrix m = mb.build(nRows, nCols); 156 | 157 | final Memory mem = Memory.wrap(m.toCompactByteArray(nRows - 1, 7)); 158 | MatrixPreambleUtil.preambleToString(mem); 159 | } 160 | 161 | static void checkMatrixEquality(final Matrix m1, final Matrix m2) { 162 | assertEquals(m1.numRows_, m2.numRows_); 163 | assertEquals(m1.numCols_, m2.numCols_); 164 | 165 | for (int i = 0; i < m1.numRows_; ++i) { 166 | for (int j = 0; j < m1.numCols_; ++j) { 167 | assertEquals(m1.getElement(i, j), m2.getElement(i, j), 168 | "Mismatch at (" + i + ", " + j + ")"); 169 | } 170 | } 171 | } 172 | 173 | /** 174 | * println the message 175 | * @param msg the message 176 | */ 177 | static void println(final String msg) { 178 | //System.out.println(msg); 179 | } 180 | } 181 | -------------------------------------------------------------------------------- /tools/FindBugsExcludeFilter.xml: -------------------------------------------------------------------------------- 1 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 64 | 65 | 66 | 71 | 72 | 77 | 83 | 84 | 89 | 90 | 101 | 102 | 110 | -------------------------------------------------------------------------------- /tools/SketchesCheckstyle.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 24 | 25 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 302 | 303 | 304 | 305 | 306 | 307 | 308 | 310 | 311 | 312 | 313 | 314 | 315 | 316 | 318 | 319 | 320 | 321 | 322 | 323 | 325 | 326 | 327 | 328 | 329 | 331 | 332 | 333 | 334 | 335 | 336 | 338 | 339 | 340 | 341 | 342 | 344 | 345 | 346 | 347 | 348 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | 356 | 357 | 358 | 359 | 360 | 361 | 363 | 365 | 367 | 369 | 370 | 371 | 372 | 373 | 374 | 375 | 376 | 377 | 378 | 379 | 380 | 381 | 382 | 383 | 384 | 385 | 386 | 387 | 388 | 389 | 390 | 391 | 392 | 393 | 394 | 395 | 396 | 397 | 398 | 399 | 400 | 401 | --------------------------------------------------------------------------------